I have a Python script that fetches data from an API (specifically www.nseindia.com/api/corporate-announcements). When I run the script locally in PyCharm, it executes without any issues and fetches the data quickly. However, when I run the same script on PythonAnywhere, I encounter a timeout error:
HTTPSConnectionPool(host='www.nseindia.com', port=443): Read timed out. (read timeout=300)
My Python Code
import requests
import pandas as pd
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import schedule
import time
import numpy as np
# Define Google Sheets credentials
scope = ['https://spreadsheets.google.com/feeds',
'https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name(
'announcement-capture-7bef8a7ce6eb.json', scope)
client = gspread.authorize(credentials)
# Google Sheets document ID
sheet_id = '1NKSFgnIUB3dW7Y0ZHCfXRd-B6QEYmCdW3km8g61X_bM'
headers = {
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Accept-Language': 'en-US,en;q=0.9',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
}
baseurl = 'https://www.nseindia.com/'
url = 'https://www.nseindia.com/api/corporate-announcements?index=equities'
def get_announcements_nse():
try:
session = requests.Session()
request = session.get(baseurl, headers=headers, timeout=300)
cookies = dict(request.cookies)
data = session.get(url, headers=headers, timeout=300, cookies=cookies)
data.raise_for_status() # Raise HTTPError for bad responses
json_data = data.json()
if not json_data: # Check if JSON data is empty
print("No announcements found.")
return pd.DataFrame(
columns=['Symbol', 'Name', 'Description', 'Announcement_Date', 'Announcement_Text', 'Attachment_File'])
else:
# Select only the desired columns from the JSON response
selected_columns = ['symbol', 'sm_name', 'desc', 'an_dt', 'attchmntText', 'attchmntFile']
# Create the DataFrame with selected columns
announcements = pd.DataFrame(json_data)[selected_columns]
# Rename the columns to match your Google Sheets column names
announcements.columns = ['Symbol', 'Name', 'Description', 'Announcement_Date', 'Announcement_Text',
'Attachment_File']
# Filter out announcements with specific keywords in 'Description' column
keywords_to_skip = ['Disclosure', 'Certificate', 'Spurt', 'Loss', 'Duplicate', 'Share Certificate',
'Price Movement', 'Price']
announcements_filtered = announcements[
~announcements['Description'].str.contains('|'.join(keywords_to_skip),
case=False)]
return announcements_filtered
except (requests.exceptions.RequestException, ValueError) as e:
print(f"An error occurred: {e}")
return pd.DataFrame(
columns=['Symbol', 'Name', 'Description', 'Announcement_Date', 'Announcement_Text', 'Attachment_File'])
def update_google_sheet():
# Get NSE announcements
announcements_nse = get_announcements_nse()
# Open the Google Sheets document by ID
sheet = client.open_by_key(sheet_id).sheet1
# Define the expected headers
expected_headers = ['Symbol', 'Name', 'Description', 'Announcement_Date', 'Attachment_Text', 'Attachment_File']
# If the sheet is empty with only column names, directly add all NSE announcements
if len(sheet.get_all_values()) == 1: # Check if the sheet has only column names
# Replace NaN values with a placeholder
announcements_nse.replace({np.nan: ''}, inplace=True)
for index, row in announcements_nse.iterrows():
# Construct the URL with the symbol
url = f'https://www.nseindia.com/get-quotes/equity?symbol={row["Symbol"]}'
# Insert the URL into the appropriate cell in the sheet
sheet.insert_row([url] + row.tolist()[1:], index + 2)
else:
# Get existing data in the sheet
existing_data = sheet.get_all_records(expected_headers=expected_headers)
# Convert existing data to DataFrame
existing_df = pd.DataFrame(existing_data)
# Filter out existing announcements from new NSE announcements
new_announcements_nse = announcements_nse.merge(existing_df, on=['Symbol', 'Name', 'Announcement_Date'],
how='left', indicator=True)
new_announcements_nse = new_announcements_nse[new_announcements_nse['_merge'] == 'left_only'].drop(
columns=['_merge'])
# Replace NaN values with a placeholder
new_announcements_nse.replace({np.nan: ''}, inplace=True)
# Append new NSE announcements to the top of the sheet
for index, row in new_announcements_nse.iterrows():
# Construct the URL with the symbol
url = f'https://www.nseindia.com/get-quotes/equity?symbol={row["Symbol"]}'
# Insert the URL into the appropriate cell in the sheet
sheet.insert_row([url] + row.tolist()[1:], index + 2)
print("NSE data updated successfully!")
# Schedule the job to run every 120 seconds
schedule.every(120).seconds.do(update_google_sheet)
while True:
schedule.run_pending()
time.sleep(1)
The timeout occurs consistently on PythonAnywhere, even after adjusting timeout settings in the script. What could be causing this timeout issue on PythonAnywhere, and how can I troubleshoot or resolve it? Are there any specific configurations or settings I need to adjust on PythonAnywhere to prevent the timeout?
Any insights or suggestions would be greatly appreciated! Thank you.
I tried increasing the time also to 300, but still its giving the timeout error. I am expecting to run this wihtout error same as it was running in my local computer.