Thiết kế website giá rẻ

Question

I have a Python script that fetches data from an API (specifically www.nseindia.com/api/corporate-announcements). When I run the script locally in PyCharm, it executes without any issues and fetches the data quickly. However, when I run the same script on PythonAnywhere, I encounter a timeout error:

HTTPSConnectionPool(host='www.nseindia.com', port=443): Read timed out. (read timeout=300)

My Python Code

import requests
import pandas as pd
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import schedule
import time
import numpy as np


# Define Google Sheets credentials
scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name(
    'announcement-capture-7bef8a7ce6eb.json', scope)
client = gspread.authorize(credentials)

# Google Sheets document ID
sheet_id = '1NKSFgnIUB3dW7Y0ZHCfXRd-B6QEYmCdW3km8g61X_bM'

headers = {
    'Accept-Encoding': 'gzip, deflate, br, zstd',
    'Accept-Language': 'en-US,en;q=0.9',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
}

baseurl = 'https://www.nseindia.com/'
url = 'https://www.nseindia.com/api/corporate-announcements?index=equities'


def get_announcements_nse():
    try:
        session = requests.Session()
        request = session.get(baseurl, headers=headers, timeout=300)
        cookies = dict(request.cookies)

        data = session.get(url, headers=headers, timeout=300, cookies=cookies)
        data.raise_for_status()  # Raise HTTPError for bad responses
        json_data = data.json()
        if not json_data:  # Check if JSON data is empty
            print("No announcements found.")
            return pd.DataFrame(
                columns=['Symbol', 'Name', 'Description', 'Announcement_Date', 'Announcement_Text', 'Attachment_File'])
        else:
            # Select only the desired columns from the JSON response
            selected_columns = ['symbol', 'sm_name', 'desc', 'an_dt', 'attchmntText', 'attchmntFile']
            # Create the DataFrame with selected columns
            announcements = pd.DataFrame(json_data)[selected_columns]
            # Rename the columns to match your Google Sheets column names
            announcements.columns = ['Symbol', 'Name', 'Description', 'Announcement_Date', 'Announcement_Text',
                                     'Attachment_File']

            # Filter out announcements with specific keywords in 'Description' column
            keywords_to_skip = ['Disclosure', 'Certificate', 'Spurt', 'Loss', 'Duplicate', 'Share Certificate',
                                'Price Movement', 'Price']
            announcements_filtered = announcements[
                ~announcements['Description'].str.contains('|'.join(keywords_to_skip),
                                                           case=False)]
            return announcements_filtered
    except (requests.exceptions.RequestException, ValueError) as e:
        print(f"An error occurred: {e}")
        return pd.DataFrame(
            columns=['Symbol', 'Name', 'Description', 'Announcement_Date', 'Announcement_Text', 'Attachment_File'])


def update_google_sheet():
    # Get NSE announcements
    announcements_nse = get_announcements_nse()

    # Open the Google Sheets document by ID
    sheet = client.open_by_key(sheet_id).sheet1

    # Define the expected headers
    expected_headers = ['Symbol', 'Name', 'Description', 'Announcement_Date', 'Attachment_Text', 'Attachment_File']

    # If the sheet is empty with only column names, directly add all NSE announcements
    if len(sheet.get_all_values()) == 1:  # Check if the sheet has only column names
        # Replace NaN values with a placeholder
        announcements_nse.replace({np.nan: ''}, inplace=True)
        for index, row in announcements_nse.iterrows():
            # Construct the URL with the symbol
            url = f'https://www.nseindia.com/get-quotes/equity?symbol={row["Symbol"]}'
            # Insert the URL into the appropriate cell in the sheet
            sheet.insert_row([url] + row.tolist()[1:], index + 2)
    else:
        # Get existing data in the sheet
        existing_data = sheet.get_all_records(expected_headers=expected_headers)

        # Convert existing data to DataFrame
        existing_df = pd.DataFrame(existing_data)

        # Filter out existing announcements from new NSE announcements
        new_announcements_nse = announcements_nse.merge(existing_df, on=['Symbol', 'Name', 'Announcement_Date'],
                                                        how='left', indicator=True)
        new_announcements_nse = new_announcements_nse[new_announcements_nse['_merge'] == 'left_only'].drop(
            columns=['_merge'])

        # Replace NaN values with a placeholder
        new_announcements_nse.replace({np.nan: ''}, inplace=True)

        # Append new NSE announcements to the top of the sheet
        for index, row in new_announcements_nse.iterrows():
            # Construct the URL with the symbol
            url = f'https://www.nseindia.com/get-quotes/equity?symbol={row["Symbol"]}'
            # Insert the URL into the appropriate cell in the sheet
            sheet.insert_row([url] + row.tolist()[1:], index + 2)

    print("NSE data updated successfully!")



# Schedule the job to run every 120 seconds
schedule.every(120).seconds.do(update_google_sheet)

while True:
    schedule.run_pending()
    time.sleep(1)

The timeout occurs consistently on PythonAnywhere, even after adjusting timeout settings in the script. What could be causing this timeout issue on PythonAnywhere, and how can I troubleshoot or resolve it? Are there any specific configurations or settings I need to adjust on PythonAnywhere to prevent the timeout?

Any insights or suggestions would be greatly appreciated! Thank you.

I tried increasing the time also to 300, but still its giving the timeout error. I am expecting to run this wihtout error same as it was running in my local computer.

Thiết kế website giá rẻ

Danh mục

Python script works locally but encounters HTTPS timeout on PythonAnywhere