Thiết kế website giá rẻ

Question

I wish to list only the company name from the URL using PowerShell or python:

https://www.moneycontrol.com/markets/earnings/results-calendar/?activeDate=2024-07-29

Below is my python script to get the structure of the webpage:

import requests
from bs4 import BeautifulSoup

# URL of the page
url = "https://www.moneycontrol.com/markets/earnings/results-calendar/?activeDate=2024-07-29"

# Fetch the page content
response = requests.get(url)
print(f"Status Code: {response.status_code}")

soup = BeautifulSoup(response.content, 'html.parser')

# Print the first 1000 characters of the HTML to check if we're getting content
print(soup.prettify()[:1000])

# Try different selectors
selectors = [
    'div.PA10',  # Example selector, replace with potential correct ones
    'table.mctable1',
    'td.PR10.PT5.PB5',
    # Add more potential selectors here
]

for selector in selectors:
    elements = soup.select(selector)
    print(f"nTrying selector: {selector}")
    print(f"Found {len(elements)} elements")
    for element in elements[:5]:  # Print first 5 elements for each selector
        print(element.text.strip())

# If still no results, print all unique tag names in the HTML
print("nAll unique tags in the HTML:")
print(set([tag.name for tag in soup.find_all()]))

Output:

PS C:AMD> python getcompany.py
Status Code: 200
<!DOCTYPE html>
<html lang="en">
 <head>
  <link as="style" href="https://accounts.moneycontrol.com/assets/css/mclogin/bootstrap.min.css" rel="preload"/>
  <link as="style" href="https://stat2.moneycontrol.com/mccss/headfoot/mc_header.css?v=1.11" rel="preload"/>
  <meta charset="utf-8"/>
  <title>
   Results Calendar: Company Results Calendar, Quarterly Results Calendar and BSE NSE Results Calendar | Moneycontrol
  </title>
  <meta content="Results Calendar: Check quarterly results calendar of BSE NSE listed companies by the Moneycontrol. Get results announcements date of all the listed stocks and shares, stocks earnings calendar, Stocks results calendar, earnings date calendar, earnings result date, announcements, news, and more" name="description"/>
  <meta content="Results Calendar, Earnings calendar, listed company results calendar, Quarterly results calendar, Company results dates list, Company Quarterly Earnings Calendar, Company Quarterly Results Calendar, Company Quarterly Earn

I could not conclude the column name for Company Name. However, tried the below python which does not fetch the desired list of company names:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time
import random

def random_sleep(min_seconds, max_seconds):
    time.sleep(random.uniform(min_seconds, max_seconds))

# Setup Chrome options
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)

# Setup the Chrome driver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

# Overwrite the navigator.webdriver property
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")

# URL of the page
url = "https://www.moneycontrol.com/markets/earnings/results-calendar/?activeDate=2024-08-01"

try:
    # Load the page
    driver.get(url)
    print("Page loaded successfully")

    # Random sleep to mimic human behavior
    random_sleep(5, 10)

    # Wait for the table to be present in the DOM
    wait = WebDriverWait(driver, 30)
    wait.until(EC.presence_of_element_located((By.CLASS_NAME, "mctable1")))

    # Get the page source
    page_source = driver.page_source

    # Parse the page source with BeautifulSoup
    soup = BeautifulSoup(page_source, 'html.parser')

    # Find the table
    table = soup.find('table', class_='mctable1')

    if table:
        # Find all rows in the table
        rows = table.find_all('tr')

        # Extract and print company names
        for row in rows[1:]:  # Skip header row
            cells = row.find_all('td')
            if len(cells) > 1:
                company_name = cells[0].text.strip()
                print(company_name)
    else:
        print("Table not found in the page source")

except Exception as e:
    print(f"An error occurred: {str(e)}")

finally:
    # Close the browser
    driver.quit()

Can you please suggest a powershell or python or solution in any other language?

Thiết kế website giá rẻ

Danh mục

List company name from a webpage using powershell or python