I wish to list only the company name from the URL using PowerShell or python:
https://www.moneycontrol.com/markets/earnings/results-calendar/?activeDate=2024-07-29
Below is my python script to get the structure of the webpage:
import requests
from bs4 import BeautifulSoup
# URL of the page
url = "https://www.moneycontrol.com/markets/earnings/results-calendar/?activeDate=2024-07-29"
# Fetch the page content
response = requests.get(url)
print(f"Status Code: {response.status_code}")
soup = BeautifulSoup(response.content, 'html.parser')
# Print the first 1000 characters of the HTML to check if we're getting content
print(soup.prettify()[:1000])
# Try different selectors
selectors = [
'div.PA10', # Example selector, replace with potential correct ones
'table.mctable1',
'td.PR10.PT5.PB5',
# Add more potential selectors here
]
for selector in selectors:
elements = soup.select(selector)
print(f"nTrying selector: {selector}")
print(f"Found {len(elements)} elements")
for element in elements[:5]: # Print first 5 elements for each selector
print(element.text.strip())
# If still no results, print all unique tag names in the HTML
print("nAll unique tags in the HTML:")
print(set([tag.name for tag in soup.find_all()]))
Output:
PS C:AMD> python getcompany.py
Status Code: 200
<!DOCTYPE html>
<html lang="en">
<head>
<link as="style" href="https://accounts.moneycontrol.com/assets/css/mclogin/bootstrap.min.css" rel="preload"/>
<link as="style" href="https://stat2.moneycontrol.com/mccss/headfoot/mc_header.css?v=1.11" rel="preload"/>
<meta charset="utf-8"/>
<title>
Results Calendar: Company Results Calendar, Quarterly Results Calendar and BSE NSE Results Calendar | Moneycontrol
</title>
<meta content="Results Calendar: Check quarterly results calendar of BSE NSE listed companies by the Moneycontrol. Get results announcements date of all the listed stocks and shares, stocks earnings calendar, Stocks results calendar, earnings date calendar, earnings result date, announcements, news, and more" name="description"/>
<meta content="Results Calendar, Earnings calendar, listed company results calendar, Quarterly results calendar, Company results dates list, Company Quarterly Earnings Calendar, Company Quarterly Results Calendar, Company Quarterly Earn
I could not conclude the column name for Company Name
. However, tried the below python which does not fetch the desired list of company names:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time
import random
def random_sleep(min_seconds, max_seconds):
time.sleep(random.uniform(min_seconds, max_seconds))
# Setup Chrome options
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
# Setup the Chrome driver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)
# Overwrite the navigator.webdriver property
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
# URL of the page
url = "https://www.moneycontrol.com/markets/earnings/results-calendar/?activeDate=2024-08-01"
try:
# Load the page
driver.get(url)
print("Page loaded successfully")
# Random sleep to mimic human behavior
random_sleep(5, 10)
# Wait for the table to be present in the DOM
wait = WebDriverWait(driver, 30)
wait.until(EC.presence_of_element_located((By.CLASS_NAME, "mctable1")))
# Get the page source
page_source = driver.page_source
# Parse the page source with BeautifulSoup
soup = BeautifulSoup(page_source, 'html.parser')
# Find the table
table = soup.find('table', class_='mctable1')
if table:
# Find all rows in the table
rows = table.find_all('tr')
# Extract and print company names
for row in rows[1:]: # Skip header row
cells = row.find_all('td')
if len(cells) > 1:
company_name = cells[0].text.strip()
print(company_name)
else:
print("Table not found in the page source")
except Exception as e:
print(f"An error occurred: {str(e)}")
finally:
# Close the browser
driver.quit()
Can you please suggest a powershell or python or solution in any other language?