I’m trying to figure out how to scrape a website and successfully managed to get the result I wanted but am limited by having to keep the cursor within a table i want to scroll.
The end goal is to have this run in the background without me having to move the cursor or even have the page open in the foreground.
From inspecting, I found that this div appears when the cursor is inside the table (that I want to scroll) and disappears as soon as the mouse moves out of it. If i can change the “top” value in this div i can achieve the result i want.HTML of the target DIV
Is there anyway to get the DIV to appear without the cursor present? Or any other workaround?
As I mentioned, I’m currently relying on placing the cursor in the table manually and then sending a scroll command. I want to automate the full thing so i can have it running without it being open
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from selenium.webdriver.common.action_chains import ActionChains
import pyautogui
# Set up the WebDriver
driver = webdriver.Chrome()
# Open the login page
driver.get('xxxxxxx')
# Wait for the username field and enter the username
username_field = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.XPATH, "//input[@placeholder='Crew ID']"))
)
username_field.send_keys('xxxxx') # Replace with your actual username
# Wait for the password field and enter the password
password_field = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.XPATH, "//input[@placeholder='Password']"))
)
password_field.send_keys('xxxxxx') # Replace with your actual password
# Submit the form
password_field.send_keys(Keys.RETURN)
time.sleep(3)
# Redirect to the required page
driver.get('xxxxxxxxxxxxxxxxxx')
time.sleep(5)
# Select "All Departures From"
try:
dropdown_icon = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, '.webix_input_icon.wxi-menu-down'))
)
dropdown_icon.click()
print("Dropdown icon clicked")
# Find and click the "All Departures From" option
all_departures_from_option = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, "//div[@role='gridcell' and @aria-rowindex='2' and text()='All Departures From']"))
)
all_departures_from_option.click()
print("All Departures From selected")
except Exception as e:
print(f"Error selecting 'All Departures From': {e}")
# Enter "BOM" into the Airport field
try:
# Find the input field by locating it under the "Airport" label
airport_label = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, "//label[text()='Airport']/following-sibling::div//input[@role='combobox']"))
)
airport_label.click()
airport_label.clear() # Clear any existing text in the field
time.sleep(3) # Pause to ensure the field clears
target_airport = "xxx"
airport_label.send_keys(target_airport)
print("'xxx' entered in the Airport field")
except Exception as e:
print(f"Error finding or entering 'xxx': {e}")
#---------------------------------------
# Use JavaScript to change the content of the specific parent div
new_date = "28/09/2024" # Replace with your desired date
script = """
var targetDiv = document.querySelector('div.webix_inp_static[aria-label="On"]');
if (targetDiv) {
targetDiv.innerText = arguments[0]; // Change the inner text
targetDiv.dispatchEvent(new Event('change')); // Trigger change event if necessary
} else {
return 'Target div not found';
}
"""
result = driver.execute_script(script, new_date)
# Check if there was an error message
if result:
print(result) # Print any error message returned by the script
else:
driver.find_element("css selector", 'body').send_keys(Keys.ENTER)
time.sleep(5)
#------------------Search for FO below--------------------
#------------------Search for FO below--------------------
def move_cursor_to_element(element):
"""Move the cursor to the given element using its absolute screen coordinates."""
# Get the element's location relative to the browser window
element_location = element.location
element_size = element.size
# Get the position of the browser window on the screen
browser_position = driver.execute_script("""
return {
x: window.screenX,
y: window.screenY
};
""")
# Calculate the absolute screen coordinates
x_position = element_location['x']
y_position = element_location['y']
# Move the cursor to the calculated screen position
pyautogui.moveTo(x_position, y_position)
def simulate_mouse_scroll():
"""Simulate mouse scroll using PyAutoGUI."""
pyautogui.scroll(-50) # Scroll down by 150 units
#print("scrolled by 150untis")
def check_fo_in_any_div(driver):
"""Search for 'FO' in any div and check its aria-rowindex."""
try:
# Wait for any div element containing the text "FO"
fo_divs = WebDriverWait(driver, 5).until(
EC.presence_of_all_elements_located((By.XPATH, '//div[contains(text(), "FO")]'))
)
# Loop through all divs containing "FO"
for fo_div in fo_divs:
# Get the aria-rowindex attribute
aria_rowindex = fo_div.get_attribute("aria-rowindex")
print(f"Found 'FO' in div with aria-rowindex: {aria_rowindex}")
# Check if the aria-rowindex is "2"
if aria_rowindex <= "2":
#print("FO found in row 2!")
return True
# If no div had aria-rowindex="2"
print("FO not found in row 2.")
return False
except Exception as e:
print(f"Error occurred: {e}")
return False
def process_flights():
"""Process flights and check if First Officer (FO) is present. Output to Excel."""
flight_details = []
row_index = 1
max_empty_attempts = 30 # Limit for empty attempts to identify table end
empty_attempts = 0
while empty_attempts < max_empty_attempts:
try:
# Try to find the row with the current index
row_selector = f'div[aria-rowindex="{row_index}"][aria-colindex="7"] div[onclick*="ShowCrewOnFlight"]'
crew_icon = driver.find_element(By.CSS_SELECTOR, row_selector)
flight_data = crew_icon.get_attribute("onclick")
#print(flight_data)
ac_reg = f'div[aria-rowindex="{row_index}"][aria-colindex="4"]'
target_div_reg = driver.find_element(By.CSS_SELECTOR, ac_reg)
span_elements_reg = target_div_reg.find_elements(By.TAG_NAME, 'span')
span_value_reg =""
if span_elements_reg:
span_value_reg = span_elements_reg[-1].text.strip()
print(span_value_reg)
else:
span_value_reg = "NO REGISTRATION YET"
# Extract flight number and carrier from the 'onclick' attribute
flight_number = flight_data.split('"Flt":')[1].split(",")[0]
carrier = flight_data.split('"Carrier":')[1].split(",")[0]
# Move the cursor to the crew icon using ActionChains
actions = ActionChains(driver)
actions.move_to_element(crew_icon).perform()
if row_index==1:
time.sleep(2) # Pause for a moment to simulate real user behavior
time.sleep(1)
# Click the crew icon to open the crew details window
crew_icon.click()
time.sleep(1) # Wait for the crew details window to open
# Check if FO is present
if check_fo_in_any_div(driver):
print(f"_")
else:
print(f"Value false for 6E{flight_number}")
flight_details.append({"Flight Number": flight_number,"Registration":span_value_reg})
# Close the crew details window by sending 'Esc' key
webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
#time.sleep(1) # Wait for the window to close
except Exception as e:
# If an error occurs, log it and count as an empty attempt
print(f"Error processing row {row_index}: {e}")
simulate_mouse_scroll()
empty_attempts += 1
print(empty_attempts)
# Simulate a mouse scroll by moving to the crew_icon and scrolling down
#move_cursor_to_element(crew_icon)
simulate_mouse_scroll()
# Increment the row index for the next iteration
row_index += 1
# Save flight details to Excel if any flights without an FO were found
if flight_details:
df = pd.DataFrame(flight_details)
df.to_excel(f"flights without_fo_from{target_airport}_on_{new_date[:2]}.xlsx", index=False)
print("Excel file 'flights_without_fo.xlsx' has been created with the flight details.")
else:
print("All flights have an FO.")
# Call the function to start the flight processing
process_flights()
# Make sure to close the WebDriver once you're done
driver.quit()
1