I am looping through google :”gasbuddy + “addresses in inputfile” and click the first link and redirect me to that webpage. It reports “stale element no found” after successfully sent the keys and I see the results loaded in driver. Due to data buffering issue, time.sleep is not working here, so I used webdriverwait, I think this is the only difference between this failed script and my test scrip which works. I attached both scripts here, what is happening to this a[h3] wrap that has all the google results, why it disappeared form DOM?
Here is the problematic part of the script inside a loop: (it reports the error code after the line sent the keys)
try:
# Loop through each address
for index, row in address_chunk.iterrows():
address = row['Address']
proxy_success = False
driver = None
for _ in range(len(proxies)):
proxy = next(proxy_cycle)
try:
if driver is not None:
driver.close()
driver.quit()
# Open the website using the current proxy
chrome_options = Options()
#chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument(f'--proxy-server={proxy}')
driver = webdriver.Chrome(service=service, options=chrome_options)
# Open the website
driver.get('https://www.google.com/')
wait = WebDriverWait(driver, 5)
search_box = wait.until(EC.presence_of_element_located((By.NAME, "q")))
proxy_success = True
break # exit the proxy lop
except Exception as e:
logging.error(f"Failed to open google for Address: {address} with proxy {proxy}- {str(e)}")
continue
if not proxy_success:
logging.error(f"All proxies failed for Address: {address}")
if driver is not None:
driver.close()
driver.quit()
continue #skip to the next address
found_results = False
results = []
try:
search_query = f"gasbuddy {address}"
logging.info(f'Searching for: {search_query} using proxy {proxy}')
# search address in Google
#search_box = driver.find_element(By.NAME, "q")
search_box.send_keys(search_query)
search_box.send_keys(Keys.RETURN)
# Use WebDriverWait instead of time.sleep
#wait.until(EC.presence_of_element_located((By.XPATH, "//a[h3]")))
wait = WebDriverWait(driver, 8)
wait.until(lambda driver: len(driver.find_elements(By.XPATH, "//a[h3]")) >= 10)
logging.info(f'Found search links for: {search_query} using proxy {proxy}')
#time.sleep(5) # Wait for search results to load
# Find GasBuddy link in search results
links = driver.find_elements(By.XPATH, "//a[h3]")
Here is the test script that works perfectly fine, I do not see any structure changes other than maybe the problem with the “webdriverwait”:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import re
from selenium.webdriver.chrome.service import Service
# Setup Selenium WebDriver
service = Service("your web driver path")
driver = webdriver.Chrome(service=service)
# Ensure the chromedriver is in your PATH or specify the path to it
# List of gas stations
gas_stations = [
{"1037 SAINT JAMES AVE SPRINGFIELD, MA 01104-1309"
},
# Add more gas stations here
]
# Loop through each gas station
for station in gas_stations:
search_query = f"gasbuddy {station}"
# Open Google
driver.get("https://www.google.com")
search_box = driver.find_element(By.NAME, "q")
search_box.send_keys(search_query)
search_box.send_keys(Keys.RETURN)
time.sleep(5) # Wait for search results to load
# Find GasBuddy link in search results
links = driver.find_elements(By.XPATH, "//a[h3]") #finds all anchor (<a>) elements on the Google search results page that contain a child <h3> element. These are typically the clickable titles of the search results.
gasbuddy_link = None
for link in links:
if "gasbuddy.com" in link.get_attribute("href"):
gasbuddy_link = link.get_attribute("href")
break
if gasbuddy_link:
driver.get(gasbuddy_link)
time.sleep(3) # Wait for the GasBuddy page to load
# Extract station ID from the URL
station_id_match = re.search(r'/station/(d+)', gasbuddy_link)
station_id = station_id_match.group(1) if station_id_match else 'N/A'
# Scrape ratings (this part will depend on the structure of the GasBuddy page)
try:
rating = driver.find_element(By.XPATH, ".//span[contains(@class, 'Station-module__ratingAverage')]").text
ratingcount = driver.find_element(By.XPATH, "//span[contains(@class, 'StationInfoBox-module__ratings___1O33R')]").text.strip("()")
amenities_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'amenitiesPanel')]//span[contains(@class, 'text__left___1iOw3')]")
amenities = [element.text.strip() for element in amenities_elements if element.text.strip()]
#amenities = [element.text for element in amenities_elements]
num_amenities = len(amenities)
amenities_str = ', '.join(amenities)
except Exception as e:
print(f"Failed to get rating for {station['name']}: {e}")
print(f"GasBuddy Link: {gasbuddy_link}")
print(f"Station ID: {station_id}")
print(f"Station Rating: {rating}")
print(f"Amount of Ratings: {ratingcount}")
# Print or process the list of amenities
print(f"Number of amenities: {num_amenities}")
print(f"Amenities: {amenities_str}")
else:
print(f"No GasBuddy link found for {station['name']}")
time.sleep(2) # Small delay before the next iteration
# Close the driver
driver.quit()
I can not use time.sleep, otherwise I suspect it might work. I used wait.until(driver.find_elements(By.XPATH, "//a[h3]"))
and I used wait.until(lambda driver: len(driver.find_elements(By.XPATH, "//a[h3]")) >= 10)
and adjusted how many links that are available to control the waiting time.
My guess is that when the google is loading all the results/links, the elements are constantly changing. Therefore, as long as I do not let the page to fully load and stop, this error will appear?
How to solve this issue?
Add: nevermind, I waited for the page load completed: wait.until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
, it is still the same error.
J Jeffery Kelly is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.