Thiết kế website giá rẻ

Question

I am looping through google :”gasbuddy + “addresses in inputfile” and click the first link and redirect me to that webpage. It reports “stale element no found” after successfully sent the keys and I see the results loaded in driver. Due to data buffering issue, time.sleep is not working here, so I used webdriverwait, I think this is the only difference between this failed script and my test scrip which works. I attached both scripts here, what is happening to this a[h3] wrap that has all the google results, why it disappeared form DOM?

Here is the problematic part of the script inside a loop: (it reports the error code after the line sent the keys)

try:
    # Loop through each address
        for index, row in address_chunk.iterrows():
            address = row['Address']
            proxy_success = False
            driver = None
            
            for _ in range(len(proxies)):
                proxy = next(proxy_cycle)
                try:
                     if driver is not None:
                        driver.close()
                        driver.quit()
                    # Open the website using the current proxy
                     chrome_options = Options()
                     #chrome_options.add_argument("--headless")
                     chrome_options.add_argument("--disable-gpu")
                     chrome_options.add_argument("--no-sandbox")
                     chrome_options.add_argument("--disable-dev-shm-usage")
                     chrome_options.add_argument(f'--proxy-server={proxy}')
                     driver = webdriver.Chrome(service=service, options=chrome_options)
    
                       # Open the website
                     driver.get('https://www.google.com/')
                     wait = WebDriverWait(driver, 5)
                     search_box = wait.until(EC.presence_of_element_located((By.NAME, "q")))
                     proxy_success = True
                     break # exit the proxy lop
                except Exception as e:
                  logging.error(f"Failed to open google for Address: {address} with proxy {proxy}- {str(e)}")
                  continue
            if not proxy_success:
              logging.error(f"All proxies failed for Address: {address}")   
              if driver is not None:
                 driver.close()
                 driver.quit()
              continue #skip to the next address
            
            found_results = False
            results = []
            
            try:
                search_query = f"gasbuddy {address}"
                logging.info(f'Searching for: {search_query} using proxy {proxy}')
                # search address in Google
                #search_box = driver.find_element(By.NAME, "q")
                search_box.send_keys(search_query)
                search_box.send_keys(Keys.RETURN)

                # Use WebDriverWait instead of time.sleep
                   #wait.until(EC.presence_of_element_located((By.XPATH, "//a[h3]")))
                wait = WebDriverWait(driver, 8)
                wait.until(lambda driver: len(driver.find_elements(By.XPATH, "//a[h3]")) >= 10)
                logging.info(f'Found search links for: {search_query} using proxy {proxy}')
                
                #time.sleep(5)  # Wait for search results to load
                
                # Find GasBuddy link in search results
                links = driver.find_elements(By.XPATH, "//a[h3]")

Here is the test script that works perfectly fine, I do not see any structure changes other than maybe the problem with the “webdriverwait”:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import re
from selenium.webdriver.chrome.service import Service

# Setup Selenium WebDriver
service = Service("your web driver path") 
driver = webdriver.Chrome(service=service)
 # Ensure the chromedriver is in your PATH or specify the path to it

# List of gas stations
gas_stations = [
    {"1037 SAINT JAMES AVE SPRINGFIELD, MA 01104-1309"
},
    # Add more gas stations here
]

# Loop through each gas station
for station in gas_stations:
    search_query = f"gasbuddy {station}"
    
    # Open Google
    driver.get("https://www.google.com")
    search_box = driver.find_element(By.NAME, "q")
    search_box.send_keys(search_query)
    search_box.send_keys(Keys.RETURN)
    
    time.sleep(5)  # Wait for search results to load
    
    # Find GasBuddy link in search results
    links = driver.find_elements(By.XPATH, "//a[h3]") #finds all anchor (<a>) elements on the Google search results page that contain a child <h3> element. These are typically the clickable titles of the search results.
    gasbuddy_link = None
    for link in links:
        if "gasbuddy.com" in link.get_attribute("href"):
            gasbuddy_link = link.get_attribute("href")
            break
    
    if gasbuddy_link:
        driver.get(gasbuddy_link)
        time.sleep(3)  # Wait for the GasBuddy page to load
        
        # Extract station ID from the URL
        station_id_match = re.search(r'/station/(d+)', gasbuddy_link)
        station_id = station_id_match.group(1) if station_id_match else 'N/A'
        # Scrape ratings (this part will depend on the structure of the GasBuddy page)
        try:
            rating = driver.find_element(By.XPATH, ".//span[contains(@class, 'Station-module__ratingAverage')]").text
            ratingcount = driver.find_element(By.XPATH, "//span[contains(@class, 'StationInfoBox-module__ratings___1O33R')]").text.strip("()")
            amenities_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'amenitiesPanel')]//span[contains(@class, 'text__left___1iOw3')]")
            amenities = [element.text.strip() for element in amenities_elements if element.text.strip()]
            #amenities = [element.text for element in amenities_elements]
            num_amenities = len(amenities)
            amenities_str = ', '.join(amenities)
        except Exception as e:
            print(f"Failed to get rating for {station['name']}: {e}")
            
        print(f"GasBuddy Link: {gasbuddy_link}")
        print(f"Station ID: {station_id}")
        print(f"Station Rating: {rating}")
        print(f"Amount of Ratings: {ratingcount}")
        # Print or process the list of amenities
        print(f"Number of amenities: {num_amenities}")
        print(f"Amenities: {amenities_str}")
    else:
        print(f"No GasBuddy link found for {station['name']}")
    
    time.sleep(2)  # Small delay before the next iteration

# Close the driver
driver.quit()

I can not use time.sleep, otherwise I suspect it might work. I used wait.until(driver.find_elements(By.XPATH, "//a[h3]")) and I used wait.until(lambda driver: len(driver.find_elements(By.XPATH, "//a[h3]")) >= 10) and adjusted how many links that are available to control the waiting time.
My guess is that when the google is loading all the results/links, the elements are constantly changing. Therefore, as long as I do not let the page to fully load and stop, this error will appear?

How to solve this issue?

Add: nevermind, I waited for the page load completed: wait.until(lambda driver: driver.execute_script('return document.readyState') == 'complete'), it is still the same error.

Thiết kế website giá rẻ

Danh mục

Why “stale element not found” after wrapping in a loop?