Thiết kế website giá rẻ

Question

I’m working on a Selenium script using Python to scrape stock data from a website with paginated content. The script needs to:

Search for a specific keyword (“FreshQA”).
Click an “Add” button for each product on the page until it’s unclickable.
Collect and save product and stock data.
Navigate through up to 15 pages and repeat the process.
The script works on the first page but fails when trying to interact with elements on subsequent pages. It either doesn’t move to the next page correctly or encounters errors when clicking elements (e.g., “element click intercepted”). Here’s a simplified version of my code:

the code

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Setup the Chrome browser
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

# URLs for different locations
locations = [
    "https://www.talabat.com/qatar/grocery/613147/MESAIMEER?aid=1636",
    # Add other URLs here...
]

# XPaths for page navigation
search_box_xpath = '//*[@id="__next"]/div[4]/div[1]/div/div/div[1]/div/div/div[2]/div[2]/div[2]/div/input'
pagination_xpath = '//*[@id="__next"]/div[4]/div[1]/div/div/div[2]/div/div/div/div[2]/ul'

# XPaths for products, stock, and add buttons with index differentiation
product_xpaths = [
    f'//*[@id="__next"]/div[4]/div[1]/div/div/div[2]/div/div/div/div[1]/div[{i+1}]/a/div/div[2]/div[1]'
    for i in range(100)  # Adjust index range as needed
]

stock_xpaths = [
    f'//*[@id="__next"]/div[4]/div[1]/div/div/div[2]/div/div/div/div[1]/div[{i+1}]/a/div/div[1]/div[3]/div/div[2]/div[2]'
    for i in range(100)  # Adjust index range as needed
]

add_button_xpaths = [
    f'//*[@id="__next"]/div[4]/div[1]/div/div/div[2]/div/div/div/div[1]/div[{i+1}]/a/div/div[1]/div[3]/div/div[2]/div[1]/img'
    for i in range(100)  # Adjust index range as needed
]

def click_add_button_until_unclickable(add_button_xpath, max_clicks=50):
    """Click the 'Add' button until it becomes unclickable or until max_clicks is reached."""
    clicks = 0
    while clicks < max_clicks:
        try:
            add_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, add_button_xpath)))
            add_button.click()
            print(f"Clicked 'Add' button at index {clicks + 1}")
            time.sleep(0)  # Wait for the stock to be revealed
            clicks += 1
        except Exception as e:
            if "element click intercepted" in str(e) or "disabled" in str(e):
                print(f"'Add' button is unclickable after {clicks} clicks.")
            else:
                print(f"Exception encountered while clicking 'Add' button: {e}")
            break

def scroll_to_element(element):
    """Scroll to the element."""
    driver.execute_script("arguments[0].scrollIntoView();", element)

def get_max_pages():
    """Get the maximum number of pages from the pagination element."""
    try:
        pagination_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, pagination_xpath)))
        page_elements = pagination_element.find_elements(By.TAG_NAME, 'li')
        max_page = max(int(page.text) for page in page_elements if page.text.isdigit())
        return max_page
    except Exception as e:
        print(f"Error getting max pages: {e}")
        return 1  # Default to 1 page if there's an issue

def collect_stock_data():
    all_stock_data = []

    for url in locations:
        driver.get(url)
        print(f"Processing URL: {url}")

        try:
            search_box = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, search_box_xpath)))
            search_box.click()
            search_box.clear()
            search_box.send_keys("FreshQA")
            time.sleep(5)  # Wait for search results to update
        except Exception as e:
            print(f"Failed to perform search: {e}")
            continue  # Skip this URL and continue with the next

        # Get the number of pages
        max_pages = get_max_pages()
        print(f"Number of pages: {max_pages}")

        # Iterate through pages
        for page_index in range(1, max_pages + 1):
            print(f"Processing page {page_index}")

            # Click the 'Add' button for each product to reveal stock quantities
            for i in range(len(add_button_xpaths)):
                try:
                    click_add_button_until_unclickable(add_button_xpaths[i])
                    print(f"Clicked 'Add' button for product index {i}")
                    
                    # Extract stock data for the product
                    try:
                        product_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, product_xpaths[i])))
                        stock_text_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, stock_xpaths[i])))
                        stock_data = {
                            'Product': product_element.text.strip(),
                            'Stock': stock_text_element.text.strip()
                        }
                        all_stock_data.append(stock_data)
                    except Exception as e:
                        print(f"Error retrieving data for product index {i}: {e}")

                except Exception as e:
                    print(f"Error interacting with 'Add' button for product index {i}: {e}")

            # Move to the next page
            if page_index < max_pages:
                retries = 3
                while retries > 0:
                    try:
                        next_page_button_xpath = f'{pagination_xpath}/li[{page_index + 1}]'
                        
                        # Scroll to the next page button
                        next_page_button = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
                        scroll_to_element(next_page_button)
                        
                        # Ensure the next button is clickable
                        next_page_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, next_page_button_xpath)))
                        next_page_button.click()
                        
                        print(f"Moved to next page: {page_index + 1}")
                        time.sleep(10)  # Wait for the new page to load
                        break  # Exit the retry loop if successful
                    except Exception as e:
                        print(f"Error navigating to next page: {e}")
                        retries -= 1
                        if retries == 0:
                            print(f"Failed to move to next page after {3} retries. Exiting page navigation.")
                            break  # Exit if retries are exhausted

    # Save data to an Excel file
    df = pd.DataFrame(all_stock_data)
    df.to_excel('stock_data.xlsx', index=False)
    print("Data saved to 'stock_data.xlsx'")

# Run the script
collect_stock_data()

# Close the browser
driver.quit()

i tried a lot fix it and need someone to modify this

Thiết kế website giá rẻ

Danh mục

Python Selenium Script for Paginated Web Scraping Fails to Navigate and Click Elements Across Pages”