Thiết kế website giá rẻ

Question

Im building a selenium web scraper for nft loan data on blur.io, and it works perfectly in NON Headless mode. But in headless mode, it is unable to find a scrollable element ,that is necessary to access, which is used to scroll to load content. This just causes the script to error out in headless mode.

ive tried the following fixes

options.add_argument("--headless=new")
options.add_argument("--window-size=1440, 900")
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument("--start-maximized")

I also have the driver wait until the element is visible, but it still cant seem to find it and just errors out

WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CLASS_NAME, 'rows')))

Here’s my full code, thank you!

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time
from tkinter import *
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

global nftData

def removePRCNT(string):
    return float(string.replace("%", ""))

nftData = []
def execute_loan_checker(apyThreshold, ltvThreshold, ethThreshold):
    global nftData
    del nftData[:]
    path = "MYPATH/YOURPATH"
    service = Service(path)

    options = Options()

    #OPTIONS IVE TRIED, DIDNT WORK TO FIX HEADLESS ISSUE
    options.add_argument("--headless=new") #works fine without this line
    options.add_argument("--window-size=1440, 900")
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    options.add_argument("--start-maximized")

    #OTHER MISC OPTIONS
    options.add_experimental_option("detach", True)
    options.add_experimental_option("excludeSwitches",["enable-automation"])

    driver = webdriver.Chrome(service=service, options=options)
    
    collection_links = ["https://blur.io/eth/collection/wrapped-cryptopunks/loans", "https://blur.io/eth/collection/azuki/loans", "https://blur.io/eth/collection/milady/loans", "https://blur.io/eth/collection/degods-eth/loans", "https://blur.io/eth/collection/boredapeyachtclub/loans", "https://blur.io/eth/collection/mutant-ape-yacht-club/loans", "https://blur.io/eth/collection/kanpai-pandas/loans", "https://blur.io/eth/collection/remilio-babies/loans", "https://blur.io/eth/collection/pudgypenguins/loans", "https://blur.io/eth/collection/otherdeed/loans", "https://blur.io/eth/collection/bored-ape-kennel-club/loans", "https://blur.io/eth/collection/clonex/loans", "https://blur.io/eth/collection/beanzofficial/loans", "https://blur.io/eth/collection/azukielementalbeans/loans", "https://blur.io/eth/collection/azukielementals/loans", "https://blur.io/eth/collection/proof-moonbirds/loans", "https://blur.io/eth/collection/lilpudgys/loans"]
    def gatherLoanData():
        addedNFTnames = []

        for link in collection_links:
            driver.get(link)
            
            #waiting until element is clickable then click it
            loans_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[.='All Loans']")))
            loans_button.click()
            time.sleep(.4) #might need to adjust sleep time based on computer speed, caused errors depending on wait timing
            

            #THIS IS WHERE ITS BEEN GETTING STUCK
            WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CLASS_NAME, 'rows')))
            scrollable_element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CLASS_NAME, "rows")))
            scroll_amount = 500  # Amount of pixels to scroll each time


            status = "AUCTION" #set status to auction for first iteration
            # Only scrolls while the status is AUCTION, to get live loans
            while status == "AUCTION":
                # Scroll down by scroll_amount pixels each time
                print("pp")
                for loan_row in driver.find_elements(By.XPATH, "//div[@id= 'COLLECTION_MAIN']//div[@role='rowgroup']//div[@role='row']"):
                    nftName = loan_row.find_element(By.XPATH, "div[1]").text #get nft title
                    status = loan_row.find_element(By.XPATH, "div[2]").text #get auction/active status to filter
                    if status == "ACTIVE":
                        break
                    borrowAmount = loan_row.find_element(By.XPATH, "div[3]").text # get borrow amount
                    ltv = loan_row.find_element(By.XPATH, "div[4]").text # get the ltv value
                    apy = loan_row.find_element(By.XPATH, "div[5]").text # get the apy value
                    if nftName not in addedNFTnames and ethThreshold > float(borrowAmount) and ltvThreshold > removePRCNT(ltv) and removePRCNT(apy) > apyThreshold:
                        nftData.append([nftName, borrowAmount, ltv, apy]) 
                        addedNFTnames.append(nftName) #add to list of nfts, to check that it hasnt been added again
                
                driver.execute_script('arguments[0].scrollTop = arguments[0].scrollTop + {};'.format(scroll_amount), scrollable_element)
                time.sleep(.05)  # Delay, might need to be increased based on load speed

    
    gatherLoanData()
    driver.close()
    return nftData

execute_loan_checker(0,999,999) #CALLS SCRIPT WITH NO FILTERING OPTIONS FOR TESTING

Thiết kế website giá rẻ

Danh mục

How to get headless to work for selenium scraper?