I am building a web scraper that scrapes the beatport top 100.
I am having an issue where some items are located, but others get an error.
def scrape_beatport():
user_agent = UserAgent().random
chrome_options = Options()
chrome_options.add_argument(f"user-agent={user_agent}")
driver = webdriver.Chrome(options=chrome_options)
try:
driver.get('https://www.beatport.com/top-100')
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div#__next')))
test1 = driver.find_elements(By.CSS_SELECTOR, 'div#__next')
test2 = driver.fin_elements(By.CSS_SELECTOR, 'div.TracksList-style__Wrapper-sc-3fb03d50-8 bWXYsy row')
finally:
driver.quit()
if __name__ == '__main__':
scrape_beatport()
this is my code. test1 is foud and is a div in the body tag. test2 is much deeper into the html structure. it is inside a bunch of other divs, and when I try to get it, they give me an error:
Traceback (most recent call last):
File "/Users/just/Documents/python/yt_test.py", line 84, in <module>
scrape_beatport()
^^^^^^^^^^^^^^^^^
File "/Users/just/Documents/python/yt_test.py", line 71, in scrape_beatport
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div.__next div.MainLayout-style__MainWrapper-sc-9f30c253-0 div div.MainLayout-style__Main-sc-9f30c253-1 main.MainLayout-style__MainContent-sc-9f30c253-2 div.TracksList-style__Wrapper-sc-3fb03d50-8')))
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/selenium/webdriver/support/wait.py", line 105, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
Stacktrace:
0 chromedriver 0x0000000105272940 chromedriver + 4368704
1 chromedriver 0x000000010526add4 chromedriver + 4337108
2 chromedriver 0x0000000104e8ec04 chromedriver + 289796
3 chromedriver 0x0000000104ed0e00 chromedriver + 560640
4 chromedriver 0x0000000104f095ec chromedriver + 792044
5 chromedriver 0x0000000104ec5ab4 chromedriver + 514740
6 chromedriver 0x0000000104ec650c chromedriver + 517388
7 chromedriver 0x0000000105236e5c chromedriver + 4124252
8 chromedriver 0x000000010523bc4c chromedriver + 4144204
9 chromedriver 0x000000010521c824 chromedriver + 4016164
10 chromedriver 0x000000010523c57c chromedriver + 4146556
11 chromedriver 0x000000010520e2d8 chromedriver + 3957464
12 chromedriver 0x000000010525bec4 chromedriver + 4275908
13 chromedriver 0x000000010525c040 chromedriver + 4276288
14 chromedriver 0x000000010526aa34 chromedriver + 4336180
15 libsystem_pthread.dylib 0x000000018a0bd034 _pthread_start + 136
16 libsystem_pthread.dylib 0x000000018a0b7e3c thread_start + 8
Please check out the beatport top 100 yourself to see the html, because it is a lot.
Thanks a lot!