I am trying to scrape the top 100 crypto from CoinMarketCap.com using Playwright in Python.
The name, Symbol, Price etc. are all in a tr_list from where I am getting all the details. The code seems to be running fine and giving out some values but suddenly stops with some attribute error .
Also some of the output got before getting this error are also repeated.
The following is the code I tried:
from playwright.sync_api import sync_playwright, retry_if_exception_raised
import psycopg2
from psycopg2.extras import execute_values
def main():
with sync_playwright() as p:
### scraping data part
browser = p.chromium.launch(headless=False)
page = browser.new_page()
page.goto('https://coinmarketcap.com/')
page.wait_for_load_state('networkidle')
### Scrolling down
for i in range(3):
page.mouse.wheel(0,5000)
page.wait_for_timeout(100000)
trs_xpath = 'div.cmc-body-wrapper table tbody tr'
trs_list = page.query_selector_all(trs_xpath)
print(len(trs_list))
master_list = []
for tr in trs_list:
coin_dict = {}
tds = tr.query_selector_all('//td')
coin_dict['id'] = tds[1].inner_text()
coin_dict['Name'] = tds[2].query_selector("//p[@color='text']").inner_text()
#coin_dict['Symbol'] = tr.query_selector('td:nth-child(2) p[color="text3"]').inner_text()
coin_dict['Symbol'] = tds[2].query_selector("//p[@color='text3']").inner_text()
coin_dict['Price'] = tds[3].inner_text().replace('$', ' ').replace(',', '')
if len(tds) > 7:
coin_dict['Market_Cap'] = tds[7].inner_text().replace('$', '').replace(',', '')
else:
coin_dict['Market_Cap'] = "N/A"
if len(tds) > 7:
coin_dict['Volume'] = tds[8].query_selector("//p[@color='text']").inner_text().replace('$',' ').replace(',','')
else:
coin_dict['Volume'] = "N/A"
master_list.append(coin_dict)
for el in master_list:
print(el)
print(len(master_list))
browser.close()
if __name__ == '__main__':
main()
After getting some results , I get following error –
coin_dict[‘Name’] = tds[2].query_selector(“//p[@color=’text’]”).inner_text()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: ‘NoneType’ object has no attribute ‘inner_text’
Pranjal is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.
1