So I’m trying to use Pyppeteer (an unofficial Python port for Puppeteer), and I’m trying to scrape a site and try to select an element.
I been trying to wait for elements with the class “tab” for example
elements = await page.querySelectorAll('.tab')
But nothing returns and I get a timeout error.
In fact, I get a timeout error when I try to wait for any element with any class on the site that I loaded up. I tried to troubleshoot by writing the HTML and investigating that HTML, but not only don’t I see any of the elements, but when I open it, my page is blank
This is my code so far
import asyncio
from pyppeteer import launch
import requests
import sys
import json
print("Test")
print("Starting script...")
def print_cookies_as_json(cookies):
cookies_json = json.dumps(cookies, indent=4)
print("Cookies in JSON format:")
print(cookies_json)
async def main():
try:
print("Launching browser...")
browser = await launch(headless=False)
page = await browser.newPage()
except Exception as e:
print(f"Error launching browser or creating new page: {e}")
return
try:
print("Reading cookies from file...")
# Load cookies from JSON file
try:
with open('/path/to/usrCookies.json', 'r') as f:
cookies = json.load(f)
if not cookies:
raise ValueError("No valid cookies found in the file.")
await page.setCookie(*cookies)
except (FileNotFoundError, ValueError) as e:
print(f"Error reading cookies: {e}")
return
# Print cookies in JSON format
print_cookies_as_json(cookies)
print("Navigating to URL...")
url = 'https://example.com'
await page.goto(url, {'waitUntil': 'networkidle0'})
response = requests.get(url, cookies={c['name']: c['value'] for c in cookies})
json_response = response.json()
print(json_response)
print("Processing JSON data...")
await page.screenshot({'path': 'screenshot_TEST.png', 'fullPage': True})
print("Waiting for page to load...")
await page.waitForSelector('body', {'timeout': 10000}) # wait for the body to load
await asyncio.sleep(1)
# Get the HTML content of the page
print("Getting HTML content...")
html = await page.content()
# Write the HTML content to a file
with open("index.html", "w", encoding="utf-8") as file:
file.write(html)
except Exception as e:
print("An error occurred:", e)
await page.screenshot({'path': 'screenshot_ERROR.png', 'fullPage': True})
print("Screenshot saved as screenshot.png")
finally:
# Close the browser
try:
print("Closing browser...")
await browser.close()
except Exception as e:
print(f"Error closing browser: {e}")
asyncio.get_event_loop().run_until_complete(main())
This is just a generic snippet of what my code looks like. Is there perhaps a flaw in how my code is executed?