I want to setup a script to export links from a website that is generated once you enter a URL. The website in question is pagespeed.web.dev. I have 0 knowledge, so while yes I know it’s not the best option, I went with ChatGPT for help. It seemed to work pretty well all things considered with just 1 URL, but once I tried to do 5 it just crapped out. Note: from what I understand I am not data scraping, you just enter the URL in the box, click Analyze, and then copy the link with a button.
Here is the code itself:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pyperclip # For clipboard operations
# Replace with the path to your GeckoDriver
geckodriver_path = r'C:Users*****.OSHSDocumentsgeckodriver.exe'
# Replace with the URL of the website performance tool
website_url = 'https://pagespeed.web.dev/'
# Replace with the placeholder text for the input box and the export button
input_box_placeholder = 'Enter a web page URL'
analyze_button_xpath = '/html/body/c-wiz/div[2]/div/div[2]/form/div[2]/button/span' # Ensure this XPath correctly identifies the button
copy_button_xpath = '/html/body/header/span/div[1]/button/span' # Adjust this XPath if needed
# Replace with your website URL
website_to_monitor = [
'https://www.ohiostatewaterproofing.com',
'https://www.basementwaterproofing.com',
'https://www.everdrywaterproofinglouisville.com/',
'https://www.stablwall.com',
'https://www.everdrycolumbus.com'
]
# Initialize the WebDriver for Firefox
service = Service(executable_path=geckodriver_path)
driver = webdriver.Firefox(service=service)
# File to save the results
results_file = 'website_performance_reports.txt'
def get_report_link(driver, website_url):
try:
# Open the website performance tool
driver.get(website_url)
# Initialize WebDriverWait
wait = WebDriverWait(driver, 30)
# Wait for the input box by placeholder text and then find it
input_box = wait.until(EC.presence_of_element_located((By.XPATH, f'//input[@placeholder="{input_box_placeholder}"]')))
input_box.send_keys(website_to_monitor)
# Wait for the analyze button to be clickable and then find it
analyze_button = wait.until(EC.element_to_be_clickable((By.XPATH, analyze_button_xpath)))
analyze_button.click()
# Wait for the analysis to complete (adjust the sleep duration as needed)
time.sleep(30) # Adjust this as needed based on the website's performance
# Simulate the click to copy the link
copy_button_xpath = '/html/body/header/span/div[1]/button/span' # Replace with the actual XPath for the copy button
copy_button = wait.until(EC.element_to_be_clickable((By.XPATH, copy_button_xpath)))
copy_button.click()
# Get the copied link from the clipboard
report_link = pyperclip.paste()
return report_link
except Exception as e:
print(f"Error retrieving report for {website_url}: {e}")
return None
def save_results(results):
with open(results_file, 'w') as file:
for url, report_link in results:
file.write(f"Website: {url}n")
file.write(f"Report Link: {report_link}n")
file.write("----n")
def main():
# Initialize the WebDriver for Firefox
service = Service(executable_path=geckodriver_path)
driver = webdriver.Firefox(service=service)
results = []
for website in website_to_monitor:
print(f"Processing {website}...")
report_link = get_report_link(driver, website)
results.append((website, report_link))
# Save all results to a file
save_results(results)
driver.quit()
print(f"All reports saved to {results_file}")
if __name__ == "__main__":
main()
Blocked out my username but that path works
I tried to work with ChatGPT and keep using its suggestions to fix things, but it seems that every fix doesn’t help. This is the error I get from the terminal:
Error retrieving report for https://www.ohiostatewaterproofing.com: Message:
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:193:5
NoSuchElementError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:511:5
dom.find/</<@chrome://remote/content/shared/DOM.sys.mjs:136:16
Code error without try/except statements:
File "C:Users*****.OSHSOSW-Test.py", line 31, in <module>
driver = webdriver.Firefox(service=service)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:Users*****.OSHSAppDataRoamingPythonPython312site-packagesseleniumwebdriverfirefoxwebdriver.py", line 57, in __init__
if finder.get_browser_path():
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:Users*****.OSHSAppDataRoamingPythonPython312site-packagesseleniumwebdrivercommondriver_finder.py", line 47, in get_browser_path
return self._binary_paths()["browser_path"]
^^^^^^^^^^^^^^^^^^^^
File "C:Users*****.OSHSAppDataRoamingPythonPython312site-packagesseleniumwebdrivercommondriver_finder.py", line 78, in _binary_paths
raise NoSuchDriverException(msg) from err
selenium.common.exceptions.NoSuchDriverException: Message: Unable to obtain driver for firefox; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors/driver_location
TheMoonIsFake is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.