Set the user data directory (if applicable)
options = webdriver.ChromeOptions()
options.add_argument(“user-data-dir=C:Usersc4chiAppDataLocalGoogleChromeUser Data”) # Replace with your path (optional)
options.add_argument(“profile-directory=Default”) # Specify profile (optional)
Ensure ChromeDriver path is correct (replace with your actual path)
chrome_driver_path = r”C:chromedriver-win64chromedriver.exe” # Absolute path
Close any conflicting Chrome instances (optional)
import os
for process in os.popen(“tasklist /FI “imagename eq chrome.exe””).readlines()[3:]:
pid = process.split()[1]
os.system(f”taskkill /F /PID {pid}”) # Windows specific
(Optional) Temporarily disable extensions (you can comment this out)
options.add_argument(“–disable-extensions”)
scraped_data = []
try:
# Create a new instance of Chrome with the specified options and service
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=options)
# Navigate to the OLX link
target_url = "https://www.olx.in/post/attributes/"
driver.get(target_url)
# Wait for up to 10 seconds for a relevant element to appear
wait = WebDriverWait(driver, 100)
element = wait.until(EC.presence_of_element_located((By.ID, "some_unique_id_on_olx_page"))) # Replace with actual element
# Find elements by ID
make_element = driver.find_element(By.ID, "make")
model_element = driver.find_element(By.ID, "model")
variant_element = driver.find_element(By.ID, "variant") # Replace with actual IDs
# Extract data (assuming these elements have text content)
make = make_element.text
model = model_element.text
variant = variant_element.text
# Create scraped data dictionary
scraped_data = {"make": make, "model": model, "variant": variant}
except Exception as e:
print(f”An error occurred: {e}”)
finally:
# Save scraped data to JSON file
with open(“scraped_data.json”, “w”) as outfile:
json.dump(scraped_data, outfile, indent=4) # Indentation for readability
# Close the browser
driver.quit()
# Print success message
if scraped_data:
print("JSON file created successfully: scraped_data.json")
now the problem is the page whose link is loaded at browser not load it properly
eg. actual link =”https://www.olx.in/post/attributes” but it load only “https://www.olx.in/post”
Rahul anand is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.