I have a Google maps scraper. The scraper should scroll down the results until there isn’t anything to scroll, scrape data (name, address, etc.) and save it to an excel.
The program does everything expect some part of scrolling. The scroller works, but it doesn’t scroll all the way down (at some point the program stops). No matter how much does it scroll it always saves 26 results (there are 48 of them).
This is the part of the code that is responsible for scrolling:
# Scroll to show more results
divSideBar = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, f"div[aria-label='Results for {service + ' ' + location}']")))
keepScrolling = True
while keepScrolling:
divSideBar.send_keys(Keys.PAGE_DOWN)
time.sleep(3)
html = driver.find_element(By.TAG_NAME, "html").get_attribute('outerHTML')
if "You've reached the end of the list." in html:
keepScrolling = False
No matter how much I increase or decrease
time.sleep(3)
I always get the same results.
What could be the issue with the code?
Here is the full code, so you can run if you want:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import time
import pandas as pd
URL = "https://www.google.com/maps"
service = "kaufland"
location = "hrvatska"
driver = webdriver.Chrome()
driver.maximize_window()
driver.get(URL)
# Accept cookies
try:
accept_cookies = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="yDmH0d"]/c-wiz/div/div/div/div[2]/div[1]/div[3]/div[1]/div[1]/form[2]/div/div/button')))
accept_cookies.click()
except NoSuchElementException:
print("No accept cookies button found.")
# Search for results and show them
input_field = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="searchboxinput"]')))
input_field.send_keys(service + ' ' + location)
input_field.send_keys(Keys.ENTER)
# Scroll to show more results
divSideBar = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, f"div[aria-label='Results for {service + ' ' + location}']")))
keepScrolling = True
while keepScrolling:
divSideBar.send_keys(Keys.PAGE_DOWN)
time.sleep(3)
html = driver.find_element(By.TAG_NAME, "html").get_attribute('outerHTML')
if "You've reached the end of the list." in html:
keepScrolling = False
page_source = driver.page_source
driver.quit()
soup = BeautifulSoup(page_source, "html.parser")
boxes = soup.find_all('div', class_='Nv2PK')
# Collect data
data = []
for box in boxes:
# Business name
try:
business_name = box.find('div', class_='qBF1Pd').getText()
except AttributeError:
business_name = "N/A"
if service.strip().lower() not in business_name.lower():
continue
# Address
try:
inner_div = box.find_all('div', class_='W4Efsd')[1].find('div', class_='W4Efsd')
address = [span.text for span in inner_div.find_all('span') if span.text and not span.find('span')][-1]
except (IndexError, AttributeError):
address = "N/A"
# Stars
try:
stars = box.find('span', class_='MW4etd').getText()
except AttributeError:
stars = "N/A"
# Number of reviews
try:
number_of_reviews = box.find('span', class_='UY7F9').getText().strip('()')
except AttributeError:
number_of_reviews = "N/A"
# Phone number
try:
phone_number = box.find('span', class_='UsdlK').getText()
except AttributeError:
phone_number = "N/A"
# Website
try:
website = box.find('a', class_='lcr4fd').get('href')
except AttributeError:
website = "N/A"
# Append to data list
data.append({
'Business Name': business_name,
'Address': address,
'Stars': stars,
'Number of Reviews': number_of_reviews,
'Phone Number': phone_number,
'Website': website
})
# Create a DataFrame and save to Excel
df = pd.DataFrame(data)
df.to_excel(f'{location}_{service}.xlsx', index=False)
print(f"Data has been saved to {location}_{service}.xlsx")