I have a scraper that I have written to fetch some data from 2 different URLS. The only difference between the two pages is that one has a dropdown list of menus while the other does not. The scraper works for the first url in urls
and fetches everything, but on the second url, it only fetches data from the first dropdown option. I thought I had catered for this but it is not working.
Here is my code:
import requests
from bs4 import BeautifulSoup
import re
import csv
urls = ['https://untappd.com/v/other-half-brewing-co/1360488',
'https://untappd.com/v/beer-witch/10272294']
def get_menu_beers(soup):
beers_all = soup.find_all('ul', {'class': 'menu-section-list'})
# Open the CSV file in write mode
with open('scraped.csv', mode='a', newline='') as file:
writer = csv.writer(file)
for beer_group in beers_all:
beers = beer_group.find_all('li')
for beer in beers:
details = beer.find('div', {'class': 'beer-details'})
a_href = details.find("a",{"class":"track-click"}).get("href")
id_num = re.findall(r'd+', a_href)
beer_id = int(id_num[-1])
name_ = details.find("a",{"class":"track-click"}).text
rating_value = details.find('div', {'class': 'caps small'})['data-rating']
writer.writerow([str(name_).strip().replace('n', ' '), rating_value])
for url in urls:
response = requests.get(url, headers = {'User-agent': 'Mozilla/5.0'})
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
try:
try:
select_options = soup.find_all('select', {'class':'menu-selector'})
if (len(select_options) > 0 ):
options_list = select_options[0].find_all('option')
menu_ids =[]
for option in options_list:
menu_ids.append(int(option['value']))
menu_urls = []
for menu_id in menu_ids:
menu_url = str(url)+ '?menu_id=' + str(menu_id)
menu_urls.append(menu_url)
for url in menu_urls:
res = requests.get(url, headers = {'User-agent': 'Mozilla/5.0'})
s = BeautifulSoup(res.text, 'html.parser')
get_menu_beers(s)
else:
get_menu_beers(soup)
except:
print("Failed HERE")
except:
print(f"Failed: {url}")
I thought this part of my code would find all the dropdown options, generate the urls for each and scrape them individually, but nothing is happening
if (len(select_options) > 0 ):
options_list = select_options[0].find_all('option')
menu_ids =[]
for option in options_list:
menu_ids.append(int(option['value']))
menu_urls = []
for menu_id in menu_ids:
menu_url = str(url)+ '?menu_id=' + str(menu_id)
menu_urls.append(menu_url)
It works for this url https://untappd.com/v/other-half-brewing-co/1360488
but not for this one https://untappd.com/v/beer-witch/10272294
Could anyone show me what I am doing wrong?