I am scraping data from the website “https://www.brickeconomy.com/sets/year/2024” using the code I wrote. I managed to extract the names of the Lego sets from the table, but I cannot access the remaining data such as “Year”, “Theme / Subtheme”, “Pieces / Minifigs”, “Availability”, “Retail”, and “Value”. I want to access the data in each item in detail and save it in JSON format. If possible, I would also like to include the image link of the Lego set. Below is an example of my code.
from bs4 import BeautifulSoup
import cloudscraper
import json
url = "https://www.brickeconomy.com/sets/year/2024"
scraper = cloudscraper.create_scraper()
response = scraper.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
print(soup.prettify())
sets_data = []
table_rows = soup.find('table', id='ContentPlaceHolder1_ctlSets_GridViewSets').find_all('tr', align='left')
for row in table_rows:
cells = row.find_all('td')
if len(cells) >= 3:
try:
set_name = cells[2].find('div').get_text(strip=True)
except AttributeError:
set_name = "-"
try:
set_year = soup.find('div', string='Year').find_next_sibling().get_text(strip=True)
except AttributeError:
set_year = "-"
print("set-id:", set_name)
print("set-year:", set_year)
print()
set_info = {
"set-id": set_name,
"set-year": set_year
}
sets_data.append(set_info)
with open('sets_data.json', 'w', encoding='utf-8') as json_file:
json.dump(sets_data, json_file, ensure_ascii=False, indent=4)
print("saved Json file")
else:
print("HTTP Error Code:", response.status_code)