Question:
I’m trying to compare prices of a product from two different supermarkets, LaughsSuper and Glomark, using BeautifulSoup and requests in Python. The price from LaughsSuper is not displaying correctly.
What I’m Trying to Achieve:
I want to extract and compare the prices of a product from the following webpages:
LaughsSuper: https://scrape-sm1.github.io/site1/COCONUT%20market1super.html
Glomark: https://glomark.lk/coconut/p/11624
Expected Output:
Laughs COCONUT – Item#mr-2058 Rs.: 95.0
Glomark Coconut Rs.: 115.0
Laughs is cheaper Rs.: 20.0
Actual Output:
Laughs COCONUT – Item#mr-2058 Rs.: 1.0
Glomark Coconut Rs.: 115.0
Laughs is cheaper Rs.: 114.0
here is my code
import requests
import sys
sys.path.insert(0,’bs4.zip’)
from bs4 import BeautifulSoup
import re
import json
Imitate the Mozilla browser.
user_agent = {‘User-agent’: ‘Mozilla/5.0’}
def extract_price_laughs(url):
try:
response = requests.get(url, headers=user_agent)
soup = BeautifulSoup(response.content, ‘html.parser’)
# Adjust the selectors based on the actual HTML structure
product_name_tag = soup.find('h1')
price_tag = soup.find(text=re.compile(r'd+(.d+)?')) # Find any text containing numbers
if not product_name_tag or not price_tag:
raise ValueError("Error: Could not find the necessary elements on the page.")
product_name = product_name_tag.text.strip()
price_str = re.search(r'd+(.d+)?', price_tag).group() # Extract the price from the matched text
price = float(price_str)
return product_name, price
except Exception as e:
raise ValueError(f"Error fetching data from LaughsSuper: {str(e)}")
def extract_price_glomark(url):
try:
response = requests.get(url, headers=user_agent)
soup = BeautifulSoup(response.content, ‘html.parser’)
script_tag = soup.find('script', type='application/ld+json')
if not script_tag:
raise ValueError("Error: Could not find the necessary script tag on the page.")
json_data = json.loads(script_tag.string)
if 'offers' in json_data:
if isinstance(json_data['offers'], list):
price = float(json_data['offers'][0]['price'])
elif isinstance(json_data['offers'], dict):
price = float(json_data['offers']['price'])
else:
raise ValueError("Error: Unexpected structure for 'offers' in JSON data.")
else:
raise ValueError("Error: 'offers' not found in JSON data.")
product_name = json_data['name']
return product_name, price
except Exception as e:
raise ValueError(f"Error fetching data from Glomark: {str(e)}")
def compare_prices(product_laughs, product_glomark):
try:
# Extract prices from LaughsSuper and Glomark
product_name_laughs, price_laughs = extract_price_laughs(product_laughs)
product_name_glomark, price_glomark = extract_price_glomark(product_glomark)
# Print the values
print('Laughs ', product_name_laughs, 'Rs.: ', price_laughs)
print('Glomark ', product_name_glomark, 'Rs.: ', price_glomark)
# Compare prices
if price_laughs > price_glomark:
print('Glomark is cheaper Rs.:', price_laughs - price_glomark)
elif price_laughs < price_glomark:
print('Laughs is cheaper Rs.:', price_glomark - price_laughs)
else:
print('Price is the same')
except Exception as e:
print(f"An error occurred: {str(e)}")