I am performing a regular web scrape for laptop prices with Beautifulsoup. The data from these searches I want to put in a data base with Sqlalchemy. But for each search I want to add just one row, with the date of the search, and a new price for each of the 15 laptops (the laptop names are the names of the columns). I cannot figure out how to do this, your help is appreciated.
I want everything in 1 table so I can turn it into a csv and make some graphs of the data.
I have tried using a for loop, iterating over the names of the laptops that are in a dictionary (the values of the names are the url’s that I use to get the price.
import requests
from bs4 import BeautifulSoup
import datetime
from sqlalchemy import create_engine
from sqlalchemy.orm import Mapped, mapped_column, sessionmaker, declarative_base
ACCEPT_LANGUAGE = "en-US,en;q=0.5"
USER_AGENT = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0")
def mediamarkt(url):
headers = {
"accept-language": ACCEPT_LANGUAGE,
"user-agent": USER_AGENT,
}
response = requests.get(url, headers=headers)
laptop_page = response.text
soup = BeautifulSoup(laptop_page, 'html.parser')
price_list=[]
item_prices = soup.select('span:-soup-contains("€")')
for price in item_prices:
price_list.append(price.text)
try:
laptop_price = int((price_list[-1].lstrip('€ ').rstrip('00').rstrip(',')))
return laptop_price
except (ValueError, IndexError):
return 0
def coolblue(url):
headers = {
"accept-language": ACCEPT_LANGUAGE,
"user-agent": USER_AGENT,
}
response = requests.get(url, headers=headers)
laptop_page = response.text
soup = BeautifulSoup(laptop_page, 'html.parser')
price_list = []
item_prices = soup.find(class_ = 'sales-price__current js-sales-price-current')
for price in item_prices:
price_list.append(price.text)
try:
laptop_price = int((price_list[-1].rstrip(',-').replace('.','')))
return laptop_price
except (ValueError, IndexError):
return 0
now = datetime.datetime.now()
engine = create_engine('sqlite:///laptop-prices.db', echo=True)
Session = sessionmaker(bind=engine)
Base = declarative_base()
# dictionaries of all laptop names
laptop_names_mediamarkt = {}
laptop_names_coolblue = {}
laptop_names_mediamarkt.update({'MediamarktLenovoYogaPro714IRH8':'https://www.mediamarkt.nl/nl/product/_lenovo-yoga-pro-7-14irh8-145-inch-intel-core-i7-32-gb-1-tb-1806227.html',
'MediamarktAcerSwift14SF1471T786Z':'https://www.mediamarkt.nl/nl/product/_acer-swift-14-sf14-71t-786z-14-inch-intel-core-i7-32-gb-1-tb-1806717.html',
'MediamarktAcerAspire5Spin14A5SP1451MTN73H8':'https://www.mediamarkt.nl/nl/product/_acer-aspire-5-spin-14-a5sp14-51mtn-73h8-14-inch-intel-core-i7-32-gb-1-tb-1790678.html',
'MediamarktMSICommercial14HA13MGvPRO098nl':'https://www.mediamarkt.nl/nl/product/_msi-commercial-14-h-a13mg-vpro-098nl-14-inch-intel-core-i7-32-gb-1-tb-1868294.html',
'MediamarktLenovoYogaSlim7':'https://www.mediamarkt.nl/nl/product/_lenovo-yoga-ultra-slim-7-14-inch-intel-core-ultra-7-32-gb-1-tb-1810315.html',
'MediamarktASUSZenbook14OledUX3405MAPP156W':'https://www.mediamarkt.nl/nl/product/_asus-zenbook-14-oled-ux3405ma-pp156w-14-inch-intel-core-ultra-7-32-gb-1-tb-1811355.html',
'MediamarktHPPavillionPlus14ew1045nd':'https://www.mediamarkt.nl/nl/product/_hp-pavilion-plus-14-ew1045nd-14-inch-intel-core-ultra-7-32-gb-1-tb-1870542.html',
'MediamarktASUSZenbook14OledUX3405MAPP278W':'https://www.mediamarkt.nl/nl/product/_asus-zenbook-14-oled-ux3405ma-pp278w-14-inch-intel-core-ultra-9-32-gb-1-tb-1803492.html',
})
laptop_names_coolblue.update({'CoolblueASUSZenbook14OledUX3405MAPP278W':'https://www.coolblue.nl/product/943557/asus-zenbook-14-oled-ux3405ma-pp278w.html',
'CoolblueLenovoIdeapadSlim5Oled14IMH983DA007BMH':'https://www.coolblue.nl/product/948982/lenovo-ideapad-slim-5-oled-14imh9-83da007bmh.html',
'CoolblueLenovoIdeapadPro514APH883AM000DMH':'https://www.coolblue.nl/product/926535/lenovo-ideapad-pro-5-14aph8-83am000dmh.html',
'CoolblueAcerSwiftGo14OledSFG147373FU':'https://www.coolblue.nl/product/947157/acer-swift-go-14-oled-sfg14-73-73fu.html',
'CoolblueLenovoYogaPro714APH882Y8002TMH':'https://www.coolblue.nl/product/933460/lenovo-yoga-pro-7-14aph8-82y8002tmh.html',
'CoolblueLenovoYogaSlim714IMH983CV0054MH':'https://www.coolblue.nl/product/946460/lenovo-yoga-slim-7-14imh9-83cv0054mh.html',
'CoolblueHPPavillionPlus14ew1970nd':'https://www.coolblue.nl/product/947519/hp-pavilion-plus-oled-14-ew1970nd.html'
})
class Laptops(Base):
__tablename__ = 'laptop_prices'
search_date: Mapped[str] = mapped_column(unique=False, primary_key=True)
MediamarktLenovoYogaPro714IRH8: Mapped[int] = mapped_column(unique=False)
MediamarktAcerSwift14SF1471T786Z: Mapped[int] = mapped_column(unique=False, nullable=True)
MediamarktAcerAspire5Spin14A5SP1451MTN73H8: Mapped[int] = mapped_column(unique=False)
MediamarktMSICommercial14HA13MGvPRO098nl: Mapped[int] = mapped_column(unique=False)
MediamarktLenovoYogaSlim7: Mapped[int] = mapped_column(unique=False)
MediamarktASUSZenbook14OledUX3405MAPP156W: Mapped[int] = mapped_column(unique=False)
MediamarktHPPavillionPlus14ew1045nd: Mapped[int] = mapped_column(unique=False)
MediamarktASUSZenbook14OledUX3405MAPP278W: Mapped[int] = mapped_column(unique=False)
CoolblueASUSZenbook14OledUX3405MAPP278W: Mapped[int] = mapped_column(unique=False)
CoolblueLenovoIdeapadSlim5Oled14IMH983DA007BMH: Mapped[int] = mapped_column(unique=False)
CoolblueLenovoIdeapadPro514APH883AM000DMH: Mapped[int] = mapped_column(unique=False)
CoolblueAcerSwiftGo14OledSFG147373FU: Mapped[int] = mapped_column(unique=False)
CoolblueLenovoYogaPro714APH882Y8002TMH: Mapped[int] = mapped_column(unique=False)
CoolblueLenovoYogaSlim714IMH983CV0054MH: Mapped[int] = mapped_column(unique=False)
CoolblueHPPavillionPlus14ew1970nd: Mapped[int] = mapped_column(unique=False)
def daily_search():
for laptop in laptop_names_mediamarkt:
url = laptop_names_mediamarkt[laptop]
daily_price = mediamarkt(url)
Base.metadata.create_all(engine)
new_price = Laptops(search_date=now, laptop=daily_price)
with Session() as session:
session.add(new_price)
session.commit()
for laptop in laptop_names_coolblue:
url = laptop_names_coolblue[laptop]
daily_price = coolblue(url)
Base.metadata.create_all(engine)
new_price = Laptops(search_date=now, laptop=daily_price)
with Session() as session:
session.add(new_price)
session.commit()
daily_search()
But this returns the following error:
‘TypeError: ‘laptop’ is an invalid keyword argument for Laptops’
How can I solve this?
2
In the class Laptops
you need to specify the columns that you want to insert in to.
It looks like You defined the schema with the Value of the tables itself.
here
new_price = Laptops(search_date=now, laptop=daily_price)
tries to insert columns search_date
and laptop
.But the Laptops
schema only has the search_date
field defined along with the unwanted values you provided (Laptop models).
Try this
class Laptops(Base):
__tablename__ = 'laptop_prices'
search_date: Mapped[str] = mapped_column(unique=False, primary_key=True)
laptop: str
2