I am trying to scrape this website that is build on javascript, now I want to scrape price and other data, when we select an option form a dropdown menu, price changes. This is the code i’ve written so far but I dont know how can I increment selected index in Click_action_script and then every time the index is selected and clicked run the parse method:
import scrapy
from scrapy_splash import SplashRequest
class ExcardSpider(scrapy.Spider):
name = "excard"
allowed_domains = ["www.excard.com.my"]
login_script = '''
function main(splash, args)
local url = args.url
assert(splash:go(url))
splash:wait(1)
splash:runjs("document.querySelector('#TemplatedContent1__product_txtusername').value = 'username';")
splash:runjs("document.querySelector('#TemplatedContent1__product_txtpassword').value = 'password';")
splash:wait(0.5)
splash:runjs('document.querySelector("#TemplatedContent1__product_excardLogin").click()')
splash:wait(5)
return {
html = splash:html(),
cookies = splash:get_cookies()
}
end
'''
click_action_script = '''
function main(splash, args)
splash:init_cookies(args.cookies)
splash:go(args.url)
splash:wait(1)
local dropdown_selector = "#mainContent_order_spec_controller1_order_spec_bizdoc1_ddlSize" -- Change to the appropriate selector
local option_selectors = splash:select_all(dropdown_selector .. " option")
-- Select the first option
local script = string.format("document.querySelector('%s').selectedIndex = 10; var event = new Event('change'); document.querySelector('%s').dispatchEvent(event);", dropdown_selector, dropdown_selector)
splash:runjs(script)
splash:wait(5)
return splash:html()
end
'''
def start_requests(self):
yield SplashRequest(url='https://www.excard.com.my/spec/Litho/Bill-Book', callback=self.parse_login, endpoint="execute", args={
'lua_source': self.login_script
})
def parse_login(self, response):
# Perform the clicking action after logging in
cookies = response.data['cookies']
yield SplashRequest(url=response.url, callback=self.parse, endpoint="execute", args={'lua_source': self.click_action_script, 'cookies': cookies})
def parse(self, response):
# Now you are on the page after the clicking action, extract information as needed
price = response.xpath("//td[@id='tdPriceb4Disc2']/text()").get()
print(price)
I cant think of a solution
New contributor
Abis Jafry is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.