I clicked the button through this script from lua:
function main(splash)
splash:init_cookies(splash.args.cookies)
splash.private_mode_enabled = false
splash.images_enabled = true -- Ensure images are loaded
assert(splash:go{
splash.args.url,
headers=splash.args.headers,
})
assert(splash:wait(5)) -- Increase wait time to ensure all resources are loaded
splash:set_viewport_full() -- Set viewport to full to capture the entire page
local button = splash:select('html body div#__next div#root_element section.styles_desktop_top-section__ielYe div#gallery.styles_desktop_container__mkdMa div.styles_desktop
_image--big__wnpkg div.styles_desktop_button__container__
BMM1C button.button-module_button__06uQ8.button-module_button-theme__Rziix.button-module_button-theme--secondary__-6EDU.button-module_button-size--
small__Lg4tu.button-module_button-theme--secondary--with-padding__rXoQh.button-module_button--reduce-padding-prefix__zAU6k')
if button then
button:mouse_click()
splash:wait(5) -- Wait for images to load after clicking the button
else
return {
error = "Button not found",
url = splash:url(),
cookies = splash:get_cookies(),
html = splash:html(),
png = splash:png(), -- Return the screenshot even if the button is not found
}
end
local screenshot = splash:png() -- Take a screenshot
return {
url = splash:url(),
cookies = splash:get_cookies(),
html = splash:html(),
png = screenshot, -- Return the screenshot
}
end
It is my code to extract images with scrapy
# the code for call parse_images
yield SplashRequest(response.url, self.parse_images, endpoint='execute',
args={"lua_source": script}, meta={"items": items}, headers=headers)
def parse_images(self, response):
items = response.meta['items']
screenshot_data = response.data['png']
screenshot_bytes = base64.b64decode(screenshot_data)
with open('screenshot.png', 'wb') as f:
f.write(screenshot_bytes)
if 'error' in response.data:
self.logger.error(f"Error in Lua script: {response.data['error']}")
else:
html = response.data['html']
soup = BeautifulSoup(html, 'html.parser')
images = soup.find_all('img')
lst2 = []
jpg_pattern = re.compile(r'https://www.propertyfinder.ae/property/[a-zA-Z0-9/-.?=]+')
for img in images:
src = img.get('src')
if src and jpg_pattern.match(src):
lst2.append(src)
items['image_urls'] = lst2
yield items
I got the screen shot and It show that the click did not happen.
enter image description here
It is the screenshot that I got from my code.
If you can please debug my code the whole last day I was working on it and it remained like this.
Thank you for your help.