I tested even all the solution I can find, but the return response always partial.
If you open https://xl.16888.com/s/129110/ in your browser, you can see the data in to, but no matter i use requests, scrapy, even selenium, the response always partial.
Is there any thing important i missed?
import re
import requests
import time
import random
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
import scrapy
from pathlib import Path
session = requests.session()
print(requests.__version__)
service = Service(executable_path=r"chromedriver.exe")
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=service, options=options)
headers={
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Host': 'stips.co.il',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'
}
def parse(self, response):
filename = f"tmp.html"
Path(filename).write_bytes(response.body)
print("aaaa")
print(response.body)
print("bbbb")
self.log(f"Saved file {filename}")
def fetch_url_v4(real_url):
scrapy.Request(url=real_url, callback=parse)
with open("tmp.html", "r", encoding='utf-8') as f:
return f.read()
def fetch_url_v3(real_url):
driver.get(real_url)
driver.maximize_window()
time.sleep(3)
return driver.page_source
def fetch_url_v2(real_url):
print(real_url)
resp=requests.get(real_url)
with open("tmp.html", "w+", encoding='utf-8') as f:
f.write(resp.text)
f.seek(0)
html = f.read()
return html
def fetch_url_v1(real_url):
print(real_url)
resp=requests.get(real_url)
req_html=''
for chunk in resp.iter_lines():
req_html+=chunk.decode('utf-8')
return req_html
def fetch_url(real_url):
print(real_url)
time.sleep(1)
resp=requests.get(real_url, headers=headers)
resp.encoding = 'utf-8'
return resp.text
print(fetch_url_v4("https://xl.16888.com/s/129110/"))