I’m following a course. It’s a bit outdated so some stuff changed on the website.
website: https://www.centris.ca/
Basically it’s a real estate website, you need to call 2 endpoints to get a result with properties.
call 1: https://www.centris.ca/property/UpdateQuery
call 2:https://www.centris.ca/Property/GetInscriptions
Unfortunatley, i cannot figure out how not to return the response il french. things i tried:
headers: ‘Accept-Language’: ‘en-US,en;q=0.6’,
Cookies: Nothing that indicates langauge
Any help would be appreciated
spider:
import scrapy
from scrapy.selector import Selector
import json
class ListingsSpider(scrapy.Spider):
name = "listings"
allowed_domains = ["www.centris.ca"]
position = {
"startPosition": 0
}
def start_requests(self):
query = {
"query":{
"UseGeographyShapes":0,
"Filters":[
],
"FieldsValues":[
{
"fieldId":"Category",
"value":"Commercial",
"fieldConditionId":"",
"valueConditionId":""
},
{
"fieldId":"SellingType",
"value":"Rent",
"fieldConditionId":"",
"valueConditionId":""
},
{
"fieldId":"RentPrice",
"value":0,
"fieldConditionId":"ForRent",
"valueConditionId":""
},
{
"fieldId":"RentPrice",
"value":999999999999,
"fieldConditionId":"ForRent",
"valueConditionId":""
}
]
},
"isHomePage": True
}
yield scrapy.Request(
url="https://www.centris.ca/property/UpdateQuery",
method="POST",
body=json.dumps(query),
headers={
'Content-Type': 'application/json',
'Content-Language': 'en'
},
callback=self.update_query
)
def update_query(self, response):
yield scrapy.Request(
url="https://www.centris.ca/Property/GetInscriptions",
method="POST",
body=json.dumps(self.position),
headers={
'Content-Type': 'application/json',
'accept-language': 'en-US,en;q=0.6',
'referer': 'https://www.centris.ca/en/properties~for-rent?view=Thumbnail',
'cache-control': 'no-cache'
},
cookies={'currency': 'USD', 'country': 'UY'},
callback=self.parse
)
def parse(self, response):
resp_dict = json.loads(response.body)
html = resp_dict.get('d').get('Result').get('html')
sel = Selector(text=html)
listings = sel.xpath("//div[@class='property-thumbnail-item thumbnailItem col-12 col-sm-6 col-md-4 col-lg-3']")
for listing in listings:
print("not yet implemented")