I’m trying to retrieve data from an endpoint on the SofaScore website, but the data I receive in response to my requests appears to be randomized. For instance, when a player is confirmed to have made 10 passes in a game (as shown on the website), the data returned by my GET request shows a completely different number.
Below is the code I am using for context:
response = requests.get('https://www.sofascore.com/api/v1/event/11873905/lineups', headers=headers)
try:
data = response.json()
except ValueError:
print("Error: Unable to parse JSON response")
data = {}
was able to find a work around on this using selenium to mimic a chrome browser and read network response. Although I get a WebDriverException error at the end of the code, I’m still not able to get the data.
options = webdriver.ChromeOptions()
options.set_capability(
'goog:loggingPrefs',
{
'performance': 'ALL',
'browser': 'ALL'
}
)
driver = webdriver.Chrome(options = options)
driver.set_page_load_timeout(10)
headers = {
'user-agent': {placeholder}
}
matches = "germany-scotland"
match_ids = "VTbslUb"
match_df = []
base_url = 'https://www.sofascore.com'
# for match, match_id in zip(matches, match_ids):
match_url = f'{base_url}/{matches}/{match_ids}'
try:
driver.get(match_url)
except:
pass
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# gives all network responses/ all APIs being called
logs_raw = driver.get_log('performance')
logs = [json.loads(lr["message"])["message"] for lr in logs_raw]
for x in logs:
if 'lineups' in x['params'].get('headers', {}).get(':path', ''):
print(x['params'].get('headers', {}).get(':path'))
break
requestId = x["params"]["requestId"]
lineups_body = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId})['body']
This is the traceback, would really appreciate ideas on how to solve this:
WebDriverException Traceback (most recent call last)
Cell In[161], line 1
----> 1 lineups_body = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId})['body']
File ~/anaconda3/lib/python3.11/site-packages/selenium/webdriver/chromium/webdriver.py:136, in ChromiumDriver.execute_cdp_cmd(self, cmd, cmd_args)
118 def execute_cdp_cmd(self, cmd: str, cmd_args: dict):
119 """Execute Chrome Devtools Protocol command and get returned result The
120 command and command args should follow chrome devtools protocol
121 domains/commands, refer to link
(...)
134 {'base64Encoded': False, 'body': 'response body string'}
135 """
--> 136 return self.execute("executeCdpCommand", {"cmd": cmd, "params": cmd_args})["value"]
File ~/anaconda3/lib/python3.11/site-packages/selenium/webdriver/remote/webdriver.py:347, in WebDriver.execute(self, driver_command, params)
345 response = self.command_executor.execute(driver_command, params)
346 if response:
--> 347 self.error_handler.check_response(response)
348 response["value"] = self._unwrap_value(response.get("value", None))
349 return response
File ~/anaconda3/lib/python3.11/site-packages/selenium/webdriver/remote/errorhandler.py:229, in ErrorHandler.check_response(self, response)
227 alert_text = value["alert"].get("text")
228 raise exception_class(message, screen, stacktrace, alert_text) # type: ignore[call-arg] # mypy is not smart enough here
--> 229 raise exception_class(message, screen, stacktrace)
WebDriverException: Message: unknown error: unhandled inspector error: {"code":-32000,"message":"No data found for resource with given identifier"}
(Session info: chrome=123.0.6312.122)