I tried to make this simple crawler to crawl down the entire ICD-11 database (https://icd.who.int/browse/2024-01/foundation/en#455013390) and collect all the titles and descriptions of all diseases, but it keeps stopping at the very top level of the link tree and does not go down the tree into the various branches and their sub-branches and so forth. Where is this going wrong?
import requests
# Configuration
token_endpoint = 'https://icdaccessmanagement.who.int/connect/token'
api_base_url = 'http://id.who.int/icd/entity/1435254666'
client_id = 'XXXX' # Replace with your client_id or set it as an environment variable
client_secret = 'XXX' # Replace with your client_secret or set it as an environment variable
scope = 'icdapi_access'
grant_type = 'client_credentials'
# Get the OAUTH2 token
# Set data to post
payload = {'client_id': client_id, 'client_secret': client_secret, 'scope': scope, 'grant_type': grant_type}
# Make request
r = requests.post(token_endpoint, data=payload, verify=False).json()
token = r['access_token']
# Access ICD API
uri = 'https://id.who.int/icd/entity/455013390'
def crawler(uri):
# HTTP header fields to set
headers = {'Authorization': 'Bearer '+token, 'Accept': 'application/json', 'Accept-Language': 'en', 'API-Version': 'v2'}
# Make request
r = requests.get(uri, headers=headers, verify=False).json()
# Print the result
try: print(r['title']['@value'], r['fullySpecifiedName']['@value'])
except: pass
try: print(r['definition']['@value'])
except: pass
try: print(r['child'])
except: pass
# Return children
try: return r['child']
except: return None
url_list = crawler(uri)
for url in url_list: url_list += crawler(url)