I am trying to use beautiful soup to scrape data, I have a .env file with ENV, API_KEY, AND SEARCH_ENGINE_API identified. For some reason I am still running into a KeyError.
Here are my errors:
FAILED tests/test_tools.py::test_scrap_data – KeyError: ‘items’
FAILED tests/test_tools.py::test_study_materials – assert (None)
Here is my corresponding code from the original functions and the tests:
def scrap_data(grade,subject,API_KEY,SEARCH_ENGINE_ID):
# We use the google api to get the results of the search 'syllabus of {subject} {grade} level' then we extract the first link
url = 'https://www.googleapis.com/customsearch/v1'
params = {
'q': f'syllabus of {subject} {grade} level',
'key': API_KEY,
'cx': SEARCH_ENGINE_ID
}
response = requests.get(url,params=params).json()
links = [item['link'] for item in response['items']]
return links[0]
def test_scrap_data():
grade = 'university'
subject = 'Computer science'
link = scrap_data(grade,subject,API_KEY,SEARCH_ENGINE_ID)
assert (link and
isinstance(link, str) and
is_valid_url(link))
def study_materials(grade:str,subject:str,course_outline:str,custom_info='None') -> str:
#prompt = read_text_file('prompt/course_description.txt')
#prompt = PromptTemplate.format(prompt)
prompt = build_prompt('prompts/study_materials.txt')
#prompt.format({'grade':grade,'subject':subject})
model = VertexAI(
model_name='gemini-pro',
temperature=0.3,
)
chain = prompt | model
response = chain.invoke({"grade":grade,"subject":subject,"custom_info":custom_info,'course_outline':course_outline})
for _ in range(5):
try:
response = chain.invoke({"grade":grade,"subject":subject,"custom_info":custom_info,'course_outline':course_outline})
response = json.loads(response) # Attempt to parse response to JSON
return response
except json.JSONDecodeError:
print("Failed to parse response to JSON. Retrying...")
except Exception as e:
print(f"An error occurred: {e}")
break
return
def test_study_materials():
grade = 'university'
subject = 'Computer science'
outline = read_text_file('tests/TestExamples/outline.txt')
result = study_materials(grade,subject,outline)
assert ( result and
isinstance(result, list) and
isinstance(result[0], dict) and
result[0]['material'] and
result[0]['purpose'] )