- I am fetching my embeddings from Mongo Atlas
- Converting the query to embeddings using text-embedding-adaa002 api key
- Performing Similarity check
- Introduced GPT api keys and azure endpoint
os.environ["OPENAI_API_KEY_GPT"] = "xxx"
os.environ["AZURE_OPENAI_ENDPOINT_GPT"] = "https://xxx.openai.azure.com/"
mongodb_connection_string = "xxx"
client = MongoClient(mongodb_connection_string)
db = client['langchain-test']
collection = db['xx']
openai.api_key_gpt = os.getenv("OPENAI_API_KEY_GPT")
azure_openai_endpoint_gpt = os.getenv("AZURE_OPENAI_ENDPOINT_GPT")
openai_gpt_client = AzureOpenAI(
api_key=openai.api_key_gpt,
api_version="2024-02-15-preview",
azure_endpoint=azure_openai_endpoint_gpt
)
def fetch_embeddings():
cursor = collection.find({})
pdf_chunks = []
for document in cursor:
pdf_chunks.append({
"text": document["text"],
"metadata": document["metadata"],
"embedding": document["embedding"]
})
return pdf_chunks
def generate_embeddings(text, model="text-embedding-ada-002"):
response = openai_client.embeddings.create(input=[text], model=model)
return response.data[0].embedding
def cosine_similarity_search(query_embedding, embeddings, top_k=5):
similarities = cosine_similarity([query_embedding], embeddings)
similar_indices = np.argsort(similarities[0])[::-1][:top_k]
return similar_indices, similarities[0][similar_indices]
def answer_question(question, pdf_chunks, all_embeddings):
question_embedding = generate_embeddings(question)
similar_indices, similarities = cosine_similarity_search(question_embedding, all_embeddings)
results = [(pdf_chunks[i], similarities[idx]) for idx, i in enumerate(similar_indices)]
return results
def generate_response(context):
prompt = f"Based on the following context, answer the question:nn{context}"
response = openai_gpt_client.completions.create(model="gpt-3.5-turbo-1102-westus", prompt=prompt, max_tokens=150)
return response.choices[0].text.strip()
pdf_chunks_db = fetch_embeddings()
all_embeddings = [chunk["embedding"] for chunk in pdf_chunks_db]
for chunk in pdf_chunks_db:
chunk["embedding"] = generate_embeddings(chunk["text"])
question = "Your question here"
results = answer_question(question, pdf_chunks_db, all_embeddings)
combined_context = "n".join([result[0]['text'] for result in results])
response = generate_response(combined_context)
print(response)
I am getting an error in generate_response function– An error occurred: ‘AzureOpenAI’ object has no attribute ‘Completion’