i am using google gen ai as it is free(suggest any better for free) and using google gen ai embedding well
i am trying to get the file splitted into chunks and then embedded it using googlegenaiembedding and then put it to the chroma vector db in my local machine but as i try with smaller data it works well but when i try adding a bigger set of files it stops and gives me an error for
ValueError: Batch size 279 exceeds maximum batch size 166
help me out with it hers the code
model = ChatGoogleGenerativeAI(
model="gemini-pro",
google_api_key="my prefectly working api key",
temperature=0.5,
convert_system_messages=True,
)
embeddings=GoogleGenerativeAIEmbeddings(
model="models/embedding-001",
google_api_key="prefectly working api key"
)
if not os.path.exists(books_dir):
raise RuntimeError(f"Error loading {books_dir}: File does not exist.")
print(file_contents)
doc_true=r"C:UsersjatinOneDriveshitDesktopbooksjaitn.txt"
def create_vector_store(docs,store_name):
emb_dir= os.path.join(db_dir, store_name)
if not os.path.exists(emb_dir):
print(f"n--- Creating vector store {store_name} ---")
db = Chroma.from_documents(
docs, embeddings, persist_directory=emb_dir
)
print(f"--- Finished creating vector store {store_name} ---")
return db
else:
print(
f"Vector store {store_name} already exists. No need to initialize.")
print("n--- Using Character-based Splitting ---")
text_splitter=CharacterTextSplitter(chunk_size=1000,chunk_overlap=100,)
texts=text_splitter.split_documents(file_contents)
create_vector_store(texts, "chroma_db_char")
print(type(texts))
def query_vector_store(store_name, query):
emb_dir = os.path.join(db_dir, store_name)
if os.path.exists(emb_dir):
print(f"n storgae pe ja rahe {store_name} ")
db = Chroma(
persist_directory=emb_dir, embedding_function=embeddings
)
retriever = db.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={"k": 1, "score_threshold": 0.01},
)
print("this is query",query)
print(retriever)
relevant_docs = retriever.invoke(query)
print(relevant_docs)
print(f"n text--> {store_name} ")
for i, doc in enumerate(relevant_docs, 1):
print(model.invoke(f"question is{query} find the answer in {doc.page_content}n "))
else:
print(f"Vector store {store_name} nahi milaa ")
query3=("who wrote mataphores we live by")
query_vector_store("chroma_db_char", query3)
i am stuck here i tried using pinecone but i does not accept my api key
Jatin is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.