Thiết kế website giá rẻ

Question

Actually i am building rag chatbot with gradio where the issue is that on first pdf file it give the actual response to that pdf file what the question is asked but if i upload new pdf and ask any question the pdf is loading correctly and its embeddings are also being created correctly but when it goes to chromadb and retrieve the vectors .as_retreiver() it gives result/ response from the previous pdf file how it is storing the data and why the data is not being updated in chromadb.
below is my code

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
import logging
from langchain.chains import ConversationChain, RetrievalQA
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import (
    PyPDFLoader,
)
from langchain_core.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from prompts import  template,document_template
from langchain_groq import ChatGroq
from langchain.embeddings import HuggingFaceEmbeddings
import os
from dotenv import load_dotenv
from langchain_core.runnables import RunnablePassthrough,RunnableParallel,RunnableWithMessageHistory
import gradio as gr
load_dotenv()

here is pdf handler and llm init fns

PROMPT = PromptTemplate(input_variables=["history", "input"], template=template)
llm = ChatGroq(
    temperature=0,
    groq_api_key=os.getenv('API'),
    model_name="llama3-groq-8b-8192-tool-use-preview"
)
memory = ConversationBufferMemory()
conversation = ConversationChain(llm=llm, memory=memory, verbose=True, prompt=PROMPT)



def pdf_handler(file):
    try:
        
        file = PyPDFLoader(file).load_and_split()
        chunks = RecursiveCharacterTextSplitter(
            chunk_size=1000, chunk_overlap=200
        ).split_documents(file)
        print('Chunks are:', chunks)

        # Initialize embeddings
        embeddings = HuggingFaceEmbeddings()

        # Reinitialize the Chroma vector store
        vector_store = Chroma.from_documents(documents=chunks, embedding=embeddings)
        
        
        return vector_store

    except Exception as e:
        logging.error("An error occurred in pdf_handler function: %s", e, exc_info=True)
        return None
    
def llm_init(file):
    try:
        template = "Context: {context}nQuestion: {question}nAnswer:"
        parser = StrOutputParser()
        chain = llm | parser
        PROMPT = PromptTemplate(
            input_variables=["context", "question"], template=template
        )
        chain_type_kwargs = {"prompt": PROMPT}

        qa = RetrievalQA.from_chain_type(
            llm=chain,
            chain_type="stuff",
            retriever=pdf_handler(file).as_retriever(search_type='similarity',search_kwargs={"k": 6}),
            return_source_documents=True,
            chain_type_kwargs=chain_type_kwargs,
        )
        return qa

    except Exception as e:
        logging.error("An error occurred in ollama_llm function: %s", e, exc_info=True)
        return None


def pdf_chat(is_file, question):
    qa = llm_init(file=is_file)
    query = qa({"query": question})["result"]
    return query


def handle_chat(file, question):
    if file:
        return pdf_chat(file, question)
    else:
        return conversation.predict(input=question)

the gradio implementation

ui = gr.Interface(
    fn=handle_chat,
    inputs=[gr.File(), 'text'],
    outputs=gr.Textbox(lines=14),
    title="Chatbot",
    description="Upload a PDF file and ask a question to get an answer, or ask a question directly.",
    theme=gr.themes.Default(primary_hue="violet", secondary_hue="violet")
)

print("its working", flush=True)
port = int(os.environ.get("PORT", 9449))
ui.launch(server_name="0.0.0.0", server_port=port)

the issue is in this part of code

vector_store = Chroma.from_documents(documents=chunks, embedding=embeddings)
        
vectors = vector_store.as_retriever(search_type='similarity',search_kwargs={"k": 6})

response = vectors.invoke("what is this")

it is responding on previous data not being updated

Thiết kế website giá rẻ

Danh mục

RAG | chromadb is retrieving the old vectors after first attemp not on new document