Hi I am using llama_index for indexing hundreds of docs that contain some specific details, now I want to build an API over it, which is able to answer queries as per the info indexed from the docs, my implementation works for a single user, but the API would be used by hundreds of diff users so how can i maintain diff chat sessions and pass previous chats as a context for new query, looked into the docs but found nothing regarding this, so it would be beneficial if someone can guide me as to what should be my approach moving forward to power my use-case.
Current Implementation
index_builder.py
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, PromptHelper, ServiceContext
import os
os.environ["OPENAI_API_KEY"] = ''
persist_dir = "./doc_index"
def construct_index(directory_path):
try:
print(f"Beginning to load data in directory: {directory_path}")
documents = SimpleDirectoryReader(directory_path).load_data()
print(f"***** Documents for indexing ***** : {documents}, length: {len(documents)}")
index = VectorStoreIndex.from_documents(documents, show_progress=True)
index.storage_context.persist(persist_dir=persist_dir)
except Exception as e:
print(f"Exception occurred in construct_index, reason is: {e}")
construct_index("docs")
chatbot.py
from llama_index.core import StorageContext, load_index_from_storage
import gradio as gr
import os
os.environ["OPENAI_API_KEY"] = ''
persist_dir = "./doc_index"
index = None
def load_index():
global index
# Load index and query engine
print(f"Beginning to load index")
storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
index = load_index_from_storage(storage_context)
print(f"Index loaded successfully for querying")
if not index:
print(f"Index not found. Please run the indexing script first.")
# Loading index before chatBot loads to avoid delay in response, any also to avoid re-loading index for every request
load_index()
def chatbot(input_text):
global index
chat_engine = index.as_chat_engine(
chat_mode="context",
system_prompt=(
"You are a chatbot who responds only as per the data available in the VectorStoreIndex in detail and nothing from internet"
"Also if data is not available, you can respond with 'Sorry, I don't have the information regarding this'"
),
)
print("Input text: ", input_text)
response = chat_engine.chat(input_text, session_state)
print('Response is: ', response)
return response
# Launch Gradio interface
iface = gr.Interface(fn=chatbot,
inputs=gr.components.Textbox(lines=7, label="Enter your query"),
outputs="text",
title="AI Chatbot")
iface.launch(share=True)
print(f"ChatBot is ready...")