I’m quite new to making custom APIs but I need to create a gpt-based chatbox with a custom knowledge basis in order to help me write my thesis. By doing research, I’ve stumbled upon this oldish guide on how to create a custom chatbox here.
After updating its code I managed to have it working and it could access the folder where I keep my files. The issue is that out of the 7 sample documents inside the folder (7 pdfs), the chatbox can only identify one of them… twice! I’m pretty sure it’s my code’s fault but I don’t know what to change anymore. Any help solving this issue is more than welcome.
Here follows the code I used:
from llama_index.core import SimpleDirectoryReader, GPTVectorStoreIndex, PromptHelper, StorageContext, load_index_from_storage
from llama_index.llms.langchain import LangChainLLM
from langchain_openai import ChatOpenAI
import gradio as gr
import sys
import os
import openai
os.environ["OPENAI_API_KEY"] = "MY API KEY GOES HERE"
def construct_index(directory_path):
max_input_size = 4896
num_outputs = 512
max_chunk_overlap = 20
chunk_size_limit = 681
prompt_helper = PromptHelper(max_input_size, num_outputs, chunk_overlap_ratio= 0.1, chunk_size_limit=chunk_size_limit)
llm_predictor = ChatOpenAI(temperature=0.00001, model_name="gpt-4o", max_tokens=num_outputs)
documents = SimpleDirectoryReader(directory_path).load_data()
index = GPTVectorStoreIndex.from_documents(documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
# index.save_to_disk('index.json')
index.storage_context.persist('./')
return index
def chatbot(input_text):
storage_context = StorageContext.from_defaults(persist_dir="./")
# index = GPTVectorStoreIndex.load_from_disk('index.json')
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine()
response = query_engine.query(input_text)
return response.response
iface = gr.Interface(fn=chatbot,
inputs=gr.components.Textbox(lines=7, label="Enter your text"),
outputs="text",
title="Custom-trained AI Chatbot")
index = construct_index("DocsTese")
iface.launch(share=True)