I was building a rag chatbot and i have encountered an error in getting a response. It says BadRequestError: status_code: 400, body: {‘message’: ‘invalid type: parameter texts is of type object but should be of type string. For proper usage, please refer to https://docs.cohere.com/embed-reference’} . The modules of different functions are here from modules.prompt import create_conversation_prompt
from langchain.chains import ConversationChain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
def create_conversation_chain(model, memory):
prompt = create_conversation_prompt()
return ConversationChain(
prompt=prompt,
llm=model,
memory=memory,
verbose=True,
)
def format_docs(docs):
return "nn".join(doc.page_content for doc in docs)
def rag_chain(retriever,prompt,llm ):
chain=(
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
return chain
from modules.prompt import get_qa_prompt,get_contextualize_q_prompt
from modules.memory_chain import memory_chain
from modules.chain import rag_chain
from modules.llm_model import initialize_cohere_llm_model
def create_full_chain(retriever):
llm = initialize_cohere_llm_model()
qa_prompt = get_qa_prompt()
contextualize_q_prompt = get_contextualize_q_prompt()
basic_chain = rag_chain(retriever, qa_prompt, llm)
full_chain = memory_chain(llm, contextualize_q_prompt, basic_chain)
return full_chain
def ask_question(query, rag_chain_with_history):
response = rag_chain_with_history.invoke(
{"input": query},
config={"configurable": {"session_id": "4a"}}
)
return response
from constant import MODEL_ENDPOINTS, HUGGINGFACE_API_KEY, COHERE_API_KEY
from langchain_community.llms import HuggingFaceEndpoint
from langchain_cohere.llms import Cohere
def initialize_llm_model(model_name: str):
model_endpoint = MODEL_ENDPOINTS[model_name]
return HuggingFaceEndpoint(
endpoint_url=model_endpoint,
huggingfacehub_api_token=HUGGINGFACE_API_KEY,
temperature=0.1,
)
def initialize_cohere_llm_model() :
return Cohere(cohere_api_key = COHERE_API_KEY)
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
def memory_chain(llm, contextualize_q_prompt, basic_chain):
chain = basic_chain | llm | contextualize_q_prompt
store = {}
def get_session_history(session_id: str) -> ChatMessageHistory:
if session_id not in store:
store[session_id] = ChatMessageHistory()
return store[session_id]
rag_chain_with_history = RunnableWithMessageHistory(
chain,
get_session_history,
input_messages_key="input",
history_messages_key="chat_history",
output_messages_key="answer",
)
return rag_chain_with_history
def get_qa_prompt():
qa_prompt = ChatPromptTemplate.from_messages(
[
("system", prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
]
)
return qa_prompt
def get_contextualize_q_prompt():
contextualize_q_system_prompt = """Given a chat history and the latest user question
which might reference context in the chat history, formulate a standalone question
which can be understood without the chat history. Do NOT answer the question,
just reformulate it if needed and otherwise return it as is."""
return ChatPromptTemplate.from_messages([
("system", contextualize_q_system_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
])
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
def split_documents(documents):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
splits = text_splitter.split_documents(documents)
return splits
def data_splitter(documents):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200, add_start_index=True
)
chunks = text_splitter.split_documents(documents)
return chunks
def get_embedding_model(cohere_api_key):
embed_model = CohereEmbeddings(
cohere_api_key=cohere_api_key,
model="embed-english-v3.0"
)
return(embed_model)
#Embed split to vector
def embed_splits_to_vector(chunks: list):
for chunk in chunks:
page_content = chunk.page_content
return page_content
create vector store
def create_vectorstore(chunks: list, embed_model):
vectorstore = Chroma.from_documents(
documents=chunks,
embedding=embed_model
)
return(vectorstore)
def send_data_to_retriever(url_webpage,COHERE_API_KEY):
docs = load_url(url_webpage)
splits = split_documents(docs)
embed_model = get_embedding_model(COHERE_API_KEY)
vectorstore = create_vectorstore(splits, embed_model)
retriever = vectorstore.as_retriever()
return (retriever)
Here is the main code.
from modules.llm_model import initialize_cohere_llm_model
from modules.splitter import data_splitter
from modules.data_parser import load_url
from modules.vectorstore import get_embedding_model , create_vectorstore, embed_splits_to_vector, send_data_to_retriever
from constant import COHERE_API_KEY
from modules.prompt import get_contextualize_q_prompt, get_qa_prompt
from modules.memory_chain import memory_chain
from modules.full_chain import create_full_chain, ask_question
from modules.memory_chain import memory_chain
from modules.chain import rag_chain
url_webpage = "``https://forexforest.com/hk/en/faq/``"
docs = load_url(url_webpage)
split_data = split_documents(docs)
chunks = data_splitter(docs)
embed_model = get_embedding_model(COHERE_API_KEY)
page_content= embed_splits_to_vector(chunks)
Get embedding model and vectorstore
vectorstore = create_vectorstore(chunks, embed_model)
llm= initialize_cohere_llm_model()
retriever= send_data_to_retriever(url_webpage, COHERE_API_KEY)
retriever = vectorstore.as_retriever()
contextualize_q_prompt = get_contextualize_q_prompt()
qa_prompt = get_qa_prompt()
basic_chain = rag_chain(retriever, qa_prompt, llm)
full_chain= create_full_chain(retriever)
rag_chain_with_history= memory_chain(llm, contextualize_q_prompt,basic_chain)
query = "What is MQL5?"
response = ask_question(query, rag_chain_with_history)
I have tried changing the function of embed_splits_to_vector to print out strings instead of lists, however it will use up my limit on the api key. And i don’t know where i have put obejects instead of strings in the code
Intern AlgoForest is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.
3