I’m developing an AI assistant that fetches information from various URLs and uses this information to answer user queries. I want to include citations or reference links at the end of each answer to credit the sources accurately. Currently, the assistant retrieves multiple URLs but doesn’t always use relevant ones in its responses. How can I modify my code to ensure that only relevant URLs are included in the citations at the end of the AI’s answers?
# Function to fetch URLs (sublinks) from the website and save them to urls.txt
# Function to fetch and extract text content from a website using Requests
def main():
# Calling function
website_urls = fetch_urls()
if not website_urls:
st.error("No URLs fetched from the CSUSB IT Knowledge Base.")
return
documents = []
# Fetch content from each website in parallel
with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(fetch_website_content, url) for url in website_urls]
for future in futures:
text_content, image_urls, fetched_url = future.result()
if text_content:
documents.append(Document(page_content=text_content, metadata={"source": fetched_url, "images": image_urls}))
if not documents:
st.error("No content fetched from the websites.")
return
# Load, create vector store
vector_store_path = "vectorstore.pkl"
if os.path.exists(vector_store_path):
with open(vector_store_path, "rb") as f:
vectorstore = pickle.load(f)
else:
document_embedder = Embeddings(model="name", model_type="passage")
vectorstore = FAISS.from_documents(documents, document_embedder)
with open(vector_store_path, "wb") as f:
pickle.dump(vectorstore, f)
st.success("Vector store created successfully.")
# Initialize models
llm = Chat(model="name")
prompt_template = ChatPromptTemplate.from_messages(
[("system", "You are an AI Assistant..."), ("user", "{input}")]
)
chain = prompt_template | llm | StrOutputParser()
# Input form
with st.form(key='chat_form', clear_on_submit=True):
user_input = st.text_input("Ask your question:", key="user_input")
submitted = st.form_submit_button("Send")
# Handle user input and generate response
if submitted and user_input and vectorstore is not None:
st.session_state.messages.insert(0, {"role": "user", "content": user_input})
# Retrieve relevant documents from vectorstore
retriever = vectorstore.as_retriever()
relevant_docs = retriever.get_relevant_documents(user_input)
# Combine relevant texts into context and collect unique sources
relevant_sources = set()
context = []
for doc in relevant_docs:
context.append(doc.page_content)
relevant_sources.add(doc.metadata["source"])
context = "nn".join(context)
augmented_user_input = f"Context: {context}nnQuestion: {user_input}"
# Invoke the AI assistant with augmented input
response = chain.invoke({"input": augmented_user_input})
if relevant_sources:
response_content = f"{response}nnSources:n" + "n".join(relevant_sources)
else:
response_content = response
st.session_state.messages.insert(0, {"role": "assistant", "content": response_content})
Liam Mason is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.