Below is my code, everything is working fine, however last line, I am unable to get index value of pdf to save in faiss add_with_ids
<code>pdf_path = "Some pdf path"
def read_and_load(dir_path):
loader = DirectoryLoader(dir_path, glob="./*.pdf", loader_cls=PyPDFLoader)
doc_loader = loader.load()
return doc_loader
load_documents = read_and_load(pdf_path)
def split_documents(content):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=0)
documents_split = text_splitter.split_documents(content)
return documents_split
cleaned_documents = split_documents(load_documents)
list_of_texts = [chunk.page_content for chunk in cleaned_documents]
list_of_embedding = ["Some list embedding"]
embedding_array = np.array(list_of_embedding)
dimension = embedding_array.shape[1]
db_vectors = embedding_array.copy().astype(np.float32)
faiss.normalize_L2(db_vectors)
index = faiss.IndexFlatIP(dimension)
index = faiss.IndexIDMap(index)
#Unable to text index value of text
db_ids = data.index.values.astype(np.int64)
index.add_with_ids(db_vectors, db_ids)
</code>
<code>pdf_path = "Some pdf path"
def read_and_load(dir_path):
loader = DirectoryLoader(dir_path, glob="./*.pdf", loader_cls=PyPDFLoader)
doc_loader = loader.load()
return doc_loader
load_documents = read_and_load(pdf_path)
def split_documents(content):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=0)
documents_split = text_splitter.split_documents(content)
return documents_split
cleaned_documents = split_documents(load_documents)
list_of_texts = [chunk.page_content for chunk in cleaned_documents]
list_of_embedding = ["Some list embedding"]
embedding_array = np.array(list_of_embedding)
dimension = embedding_array.shape[1]
db_vectors = embedding_array.copy().astype(np.float32)
faiss.normalize_L2(db_vectors)
index = faiss.IndexFlatIP(dimension)
index = faiss.IndexIDMap(index)
#Unable to text index value of text
db_ids = data.index.values.astype(np.int64)
index.add_with_ids(db_vectors, db_ids)
</code>
pdf_path = "Some pdf path"
def read_and_load(dir_path):
loader = DirectoryLoader(dir_path, glob="./*.pdf", loader_cls=PyPDFLoader)
doc_loader = loader.load()
return doc_loader
load_documents = read_and_load(pdf_path)
def split_documents(content):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=0)
documents_split = text_splitter.split_documents(content)
return documents_split
cleaned_documents = split_documents(load_documents)
list_of_texts = [chunk.page_content for chunk in cleaned_documents]
list_of_embedding = ["Some list embedding"]
embedding_array = np.array(list_of_embedding)
dimension = embedding_array.shape[1]
db_vectors = embedding_array.copy().astype(np.float32)
faiss.normalize_L2(db_vectors)
index = faiss.IndexFlatIP(dimension)
index = faiss.IndexIDMap(index)
#Unable to text index value of text
db_ids = data.index.values.astype(np.int64)
index.add_with_ids(db_vectors, db_ids)