Thiết kế website giá rẻ

Question

Description

I’m working on using Predibase and LlamaIndex to set up all the moving parts of an RAG system, with Predibase as the LLM provider and currently I’m trying to create the index so that any query I make will pull the relevant context from my Pinecone Vector Store.

The following Python code is an example of achieving this objective.

<code>index = VectorStoreIndex.from_documents(documents, storage_context=pinecone_storage_context)

</code>

<code>index = VectorStoreIndex.from_documents(documents, storage_context=pinecone_storage_context) </code>

index = VectorStoreIndex.from_documents(documents, storage_context=pinecone_storage_context)

During this specific process, in this case, the type of the variable ‘documents’ that I’m passing over is ‘list’ but this makes an error saying “AttributeError: ‘Document’ object has no attribute ‘get_doc_id'”.

So I would like to kindly ask if there’s any way to solve this issue by any chance.
This is the resource that I’m currently following right now. (https://docs.predibase.com/user-guide/examples/rag)

Colab Python Code

<code># Extract Filings Function

def get_filings(ticker):

global sec_api_key

# Finding Recent Filings with QueryAPI

queryApi = QueryApi(api_key=sec_api_key)

query = {

"query": f"ticker:{ticker} AND formType:"10-K"",

"from": "0",

"size": "1",

"sort": [{ "filedAt": { "order": "desc" } }]

}

filings = queryApi.get_filings(query)

# Getting 10-K URL

filing_url = filings["filings"][0]["linkToFilingDetails"]

# Extracting Text with ExtractorAPI

extractorApi = ExtractorApi(api_key=sec_api_key)

onea_text = extractorApi.get_section(filing_url, "1A", "text") # Section 1A - Risk Factors

seven_text = extractorApi.get_section(filing_url, "7", "text") # Section 7 - Management’s Discussion and Analysis of Financial Condition and Results of Operations

# Joining Texts

combined_text = onea_text + "nn" + seven_text

return combined_text

</code>

<code># Extract Filings Function def get_filings(ticker): global sec_api_key # Finding Recent Filings with QueryAPI queryApi = QueryApi(api_key=sec_api_key) query = { "query": f"ticker:{ticker} AND formType:"10-K"", "from": "0", "size": "1", "sort": [{ "filedAt": { "order": "desc" } }] } filings = queryApi.get_filings(query) # Getting 10-K URL filing_url = filings["filings"][0]["linkToFilingDetails"] # Extracting Text with ExtractorAPI extractorApi = ExtractorApi(api_key=sec_api_key) onea_text = extractorApi.get_section(filing_url, "1A", "text") # Section 1A - Risk Factors seven_text = extractorApi.get_section(filing_url, "7", "text") # Section 7 - Management’s Discussion and Analysis of Financial Condition and Results of Operations # Joining Texts combined_text = onea_text + "nn" + seven_text return combined_text </code>

# Extract Filings Function
def get_filings(ticker):
    global sec_api_key

    # Finding Recent Filings with QueryAPI
    queryApi = QueryApi(api_key=sec_api_key)
    query = {
      "query": f"ticker:{ticker} AND formType:"10-K"",
      "from": "0",
      "size": "1",
      "sort": [{ "filedAt": { "order": "desc" } }]
    }
    filings = queryApi.get_filings(query)

    # Getting 10-K URL
    filing_url = filings["filings"][0]["linkToFilingDetails"]

    # Extracting Text with ExtractorAPI
    extractorApi = ExtractorApi(api_key=sec_api_key)
    onea_text = extractorApi.get_section(filing_url, "1A", "text") # Section 1A - Risk Factors
    seven_text = extractorApi.get_section(filing_url, "7", "text") # Section 7 - Management’s Discussion and Analysis of Financial Condition and Results of Operations

    # Joining Texts
    combined_text = onea_text + "nn" + seven_text

    return combined_text

<code># construct vector store and custom storage context

pinecone_index = pc.Index("predibase-demo-hf")

pincone_vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

pinecone_storage_context = StorageContext.from_defaults(vector_store=pincone_vector_store)

# Prompt the user to input the stock ticker they want to analyze

ticker = input("What Ticker Would you Like to Analyze? ex. AAPL: ")

print("-----")

print("Getting Filing Data")

# Retrieve the filing data for the specified ticker

filing_data = get_filings(ticker)

print("-----")

print("Initializing Vector Database")

# Initialize a text splitter to divide the filing data into chunks

text_splitter = RecursiveCharacterTextSplitter(

chunk_size = 1000, # Maximum size of each chunk

chunk_overlap = 500, # Number of characters to overlap between chunks

length_function = len, # Function to determine the length of the chunks

is_separator_regex = False # Whether the separator is a regex pattern

)

# Split the filing data into smaller, manageable chunks

split_data = text_splitter.create_documents([filing_data])

# Load in the documents you want to index

documents = split_data

</code>

<code># construct vector store and custom storage context pinecone_index = pc.Index("predibase-demo-hf") pincone_vector_store = PineconeVectorStore(pinecone_index=pinecone_index) pinecone_storage_context = StorageContext.from_defaults(vector_store=pincone_vector_store) # Prompt the user to input the stock ticker they want to analyze ticker = input("What Ticker Would you Like to Analyze? ex. AAPL: ") print("-----") print("Getting Filing Data") # Retrieve the filing data for the specified ticker filing_data = get_filings(ticker) print("-----") print("Initializing Vector Database") # Initialize a text splitter to divide the filing data into chunks text_splitter = RecursiveCharacterTextSplitter( chunk_size = 1000, # Maximum size of each chunk chunk_overlap = 500, # Number of characters to overlap between chunks length_function = len, # Function to determine the length of the chunks is_separator_regex = False # Whether the separator is a regex pattern ) # Split the filing data into smaller, manageable chunks split_data = text_splitter.create_documents([filing_data]) # Load in the documents you want to index documents = split_data </code>

# construct vector store and custom storage context
pinecone_index = pc.Index("predibase-demo-hf")
pincone_vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
pinecone_storage_context = StorageContext.from_defaults(vector_store=pincone_vector_store)


# Prompt the user to input the stock ticker they want to analyze
ticker = input("What Ticker Would you Like to Analyze? ex. AAPL: ")

print("-----")
print("Getting Filing Data")
# Retrieve the filing data for the specified ticker
filing_data = get_filings(ticker)

print("-----")
print("Initializing Vector Database")
# Initialize a text splitter to divide the filing data into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,         # Maximum size of each chunk
    chunk_overlap = 500,       # Number of characters to overlap between chunks
    length_function = len,     # Function to determine the length of the chunks
    is_separator_regex = False # Whether the separator is a regex pattern
)
# Split the filing data into smaller, manageable chunks
split_data = text_splitter.create_documents([filing_data])


# Load in the documents you want to index
documents = split_data

<code>index = VectorStoreIndex.from_documents(documents, storage_context=pinecone_storage_context)

</code>

<code>index = VectorStoreIndex.from_documents(documents, storage_context=pinecone_storage_context) </code>

index = VectorStoreIndex.from_documents(documents, storage_context=pinecone_storage_context)

Thiết kế website giá rẻ

Danh mục

Error happening while creating the index (AttributeError: ‘Document’ object has no attribute ‘get_doc_id’)

Description

Colab Python Code

Error Image