Thiết kế website giá rẻ

Question

I am trying to do a similarity search to find the most similar documents to my query. However, the response does not include id. So the response is a list of tuple with the following format:
(Document(page_content=””, metadata={embedding=[], content=””}), )

I know each document has an ID as when I added the document, it returned the _id and I stored in my postgresql DB.

Ways I found online:

include _id in metadata, but _id is generated after I add the document in so not sure how it will work
include postgresql row id in the metadata but i only add the document to postgresql after adding to opensearch (prefer not to change the order because of the logic

I need to get the _id of the similar document. Please let me know what else I can try

This is my add function:

<code>def add_document(opensearch_client, index_name, embedding, content):

document_data = {

"embedding": embedding,

"content": content

}

response = opensearch_client.index(index=index_name, body=document_data)

logger.info("Document added to opensearch")

return response['_id']

</code>

<code>def add_document(opensearch_client, index_name, embedding, content): document_data = { "embedding": embedding, "content": content } response = opensearch_client.index(index=index_name, body=document_data) logger.info("Document added to opensearch") return response['_id'] </code>

def add_document(opensearch_client, index_name, embedding, content):
    document_data = {
        "embedding": embedding,
        "content": content
    }
    response = opensearch_client.index(index=index_name, body=document_data)
    logger.info("Document added to opensearch")
    return response['_id']

This is my search function:

<code>def search_vector_db(query, _is_aoss=False):

session = boto3.Session()

credentials = session.get_credentials()

aws_auth = AWS4Auth(credentials.access_key, credentials.secret_key, "ap-southeast-1", 'es', session_token=credentials.token)

opensearch_endpoint = get_opensearch_endpoint("vector-kb", "ap-southeast-1")

docsearch = OpenSearchVectorSearch(

index_name="vector-kb-index",

embedding_function=get_openai_embedding_client(),

opensearch_url=f"https://{opensearch_endpoint}",

http_auth=aws_auth,

timeout=30,

is_aoss=_is_aoss,

connection_class=RequestsHttpConnection,

use_ssl=True,

verify_certs=True,

)

docs = docsearch.similarity_search_with_score(

query,

search_type="script_scoring",

space_type="cosinesimil",

vector_field="embedding",

text_field="content",

score_threshold=1.5

)

contexts = []

for doc in docs:

logger.info(doc)

contexts.append(doc[0].page_content)

logger.info("Similar documents retrieved from Opensearch for context")

return contexts

</code>

<code>def search_vector_db(query, _is_aoss=False): session = boto3.Session() credentials = session.get_credentials() aws_auth = AWS4Auth(credentials.access_key, credentials.secret_key, "ap-southeast-1", 'es', session_token=credentials.token) opensearch_endpoint = get_opensearch_endpoint("vector-kb", "ap-southeast-1") docsearch = OpenSearchVectorSearch( index_name="vector-kb-index", embedding_function=get_openai_embedding_client(), opensearch_url=f"https://{opensearch_endpoint}", http_auth=aws_auth, timeout=30, is_aoss=_is_aoss, connection_class=RequestsHttpConnection, use_ssl=True, verify_certs=True, ) docs = docsearch.similarity_search_with_score( query, search_type="script_scoring", space_type="cosinesimil", vector_field="embedding", text_field="content", score_threshold=1.5 ) contexts = [] for doc in docs: logger.info(doc) contexts.append(doc[0].page_content) logger.info("Similar documents retrieved from Opensearch for context") return contexts </code>

def search_vector_db(query, _is_aoss=False):
    session = boto3.Session()
    credentials = session.get_credentials()
    aws_auth = AWS4Auth(credentials.access_key, credentials.secret_key, "ap-southeast-1", 'es', session_token=credentials.token)

    opensearch_endpoint = get_opensearch_endpoint("vector-kb", "ap-southeast-1")

    docsearch = OpenSearchVectorSearch(
        index_name="vector-kb-index",
        embedding_function=get_openai_embedding_client(),
        opensearch_url=f"https://{opensearch_endpoint}",
        http_auth=aws_auth,
        timeout=30,
        is_aoss=_is_aoss,
        connection_class=RequestsHttpConnection,
        use_ssl=True,
        verify_certs=True,
    )

    docs = docsearch.similarity_search_with_score(
        query,
        search_type="script_scoring",
        space_type="cosinesimil",
        vector_field="embedding",
        text_field="content",
        score_threshold=1.5
    )

    contexts = []
    for doc in docs:
        logger.info(doc)
        contexts.append(doc[0].page_content)
    
    logger.info("Similar documents retrieved from Opensearch for context")
    return contexts

Thiết kế website giá rẻ

Danh mục

Retrieving ID from similarity_search_with_score OpenSearch Langchain