I have an opensearch schema in which one of the field is called products. products contains a list of products the document is relevant about.
I have a user query parser which identifies which products the user is talking about. I want to do semantic search or hybrid search on this schema, but with a filter on the products field.
user input products: [‘prod-1’, ‘prod-2’]
value of products in a document: [‘prod-1’, ‘prod-3’] – This document must be searched because product 1 is there.
Schema:
{
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 1,
"knn": true,
"knn.space_type": "cosinesimil"
}
},
"mappings": {
"properties": {
"page_content": {
"type": "text"
},
"products": {
"type": "keyword"
}
}}
Is this the right and ideal way to create schema as products are a list?
def format_query(text, query_vector, text_fields, vector_field, alpha, method, limit, filter_products=[]):
# Define the query
if method == 'semantic':
# Semantic search using only the vector similarity
base_query = {
"knn": {
vector_field: {
"vector": query_vector, # Replace with your actual query vector components
"k": limit # Adjust the number of nearest neighbors you want to retrieve
}
}
}
elif method == 'hybrid':
# Hybrid search combining text search scores and vector similarity
base_query = {
"function_score": {
"query": {
"multi_match": {
"query": text,
"fields": text_fields
}
},
"functions": [
{
"script_score": {
"script": {
"source": f"cosineSimilarity(params.query_vector, doc['{vector_field}']) * params.alpha + (1.0 - params.alpha)",
"params": {"query_vector": query_vector, "alpha": alpha}
}
}
}
],
"boost_mode": "replace"
}
}
else:
raise ValueError("Unsupported search method")
# Define the filter for products if products is provided and not empty
filter_query = []
if filter_products:
filter_query.append({
"terms": {
"products": filter_products
}
})
# Combine the base query and filter query
if filter_query:
query = {
"bool": {
"must": [base_query],
"filter": filter_query
}
}
#query = base_query
else:
query = base_query
print(query)
return query
This query returns nothing if filter_products are not empty
I have tried various methods, validated the uploaded data and schema. It would be great if someone could please help me solve this.