Version
Opensearch 2.13
Description
I am exploring the possibility of generating facets from my index in Opensearch using bucket aggregations and the search API. I am trying to find how to generate bucket aggregation for a nested field that satisfies a specific condition.
My index contains a “properties” field mapping, which is nested since each document may contain one or more properties that need to be queried independently. There are other fields, like “type” or “createdBy”, which have been created as object mappings, since each document will have exactly one of each. The mapping is shown below.
"idx_container_view_list": {
"mappings": {
"properties": {
"ancestorPath": {
"type": "keyword"
},
"color": {
"type": "keyword"
},
"coverFile": {
"type": "keyword"
},
"createdAt": {
"type": "date"
},
"createdBy": {
"properties": {
"email": {
"type": "keyword"
},
"firstname": {
"type": "keyword"
},
"lastname": {
"type": "keyword"
},
"username": {
"type": "keyword"
},
"uuid": {
"type": "keyword"
}
}
},
"deletedAt": {
"type": "date"
},
"deletedBy": ...,
"label": {
"type": "keyword"
},
"parent": {
"type": "keyword"
},
"properties": {
"type": "nested",
"properties": {
"propertyName": {
"type": "keyword"
},
"propertyUuid": {
"type": "keyword"
},
"propertyValue": {
"type": "keyword"
},
"schemaPropertyUuid": {
"type": "keyword"
},
"uuid": {
"type": "keyword"
},
"valueAsLocation": {
"properties": {
"latitude": {
"type": "long"
},
"longitude": {
"type": "long"
},
"placeName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"valueAsNumber": {
"type": "float"
}
}
},
"publishedAt": {
"type": "date"
},
"publishedBy": ...
},
"shared": {
"type": "boolean"
},
"type": {
"properties": {
"name": {
"type": "keyword"
},
"slug": {
"type": "keyword"
},
"uuid": {
"type": "keyword"
}
}
},
"updatedAt": {
"type": "date"
},
"updatedBy": ...,
"uuid": {
"type": "keyword"
}
}
}
}
My aim is to create facets on the properties field, ideally buckets for a specific propertyUuid. Following this article, I have created a query to get a general aggregation for all properties (in this example, I get the counts of the top 5 values for each of the top 20 properties).
"size" : 0,
"aggs": {
"different_container_property_types": {
"nested": {
"path":"properties"
},
"aggs": {
"property_uuid_bucket": {
"terms": {
"field": "properties.propertyUuid",
"size": 20
},
"aggs": {
"property_value_bucket": {
"terms": {
"field": "properties.propertyValue",
"size": 5
}
}
}
}
}
}
}
It would be more helpful to get the bucket aggregations for a specific property uuid. I have tried to add a query term in the request (see following example), but the result is to filter the documents that contain the property uuid, then create buckets for all the properties that these documents have.
"size" : 0,
"query" : {
"nested": {
"path": "properties",
"query":{
"term": {
"properties.propertyUuid": "62329d5b-dbc9-4022-93e7-4690c9069a7e"
}
}
}
},
"aggs": {
"different_container_property_types": {
"nested": {
"path":"properties"
},
"aggs": {
"property_uuid_bucket": {
"terms": {
"field": "properties.propertyUuid",
"size": 20
},
"aggs": {
"property_value_bucket": {
"terms": {
"field": "properties.propertyValue",
"size": 5
}
}
}
}
}
}
}
Part of the response:
"took": 46,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 352,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"different_container_property_types": {
"doc_count": 7624,
"property_uuid_bucket": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 583,
"buckets": [
{
"key": "02d39964-e894-4d8e-8fdd-32bf2a9c6eab",
"doc_count": 353,
"property_value_bucket": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 84,
"buckets": [...]
}
},
{
"key": "03df8c5e-61c1-4485-8978-dd2bb96c6790",
...
},
{
"key": "62329d5b-dbc9-4022-93e7-4690c9069a7e",
"doc_count": 352,
"property_value_bucket": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 77,
"buckets": [
{
"key": "b0cf8292-6005-4095-946d-51fc908243b8",
"doc_count": 138
},
{
"key": "3f5230ab-8614-436b-abc3-d5cad7f4b5a7",
"doc_count": 75
},
{
"key": "25135d79-a4c1-44a9-beed-b926b94a40fb",
"doc_count": 34
},
{
"key": "ab2807d8-1946-40e4-a156-cf3195438697",
"doc_count": 31
},
{
"key": "d03982e8-872b-4f02-a6b8-fa461e16389e",
"doc_count": 24
}
]
}
},
...
I understand that this happens because the Opensearch creates hidden documents for each property; in fact, there are 352 documents containing this property, then these documents contain 7624 properties in total.
Is there a way to get the bucket aggregation only for the specific property uuid in the query, to avoid any unnecessary load in Opensearch?