I am currently working on a personal project to learn more about mongo. On this project I have a collection with a dynamic field called metadata which can be like this:
metadata: {
"stringField": "string value",
"numberField": 10,
"dateField": "2024-01-01T12:00:000Z"
}
This metadata field can have any number of possible fields inside. So I created a facet search index for the metadata field and their subfields. I then created an aggregation pipeline with a search on that index and facets and tested for 100 million records. The performance was really bad. So I added a limit of 5000 records and now I can perform the search and facets in about 3 seconds. It doesnt return all results, only the first 5k, but that is something. I will post the aggregation pipeline bellow, can you guys help me know if I am doing something wrong or if it is possible to optimize this to run fast (like 1 second or bellow) for that amount of records? I am new to all this.
[
{
"$search": {
"compound": {
"must": [
{
"text": {
"path": {
"wildcard": "*"
},
"query": "US"
}
}
]
},
"index": "facet-search"
}
},
{
"$limit": 5000
},
{
"$addFields": {
"class_id": {
"$toObjectId": "$classification"
}
}
},
{
"$lookup": {
"from": "Classifications",
"localField": "class_id",
"foreignField": "_id",
"as" : "classObj"
}
},
{
"$addFields": {
"contentClassName": "$classObj.name"
}
},
{
"$unwind": "$contentClassName"
},
{
"$project": {
"_id": 0,
"class_id": 0,
"classObj": 0
}
},
{
"$facet": {
"docs": [],
"DocumentTitle": [
{
"$group": {
"_id": "$metadata.DocumentTitle",
"count": { "$sum": 1 }
}
},
{
"$addFields": {
"displayName": "Document Title"
}
},
{
"$set": {
"value": "$_id",
"_id": "$$REMOVE"
}
},
{
"$sort": {
"count": -1
}
},
{
"$limit": 10
}
],
"Customer_Name": [
{
"$group": {
"_id": "$metadata.Customer_Name",
"count": { "$sum": 1 }
}
},
{
"$addFields": {
"displayName": "Customer Name"
}
},
{
"$set": {
"value": "$_id",
"_id": "$$REMOVE"
}
},
{
"$sort": {
"count": -1
}
},
{
"$limit": 10
}
],
"Branch_Number_String": [
{
"$group": {
"_id": "$metadata.Branch_Number_String",
"count": { "$sum": 1 }
}
},
{
"$addFields": {
"displayName": "Branch Number"
}
},
{
"$set": {
"value": "$_id",
"_id": "$$REMOVE"
}
},
{
"$sort": {
"count": -1
}
},
{
"$limit": 10
}
],
"contentClass": [
{
"$group": {
"_id": "$contentClass",
"count": { "$sum": 1 }
}
},
{
"$addFields": {
"displayName": "Content Class"
}
},
{
"$set": {
"value": "$_id",
"_id": "$$REMOVE"
}
},
{
"$sort": {
"count": -1
}
},
{
"$limit": 10
}
]
}
}
]