Context
I have Rest API springboot application which retrieves fares from cosmos db using mongo APIs with passed sorting searching and filtering criteria.
Document schema stored in Cosmos DB
[ { "_id": {"$oid": "667aef3ddae0160cd28b9eae"}, "airlineCode": "xyz", "arrivalCountryCode": "SE", "arrivalStationCode": "GOT", "currencyCodeDestination": "SEK", "currencyCodeOrigin": "EUR", "departureCountryCode": "IT", "departureDate": {"$date": "2024-06-22T22:00:00.000Z"}, "departureStationCode": "CAG", "fareType": "BASIC", "month": 6, "originalPriceDestination": {"$numberDecimal": 3419.00}, "originalPriceOrigin": {"$numberDecimal": 289.99}, "passengerType": "ADULT", "priceDestination": {"$numberDecimal": 3419.00}, "priceOrigin": {"$numberDecimal": 289.99}, "primaryKey": "CAG:GOT:19897:xyz:BASIC", "promotionTypeDestination": "", "promotionTypeOrigin": "", "snapShotId": 1719332626405, "viaCountryCode": "DE", "viaStationCode": "DUS", "year": 2024 } ]
Cosmos DB configuration
- non partitioned collection(Fares)
- 10000 RU/S
- Server side retry enabled
- Spring boot application and cosmos db is in same region to avoid any network latency.
- Collection data size is only 0.7 GB or 700K records
- Indexed as below:
"indexes": [
{
"key": {
"keys": [
"_id"
]
},
"options": null
},
{
"key": {
"keys": [
"departureStationCode",
"arrivalStationCode",
"month",
"year"
]
},
"options": null
},
{
"key": {
"keys": [
"snapShotId",
"fareType"
]
},
"options": null
},
{
"key": {
"keys": [
"passengerType",
"fareType",
"airlineCode",
"departureDate"
]
},
"options": null
},
{
"key": {
"keys": [
"arrivalStationCode",
"departureDate"
]
},
"options": null
},
{
"key": {
"keys": [
"primaryKey"
]
},
"options": null
},
{
"key": {
"keys": [
"departureDate"
]
},
"options": null
},
{
"key": {
"keys": [
"departureStationCode"
]
},
"options": null
},
{
"key": {
"keys": [
"departureCountryCode"
]
},
"options": null
},
{
"key": {
"keys": [
"arrivalStationCode"
]
},
"options": null
},
{
"key": {
"keys": [
"arrivalCountryCode"
]
},
"options": null
},
{
"key": {
"keys": [
"passengerType"
]
},
"options": null
},
{
"key": {
"keys": [
"priceOrigin"
]
},
"options": null
},
{
"key": {
"keys": [
"priceDestination"
]
},
"options": null
},
{
"key": {
"keys": [
"airlineCode"
]
},
"options": null
},
{
"key": {
"keys": [
"snapShotId"
]
},
"options": null
},
{
"key": {
"keys": [
"fareType"
]
},
"options": null
},
{
"key": {
"keys": [
"month"
]
},
"options": null
},
{
"key": {
"keys": [
"year"
]
},
"options": null
}
],
Mongo Aggregation pipeline
[
{
"$match": {
"$and": [
{
"passengerType": "ADULT"
},
{
"fareType": "BASIC"
},
{
"airlineCode": "XYZ"
},
{
"departureDate": {
"$gte": {
"$date": "2024-06-23T22:00:00Z"
}
}
}
]
}
},
{
"$sort": {
"priceOrigin": 1
}
},
{
"$group": {
"_id": {
"arrivalStationCode": "$arrivalStationCode",
"departureDate": "$departureDate"
},
"priceOrigin": {
"$min": "$priceOrigin"
},
"matchingFare": {
"$first": "$$ROOT"
}
}
},
{
"$replaceRoot": {
"newRoot": "$matchingFare"
}
},
{
"$sort": {
"priceOrigin": 1
}
},
{
"$sort": {
"priceDestination": 1
}
},
{
"$skip": 0
},
{
"$limit": 1000
}
]
Consider mongo aggregation pipeline can’t be changed as it’s already optimized. Also there is no limit for cost to get performance improments.
With above details in mind, I would like to ask my question why below mongo aggregation pipeline generated via mongo api is taking 1.5 minute to return result. I have indexed every possible field which is used for filtering and sorting but no gain in performance.
Just to experiment, I even tried partitioning (partition key: departureDate) collection and increased RUs till 100K but not getting any perfromance (However I think partitioning is not required as our data size is too less). I have also gone through MS guildelines for query tunning as mentioned in here https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/troubleshoot-query-performance. But could not find any improvement points. Is there any way we can get results in miliseconds/few seconds for this usecase? Or I should go for SQL DB ?
Query Performance Metrics
[
{
"ActivityId": "3453242d-4650-4ab0-9f82-57cecab608b1",
"command": "db.runCommand({explain: { 'aggregate' : 'fares', 'pipeline' : [{ '$match' : { '$and' : [{ 'passengerType' : 'ADULT' }, { 'fareType' : 'BASIC' }, { 'airlineCode' : 'XYZ' }, { 'departureDate' : { '$gte' : ISODate('2024-06-23T00:00:00Z') } }] } }, { '$sort' : { 'priceOrigin' : 1 } }, { '$group' : { '_id' : { 'arrivalStationCode' : '$arrivalStationCode', 'departureDate' : '$departureDate' }, 'priceOrigin' : { '$min' : '$priceOrigin' }, 'matchingFare' : { '$first' : '$$ROOT' } } }, { '$replaceRoot' : { 'newRoot' : '$matchingFare' } }, { '$sort' : { 'priceOrigin' : 1 } }, { '$sort' : { 'priceDestination' : 1 } }, { '$skip' : 0 }, { '$limit' : 1000 }], 'explain' : true, '$db' : 'mongo_db_fare_test', 'lsid' : { 'id' : CSUUID('4e1d2d83-edac-4204-9f8e-5500738a0033') } }})",
"continuation": {
"hasMore": false
},
"estimatedDelayFromRateLimitingInMilliseconds": 0,
"ok": 1,
"retriedDueToRateLimiting": false,
"stages": [
{
"stage": "$query",
"timeInclusiveMS": 25441.1806,
"timeExclusiveMS": 25441.1806,
"in": 312354,
"out": 312354,
"dependency": {
"getNextPageCount": 18,
"count": 18,
"time": 0,
"bytes": 153613555
},
"details": {
"database": "mongo_db_fare_test",
"collection": "fares",
"query": {
"$and": [
{
"passengerType": {
"$eq": "ADULT"
}
},
{
"fareType": {
"$eq": "BASIC"
}
},
{
"airlineCode": {
"$eq": "XYZ"
}
},
{
"departureDate": {
"$gte": {"$date": "2024-06-23T00:00:00.000Z"}
}
}
]
},
"indexUsage": {
"pathsIndexed": {
"individualIndexes": ["departureDate", "passengerType", "airlineCode", "fareType", "priceOrigin"],
"compoundIndexes": []
},
"pathsNotIndexed": {
"individualIndexes": [],
"compoundIndexes": []
}
},
"sort": {
"priceOrigin": 1
},
"shardInformation": [
{
"activityId": "e7e41836-8580-4ca5-80a7-9353e5105463",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 622.6982,
"preemptions": 0,
"outputDocumentCount": 17799,
"retrievedDocumentCount": 17799
},
{
"activityId": "3c957e8f-d023-4148-8f7c-c91cf132c23a",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 635.7353,
"preemptions": 0,
"outputDocumentCount": 18215,
"retrievedDocumentCount": 18215
},
{
"activityId": "329db46b-2542-4c25-bb45-1333acef9ef1",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 603.3912,
"preemptions": 0,
"outputDocumentCount": 18124,
"retrievedDocumentCount": 18124
},
{
"activityId": "db2393a4-d274-4730-9a19-9e6bd902275e",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 707.025,
"preemptions": 0,
"outputDocumentCount": 18530,
"retrievedDocumentCount": 18530
},
{
"activityId": "abc1d861-32f2-41a1-a9bd-f1350100812f",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 702.3894,
"preemptions": 0,
"outputDocumentCount": 18288,
"retrievedDocumentCount": 18288
},
{
"activityId": "b55e1e0e-90e2-4cce-9521-0d72656376e6",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 675.2357,
"preemptions": 0,
"outputDocumentCount": 18226,
"retrievedDocumentCount": 18226
},
{
"activityId": "fe08bbe8-60e1-453d-90d8-98e1cc239ac6",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 593.0335,
"preemptions": 0,
"outputDocumentCount": 18051,
"retrievedDocumentCount": 18051
},
{
"activityId": "05892075-20e5-4ae1-bad3-35abd0ffead1",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 629.8684,
"preemptions": 0,
"outputDocumentCount": 17413,
"retrievedDocumentCount": 17413
},
{
"activityId": "abd07d1b-ef3e-4096-8888-d3ec76fb6d5e",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 663.7408,
"preemptions": 0,
"outputDocumentCount": 17696,
"retrievedDocumentCount": 17696
},
{
"activityId": "f047b565-1e50-409a-8904-5680e3a3d18c",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 704.6657,
"preemptions": 0,
"outputDocumentCount": 18227,
"retrievedDocumentCount": 18227
},
{
"activityId": "c7121fa4-815b-4a65-9483-4588d0e6d715",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 620.6626,
"preemptions": 0,
"outputDocumentCount": 17679,
"retrievedDocumentCount": 17679
},
{
"activityId": "b5da6af2-5697-4d89-ac9a-7b2a2674527b",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 628.1327,
"preemptions": 0,
"outputDocumentCount": 17672,
"retrievedDocumentCount": 17672
},
{
"activityId": "b13e9c4b-b4fb-46aa-834f-40476a701949",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 711.4814,
"preemptions": 0,
"outputDocumentCount": 18100,
"retrievedDocumentCount": 18100
},
{
"activityId": "e9f2f8f2-bb2b-4686-946c-40401dd4bba6",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 634.6194,
"preemptions": 0,
"outputDocumentCount": 17879,
"retrievedDocumentCount": 17879
},
{
"activityId": "48c4a877-fb8b-4e77-9d82-d08c3ec2e563",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 634.1238,
"preemptions": 0,
"outputDocumentCount": 17442,
"retrievedDocumentCount": 17442
},
{
"activityId": "9e308371-2d74-4b4c-a825-6288aee3ff6d",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 633.7888,
"preemptions": 0,
"outputDocumentCount": 17517,
"retrievedDocumentCount": 17517
},
{
"activityId": "bdf134f2-300f-4dd5-91a4-22385c5bf277",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 599.1789,
"preemptions": 0,
"outputDocumentCount": 17485,
"retrievedDocumentCount": 17485
},
{
"activityId": "b2f02055-c2c7-4b27-ac4e-051551e1d760",
"shardKeyRangeId": "[,FF) move next",
"durationMS": 314.4678,
"preemptions": 0,
"outputDocumentCount": 8011,
"retrievedDocumentCount": 8011
}
],
"queryMetrics": {
"retrievedDocumentCount": 312354,
"retrievedDocumentSizeBytes": 287412018,
"outputDocumentCount": 312354,
"outputDocumentSizeBytes": 183099349,
"indexHitRatio": 1,
"totalQueryExecutionTimeMS": 10137.59,
"queryPreparationTimes": {
"queryCompilationTimeMS": 2.3,
"logicalPlanBuildTimeMS": 1.07,
"physicalPlanBuildTimeMS": 5.71,
"queryOptimizationTimeMS": 0.18
},
"indexLookupTimeMS": 58.9699,
"documentLoadTimeMS": 2257.85,
"vmExecutionTimeMS": 9641.2898,
"runtimeExecutionTimes": {
"queryEngineExecutionTimeMS": 1793.9499,
"systemFunctionExecutionTimeMS": 1079.7799,
"userDefinedFunctionExecutionTimeMS": 0
},
"documentWriteTimeMS": 5530.52
}
}
},
{
"stage": "$group",
"timeInclusiveMS": 29417.2565,
"timeExclusiveMS": 3976.0759,
"in": 312354,
"out": 19537,
"details": {
"$group": {
"_id": {
"arrivalStationCode": "$arrivalStationCode",
"departureDate": "$departureDate"
},
"priceOrigin": {
"$min": "$priceOrigin"
},
"matchingFare": {
"$first": "$$ROOT"
}
}
}
},
{
"stage": "$replaceRoot",
"timeInclusiveMS": 29450.3874,
"timeExclusiveMS": 33.1309,
"in": 19537,
"out": 19537,
"details": {
"newRoot": "$matchingFare"
}
},
{
"stage": "$sort",
"timeInclusiveMS": 29546.5522,
"timeExclusiveMS": 96.1648,
"in": 39074,
"out": 0,
"details": {
"sortKey": {
"priceOrigin": 1
}
}
},
{
"stage": "$sort",
"timeInclusiveMS": 29660.9687,
"timeExclusiveMS": 114.4165,
"in": 39074,
"out": 0,
"details": {
"sortKey": {
"priceDestination": 1
}
}
},
{
"stage": "$skip",
"timeInclusiveMS": 29660.9769,
"timeExclusiveMS": 0.0082,
"in": 0,
"out": 0,
"details": {
"skip": 0
}
},
{
"stage": "$limit",
"timeInclusiveMS": 29661.1085,
"timeExclusiveMS": 0.1316,
"in": 19537,
"out": 1000,
"details": {
"limit": 1000
}
}
],
"totalRequestCharge": 36331.035
}
]
I tried indexing and improving the RU/s following query tuning guidelines.
I am expecting people to find the problem in given query and suggest how can we improve said query.