I have an unpartitioned BigQuery table with some 425k rows scattered over 1-2 years.
I was about to partition this table by timestamp
while I’ve observed an interested phenomenon: the query already behaves as it would run on the partitioned table. E.g. the later is the timestamp, the less data and more quickly read the query.
SELECT meta FROM `project.dataset.table` where timestamp > '2024-07-09'
Bytes processed 3.25Mb
SELECT meta FROM `project.dataset.table` where timestamp > '2024-06-30'
Bytes processed 13Mb
SELECT meta FROM `project.dataset.table` where timestamp > '2024-06-15'
Bytes processed 26.81 MB
Why the database can skip the scanning the complete table? Is it reliable behavior? E.g. can I skip partitioning data on BQ at all still relying BQ can determine effectively needed output?
My table is very efficiently compressed:
Total logical bytes 2.06 GB
Total physical bytes 86.6 MB
Until know I new only clustering and partitioning (partition pruning) as the only methods to optimize performance / processing costs for BigQuery queries.
Table specification:
bq show --format=prettyjson project:dataset.table
{
"creationTime": "1660725581097",
"etag": "PNomRWba1MQFgVDSGcrgiQ==",
"id": "project:dataset.table",
"kind": "bigquery#table",
"lastModifiedTime": "1720490034640",
"location": "region",
"numActiveLogicalBytes": "2213392702",
"numActivePhysicalBytes": "90803671",
"numBytes": "2213392702",
"numCurrentPhysicalBytes": "90803671",
"numLongTermBytes": "0",
"numLongTermLogicalBytes": "0",
"numLongTermPhysicalBytes": "0",
"numRows": "425332",
"numTimeTravelPhysicalBytes": "0",
"numTotalLogicalBytes": "2213392702",
"numTotalPhysicalBytes": "90803671",
"schema": {
"fields": [
{
"description": "Feature extraction id",
"mode": "REQUIRED",
"name": "id",
"type": "STRING"
},
{
"description": "Entity id (hash system identity).",
"mode": "REQUIRED",
"name": "entity_id",
"type": "STRING"
},
{
"description": "Collector (check) run id.",
"mode": "REQUIRED",
"name": "collector_run_id",
"type": "STRING"
},
{
"description": "Timestamp of feature extraction run.",
"mode": "REQUIRED",
"name": "timestamp",
"type": "TIMESTAMP"
},
{
"description": "Feature extraction run metadata.",
"mode": "REQUIRED",
"name": "meta",
"type": "JSON"
},
{
"description": "Extracted features",
"mode": "REQUIRED",
"name": "features",
"type": "JSON"
}
]
},
"selfLink": "https://bigquery.googleapis.com/bigquery/v2/projects/project/datasets/dataset/tables/table",
"tableReference": {
"datasetId": "dataset",
"projectId": "project",
"tableId": "table"
},
"type": "TABLE"
}
Job details:
bq show -j --format=prettyjson project:region.bquxjob_562108fe_190966d854d
{
"configuration": {
"jobType": "QUERY",
"query": {
"destinationTable": {
"datasetId": "_c7a721d4b70d2392c6643caa52c5ecf0fa067025",
"projectId": "project",
"tableId": "anoncb8970105fa30adbf37868493309113c88adcb284ef581320a4553c0eb2d1a66"
},
"priority": "INTERACTIVE",
"query": "SELECT meta FROM `project.dataset.table` where timestamp > '2024-06-30'",
"useLegacySql": false,
"writeDisposition": "WRITE_TRUNCATE"
}
},
"etag": "g7fZ9ss84zKxUWA8PXpjZg==",
"id": "project:region.bquxjob_562108fe_190966d854d",
"jobCreationReason": {
"code": "REQUESTED"
},
"jobReference": {
"jobId": "bquxjob_562108fe_190966d854d",
"location": "region",
"projectId": "project"
},
"kind": "bigquery#job",
"principal_subject": "user:user1",
"selfLink": "https://bigquery.googleapis.com/bigquery/v2/projects/project/jobs/bquxjob_562108fe_190966d854d?location=region",
"statistics": {
"creationTime": "1720510678481",
"endTime": "1720510679673",
"finalExecutionDurationMs": "681",
"query": {
"billingTier": 1,
"cacheHit": false,
"estimatedBytesProcessed": "206178878",
"metadataCacheStatistics": {
"tableMetadataCacheUsage": [
{
"explanation": "Table must be partitioned and clustered to use CMETA.",
"tableReference": {
"datasetId": "dataset",
"projectId": "project",
"tableId": "table"
},
"unusedReason": "OTHER_REASON"
}
]
},
"performanceInsights": {
"avgPreviousExecutionMs": "1358"
},
"queryPlan": [
{
"completedParallelInputs": "127",
"computeMode": "BIGQUERY",
"computeMsAvg": "19",
"computeMsMax": "125",
"computeRatioAvg": 0.05,
"computeRatioMax": 0.32894736842105265,
"endMs": "1720510678747",
"id": "0",
"name": "S00: Input",
"parallelInputs": "127",
"readMsAvg": "4",
"readMsMax": "10",
"readRatioAvg": 0.010526315789473684,
"readRatioMax": 0.02631578947368421,
"recordsRead": "21038",
"recordsWritten": "21038",
"shuffleOutputBytes": "17148416",
"shuffleOutputBytesSpilled": "0",
"slotMs": "287",
"startMs": "1720510678666",
"status": "COMPLETE",
"steps": [
{
"kind": "READ",
"substeps": [
"$2:meta, $1:timestamp",
"FROM project.dataset.table",
"WHERE greater($1, 1719705600.000000000)"
]
},
{
"kind": "WRITE",
"substeps": [
"$2",
"TO __stage00_output"
]
}
],
"waitMsAvg": "36",
"waitMsMax": "67",
"waitRatioAvg": 0.09473684210526316,
"waitRatioMax": 0.1763157894736842,
"writeMsAvg": "2",
"writeMsMax": "5",
"writeRatioAvg": 0.005263157894736842,
"writeRatioMax": 0.013157894736842105
},
{
"completedParallelInputs": "1",
"computeMode": "BIGQUERY",
"computeMsAvg": "380",
"computeMsMax": "380",
"computeRatioAvg": 1,
"computeRatioMax": 1,
"endMs": "1720510679148",
"id": "1",
"inputStages": [
"0"
],
"name": "S01: Output",
"parallelInputs": "1",
"readMsAvg": "0",
"readMsMax": "0",
"readRatioAvg": 0,
"readRatioMax": 0,
"recordsRead": "21038",
"recordsWritten": "21038",
"shuffleOutputBytes": "9601578",
"shuffleOutputBytesSpilled": "0",
"slotMs": "526",
"startMs": "1720510678843",
"status": "COMPLETE",
"steps": [
{
"kind": "READ",
"substeps": [
"$2",
"FROM __stage00_output"
]
},
{
"kind": "WRITE",
"substeps": [
"$2",
"TO __stage01_output"
]
}
],
"waitMsAvg": "93",
"waitMsMax": "93",
"waitRatioAvg": 0.24473684210526317,
"waitRatioMax": 0.24473684210526317,
"writeMsAvg": "76",
"writeMsMax": "76",
"writeRatioAvg": 0.2,
"writeRatioMax": 0.2
}
],
"referencedTables": [
{
"datasetId": "dataset",
"projectId": "project",
"tableId": "table"
}
],
"statementType": "SELECT",
"timeline": [
{
"activeUnits": "0",
"completedUnits": "128",
"elapsedMs": "595",
"estimatedRunnableUnits": "0",
"pendingUnits": "0",
"totalSlotMs": "814"
},
{
"completedUnits": "128",
"elapsedMs": "657",
"estimatedRunnableUnits": "0",
"pendingUnits": "0",
"totalSlotMs": "814"
}
],
"totalBytesBilled": "13631488",
"totalBytesProcessed": "13004234",
"totalPartitionsProcessed": "0",
"totalSlotMs": "814",
"transferredBytes": "0"
},
"startTime": "1720510678570",
"totalBytesProcessed": "13004234",
"totalSlotMs": "814"
},
"status": {
"state": "DONE"
},
"user_email": "user1"
}