After upgrading to 3.0(enable TSDB) we are facing performance issues with loki. Queries are taking lots of time to complete. Any help/config which can help to improve the query time.
Setup Details:
Loki Version: 3.1
Deployment Type: Loki Distributed
Ingesters: 7
Distributor: 7
index gateway: 3
Querier – autoscale
Frontend – autoscale
Scheduler – 5
Chunk/result cache – 3 each
PFB Configuration:
auth_enabled: true
chunk_store_config:
cache_lookups_older_than: 2h
chunk_cache_config:
default_validity: 10m
memcached:
batch_size: 256
expiration: 5m
parallelism: 10
memcached_client:
addresses: dnssrv+_memcached-client._tcp.loki-distributed-memcached-chunks.logging.svc.cluster.local
circuit_breaker_consecutive_failures: 10
circuit_breaker_interval: 180s
circuit_breaker_timeout: 180s
consistent_hash: true
max_item_size: 4845728
timeout: 1000ms
write_dedupe_cache_config:
memcached_client:
addresses: dnssrv+_memcached-client._tcp.loki-distributed-memcached-index-writes.logging.svc.cluster.local
consistent_hash: true
common:
compactor_address: http://loki-distributed-compactor:3100
compactor:
delete_request_store: aws
retention_delete_delay: 24h
retention_delete_worker_count: 150
retention_enabled: true
working_directory: /var/loki/retention
distributor:
ring:
kvstore:
store: memberlist
frontend:
compress_responses: true
grpc_client_config:
grpc_compression: snappy
max_recv_msg_size: 16777216000
max_send_msg_size: 16777216000
log_queries_longer_than: 5s
querier_forget_delay: 10s
scheduler_address: loki-distributed-query-scheduler:9095
tail_proxy_url: http://loki-distributed-querier:3100
scheduler_worker_concurrency: 20
frontend_worker:
grpc_client_config:
grpc_compression: snappy
max_recv_msg_size: 16777216000
max_send_msg_size: 16777216000
scheduler_address: loki-distributed-query-scheduler:9095
index_gateway:
mode: ring
ring:
kvstore:
store: memberlist
ingester:
autoforget_unhealthy: true
chunk_block_size: 262144
chunk_encoding: snappy
chunk_idle_period: 5m
chunk_retain_period: 1m
chunk_target_size: 3145728
concurrent_flushes: 48
flush_check_period: 15s
lifecycler:
ring:
kvstore:
store: memberlist
replication_factor: 1
max_chunk_age: 30m
max_returned_stream_errors: 0
wal:
dir: /var/loki/wal
enabled: true
flush_on_shutdown: true
replay_memory_ceiling: 4GB
ingester_client:
grpc_client_config:
grpc_compression: snappy
max_recv_msg_size: 16777216000
max_send_msg_size: 16777216000
pool_config:
client_cleanup_period: 5s
health_check_ingesters: true
limits_config:
allow_structured_metadata: false
cardinality_limit: 400000
deletion_mode: filter-and-delete
ingestion_burst_size_mb: 148
ingestion_rate_mb: 128
ingestion_rate_strategy: global
max_cache_freshness_per_query: 10m
max_chunks_per_query: 5000000
max_entries_limit_per_query: 10000
max_global_streams_per_user: 20000
max_label_names_per_series: 30
max_line_size: 0
max_querier_bytes_read: 150GB
max_query_parallelism: 256
max_query_series: 50000
max_stats_cache_freshness: 0
max_streams_matchers_per_query: 10000
max_streams_per_user: 0
per_stream_rate_limit: 30MB
per_stream_rate_limit_burst: 50MB
reject_old_samples: false
retention_period: 8760h
split_queries_by_interval: 15m
tsdb_max_bytes_per_shard: 1000MB
tsdb_max_query_parallelism: 1000
tsdb_precompute_chunks: true
unordered_writes: true
memberlist:
join_members:
- loki-distributed-memberlist
querier:
engine:
max_look_back_period: 30s
max_concurrent: 10
multi_tenant_queries_enabled: true
query_ingesters_within: 1h
query_range:
align_queries_with_step: true
cache_results: true
max_retries: 1
parallelise_shardable_queries: true
results_cache:
cache:
memcached:
batch_size: 1024
expiration: 10m
parallelism: 100
memcached_client:
addresses: dnssrv+_memcached-client._tcp.loki-distributed-memcached-frontend.logging.svc.cluster.local
consistent_hash: true
max_item_size: 4597152
timeout: 500ms
update_interval: 1m
query_scheduler:
max_outstanding_requests_per_tenant: 32768
max_queue_hierarchy_levels: 0
querier_forget_delay: 15s
runtime_config:
file: /var/loki-distributed-runtime/runtime.yaml
period: 120s
schema_config:
configs:
- from: "2020-09-07"
index:
period: 24h
prefix: loki_index_
object_store: aws
schema: v11
store: boltdb-shipper
- from: "2024-07-14"
index:
period: 24h
prefix: loki_index_
object_store: s3
schema: v12
store: tsdb
- from: "2024-07-15"
index:
period: 24h
prefix: loki_index_
object_store: s3
schema: v13
store: tsdb
server:
grpc_server_max_concurrent_streams: 10000
grpc_server_max_recv_msg_size: 41943040000
grpc_server_max_send_msg_size: 41943040000
http_listen_port: 3100
http_server_idle_timeout: 300s
http_server_read_timeout: 300s
http_server_write_timeout: 300s
storage_config:
aws:
s3: s3://xxxxx
boltdb_shipper:
active_index_directory: /var/loki/index
cache_location: /var/loki/cache
cache_ttl: 24h
index_gateway_client:
server_address: dns:///loki-distributed-index-gateway:9095
filesystem:
directory: /var/loki/chunks
index_cache_validity: 3m
index_queries_cache_config:
memcached:
batch_size: 100
expiration: 10m
parallelism: 100
memcached_client:
addresses: dnssrv+_memcached-client._tcp.loki-distributed-memcached-index-queries.logging.svc.cluster.local
consistent_hash: true
timeout: 1000ms
tsdb_shipper:
active_index_directory: /var/loki/tsdb-index
cache_location: /var/loki/tsdb-cache
index_gateway_client:
server_address: dns:///loki-distributed-index-gateway.logging.svc.cluster.local:9095