Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
* [ENHANCEMENT] Distributor: Added `cortex_distributor_received_histogram_buckets` metric to track number of buckets in received native histogram samples before validation, per user. #7569
* [ENHANCEMENT] Distributor: Add `WrappedHistogram` with configurable size limit (`-validation.max-native-histogram-size-bytes`) to cap native histogram protobuf size before unmarshalling. #7570
* [ENHANCEMENT] Ingester: Add lazy regex evaluation on head postings cache miss. Defers expensive regex matchers on high-cardinality labels to per-series filtering when a selective equality matcher already narrows the result set. Configured via `-blocks-storage.expanded_postings_cache.head.lazy-matcher-max-cardinality` (disabled by default). #7553
* [ENHANCEMENT] Query Frontend: Improve the slow query log with `component`, `source`, `user_agent`, `engine_type`, `block_store_type`, and query stats fields to aid slow query diagnosis. #7601
* [BUGFIX] Querier: Fix queryWithRetry and labelsWithRetry returning (nil, nil) on cancelled context by propagating ctx.Err(). #7370
* [BUGFIX] Metrics Helper: Fix non-deterministic bucket order in merged histograms by sorting buckets after map iteration, matching Prometheus client library behavior. #7380
* [BUGFIX] Distributor: Return HTTP 401 Unauthorized when tenant ID resolution fails in the Prometheus Remote Write 2.0 path. #7389
Expand Down
44 changes: 42 additions & 2 deletions pkg/frontend/transport/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
queryString = f.parseRequestQueryString(r, buf)
}
if shouldReportSlowQuery {
f.reportSlowQuery(r, queryString, queryResponseTime)
f.reportSlowQuery(r, queryString, queryResponseTime, source, stats)
if f.cfg.QueryStatsEnabled {
f.getOrCreateSlowQueryMetric().WithLabelValues(source, userID).Inc()
}
Expand Down Expand Up @@ -423,18 +423,58 @@ func (f *Handler) logQueryRequest(r *http.Request, queryString url.Values, sourc
}

// reportSlowQuery reports slow queries.
func (f *Handler) reportSlowQuery(r *http.Request, queryString url.Values, queryResponseTime time.Duration) {
func (f *Handler) reportSlowQuery(r *http.Request, queryString url.Values, queryResponseTime time.Duration, source string, stats *querier_stats.QueryStats) {
logMessage := []any{
"msg", "slow query detected",
"method", r.Method,
"host", r.Host,
"path", r.URL.Path,
"source", source,
"time_taken", queryResponseTime.String(),
}

grafanaFields := formatGrafanaStatsFields(r)
if len(grafanaFields) > 0 {
logMessage = append(logMessage, grafanaFields...)
}

if userAgent := r.Header.Get("User-Agent"); len(userAgent) > 0 {
logMessage = append(logMessage, "user_agent", userAgent)
}
if engineType := r.Header.Get(engine.TypeHeader); len(engineType) > 0 {
logMessage = append(logMessage, "engine_type", engineType)
}
if blockStoreType := r.Header.Get(querier.BlockStoreTypeHeader); len(blockStoreType) > 0 {
logMessage = append(logMessage, "block_store_type", blockStoreType)
}
if wallTime := stats.LoadWallTime(); wallTime > 0 {
logMessage = append(logMessage, "query_wall_time_seconds", wallTime.Seconds())
}
if storageWallTime := stats.LoadQueryStorageWallTime(); storageWallTime > 0 {
logMessage = append(logMessage, "query_storage_wall_time_seconds", storageWallTime.Seconds())
}
if n := stats.LoadFetchedSeries(); n > 0 {
logMessage = append(logMessage, "fetched_series_count", n)
}
if n := stats.LoadFetchedChunks(); n > 0 {
logMessage = append(logMessage, "fetched_chunks_count", n)
}
if n := stats.LoadFetchedSamples(); n > 0 {
logMessage = append(logMessage, "fetched_samples_count", n)
}
if n := stats.LoadScannedSamples(); n > 0 {
logMessage = append(logMessage, "samples_scanned", n)
}
if n := stats.LoadFetchedChunkBytes(); n > 0 {
logMessage = append(logMessage, "fetched_chunks_bytes", n)
}
if n := stats.LoadFetchedDataBytes(); n > 0 {
logMessage = append(logMessage, "fetched_data_bytes", n)
}
if n := stats.LoadSplitQueries(); n > 0 {
logMessage = append(logMessage, "split_queries", n)
}

logMessage = append(logMessage, formatQueryString(queryString)...)

level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
Expand Down
Loading