From 2aebb39d51fc450204cdebd1f2c66d555bac4c89 Mon Sep 17 00:00:00 2001 From: "cong.xie" Date: Fri, 15 May 2026 10:52:49 -0400 Subject: [PATCH 1/7] feat(_mapping): timestamp pushdown + column-hints fast path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two small additions to the ES-compat `_mapping(s)` endpoint that together let downstream callers (e.g. Trino's ES connector) skip the expensive `list_fields` scan in the common case. Today `GET /_elastic/{index}/_mapping(s)` calls `list_fields` over every published split. On indexes with hundreds of thousands of dynamic fields this can take several seconds and runs into `QW_FIELD_LIST_SIZE_LIMIT` (100k by default). This PR addresses both pieces with no proto change: 1. Timestamp pushdown New `?start_timestamp=…&end_timestamp=…` URL params on `_mapping(s)`, forwarded into `ListFieldsRequest` verbatim. The metastore prunes the candidate split set by time window before any leaf fan-out. Unit is epoch seconds, half-open interval — matching the existing `ListFieldsRequest` proto contract. 2. Column-hints fast path New `?fields=…` URL param (comma-separated names). When every requested name is a flat literal (no `*`, no `?`, no `.`) declared in the union of the indexes' `doc_mapping`, the handler builds the response straight from the declared mapping, filtered to those names. No `list_fields` call, no split I/O. Anything else (wildcards, dotted paths, names not in `doc_mapping`) falls through to the full-mapping path: `list_fields` over the splits in the time range, full unfiltered mapping returned — same shape as today, just with the timestamp-pushdown optimization applied. Notes: - Unknown query params are silently ignored (no `deny_unknown_fields`) to stay compatible with standard ES clients that pass `pretty`, `ignore_unavailable`, `allow_no_indices`, etc. - No proto change. Stays on existing `ListFieldsRequest`. - `IndexMappingQueryParams` parser and the new `ElasticsearchMappingsResponse::from_doc_mapping_filtered` are unit-tested in their respective modules. --- quickwit/Cargo.lock | 1 + quickwit/Cargo.toml | 1 + quickwit/quickwit-serve/Cargo.toml | 1 + .../src/elasticsearch_api/filter.rs | 5 +- .../model/index_mapping_query_params.rs | 102 +++++++++++++++++ .../src/elasticsearch_api/model/mappings.rs | 105 +++++++++++++++++- .../src/elasticsearch_api/model/mod.rs | 2 + .../src/elasticsearch_api/rest_handler.rs | 69 ++++++++++-- 8 files changed, 271 insertions(+), 15 deletions(-) create mode 100644 quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 330e6591856..713d9fc13e1 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -9355,6 +9355,7 @@ dependencies = [ "serde", "serde_json", "serde_qs 0.15.0", + "serde_urlencoded", "serde_with", "tempfile", "thiserror 2.0.18", diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 5d9e5ce401d..1534e1cbe96 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -248,6 +248,7 @@ serde = { version = "1.0.228", features = ["derive", "rc"] } serde_json = "1.0" serde_json_borrow = "0.9" serde_qs = { version = "0.15" } +serde_urlencoded = "0.7" serde_with = "3.20" serde_yaml = "0.9" serial_test = { version = "3.4", features = ["file_locks"] } diff --git a/quickwit/quickwit-serve/Cargo.toml b/quickwit/quickwit-serve/Cargo.toml index bd3ee2006a0..a7c3a528848 100644 --- a/quickwit/quickwit-serve/Cargo.toml +++ b/quickwit/quickwit-serve/Cargo.toml @@ -90,6 +90,7 @@ itertools = { workspace = true } mockall = { workspace = true } opentelemetry = { workspace = true } opentelemetry_sdk = { workspace = true } +serde_urlencoded = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true } tokio-stream = { workspace = true } diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/filter.rs b/quickwit/quickwit-serve/src/elasticsearch_api/filter.rs index 5f8ddd69f9f..79b990d7330 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/filter.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/filter.rs @@ -20,7 +20,7 @@ use warp::{Filter, Rejection}; use super::model::{ CatIndexQueryParams, DeleteQueryParams, FieldCapabilityQueryParams, FieldCapabilityRequestBody, - MultiSearchQueryParams, SearchQueryParamsCount, + IndexMappingQueryParams, MultiSearchQueryParams, SearchQueryParamsCount, }; use crate::Body; use crate::decompression::get_body_bytes; @@ -285,9 +285,10 @@ pub(crate) fn elastic_aliases_filter() -> impl Filter impl Filter + Clone { +-> impl Filter + Clone { warp::path!("_elastic" / String / "_mapping") .or(warp::path!("_elastic" / String / "_mappings")) .unify() .and(warp::get()) + .and(warp::query()) } diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs new file mode 100644 index 00000000000..e19b6deebb0 --- /dev/null +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs @@ -0,0 +1,102 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use serde::{Deserialize, Serialize}; + +/// Query parameters for `_mapping(s)`. Unknown params are silently ignored. +/// +/// Timestamps are epoch seconds, half-open `[start, end)` — forwarded into +/// `ListFieldsRequest` to prune splits. `fields` is a comma-separated hint; +/// see `parse_field_hints` in `rest_handler.rs` for the fast-path semantics. +#[serde_with::skip_serializing_none] +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct IndexMappingQueryParams { + #[serde(default)] + pub start_timestamp: Option, + #[serde(default)] + pub end_timestamp: Option, + #[serde(default)] + pub fields: Option, +} + +#[cfg(test)] +mod tests { + use super::IndexMappingQueryParams; + + #[test] + fn empty_query_string_yields_none() { + let params: IndexMappingQueryParams = serde_urlencoded::from_str("").unwrap(); + assert!(params.start_timestamp.is_none()); + assert!(params.end_timestamp.is_none()); + assert!(params.fields.is_none()); + } + + #[test] + fn both_params_present_yield_some() { + let qs = "start_timestamp=1712160204&end_timestamp=1712764984"; + let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); + assert_eq!(params.start_timestamp, Some(1712160204)); + assert_eq!(params.end_timestamp, Some(1712764984)); + assert!(params.fields.is_none()); + } + + #[test] + fn only_start_timestamp_present() { + let qs = "start_timestamp=1712160204"; + let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); + assert_eq!(params.start_timestamp, Some(1712160204)); + assert!(params.end_timestamp.is_none()); + } + + #[test] + fn only_end_timestamp_present() { + let qs = "end_timestamp=1712764984"; + let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); + assert!(params.start_timestamp.is_none()); + assert_eq!(params.end_timestamp, Some(1712764984)); + } + + #[test] + fn unknown_field_is_ignored() { + let qs = "start_timestamp=1&pretty=true&ignore_unavailable=true"; + let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); + assert_eq!(params.start_timestamp, Some(1)); + assert!(params.end_timestamp.is_none()); + assert!(params.fields.is_none()); + } + + #[test] + fn fields_param_present() { + let qs = "fields=host,message,status"; + let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); + assert_eq!(params.fields.as_deref(), Some("host,message,status")); + } + + #[test] + fn fields_combined_with_timestamps() { + let qs = "start_timestamp=1&end_timestamp=2&fields=host"; + let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); + assert_eq!(params.start_timestamp, Some(1)); + assert_eq!(params.end_timestamp, Some(2)); + assert_eq!(params.fields.as_deref(), Some("host")); + } + + #[test] + fn empty_fields_value() { + // Empty string parses as Some(""); `parse_field_hints` treats it as absent. + let qs = "fields="; + let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); + assert_eq!(params.fields.as_deref(), Some("")); + } +} diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs index ad09bf67243..3b79d304a17 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use quickwit_doc_mapper::{FieldMappingEntry, FieldMappingType}; use quickwit_metastore::IndexMetadata; @@ -66,11 +66,25 @@ impl ElasticsearchMappingsResponse { indexes_metadata: Vec, list_fields_response: Option<&ListFieldsResponse>, ) -> Self { + Self::from_doc_mapping_filtered(indexes_metadata, list_fields_response, &[]) + } + + /// Filters to `requested_fields` by exact top-level name. Empty slice = no + /// filter; matched names retain their full declared subtree. + pub fn from_doc_mapping_filtered( + indexes_metadata: Vec, + list_fields_response: Option<&ListFieldsResponse>, + requested_fields: &[String], + ) -> Self { + let filter: HashSet<&str> = requested_fields.iter().map(String::as_str).collect(); let indices = indexes_metadata .into_iter() .map(|index_metadata| { let field_mappings = &index_metadata.index_config.doc_mapping.field_mappings; let mut properties = build_properties(field_mappings); + if !filter.is_empty() { + properties.retain(|name, _| filter.contains(name.as_str())); + } if let Some(list_fields) = list_fields_response { merge_dynamic_fields(&mut properties, list_fields); } @@ -272,10 +286,10 @@ mod tests { assert_eq!(meta["properties"]["source"]["type"], "keyword"); } + use quickwit_proto::search::ListFieldsEntry; + #[test] fn test_merge_dynamic_fields_skips_existing_and_internal() { - use quickwit_proto::search::ListFieldsEntry; - let mut properties = HashMap::new(); properties.insert("title".to_string(), FieldMapping::Leaf { typ: "text" }); @@ -306,4 +320,89 @@ mod tests { assert!(properties.contains_key("dynamic_field")); assert!(!properties.contains_key("_timestamp")); } + + fn make_index_metadata(field_mappings: serde_json::Value) -> IndexMetadata { + let entries: Vec = serde_json::from_value(field_mappings).unwrap(); + let mut metadata = IndexMetadata::for_test("test", "ram:///indexes/test"); + metadata.index_config.doc_mapping.field_mappings = entries; + metadata + } + + fn properties_of(response: &ElasticsearchMappingsResponse) -> &HashMap { + &response.indices["test"].mappings.properties + } + + #[test] + fn from_doc_mapping_filtered_keeps_only_requested() { + let index_metadata = make_index_metadata(serde_json::json!([ + { "name": "host", "type": "text" }, + { "name": "message", "type": "text" }, + { "name": "status", "type": "i64" }, + { "name": "service", "type": "text" }, + { "name": "trace_id", "type": "text" }, + ])); + let requested = vec!["host".to_string(), "message".to_string()]; + let response = ElasticsearchMappingsResponse::from_doc_mapping_filtered( + vec![index_metadata], + None, + &requested, + ); + let props = properties_of(&response); + assert_eq!(props.len(), 2); + assert!(props.contains_key("host")); + assert!(props.contains_key("message")); + assert!(!props.contains_key("status")); + assert!(!props.contains_key("service")); + assert!(!props.contains_key("trace_id")); + } + + #[test] + fn from_doc_mapping_filtered_includes_object_subtree_for_top_level_match() { + let index_metadata = make_index_metadata(serde_json::json!([ + { + "name": "host", + "type": "object", + "field_mappings": [ + { "name": "region", "type": "text" }, + { "name": "name", "type": "text" } + ] + }, + { "name": "message", "type": "text" } + ])); + let requested = vec!["host".to_string()]; + let response = ElasticsearchMappingsResponse::from_doc_mapping_filtered( + vec![index_metadata], + None, + &requested, + ); + let serialized = serde_json::to_value(&response).unwrap(); + // The `host` object subtree is preserved verbatim — both nested fields stay. + let host_props = &serialized["test"]["mappings"]["properties"]["host"]["properties"]; + assert_eq!(host_props["region"]["type"], "keyword"); + assert_eq!(host_props["name"]["type"], "keyword"); + // `message` is filtered out. + assert!( + serialized["test"]["mappings"]["properties"] + .get("message") + .is_none() + ); + } + + #[test] + fn from_doc_mapping_filtered_empty_request_returns_all() { + let index_metadata = make_index_metadata(serde_json::json!([ + { "name": "host", "type": "text" }, + { "name": "message", "type": "text" } + ])); + let response_all = + ElasticsearchMappingsResponse::from_doc_mapping(vec![index_metadata.clone()], None); + let response_filtered = ElasticsearchMappingsResponse::from_doc_mapping_filtered( + vec![index_metadata], + None, + &[], + ); + let serialized_all = serde_json::to_value(&response_all).unwrap(); + let serialized_filtered = serde_json::to_value(&response_filtered).unwrap(); + assert_eq!(serialized_all, serialized_filtered); + } } diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/mod.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/mod.rs index 4351a26b65b..f64af3e5413 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/mod.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/mod.rs @@ -17,6 +17,7 @@ mod bulk_query_params; mod cat_indices; mod error; mod field_capability; +mod index_mapping_query_params; mod mappings; mod multi_search; mod scroll; @@ -36,6 +37,7 @@ pub use field_capability::{ FieldCapabilityQueryParams, FieldCapabilityRequestBody, FieldCapabilityResponse, build_list_field_request_for_es_api, convert_to_es_field_capabilities_response, }; +pub use index_mapping_query_params::IndexMappingQueryParams; pub(crate) use mappings::ElasticsearchMappingsResponse; pub use multi_search::{ MultiSearchHeader, MultiSearchQueryParams, MultiSearchResponse, MultiSearchSingleResponse, diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs index 9f4a83f357f..547346718bd 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::str::from_utf8; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -58,9 +58,9 @@ use super::model::{ CatIndexQueryParams, DeleteQueryParams, ElasticsearchCatIndexResponse, ElasticsearchError, ElasticsearchResolveIndexEntryResponse, ElasticsearchResolveIndexResponse, ElasticsearchResponse, ElasticsearchStatsResponse, FieldCapabilityQueryParams, - FieldCapabilityRequestBody, FieldCapabilityResponse, MultiSearchHeader, MultiSearchQueryParams, - MultiSearchResponse, MultiSearchSingleResponse, ScrollQueryParams, SearchBody, - SearchQueryParams, SearchQueryParamsCount, StatsResponseEntry, + FieldCapabilityRequestBody, FieldCapabilityResponse, IndexMappingQueryParams, + MultiSearchHeader, MultiSearchQueryParams, MultiSearchResponse, MultiSearchSingleResponse, + ScrollQueryParams, SearchBody, SearchQueryParams, SearchQueryParamsCount, StatsResponseEntry, build_list_field_request_for_es_api, convert_to_es_field_capabilities_response, }; use super::{TrackTotalHits, make_elastic_api_response}; @@ -199,8 +199,37 @@ async fn get_index_metadata( Ok(index_metadata) } +/// Splits `fields=` on commas, trims, drops empties. `None` if nothing usable remains. +fn parse_field_hints(params: &IndexMappingQueryParams) -> Option> { + let raw = params.fields.as_deref()?; + let tokens: Vec = raw + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + if tokens.is_empty() { + None + } else { + Some(tokens) + } +} + +fn collect_declared_top_level_names(indexes_metadata: &[IndexMetadata]) -> HashSet { + let mut names = HashSet::new(); + for metadata in indexes_metadata { + for entry in &metadata.index_config.doc_mapping.field_mappings { + names.insert(entry.name.clone()); + } + } + names +} + +/// `_mapping(s)` handler. With `?fields=…` listing only flat declared fields, +/// serves the response from `doc_mapping` and skips `list_fields`. Anything +/// else falls through to a split fan-out filtered by `[start, end)`. pub(crate) async fn es_compat_index_mapping( index_id: String, + params: IndexMappingQueryParams, mut metastore: MetastoreServiceClient, search_service: Arc, ) -> Result { @@ -214,22 +243,42 @@ pub(crate) async fn es_compat_index_mapping( .iter() .map(|m| m.index_id().to_string()) .collect(); + + let requested_fields = parse_field_hints(¶ms); + + // Fast path: every requested name is a flat declared field — skip list_fields. + if let Some(requested_fields) = &requested_fields { + let declared_top: HashSet = collect_declared_top_level_names(&indexes_metadata); + let all_declared = requested_fields.iter().all(|requested| { + !requested.contains('*') + && !requested.contains('?') + && !requested.contains('.') + && declared_top.contains(requested.as_str()) + }); + if all_declared { + return Ok(ElasticsearchMappingsResponse::from_doc_mapping_filtered( + indexes_metadata, + None, + requested_fields, + )); + } + } + let list_fields_request = quickwit_proto::search::ListFieldsRequest { index_id_patterns, - field_patterns: Vec::new(), - start_timestamp: None, - end_timestamp: None, + field_patterns: requested_fields.unwrap_or_default(), + start_timestamp: params.start_timestamp, + end_timestamp: params.end_timestamp, query_ast: None, }; let list_fields_response = search_service .root_list_fields(list_fields_request) .await .ok(); - let response = ElasticsearchMappingsResponse::from_doc_mapping( + Ok(ElasticsearchMappingsResponse::from_doc_mapping( indexes_metadata, list_fields_response.as_ref(), - ); - Ok(response) + )) } /// GET or POST _elastic/_search From 2f719f672319d9a6c07e75eca01830e746689801 Mon Sep 17 00:00:00 2001 From: "cong.xie" Date: Wed, 20 May 2026 10:07:23 -0400 Subject: [PATCH 2/7] address review feedback on _mapping fast path - rename `fields` query param to `field_patterns` to mirror `ListFieldsRequest.field_patterns` - switch tests to `serde_qs` (already a workspace dep, matches the bulk-query-params test style) - move the declared-field filter out of `mappings.rs` and into the rest_handler fast path: trim `field_mappings` in place before calling `from_doc_mapping`, dropping `from_doc_mapping_filtered` entirely (dynamic fields were already filtered at the leaves via `ListFieldsRequest.field_patterns`) - nits in `parse_field_patterns`: trim/filter before allocating the owned String per token - nits in `collect_declared_top_level_names`: functional flat_map style --- .../model/index_mapping_query_params.rs | 53 +++++---- .../src/elasticsearch_api/model/mappings.rs | 106 +----------------- .../src/elasticsearch_api/rest_handler.rs | 67 ++++++----- 3 files changed, 73 insertions(+), 153 deletions(-) diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs index e19b6deebb0..05092dc233c 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs @@ -17,8 +17,9 @@ use serde::{Deserialize, Serialize}; /// Query parameters for `_mapping(s)`. Unknown params are silently ignored. /// /// Timestamps are epoch seconds, half-open `[start, end)` — forwarded into -/// `ListFieldsRequest` to prune splits. `fields` is a comma-separated hint; -/// see `parse_field_hints` in `rest_handler.rs` for the fast-path semantics. +/// `ListFieldsRequest` to prune splits. `field_patterns` is a comma-separated +/// hint mirroring `ListFieldsRequest.field_patterns`; see `parse_field_patterns` +/// in `rest_handler.rs` for the fast-path semantics. #[serde_with::skip_serializing_none] #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct IndexMappingQueryParams { @@ -27,7 +28,7 @@ pub struct IndexMappingQueryParams { #[serde(default)] pub end_timestamp: Option, #[serde(default)] - pub fields: Option, + pub field_patterns: Option, } #[cfg(test)] @@ -36,25 +37,25 @@ mod tests { #[test] fn empty_query_string_yields_none() { - let params: IndexMappingQueryParams = serde_urlencoded::from_str("").unwrap(); + let params: IndexMappingQueryParams = serde_qs::from_str("").unwrap(); assert!(params.start_timestamp.is_none()); assert!(params.end_timestamp.is_none()); - assert!(params.fields.is_none()); + assert!(params.field_patterns.is_none()); } #[test] fn both_params_present_yield_some() { let qs = "start_timestamp=1712160204&end_timestamp=1712764984"; - let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); + let params: IndexMappingQueryParams = serde_qs::from_str(qs).unwrap(); assert_eq!(params.start_timestamp, Some(1712160204)); assert_eq!(params.end_timestamp, Some(1712764984)); - assert!(params.fields.is_none()); + assert!(params.field_patterns.is_none()); } #[test] fn only_start_timestamp_present() { let qs = "start_timestamp=1712160204"; - let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); + let params: IndexMappingQueryParams = serde_qs::from_str(qs).unwrap(); assert_eq!(params.start_timestamp, Some(1712160204)); assert!(params.end_timestamp.is_none()); } @@ -62,7 +63,7 @@ mod tests { #[test] fn only_end_timestamp_present() { let qs = "end_timestamp=1712764984"; - let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); + let params: IndexMappingQueryParams = serde_qs::from_str(qs).unwrap(); assert!(params.start_timestamp.is_none()); assert_eq!(params.end_timestamp, Some(1712764984)); } @@ -70,33 +71,37 @@ mod tests { #[test] fn unknown_field_is_ignored() { let qs = "start_timestamp=1&pretty=true&ignore_unavailable=true"; - let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); + let params: IndexMappingQueryParams = serde_qs::from_str(qs).unwrap(); assert_eq!(params.start_timestamp, Some(1)); assert!(params.end_timestamp.is_none()); - assert!(params.fields.is_none()); + assert!(params.field_patterns.is_none()); } #[test] - fn fields_param_present() { - let qs = "fields=host,message,status"; - let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); - assert_eq!(params.fields.as_deref(), Some("host,message,status")); + fn field_patterns_param_present() { + let qs = "field_patterns=host,message,status"; + let params: IndexMappingQueryParams = serde_qs::from_str(qs).unwrap(); + assert_eq!( + params.field_patterns.as_deref(), + Some("host,message,status") + ); } #[test] - fn fields_combined_with_timestamps() { - let qs = "start_timestamp=1&end_timestamp=2&fields=host"; - let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); + fn field_patterns_combined_with_timestamps() { + let qs = "start_timestamp=1&end_timestamp=2&field_patterns=host"; + let params: IndexMappingQueryParams = serde_qs::from_str(qs).unwrap(); assert_eq!(params.start_timestamp, Some(1)); assert_eq!(params.end_timestamp, Some(2)); - assert_eq!(params.fields.as_deref(), Some("host")); + assert_eq!(params.field_patterns.as_deref(), Some("host")); } #[test] - fn empty_fields_value() { - // Empty string parses as Some(""); `parse_field_hints` treats it as absent. - let qs = "fields="; - let params: IndexMappingQueryParams = serde_urlencoded::from_str(qs).unwrap(); - assert_eq!(params.fields.as_deref(), Some("")); + fn empty_field_patterns_value() { + // `serde_qs` collapses an empty value to `None` (unlike `serde_urlencoded`, + // which would yield `Some("")`). + let qs = "field_patterns="; + let params: IndexMappingQueryParams = serde_qs::from_str(qs).unwrap(); + assert!(params.field_patterns.is_none()); } } diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs index 3b79d304a17..5b23c414084 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use quickwit_doc_mapper::{FieldMappingEntry, FieldMappingType}; use quickwit_metastore::IndexMetadata; @@ -62,29 +62,20 @@ enum FieldMapping { } impl ElasticsearchMappingsResponse { + /// Builds a response from declared doc-mapping field mappings, optionally + /// merged with dynamic fields discovered via `ListFields`. Callers that + /// need to restrict the declared subtree should filter + /// `index_config.doc_mapping.field_mappings` before invoking — see the + /// `field_patterns` fast path in `rest_handler.rs`. pub fn from_doc_mapping( indexes_metadata: Vec, list_fields_response: Option<&ListFieldsResponse>, ) -> Self { - Self::from_doc_mapping_filtered(indexes_metadata, list_fields_response, &[]) - } - - /// Filters to `requested_fields` by exact top-level name. Empty slice = no - /// filter; matched names retain their full declared subtree. - pub fn from_doc_mapping_filtered( - indexes_metadata: Vec, - list_fields_response: Option<&ListFieldsResponse>, - requested_fields: &[String], - ) -> Self { - let filter: HashSet<&str> = requested_fields.iter().map(String::as_str).collect(); let indices = indexes_metadata .into_iter() .map(|index_metadata| { let field_mappings = &index_metadata.index_config.doc_mapping.field_mappings; let mut properties = build_properties(field_mappings); - if !filter.is_empty() { - properties.retain(|name, _| filter.contains(name.as_str())); - } if let Some(list_fields) = list_fields_response { merge_dynamic_fields(&mut properties, list_fields); } @@ -320,89 +311,4 @@ mod tests { assert!(properties.contains_key("dynamic_field")); assert!(!properties.contains_key("_timestamp")); } - - fn make_index_metadata(field_mappings: serde_json::Value) -> IndexMetadata { - let entries: Vec = serde_json::from_value(field_mappings).unwrap(); - let mut metadata = IndexMetadata::for_test("test", "ram:///indexes/test"); - metadata.index_config.doc_mapping.field_mappings = entries; - metadata - } - - fn properties_of(response: &ElasticsearchMappingsResponse) -> &HashMap { - &response.indices["test"].mappings.properties - } - - #[test] - fn from_doc_mapping_filtered_keeps_only_requested() { - let index_metadata = make_index_metadata(serde_json::json!([ - { "name": "host", "type": "text" }, - { "name": "message", "type": "text" }, - { "name": "status", "type": "i64" }, - { "name": "service", "type": "text" }, - { "name": "trace_id", "type": "text" }, - ])); - let requested = vec!["host".to_string(), "message".to_string()]; - let response = ElasticsearchMappingsResponse::from_doc_mapping_filtered( - vec![index_metadata], - None, - &requested, - ); - let props = properties_of(&response); - assert_eq!(props.len(), 2); - assert!(props.contains_key("host")); - assert!(props.contains_key("message")); - assert!(!props.contains_key("status")); - assert!(!props.contains_key("service")); - assert!(!props.contains_key("trace_id")); - } - - #[test] - fn from_doc_mapping_filtered_includes_object_subtree_for_top_level_match() { - let index_metadata = make_index_metadata(serde_json::json!([ - { - "name": "host", - "type": "object", - "field_mappings": [ - { "name": "region", "type": "text" }, - { "name": "name", "type": "text" } - ] - }, - { "name": "message", "type": "text" } - ])); - let requested = vec!["host".to_string()]; - let response = ElasticsearchMappingsResponse::from_doc_mapping_filtered( - vec![index_metadata], - None, - &requested, - ); - let serialized = serde_json::to_value(&response).unwrap(); - // The `host` object subtree is preserved verbatim — both nested fields stay. - let host_props = &serialized["test"]["mappings"]["properties"]["host"]["properties"]; - assert_eq!(host_props["region"]["type"], "keyword"); - assert_eq!(host_props["name"]["type"], "keyword"); - // `message` is filtered out. - assert!( - serialized["test"]["mappings"]["properties"] - .get("message") - .is_none() - ); - } - - #[test] - fn from_doc_mapping_filtered_empty_request_returns_all() { - let index_metadata = make_index_metadata(serde_json::json!([ - { "name": "host", "type": "text" }, - { "name": "message", "type": "text" } - ])); - let response_all = - ElasticsearchMappingsResponse::from_doc_mapping(vec![index_metadata.clone()], None); - let response_filtered = ElasticsearchMappingsResponse::from_doc_mapping_filtered( - vec![index_metadata], - None, - &[], - ); - let serialized_all = serde_json::to_value(&response_all).unwrap(); - let serialized_filtered = serde_json::to_value(&response_filtered).unwrap(); - assert_eq!(serialized_all, serialized_filtered); - } } diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs index 547346718bd..36fa80a440c 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs @@ -199,41 +199,41 @@ async fn get_index_metadata( Ok(index_metadata) } -/// Splits `fields=` on commas, trims, drops empties. `None` if nothing usable remains. -fn parse_field_hints(params: &IndexMappingQueryParams) -> Option> { - let raw = params.fields.as_deref()?; - let tokens: Vec = raw +/// Splits `field_patterns=` on commas, trims, drops empties. `None` if nothing +/// usable remains. +fn parse_field_patterns(params: &IndexMappingQueryParams) -> Option> { + let raw = params.field_patterns.as_deref()?; + let patterns: Vec = raw .split(',') - .map(|s| s.trim().to_string()) + .map(str::trim) .filter(|s| !s.is_empty()) + .map(str::to_string) .collect(); - if tokens.is_empty() { + if patterns.is_empty() { None } else { - Some(tokens) + Some(patterns) } } fn collect_declared_top_level_names(indexes_metadata: &[IndexMetadata]) -> HashSet { - let mut names = HashSet::new(); - for metadata in indexes_metadata { - for entry in &metadata.index_config.doc_mapping.field_mappings { - names.insert(entry.name.clone()); - } - } - names + indexes_metadata + .iter() + .flat_map(|m| &m.index_config.doc_mapping.field_mappings) + .map(|entry| entry.name.clone()) + .collect() } -/// `_mapping(s)` handler. With `?fields=…` listing only flat declared fields, -/// serves the response from `doc_mapping` and skips `list_fields`. Anything -/// else falls through to a split fan-out filtered by `[start, end)`. +/// `_mapping(s)` handler. With `?field_patterns=…` listing only flat declared +/// fields, serves the response from `doc_mapping` and skips `list_fields`. +/// Anything else falls through to a split fan-out filtered by `[start, end)`. pub(crate) async fn es_compat_index_mapping( index_id: String, params: IndexMappingQueryParams, mut metastore: MetastoreServiceClient, search_service: Arc, ) -> Result { - let indexes_metadata = if index_id.contains('*') || index_id.contains(',') { + let mut indexes_metadata = if index_id.contains('*') || index_id.contains(',') { let patterns: Vec = index_id.split(',').map(|s| s.trim().to_string()).collect(); resolve_index_patterns(&patterns, &mut metastore).await? } else { @@ -244,29 +244,38 @@ pub(crate) async fn es_compat_index_mapping( .map(|m| m.index_id().to_string()) .collect(); - let requested_fields = parse_field_hints(¶ms); + let field_patterns = parse_field_patterns(¶ms); - // Fast path: every requested name is a flat declared field — skip list_fields. - if let Some(requested_fields) = &requested_fields { + // Fast path: every requested name is a flat declared field — skip + // `list_fields` entirely and trim the declared subtree in-place so we don't + // pay for building / discarding properties downstream. + if let Some(field_patterns) = &field_patterns { let declared_top: HashSet = collect_declared_top_level_names(&indexes_metadata); - let all_declared = requested_fields.iter().all(|requested| { - !requested.contains('*') - && !requested.contains('?') - && !requested.contains('.') - && declared_top.contains(requested.as_str()) + let all_declared = field_patterns.iter().all(|pattern| { + !pattern.contains('*') + && !pattern.contains('?') + && !pattern.contains('.') + && declared_top.contains(pattern.as_str()) }); if all_declared { - return Ok(ElasticsearchMappingsResponse::from_doc_mapping_filtered( + let keep: HashSet<&str> = field_patterns.iter().map(String::as_str).collect(); + for metadata in &mut indexes_metadata { + metadata + .index_config + .doc_mapping + .field_mappings + .retain(|entry| keep.contains(entry.name.as_str())); + } + return Ok(ElasticsearchMappingsResponse::from_doc_mapping( indexes_metadata, None, - requested_fields, )); } } let list_fields_request = quickwit_proto::search::ListFieldsRequest { index_id_patterns, - field_patterns: requested_fields.unwrap_or_default(), + field_patterns: field_patterns.unwrap_or_default(), start_timestamp: params.start_timestamp, end_timestamp: params.end_timestamp, query_ast: None, From 2c2568e796ea6da8aabecb56048247d66e8d9c15 Mon Sep 17 00:00:00 2001 From: "cong.xie" Date: Wed, 20 May 2026 10:39:13 -0400 Subject: [PATCH 3/7] address review followups: drop fast-path filter, remove unused dep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - drop the fast-path declared-field `retain(...)` in rest_handler. `field_patterns` is now hint-only: it triggers the fast path (skip `list_fields`) when every pattern matches a flat declared field, and is pushed down to the leaves for dynamic-field filtering. Both fast and slow paths now return the full declared schema, matching slow- path semantics that existed before. - remove unused `serde_urlencoded` dev-dep from `quickwit-serve` and the workspace `Cargo.toml` (was already unused after switching tests to `serde_qs`). - `collect_declared_top_level_names`: switch back to a procedural form preallocated with `HashSet::with_capacity(sum)` to signal the upper bound — no filtering, no explosion. --- quickwit/Cargo.lock | 1 - quickwit/Cargo.toml | 1 - quickwit/quickwit-serve/Cargo.toml | 1 - .../model/index_mapping_query_params.rs | 8 ++--- .../src/elasticsearch_api/model/mappings.rs | 7 ++-- .../src/elasticsearch_api/rest_handler.rs | 36 +++++++++---------- 6 files changed, 25 insertions(+), 29 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 713d9fc13e1..330e6591856 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -9355,7 +9355,6 @@ dependencies = [ "serde", "serde_json", "serde_qs 0.15.0", - "serde_urlencoded", "serde_with", "tempfile", "thiserror 2.0.18", diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 1534e1cbe96..5d9e5ce401d 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -248,7 +248,6 @@ serde = { version = "1.0.228", features = ["derive", "rc"] } serde_json = "1.0" serde_json_borrow = "0.9" serde_qs = { version = "0.15" } -serde_urlencoded = "0.7" serde_with = "3.20" serde_yaml = "0.9" serial_test = { version = "3.4", features = ["file_locks"] } diff --git a/quickwit/quickwit-serve/Cargo.toml b/quickwit/quickwit-serve/Cargo.toml index a7c3a528848..bd3ee2006a0 100644 --- a/quickwit/quickwit-serve/Cargo.toml +++ b/quickwit/quickwit-serve/Cargo.toml @@ -90,7 +90,6 @@ itertools = { workspace = true } mockall = { workspace = true } opentelemetry = { workspace = true } opentelemetry_sdk = { workspace = true } -serde_urlencoded = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true } tokio-stream = { workspace = true } diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs index 05092dc233c..4e41b4ec957 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs @@ -18,8 +18,9 @@ use serde::{Deserialize, Serialize}; /// /// Timestamps are epoch seconds, half-open `[start, end)` — forwarded into /// `ListFieldsRequest` to prune splits. `field_patterns` is a comma-separated -/// hint mirroring `ListFieldsRequest.field_patterns`; see `parse_field_patterns` -/// in `rest_handler.rs` for the fast-path semantics. +/// hint mirroring `ListFieldsRequest.field_patterns`: it is pushed down to the +/// leaves for dynamic-field filtering and, when every pattern matches a flat +/// declared field, triggers a fast path that skips `list_fields` entirely. #[serde_with::skip_serializing_none] #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct IndexMappingQueryParams { @@ -98,8 +99,7 @@ mod tests { #[test] fn empty_field_patterns_value() { - // `serde_qs` collapses an empty value to `None` (unlike `serde_urlencoded`, - // which would yield `Some("")`). + // `serde_qs` collapses an empty value to `None`. let qs = "field_patterns="; let params: IndexMappingQueryParams = serde_qs::from_str(qs).unwrap(); assert!(params.field_patterns.is_none()); diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs index 5b23c414084..e2ce229c459 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs @@ -63,10 +63,9 @@ enum FieldMapping { impl ElasticsearchMappingsResponse { /// Builds a response from declared doc-mapping field mappings, optionally - /// merged with dynamic fields discovered via `ListFields`. Callers that - /// need to restrict the declared subtree should filter - /// `index_config.doc_mapping.field_mappings` before invoking — see the - /// `field_patterns` fast path in `rest_handler.rs`. + /// merged with dynamic fields discovered via `ListFields`. Dynamic-field + /// filtering (when requested via `field_patterns`) is handled at the + /// leaves; this function does not filter on its own. pub fn from_doc_mapping( indexes_metadata: Vec, list_fields_response: Option<&ListFieldsResponse>, diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs index 36fa80a440c..8bac9d52b35 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs @@ -217,23 +217,31 @@ fn parse_field_patterns(params: &IndexMappingQueryParams) -> Option> } fn collect_declared_top_level_names(indexes_metadata: &[IndexMetadata]) -> HashSet { - indexes_metadata + let upper_bound: usize = indexes_metadata .iter() - .flat_map(|m| &m.index_config.doc_mapping.field_mappings) - .map(|entry| entry.name.clone()) - .collect() + .map(|m| m.index_config.doc_mapping.field_mappings.len()) + .sum(); + let mut names = HashSet::with_capacity(upper_bound); + for metadata in indexes_metadata { + for entry in &metadata.index_config.doc_mapping.field_mappings { + names.insert(entry.name.clone()); + } + } + names } -/// `_mapping(s)` handler. With `?field_patterns=…` listing only flat declared -/// fields, serves the response from `doc_mapping` and skips `list_fields`. -/// Anything else falls through to a split fan-out filtered by `[start, end)`. +/// `_mapping(s)` handler. `field_patterns` is a hint: when every requested +/// pattern matches a flat declared field, we skip `list_fields` and return the +/// declared schema directly. Anything else falls through to a split fan-out +/// filtered by `[start, end)`, with the same patterns pushed down to the +/// leaves for dynamic-field filtering. pub(crate) async fn es_compat_index_mapping( index_id: String, params: IndexMappingQueryParams, mut metastore: MetastoreServiceClient, search_service: Arc, ) -> Result { - let mut indexes_metadata = if index_id.contains('*') || index_id.contains(',') { + let indexes_metadata = if index_id.contains('*') || index_id.contains(',') { let patterns: Vec = index_id.split(',').map(|s| s.trim().to_string()).collect(); resolve_index_patterns(&patterns, &mut metastore).await? } else { @@ -247,8 +255,8 @@ pub(crate) async fn es_compat_index_mapping( let field_patterns = parse_field_patterns(¶ms); // Fast path: every requested name is a flat declared field — skip - // `list_fields` entirely and trim the declared subtree in-place so we don't - // pay for building / discarding properties downstream. + // `list_fields` entirely. The declared schema is returned as-is; dynamic + // discovery is not needed. if let Some(field_patterns) = &field_patterns { let declared_top: HashSet = collect_declared_top_level_names(&indexes_metadata); let all_declared = field_patterns.iter().all(|pattern| { @@ -258,14 +266,6 @@ pub(crate) async fn es_compat_index_mapping( && declared_top.contains(pattern.as_str()) }); if all_declared { - let keep: HashSet<&str> = field_patterns.iter().map(String::as_str).collect(); - for metadata in &mut indexes_metadata { - metadata - .index_config - .doc_mapping - .field_mappings - .retain(|entry| keep.contains(entry.name.as_str())); - } return Ok(ElasticsearchMappingsResponse::from_doc_mapping( indexes_metadata, None, From 7aaa932e696a2a806842655e8845bbeff9018341 Mon Sep 17 00:00:00 2001 From: "cong.xie" Date: Thu, 21 May 2026 10:28:17 -0400 Subject: [PATCH 4/7] remove fast path from _mapping handler The fast path (skip list_fields when all field_patterns are declared flat fields) will be implemented in pomsky. This commit reduces the handler to always call root_list_fields, forwarding field_patterns, start_timestamp, and end_timestamp as pushdown hints. --- .../src/elasticsearch_api/rest_handler.rs | 43 ++----------------- 1 file changed, 4 insertions(+), 39 deletions(-) diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs index 8bac9d52b35..a2515545fc2 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::str::from_utf8; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -216,25 +216,9 @@ fn parse_field_patterns(params: &IndexMappingQueryParams) -> Option> } } -fn collect_declared_top_level_names(indexes_metadata: &[IndexMetadata]) -> HashSet { - let upper_bound: usize = indexes_metadata - .iter() - .map(|m| m.index_config.doc_mapping.field_mappings.len()) - .sum(); - let mut names = HashSet::with_capacity(upper_bound); - for metadata in indexes_metadata { - for entry in &metadata.index_config.doc_mapping.field_mappings { - names.insert(entry.name.clone()); - } - } - names -} - -/// `_mapping(s)` handler. `field_patterns` is a hint: when every requested -/// pattern matches a flat declared field, we skip `list_fields` and return the -/// declared schema directly. Anything else falls through to a split fan-out -/// filtered by `[start, end)`, with the same patterns pushed down to the -/// leaves for dynamic-field filtering. +/// `_mapping(s)` handler. Pushes `field_patterns`, `start_timestamp`, and +/// `end_timestamp` down to `root_list_fields` so splits can be pruned and +/// dynamic fields filtered at the leaves. pub(crate) async fn es_compat_index_mapping( index_id: String, params: IndexMappingQueryParams, @@ -254,25 +238,6 @@ pub(crate) async fn es_compat_index_mapping( let field_patterns = parse_field_patterns(¶ms); - // Fast path: every requested name is a flat declared field — skip - // `list_fields` entirely. The declared schema is returned as-is; dynamic - // discovery is not needed. - if let Some(field_patterns) = &field_patterns { - let declared_top: HashSet = collect_declared_top_level_names(&indexes_metadata); - let all_declared = field_patterns.iter().all(|pattern| { - !pattern.contains('*') - && !pattern.contains('?') - && !pattern.contains('.') - && declared_top.contains(pattern.as_str()) - }); - if all_declared { - return Ok(ElasticsearchMappingsResponse::from_doc_mapping( - indexes_metadata, - None, - )); - } - } - let list_fields_request = quickwit_proto::search::ListFieldsRequest { index_id_patterns, field_patterns: field_patterns.unwrap_or_default(), From b208d212bd3d9863f6713812a0e8c8f1199d4276 Mon Sep 17 00:00:00 2001 From: "cong.xie" Date: Thu, 21 May 2026 10:39:02 -0400 Subject: [PATCH 5/7] update comments to remove stale fast-path references --- .../model/index_mapping_query_params.rs | 10 +++++----- .../src/elasticsearch_api/model/mappings.rs | 4 +--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs index 4e41b4ec957..e929f9c54d2 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs @@ -16,11 +16,11 @@ use serde::{Deserialize, Serialize}; /// Query parameters for `_mapping(s)`. Unknown params are silently ignored. /// -/// Timestamps are epoch seconds, half-open `[start, end)` — forwarded into -/// `ListFieldsRequest` to prune splits. `field_patterns` is a comma-separated -/// hint mirroring `ListFieldsRequest.field_patterns`: it is pushed down to the -/// leaves for dynamic-field filtering and, when every pattern matches a flat -/// declared field, triggers a fast path that skips `list_fields` entirely. +/// Timestamps (`start_timestamp`, `end_timestamp`) are epoch seconds, +/// half-open `[start, end)`, forwarded to `ListFieldsRequest` to prune splits. +/// `field_patterns` is a comma-separated list mirroring +/// `ListFieldsRequest.field_patterns`, pushed down to the leaves for +/// dynamic-field filtering. #[serde_with::skip_serializing_none] #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct IndexMappingQueryParams { diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs index e2ce229c459..15bcc16f060 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs @@ -63,9 +63,7 @@ enum FieldMapping { impl ElasticsearchMappingsResponse { /// Builds a response from declared doc-mapping field mappings, optionally - /// merged with dynamic fields discovered via `ListFields`. Dynamic-field - /// filtering (when requested via `field_patterns`) is handled at the - /// leaves; this function does not filter on its own. + /// merged with dynamic fields from a `ListFields` response. pub fn from_doc_mapping( indexes_metadata: Vec, list_fields_response: Option<&ListFieldsResponse>, From 010d7f26f653f008ab01397645e7bef2ad3a5f2c Mon Sep 17 00:00:00 2001 From: "cong.xie" Date: Thu, 21 May 2026 11:12:08 -0400 Subject: [PATCH 6/7] address remaining review nits - move parse_field_patterns into IndexMappingQueryParams::field_patterns() - return Vec directly instead of Option>; caller no longer needs unwrap_or_default - use filter_map to trim and drop empties in one pass - split final Ok(...) into let response + Ok(response) --- .../model/index_mapping_query_params.rs | 20 ++++++++++++++ .../src/elasticsearch_api/rest_handler.rs | 26 +++---------------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs index e929f9c54d2..daed6278abe 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs @@ -32,6 +32,26 @@ pub struct IndexMappingQueryParams { pub field_patterns: Option, } +impl IndexMappingQueryParams { + /// Splits `field_patterns` on commas, trims, and drops empties. + /// Returns an empty `Vec` when the parameter is absent or blank. + pub fn field_patterns(&self) -> Vec { + self.field_patterns + .as_deref() + .unwrap_or_default() + .split(',') + .filter_map(|pattern| { + let trimmed = pattern.trim(); + if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + } + }) + .collect() + } +} + #[cfg(test)] mod tests { use super::IndexMappingQueryParams; diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs index a2515545fc2..3ee58a80ecf 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs @@ -199,23 +199,6 @@ async fn get_index_metadata( Ok(index_metadata) } -/// Splits `field_patterns=` on commas, trims, drops empties. `None` if nothing -/// usable remains. -fn parse_field_patterns(params: &IndexMappingQueryParams) -> Option> { - let raw = params.field_patterns.as_deref()?; - let patterns: Vec = raw - .split(',') - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(str::to_string) - .collect(); - if patterns.is_empty() { - None - } else { - Some(patterns) - } -} - /// `_mapping(s)` handler. Pushes `field_patterns`, `start_timestamp`, and /// `end_timestamp` down to `root_list_fields` so splits can be pruned and /// dynamic fields filtered at the leaves. @@ -236,11 +219,9 @@ pub(crate) async fn es_compat_index_mapping( .map(|m| m.index_id().to_string()) .collect(); - let field_patterns = parse_field_patterns(¶ms); - let list_fields_request = quickwit_proto::search::ListFieldsRequest { index_id_patterns, - field_patterns: field_patterns.unwrap_or_default(), + field_patterns: params.field_patterns(), start_timestamp: params.start_timestamp, end_timestamp: params.end_timestamp, query_ast: None, @@ -249,10 +230,11 @@ pub(crate) async fn es_compat_index_mapping( .root_list_fields(list_fields_request) .await .ok(); - Ok(ElasticsearchMappingsResponse::from_doc_mapping( + let response = ElasticsearchMappingsResponse::from_doc_mapping( indexes_metadata, list_fields_response.as_ref(), - )) + ); + Ok(response) } /// GET or POST _elastic/_search From 4d9a120b4d4f941fb2e8695c08e5aeaff06c1e0a Mon Sep 17 00:00:00 2001 From: "cong.xie" Date: Thu, 21 May 2026 11:16:49 -0400 Subject: [PATCH 7/7] accept ES-compatible `fields` alias for field_patterns query param Callers using the documented `?fields=` parameter (e.g. the Trino ES connector) would get an empty ListFieldsRequest.field_patterns, causing leaves to enumerate all dynamic fields. Adding the serde alias makes both `?fields=` and `?field_patterns=` map to the same field. --- .../model/index_mapping_query_params.rs | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs index daed6278abe..aba3d369431 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/index_mapping_query_params.rs @@ -28,7 +28,8 @@ pub struct IndexMappingQueryParams { pub start_timestamp: Option, #[serde(default)] pub end_timestamp: Option, - #[serde(default)] + /// Accepts both `field_patterns` (Quickwit) and `fields` (ES-compatible). + #[serde(default, alias = "fields")] pub field_patterns: Option, } @@ -124,4 +125,21 @@ mod tests { let params: IndexMappingQueryParams = serde_qs::from_str(qs).unwrap(); assert!(params.field_patterns.is_none()); } + + #[test] + fn es_compatible_fields_alias_accepted() { + // ES clients send `?fields=` — must map to the same field as `field_patterns`. + let qs = "fields=host,message"; + let params: IndexMappingQueryParams = serde_qs::from_str(qs).unwrap(); + assert_eq!(params.field_patterns.as_deref(), Some("host,message")); + } + + #[test] + fn fields_alias_combined_with_timestamps() { + let qs = "start_timestamp=1&end_timestamp=2&fields=host"; + let params: IndexMappingQueryParams = serde_qs::from_str(qs).unwrap(); + assert_eq!(params.start_timestamp, Some(1)); + assert_eq!(params.end_timestamp, Some(2)); + assert_eq!(params.field_patterns.as_deref(), Some("host")); + } }