From c74dc8eca1bc730bdd414c8498061fb125023491 Mon Sep 17 00:00:00 2001 From: QuakeWang <1677980708@qq.com> Date: Tue, 26 May 2026 13:54:38 +0800 Subject: [PATCH] Support canonical Lumina index type --- crates/paimon/src/lumina/mod.rs | 22 ++- .../paimon/src/table/vector_search_builder.rs | 157 +++++++++++++----- 2 files changed, 138 insertions(+), 41 deletions(-) diff --git a/crates/paimon/src/lumina/mod.rs b/crates/paimon/src/lumina/mod.rs index 04b4f87e..7986a537 100644 --- a/crates/paimon/src/lumina/mod.rs +++ b/crates/paimon/src/lumina/mod.rs @@ -20,7 +20,16 @@ pub mod reader; use std::collections::HashMap; -pub const LUMINA_VECTOR_ANN_IDENTIFIER: &str = "lumina-vector-ann"; +pub const LUMINA_IDENTIFIER: &str = "lumina"; +pub const LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER: &str = "lumina-vector-ann"; +pub const LUMINA_VECTOR_ANN_IDENTIFIER: &str = LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER; + +pub fn is_lumina_index_type(index_type: &str) -> bool { + matches!( + index_type, + LUMINA_IDENTIFIER | LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER + ) +} #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum LuminaVectorMetric { @@ -507,6 +516,17 @@ mod tests { assert!(LuminaVectorMetric::from_string("hamming").is_err()); } + #[test] + fn test_lumina_index_type_identifier_helper() { + assert!(is_lumina_index_type(LUMINA_IDENTIFIER)); + assert!(is_lumina_index_type(LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER)); + assert!(is_lumina_index_type(LUMINA_VECTOR_ANN_IDENTIFIER)); + assert!(!is_lumina_index_type("")); + assert!(!is_lumina_index_type("btree")); + assert!(!is_lumina_index_type("lumina-vector")); + assert!(!is_lumina_index_type("LUMINA")); + } + #[test] fn test_index_meta_serialize_deserialize() { let mut options = HashMap::new(); diff --git a/crates/paimon/src/table/vector_search_builder.rs b/crates/paimon/src/table/vector_search_builder.rs index 1f79e540..887b2306 100644 --- a/crates/paimon/src/table/vector_search_builder.rs +++ b/crates/paimon/src/table/vector_search_builder.rs @@ -16,7 +16,7 @@ // under the License. use crate::lumina::reader::LuminaVectorGlobalIndexReader; -use crate::lumina::{GlobalIndexIOMeta, SearchResult, VectorSearch, LUMINA_VECTOR_ANN_IDENTIFIER}; +use crate::lumina::{is_lumina_index_type, GlobalIndexIOMeta, SearchResult, VectorSearch}; use crate::spec::{DataField, FileKind, IndexManifest}; use crate::table::snapshot_manager::SnapshotManager; use crate::table::{find_field_id_by_name, RowRange, Table}; @@ -130,7 +130,7 @@ async fn evaluate_vector_search( .iter() .filter(|e| { e.kind == FileKind::Add - && e.index_file.index_type == LUMINA_VECTOR_ANN_IDENTIFIER + && is_lumina_index_type(&e.index_file.index_type) && e.index_file .global_index_meta .as_ref() @@ -190,6 +190,7 @@ async fn evaluate_vector_search( #[cfg(test)] mod tests { use super::*; + use crate::lumina::{LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER, LUMINA_IDENTIFIER}; use crate::spec::{DataType, GlobalIndexMeta, IndexFileMeta, IndexManifestEntry, IntType}; fn make_field(id: i32, name: &str) -> DataField { @@ -237,32 +238,39 @@ mod tests { assert!(result.is_empty()); } + #[tokio::test] + async fn test_evaluate_ignores_non_lumina_index_type() { + let file_io = crate::io::FileIOBuilder::new("memory").build().unwrap(); + let fields = vec![make_field(2, "embedding")]; + let vs = VectorSearch::new(vec![1.0], 10, "embedding".to_string()).unwrap(); + + let entry = make_lumina_entry("test.idx", "btree", FileKind::Add, 2); + + let result = evaluate_vector_search( + &file_io, + "memory:///test_table", + &HashMap::new(), + &[entry], + &vs, + &fields, + ) + .await + .unwrap(); + assert!(result.is_empty()); + } + #[tokio::test] async fn test_evaluate_no_matching_field() { let file_io = crate::io::FileIOBuilder::new("memory").build().unwrap(); let fields = vec![make_field(1, "id")]; let vs = VectorSearch::new(vec![1.0], 10, "embedding".to_string()).unwrap(); - let entry = IndexManifestEntry { - kind: FileKind::Add, - partition: vec![], - bucket: 0, - index_file: IndexFileMeta { - index_type: LUMINA_VECTOR_ANN_IDENTIFIER.to_string(), - file_name: "test.idx".to_string(), - file_size: 100, - row_count: 10, - deletion_vectors_ranges: None, - global_index_meta: Some(GlobalIndexMeta { - row_range_start: 0, - row_range_end: 9, - index_field_id: 99, - extra_field_ids: None, - index_meta: None, - }), - }, - version: 1, - }; + let entry = make_lumina_entry( + "test.idx", + LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER, + FileKind::Add, + 99, + ); let result = evaluate_vector_search( &file_io, @@ -283,37 +291,106 @@ mod tests { let fields = vec![make_field(2, "embedding")]; let vs = VectorSearch::new(vec![1.0], 10, "embedding".to_string()).unwrap(); - let entry = IndexManifestEntry { - kind: FileKind::Delete, + let entry = make_lumina_entry( + "test.idx", + LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER, + FileKind::Delete, + 2, + ); + + let result = evaluate_vector_search( + &file_io, + "memory:///test_table", + &HashMap::new(), + &[entry], + &vs, + &fields, + ) + .await + .unwrap(); + assert!(result.is_empty()); + } + + #[tokio::test] + async fn test_evaluate_accepts_canonical_lumina_index_type() { + let file_io = crate::io::FileIOBuilder::new("memory").build().unwrap(); + let fields = vec![make_field(2, "embedding")]; + let vs = VectorSearch::new(vec![1.0], 10, "embedding".to_string()).unwrap(); + + let entry = make_lumina_entry("missing.idx", LUMINA_IDENTIFIER, FileKind::Add, 2); + + let err = evaluate_vector_search( + &file_io, + "memory:///test_table", + &HashMap::new(), + &[entry], + &vs, + &fields, + ) + .await + .unwrap_err(); + assert!( + err.to_string() + .contains("Failed to read Lumina index file 'missing.idx'"), + "unexpected error: {err}" + ); + } + + #[tokio::test] + async fn test_evaluate_accepts_legacy_lumina_index_type() { + let file_io = crate::io::FileIOBuilder::new("memory").build().unwrap(); + let fields = vec![make_field(2, "embedding")]; + let vs = VectorSearch::new(vec![1.0], 10, "embedding".to_string()).unwrap(); + + let entry = make_lumina_entry( + "missing.idx", + LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER, + FileKind::Add, + 2, + ); + + let err = evaluate_vector_search( + &file_io, + "memory:///test_table", + &HashMap::new(), + &[entry], + &vs, + &fields, + ) + .await + .unwrap_err(); + assert!( + err.to_string() + .contains("Failed to read Lumina index file 'missing.idx'"), + "unexpected error: {err}" + ); + } + + fn make_lumina_entry( + file_name: &str, + index_type: &str, + kind: FileKind, + index_field_id: i32, + ) -> IndexManifestEntry { + IndexManifestEntry { + kind, partition: vec![], bucket: 0, index_file: IndexFileMeta { - index_type: LUMINA_VECTOR_ANN_IDENTIFIER.to_string(), - file_name: "test.idx".to_string(), + index_type: index_type.to_string(), + file_name: file_name.to_string(), file_size: 100, row_count: 10, deletion_vectors_ranges: None, global_index_meta: Some(GlobalIndexMeta { row_range_start: 0, row_range_end: 9, - index_field_id: 2, + index_field_id, extra_field_ids: None, index_meta: None, }), }, version: 1, - }; - - let result = evaluate_vector_search( - &file_io, - "memory:///test_table", - &HashMap::new(), - &[entry], - &vs, - &fields, - ) - .await - .unwrap(); - assert!(result.is_empty()); + } } }