From dc95111a8be3f122b50f0f78f590e34de5d1b909 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 5 Dec 2025 10:02:15 +0800 Subject: [PATCH 1/3] update tantivy --- quickwit/Cargo.lock | 20 ++++++------- quickwit/Cargo.toml | 2 +- .../quickwit-doc-mapper/src/query_builder.rs | 30 +++++++++++++------ 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index f789470339a..20b6f2a70b8 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -5688,7 +5688,7 @@ checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" [[package]] name = "ownedbytes" version = "0.9.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=25d44fcec8#25d44fcec811da8d2d47877975eda31e58b77639" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=618e3bd#618e3bd11b87e99686bd3ff62c6e2dd75046f78c" dependencies = [ "stable_deref_trait", ] @@ -9587,8 +9587,8 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" [[package]] name = "tantivy" -version = "0.25.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=25d44fcec8#25d44fcec811da8d2d47877975eda31e58b77639" +version = "0.26.0" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=618e3bd#618e3bd11b87e99686bd3ff62c6e2dd75046f78c" dependencies = [ "aho-corasick", "arc-swap", @@ -9644,7 +9644,7 @@ dependencies = [ [[package]] name = "tantivy-bitpacker" version = "0.9.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=25d44fcec8#25d44fcec811da8d2d47877975eda31e58b77639" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=618e3bd#618e3bd11b87e99686bd3ff62c6e2dd75046f78c" dependencies = [ "bitpacking", ] @@ -9652,7 +9652,7 @@ dependencies = [ [[package]] name = "tantivy-columnar" version = "0.6.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=25d44fcec8#25d44fcec811da8d2d47877975eda31e58b77639" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=618e3bd#618e3bd11b87e99686bd3ff62c6e2dd75046f78c" dependencies = [ "downcast-rs", "fastdivide", @@ -9667,7 +9667,7 @@ dependencies = [ [[package]] name = "tantivy-common" version = "0.10.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=25d44fcec8#25d44fcec811da8d2d47877975eda31e58b77639" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=618e3bd#618e3bd11b87e99686bd3ff62c6e2dd75046f78c" dependencies = [ "async-trait", "byteorder", @@ -9690,7 +9690,7 @@ dependencies = [ [[package]] name = "tantivy-query-grammar" version = "0.25.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=25d44fcec8#25d44fcec811da8d2d47877975eda31e58b77639" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=618e3bd#618e3bd11b87e99686bd3ff62c6e2dd75046f78c" dependencies = [ "fnv", "nom 7.1.3", @@ -9702,7 +9702,7 @@ dependencies = [ [[package]] name = "tantivy-sstable" version = "0.6.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=25d44fcec8#25d44fcec811da8d2d47877975eda31e58b77639" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=618e3bd#618e3bd11b87e99686bd3ff62c6e2dd75046f78c" dependencies = [ "futures-util", "itertools 0.14.0", @@ -9715,7 +9715,7 @@ dependencies = [ [[package]] name = "tantivy-stacker" version = "0.6.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=25d44fcec8#25d44fcec811da8d2d47877975eda31e58b77639" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=618e3bd#618e3bd11b87e99686bd3ff62c6e2dd75046f78c" dependencies = [ "murmurhash32", "rand_distr", @@ -9725,7 +9725,7 @@ dependencies = [ [[package]] name = "tantivy-tokenizer-api" version = "0.6.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=25d44fcec8#25d44fcec811da8d2d47877975eda31e58b77639" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=618e3bd#618e3bd11b87e99686bd3ff62c6e2dd75046f78c" dependencies = [ "serde", ] diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 9c57442c447..810f1d1f7f2 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -346,7 +346,7 @@ quickwit-serve = { path = "quickwit-serve" } quickwit-storage = { path = "quickwit-storage" } quickwit-telemetry = { path = "quickwit-telemetry" } -tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "25d44fcec8", default-features = false, features = [ +tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "618e3bd", default-features = false, features = [ "lz4-compression", "mmap", "quickwit", diff --git a/quickwit/quickwit-doc-mapper/src/query_builder.rs b/quickwit/quickwit-doc-mapper/src/query_builder.rs index d1a24f4c853..10c0a8a8397 100644 --- a/quickwit/quickwit-doc-mapper/src/query_builder.rs +++ b/quickwit/quickwit-doc-mapper/src/query_builder.rs @@ -263,20 +263,32 @@ fn extract_term_set_query_fields( Ok(visitor.term_dict_fields_to_warm_up) } +/// Converts a `prefix` term into the equivalent term range. +/// +/// The resulting range is `[prefix, next_prefix)`, that is: +/// - start bound: `Included(prefix)` +/// - end bound: `Excluded(next lexicographic term after the prefix)` +/// +/// "abc" -> start: "abc", end: "abd" (excluded) +/// "ab\xFF" -> start: "ab\xFF", end: "ac" (excluded) +/// "\xFF\xFF" -> start: "\xFF\xFF", end: Unbounded fn prefix_term_to_range(prefix: Term) -> (Bound, Bound) { - let mut end_bound = prefix.serialized_term().to_vec(); - while !end_bound.is_empty() { - let last_byte = end_bound.last_mut().unwrap(); + // Start from the given prefix and try to find the successor + let mut end_bound = prefix.clone(); + let mut end_bound_value_bytes = prefix.serialized_value_bytes().to_vec(); + while !end_bound_value_bytes.is_empty() { + let last_byte = end_bound_value_bytes.last_mut().unwrap(); if *last_byte != u8::MAX { *last_byte += 1; - return ( - Bound::Included(prefix), - Bound::Excluded(Term::wrap(end_bound)), - ); + // The last non-`u8::MAX` byte incremented + // gives us the exclusive upper bound. + end_bound.set_bytes(&end_bound_value_bytes); + return (Bound::Included(prefix), Bound::Excluded(end_bound)); } - end_bound.pop(); + // pop u8::MAX byte and try next + end_bound_value_bytes.pop(); } - // prefix is something like [255, 255, ..] + // All bytes were `u8::MAX`: there is no successor, so the upper bound is unbounded. (Bound::Included(prefix), Bound::Unbounded) } From 8f7628637203197e4102df6310f4afc2185d8d31 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 5 Dec 2025 11:02:03 +0800 Subject: [PATCH 2/3] allow deprecated --- quickwit/quickwit-jaeger/src/lib.rs | 1 + quickwit/quickwit-search/src/lib.rs | 19 +++++++++++-------- quickwit/quickwit-search/src/list_terms.rs | 2 ++ quickwit/quickwit-search/src/tests.rs | 1 + 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index 5749842655a..340ce7eb582 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -562,6 +562,7 @@ impl SpanReaderPlugin for JaegerService { } } +#[allow(deprecated)] fn extract_term(term_bytes: &[u8]) -> String { tantivy::Term::wrap(term_bytes) .value() diff --git a/quickwit/quickwit-search/src/lib.rs b/quickwit/quickwit-search/src/lib.rs index 0706cd7f81c..008556d595f 100644 --- a/quickwit/quickwit-search/src/lib.rs +++ b/quickwit/quickwit-search/src/lib.rs @@ -306,14 +306,17 @@ pub async fn single_node_search( #[cfg(any(test, feature = "testsuite"))] #[macro_export] macro_rules! encode_term_for_test { - ($field:expr, $value:expr) => { - ::tantivy::schema::Term::from_field_text( - ::tantivy::schema::Field::from_field_id($field), - $value, - ) - .serialized_term() - .to_vec() - }; + ($field:expr, $value:expr) => {{ + #[allow(deprecated)] + { + ::tantivy::schema::Term::from_field_text( + ::tantivy::schema::Field::from_field_id($field), + $value, + ) + .serialized_term() + .to_vec() + } + }}; ($value:expr) => { encode_term_for_test!(0, $value) }; diff --git a/quickwit/quickwit-search/src/list_terms.rs b/quickwit/quickwit-search/src/list_terms.rs index 45dd1256f36..3a72f26d595 100644 --- a/quickwit/quickwit-search/src/list_terms.rs +++ b/quickwit/quickwit-search/src/list_terms.rs @@ -206,6 +206,7 @@ pub fn jobs_to_leaf_requests( /// Apply a leaf list terms on a single split. #[instrument(skip_all, fields(split_id = split.split_id))] +#[allow(deprecated)] async fn leaf_list_terms_single_split( searcher_context: &SearcherContext, search_request: &ListTermsRequest, @@ -308,6 +309,7 @@ fn term_from_data(field: Field, field_type: &FieldType, data: &[u8]) -> Term { term } +#[allow(deprecated)] fn term_to_data(field: Field, field_type: &FieldType, field_value: &[u8]) -> Vec { let mut term = Term::from_field_bool(field, false); term.clear_with_type(field_type.value_type()); diff --git a/quickwit/quickwit-search/src/tests.rs b/quickwit/quickwit-search/src/tests.rs index 83d5abad8a1..95c856cf6e3 100644 --- a/quickwit/quickwit-search/src/tests.rs +++ b/quickwit/quickwit-search/src/tests.rs @@ -1646,6 +1646,7 @@ async fn test_single_node_range_queries() -> anyhow::Result<()> { Ok(()) } +#[allow(deprecated)] fn collect_str_terms(response: LeafListTermsResponse) -> Vec { response .terms From eb847a012f07b9541a5bf91a3a24890bd4b45670 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 5 Dec 2025 11:21:14 +0800 Subject: [PATCH 3/3] new TopDocs api --- quickwit/quickwit-indexing/src/actors/merge_executor.rs | 3 ++- quickwit/quickwit-search/src/tests.rs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/quickwit/quickwit-indexing/src/actors/merge_executor.rs b/quickwit/quickwit-indexing/src/actors/merge_executor.rs index 3ca35a06c5b..4c1a5215752 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_executor.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_executor.rs @@ -839,7 +839,8 @@ mod tests { let documents_left = searcher .search( &tantivy::query::AllQuery, - &tantivy::collector::TopDocs::with_limit(result_docs.len() + 1), + &tantivy::collector::TopDocs::with_limit(result_docs.len() + 1) + .order_by_score(), )? .into_iter() .map(|(_, doc_address)| { diff --git a/quickwit/quickwit-search/src/tests.rs b/quickwit/quickwit-search/src/tests.rs index 95c856cf6e3..e986352c5b2 100644 --- a/quickwit/quickwit-search/src/tests.rs +++ b/quickwit/quickwit-search/src/tests.rs @@ -1651,7 +1651,7 @@ fn collect_str_terms(response: LeafListTermsResponse) -> Vec { response .terms .into_iter() - .map(|term| Term::wrap(term).value().as_str().unwrap().to_string()) + .map(|term| Term::wrap(&term).value().as_str().unwrap().to_string()) .collect() }