From 1accd3408c824a955680940636aed62b08cbdcae Mon Sep 17 00:00:00 2001 From: punAhuja Date: Fri, 26 Sep 2025 14:14:34 +0530 Subject: [PATCH 01/15] Added a configurable ef-search parameter --- .../solr/handler/component/QueryComponent.java | 15 +++++++++++++++ .../apache/solr/schema/DenseVectorField.java | 17 +++++++++++++++++ .../apache/solr/search/neural/KnnQParser.java | 5 ++++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 610c057a942..5a3fb3b4174 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -218,6 +218,21 @@ public void prepare(ResponseBuilder rb) throws IOException { rb.setSortSpec(parser.getSortSpec(true)); rb.setQparser(parser); + // If this is a KNN request, cap effective rows to topK + Object tk = req.getContext().get("knn.topK"); + if (tk instanceof Integer) { + int topK = (Integer) tk; + + // Adjust the SortSpec count + SortSpec ss = rb.getSortSpec(); + int rows = ss.getCount(); + int start = ss.getOffset(); + if (rows > topK) { + rb.setSortSpec(new SortSpec(ss.getSort(), ss.getSchemaFields(), topK, start)); + rb.shards_rows = topK; + } + } + String[] fqs = req.getParams().getParams(CommonParams.FQ); if (fqs != null && fqs.length != 0) { List filters = rb.getFilters(); diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 22d0add817c..7772361dd5d 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -416,6 +416,23 @@ public Query getKnnVectorQuery( } } + public Query getKnnVectorQuery( + String fieldName, String vectorToSearch, int topK, int efSearch, Query filterQuery) { + final int k = efSearch; + DenseVectorParser vectorBuilder = + getVectorBuilder(vectorToSearch, DenseVectorParser.BuilderPhase.QUERY); + switch (vectorEncoding) { + case FLOAT32: + return new KnnFloatVectorQuery(fieldName, vectorBuilder.getFloatVector(), k, filterQuery); + case BYTE: + return new KnnByteVectorQuery(fieldName, vectorBuilder.getByteVector(), k, filterQuery); + default: + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Unexpected state. Vector Encoding: " + vectorEncoding); + } + } + /** * Not Supported. Please use the {!knn} query parser to run K nearest neighbors search queries. */ diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java index 189069805cd..67719410068 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java @@ -94,8 +94,11 @@ public Query parse() throws SyntaxError { final DenseVectorField denseVectorType = getCheckedFieldType(schemaField); final String vectorToSearch = getVectorToSearch(); final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K); + final int efSearch = localParams.getInt("ef-search", topK * 2); + + req.getContext().put("knn.topK", topK); return denseVectorType.getKnnVectorQuery( - schemaField.getName(), vectorToSearch, topK, getFilterQuery(), getEarlyTerminationParams()); + schemaField.getName(), vectorToSearch, topK, efSearch, getFilterQuery(), getEarlyTerminationParams()); } } From d5b7b3c31ed97f15f231fc8e1e5067166ad72317 Mon Sep 17 00:00:00 2001 From: punAhuja Date: Fri, 26 Sep 2025 17:00:02 +0530 Subject: [PATCH 02/15] Removed changes in QueryComponent and handling efSearch in extended Query classes --- .../handler/component/QueryComponent.java | 15 -------- .../apache/solr/schema/DenseVectorField.java | 9 +++-- .../apache/solr/search/neural/KnnQParser.java | 2 -- .../search/neural/SolrKnnByteVectorQuery.java | 35 ++++++++++++++++++ .../neural/SolrKnnFloatVectorQuery.java | 36 +++++++++++++++++++ 5 files changed, 77 insertions(+), 20 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/search/neural/SolrKnnByteVectorQuery.java create mode 100644 solr/core/src/java/org/apache/solr/search/neural/SolrKnnFloatVectorQuery.java diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 5a3fb3b4174..610c057a942 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -218,21 +218,6 @@ public void prepare(ResponseBuilder rb) throws IOException { rb.setSortSpec(parser.getSortSpec(true)); rb.setQparser(parser); - // If this is a KNN request, cap effective rows to topK - Object tk = req.getContext().get("knn.topK"); - if (tk instanceof Integer) { - int topK = (Integer) tk; - - // Adjust the SortSpec count - SortSpec ss = rb.getSortSpec(); - int rows = ss.getCount(); - int start = ss.getOffset(); - if (rows > topK) { - rb.setSortSpec(new SortSpec(ss.getSort(), ss.getSchemaFields(), topK, start)); - rb.shards_rows = topK; - } - } - String[] fqs = req.getParams().getParams(CommonParams.FQ); if (fqs != null && fqs.length != 0) { List filters = rb.getFilters(); diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 7772361dd5d..7f7a64f931e 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -47,6 +47,8 @@ import org.apache.solr.common.SolrException; import org.apache.solr.search.QParser; import org.apache.solr.search.neural.KnnQParser.EarlyTerminationParams; +import org.apache.solr.search.neural.SolrKnnByteVectorQuery; +import org.apache.solr.search.neural.SolrKnnFloatVectorQuery; import org.apache.solr.uninverting.UninvertingReader; import org.apache.solr.util.vector.ByteDenseVectorParser; import org.apache.solr.util.vector.DenseVectorParser; @@ -418,14 +420,15 @@ public Query getKnnVectorQuery( public Query getKnnVectorQuery( String fieldName, String vectorToSearch, int topK, int efSearch, Query filterQuery) { - final int k = efSearch; DenseVectorParser vectorBuilder = getVectorBuilder(vectorToSearch, DenseVectorParser.BuilderPhase.QUERY); switch (vectorEncoding) { case FLOAT32: - return new KnnFloatVectorQuery(fieldName, vectorBuilder.getFloatVector(), k, filterQuery); + return new SolrKnnFloatVectorQuery( + fieldName, vectorBuilder.getFloatVector(), topK, efSearch, filterQuery); case BYTE: - return new KnnByteVectorQuery(fieldName, vectorBuilder.getByteVector(), k, filterQuery); + return new SolrKnnByteVectorQuery( + fieldName, vectorBuilder.getByteVector(), topK, efSearch, filterQuery); default: throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java index 67719410068..d6fde69da82 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java @@ -96,8 +96,6 @@ public Query parse() throws SyntaxError { final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K); final int efSearch = localParams.getInt("ef-search", topK * 2); - req.getContext().put("knn.topK", topK); - return denseVectorType.getKnnVectorQuery( schemaField.getName(), vectorToSearch, topK, efSearch, getFilterQuery(), getEarlyTerminationParams()); } diff --git a/solr/core/src/java/org/apache/solr/search/neural/SolrKnnByteVectorQuery.java b/solr/core/src/java/org/apache/solr/search/neural/SolrKnnByteVectorQuery.java new file mode 100644 index 00000000000..e16866bacd5 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/neural/SolrKnnByteVectorQuery.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.neural; + +import org.apache.lucene.search.KnnByteVectorQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; + +public class SolrKnnByteVectorQuery extends KnnByteVectorQuery { + private final int topK; + + public SolrKnnByteVectorQuery(String field, byte[] target, int topK, int efSearch, Query filter) { + super(field, target, efSearch, filter); + this.topK = topK; + } + + @Override + protected TopDocs mergeLeafResults(TopDocs[] perLeafResults) { + return TopDocs.merge(topK, perLeafResults); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/neural/SolrKnnFloatVectorQuery.java b/solr/core/src/java/org/apache/solr/search/neural/SolrKnnFloatVectorQuery.java new file mode 100644 index 00000000000..60eb4f19a57 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/neural/SolrKnnFloatVectorQuery.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.neural; + +import org.apache.lucene.search.KnnFloatVectorQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; + +public class SolrKnnFloatVectorQuery extends KnnFloatVectorQuery { + private final int topK; + + public SolrKnnFloatVectorQuery( + String field, float[] target, int topK, int efSearch, Query filter) { + super(field, target, efSearch, filter); + this.topK = topK; + } + + @Override + protected TopDocs mergeLeafResults(TopDocs[] perLeafResults) { + return TopDocs.merge(topK, perLeafResults); + } +} From 49440aec8f39d9be93cceedf84f2b59f6d634169 Mon Sep 17 00:00:00 2001 From: punAhuja Date: Fri, 26 Sep 2025 22:00:44 +0530 Subject: [PATCH 03/15] Removed function which is not being called --- .../apache/solr/schema/DenseVectorField.java | 29 ++++--------------- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 7f7a64f931e..ff38f02521b 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -37,8 +37,6 @@ import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.ByteKnnVectorFieldSource; import org.apache.lucene.queries.function.valuesource.FloatKnnVectorFieldSource; -import org.apache.lucene.search.KnnByteVectorQuery; -import org.apache.lucene.search.KnnFloatVectorQuery; import org.apache.lucene.search.PatienceKnnVectorQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; @@ -378,6 +376,7 @@ public Query getKnnVectorQuery( String fieldName, String vectorToSearch, int topK, + int efSearch, Query filterQuery, EarlyTerminationParams earlyTermination) { @@ -386,8 +385,8 @@ public Query getKnnVectorQuery( switch (vectorEncoding) { case FLOAT32: - KnnFloatVectorQuery knnFloatVectorQuery = - new KnnFloatVectorQuery(fieldName, vectorBuilder.getFloatVector(), topK, filterQuery); + SolrKnnFloatVectorQuery knnFloatVectorQuery = + new SolrKnnFloatVectorQuery(fieldName, vectorBuilder.getFloatVector(), topK, efSearch, filterQuery); if (earlyTermination.isEnabled()) { return (earlyTermination.getSaturationThreshold() != null && earlyTermination.getPatience() != null) @@ -399,8 +398,8 @@ public Query getKnnVectorQuery( } return knnFloatVectorQuery; case BYTE: - KnnByteVectorQuery knnByteVectorQuery = - new KnnByteVectorQuery(fieldName, vectorBuilder.getByteVector(), topK, filterQuery); + SolrKnnByteVectorQuery knnByteVectorQuery = + new SolrKnnByteVectorQuery(fieldName, vectorBuilder.getByteVector(), topK, efSearch, filterQuery); if (earlyTermination.isEnabled()) { return (earlyTermination.getSaturationThreshold() != null && earlyTermination.getPatience() != null) @@ -418,24 +417,6 @@ public Query getKnnVectorQuery( } } - public Query getKnnVectorQuery( - String fieldName, String vectorToSearch, int topK, int efSearch, Query filterQuery) { - DenseVectorParser vectorBuilder = - getVectorBuilder(vectorToSearch, DenseVectorParser.BuilderPhase.QUERY); - switch (vectorEncoding) { - case FLOAT32: - return new SolrKnnFloatVectorQuery( - fieldName, vectorBuilder.getFloatVector(), topK, efSearch, filterQuery); - case BYTE: - return new SolrKnnByteVectorQuery( - fieldName, vectorBuilder.getByteVector(), topK, efSearch, filterQuery); - default: - throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, - "Unexpected state. Vector Encoding: " + vectorEncoding); - } - } - /** * Not Supported. Please use the {!knn} query parser to run K nearest neighbors search queries. */ From 2c0015de211dc5e4bed7e17878e9bf126c880362 Mon Sep 17 00:00:00 2001 From: punAhuja Date: Mon, 29 Sep 2025 10:08:34 +0530 Subject: [PATCH 04/15] Modified tests to expect new behaviour of added default efSearch parameter --- .../org/apache/solr/schema/DenseVectorField.java | 6 ++++-- .../apache/solr/search/neural/KnnQParser.java | 7 ++++++- .../solr/search/neural/KnnQParserTest.java | 16 ++++++++-------- .../search/TextToVectorQParserTest.java | 2 +- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index ff38f02521b..5d48dd63030 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -386,7 +386,8 @@ public Query getKnnVectorQuery( switch (vectorEncoding) { case FLOAT32: SolrKnnFloatVectorQuery knnFloatVectorQuery = - new SolrKnnFloatVectorQuery(fieldName, vectorBuilder.getFloatVector(), topK, efSearch, filterQuery); + new SolrKnnFloatVectorQuery( + fieldName, vectorBuilder.getFloatVector(), topK, efSearch, filterQuery); if (earlyTermination.isEnabled()) { return (earlyTermination.getSaturationThreshold() != null && earlyTermination.getPatience() != null) @@ -399,7 +400,8 @@ public Query getKnnVectorQuery( return knnFloatVectorQuery; case BYTE: SolrKnnByteVectorQuery knnByteVectorQuery = - new SolrKnnByteVectorQuery(fieldName, vectorBuilder.getByteVector(), topK, efSearch, filterQuery); + new SolrKnnByteVectorQuery( + fieldName, vectorBuilder.getByteVector(), topK, efSearch, filterQuery); if (earlyTermination.isEnabled()) { return (earlyTermination.getSaturationThreshold() != null && earlyTermination.getPatience() != null) diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java index d6fde69da82..945c5cbbab6 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java @@ -97,6 +97,11 @@ public Query parse() throws SyntaxError { final int efSearch = localParams.getInt("ef-search", topK * 2); return denseVectorType.getKnnVectorQuery( - schemaField.getName(), vectorToSearch, topK, efSearch, getFilterQuery(), getEarlyTerminationParams()); + schemaField.getName(), + vectorToSearch, + topK, + efSearch, + getFilterQuery(), + getEarlyTerminationParams()); } } diff --git a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java index fe417165197..b13e25ece0f 100644 --- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java @@ -970,7 +970,7 @@ public void knnQueryAsRerank_shouldAddSimilarityFunctionScore() { } @Test - public void testKnnFloatWithoutExplicitlyEarlyTermination_returnsKnnFloatVectorQuery() { + public void testKnnFloatWithoutExplicitlyEarlyTermination_returnsSolrKnnFloatVectorQuery() { // It verifies that when no early termination parameters are provided, // the default behavior is applied (early termination is disabled), and no special logic is // triggered. @@ -985,11 +985,11 @@ public void testKnnFloatWithoutExplicitlyEarlyTermination_returnsKnnFloatVectorQ "debugQuery", "true"), "//result[@numFound='5']", - "//str[@name='parsedquery'][.='KnnFloatVectorQuery(KnnFloatVectorQuery:vector[1.0,...][5])']"); + "//str[@name='parsedquery'][.='SolrKnnFloatVectorQuery(SolrKnnFloatVectorQuery:vector[1.0,...][10])']"); } @Test - public void testKnnFloatWithoutEarlyTermination_returnsKnnFloatVectorQuery() { + public void testKnnFloatWithoutEarlyTermination_returnsSolrKnnFloatVectorQuery() { // It verifies that when early termination is explicitly set to false, no special logic is // triggered. String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; @@ -1003,7 +1003,7 @@ public void testKnnFloatWithoutEarlyTermination_returnsKnnFloatVectorQuery() { "debugQuery", "true"), "//result[@numFound='5']", - "//str[@name='parsedquery'][.='KnnFloatVectorQuery(KnnFloatVectorQuery:vector[1.0,...][5])']"); + "//str[@name='parsedquery'][.='SolrKnnFloatVectorQuery(SolrKnnFloatVectorQuery:vector[1.0,...][10])']"); } @Test @@ -1024,7 +1024,7 @@ public void testKnnFloatWithEarlyTerminationDefaultParams_returnsPatienceKnnVect String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=KnnFloatVectorQuery:vector[1.0,...][10]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][20]})", defaultSaturationThreshold, defaultPatience); @@ -1063,7 +1063,7 @@ public void testKnnFloatWithEarlyTerminationDefaultParams_returnsPatienceKnnVect String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=KnnFloatVectorQuery:vector[1.0,...][10]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][20]})", explicitSaturationThreshold, explicitPatience); @@ -1096,7 +1096,7 @@ public void testKnnFloatWithEarlyTerminationDefaultParams_returnsPatienceKnnVect String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=KnnByteVectorQuery:vector_byte_encoding[2,...][5]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnByteVectorQuery:vector_byte_encoding[2,...][10]})", explicitSaturationThreshold, explicitPatience); @@ -1128,7 +1128,7 @@ public void testKnnFloatWithEarlyTerminationDefaultParams_returnsPatienceKnnVect String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=KnnFloatVectorQuery:vector[1.0,...][10]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][20]})", explicitSaturationThreshold, explicitPatience); diff --git a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/search/TextToVectorQParserTest.java b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/search/TextToVectorQParserTest.java index 6c23ae21d1c..f2e1938c2e6 100644 --- a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/search/TextToVectorQParserTest.java +++ b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/search/TextToVectorQParserTest.java @@ -406,7 +406,7 @@ public void earlyTerminationEnabled_returnsPatienceKnnVectorQuery() throws Excep String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=KnnFloatVectorQuery:vector[1.0,...][5]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][10]})", defaultSaturationThreshold, defaultPatience); From 7b7f1296b3f97519b2c7bab86410492746a712e3 Mon Sep 17 00:00:00 2001 From: punAhuja Date: Mon, 29 Sep 2025 10:16:19 +0530 Subject: [PATCH 05/15] Added change to CHANGES.txt --- solr/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 89c9ff2a3dc..1a8c831c049 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -27,6 +27,8 @@ New Features * SOLR-17814: Add support for PatienceKnnVectorQuery. (Ilaria Petreti via Alessandro Benedetti) +* SOLR-17928: Add efSearch parameter to KNN query. (Puneet Ahuja) + Improvements --------------------- From 3026ddaaee0f45bf879024ac4e4b1e2f70c6c2a1 Mon Sep 17 00:00:00 2001 From: punAhuja Date: Mon, 29 Sep 2025 14:43:16 +0530 Subject: [PATCH 06/15] Added ef-search to ref-guide --- .../query-guide/pages/dense-vector-search.adoc | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index c52fa7ffb08..a0e00450d41 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -452,10 +452,24 @@ Our recommendation is to rely on the default value and change this parameter onl + This parameter must be used together with `saturationThreshold`; either specify both to customize the behavior, or omit both to rely on the default values. +`ef-search`:: ++ +[%autowidth,frame=none] +|=== +|Optional | Default: `topK * 2` +|=== ++ +(advanced) Controls how many candidates the HNSW algorithm examines during search. ++ +The algorithm fetches more results than the requested `topK` and then selects the best ones. Higher values fetch more candidates, improving recall but slowing down the search. Lower values fetch fewer candidates for faster performance but may miss some good matches. ++ +Accepted values: +Any positive integer. + Here's an example of a `knn` search using the early termination with input parameters: [source,text] -?q={!knn f=vector topK=10 earlyTermination=true saturationThreshold=0.989 patience=10}[1.0, 2.0, 3.0, 4.0] +?q={!knn f=vector topK=10 earlyTermination=true saturationThreshold=0.989 patience=10 ef-search=30}[1.0, 2.0, 3.0, 4.0] === knn_text_to_vector Query Parser From 1be0fecc832d434418f32ed5d720af537ec2bafd Mon Sep 17 00:00:00 2001 From: punAhuja Date: Thu, 30 Oct 2025 16:54:22 +0530 Subject: [PATCH 07/15] Added some comments and validation for efSearch >= topK --- .../apache/solr/search/neural/KnnQParser.java | 5 +++++ .../search/neural/SolrKnnByteVectorQuery.java | 16 ++++++++++++++++ .../search/neural/SolrKnnFloatVectorQuery.java | 16 ++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java index dd0e76bb266..453c0be9943 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java @@ -109,6 +109,11 @@ public Query parse() throws SyntaxError { final String vectorToSearch = getVectorToSearch(); final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K); final int efSearch = localParams.getInt("efSearch", topK * 2); + if (efSearch < topK) { + throw new IllegalArgumentException( + "efSearch (" + efSearch + ") must be >= topK (" + topK + ")"); + } + final Integer filteredSearchThreshold = localParams.getInt(FILTERED_SEARCH_THRESHOLD); return denseVectorType.getKnnVectorQuery( diff --git a/solr/core/src/java/org/apache/solr/search/neural/SolrKnnByteVectorQuery.java b/solr/core/src/java/org/apache/solr/search/neural/SolrKnnByteVectorQuery.java index 874e7b21600..91275d23267 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/SolrKnnByteVectorQuery.java +++ b/solr/core/src/java/org/apache/solr/search/neural/SolrKnnByteVectorQuery.java @@ -25,6 +25,8 @@ public class SolrKnnByteVectorQuery extends KnnByteVectorQuery { private final int topK; public SolrKnnByteVectorQuery(String field, byte[] target, int topK, int efSearch, Query filter) { + // efSearch is used as 'k' to explore this many vectors in HNSW, then topK results are returned + // to the user super(field, target, efSearch, filter); this.topK = topK; } @@ -44,4 +46,18 @@ public SolrKnnByteVectorQuery( protected TopDocs mergeLeafResults(TopDocs[] perLeafResults) { return TopDocs.merge(topK, perLeafResults); } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!super.equals(obj)) return false; + if (getClass() != obj.getClass()) return false; + SolrKnnByteVectorQuery other = (SolrKnnByteVectorQuery) obj; + return this.topK == other.topK; + } + + @Override + public int hashCode() { + return 31 * super.hashCode() + Integer.hashCode(topK); + } } diff --git a/solr/core/src/java/org/apache/solr/search/neural/SolrKnnFloatVectorQuery.java b/solr/core/src/java/org/apache/solr/search/neural/SolrKnnFloatVectorQuery.java index 8f5ecc6f4d5..bc96e5463bd 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/SolrKnnFloatVectorQuery.java +++ b/solr/core/src/java/org/apache/solr/search/neural/SolrKnnFloatVectorQuery.java @@ -26,6 +26,8 @@ public class SolrKnnFloatVectorQuery extends KnnFloatVectorQuery { public SolrKnnFloatVectorQuery( String field, float[] target, int topK, int efSearch, Query filter) { + // efSearch is used as 'k' to explore this many vectors in HNSW then topK results are returned + // to the user super(field, target, efSearch, filter); this.topK = topK; } @@ -45,4 +47,18 @@ public SolrKnnFloatVectorQuery( protected TopDocs mergeLeafResults(TopDocs[] perLeafResults) { return TopDocs.merge(topK, perLeafResults); } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!super.equals(obj)) return false; + if (getClass() != obj.getClass()) return false; + SolrKnnFloatVectorQuery other = (SolrKnnFloatVectorQuery) obj; + return this.topK == other.topK; + } + + @Override + public int hashCode() { + return 31 * super.hashCode() + Integer.hashCode(topK); + } } From ae95c298fe6b7504386dae730093fdd49f02b590 Mon Sep 17 00:00:00 2001 From: punAhuja Date: Thu, 30 Oct 2025 19:46:50 +0530 Subject: [PATCH 08/15] Removed old change in CHANGES.txt --- solr/CHANGES.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 8dd1277cd0c..7b0393fd50e 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -87,8 +87,6 @@ New Features * SOLR-17815: Add parameter to regulate for ACORN-based filtering in vector search. (Anna Ruggero, Alessandro Benedetti) -* SOLR-17928: Add efSearch parameter to KNN query. (Puneet Ahuja) - Improvements --------------------- From a73f4a2db3e9343455c1ab85dbd7a0ef11e04948 Mon Sep 17 00:00:00 2001 From: punAhuja Date: Thu, 6 Nov 2025 14:47:54 +0530 Subject: [PATCH 09/15] Exposing efSearchScaleFactor instead of efSearch, and using it to calculate efSearch internally -Set default efSearchScaleFactor as 1.0, which means default efSearch = topK --- .../apache/solr/search/neural/KnnQParser.java | 8 +++++--- .../solr/schema/DenseVectorFieldTest.java | 20 +++++++++---------- .../solr/search/neural/KnnQParserTest.java | 20 +++++++++---------- solr/licenses/cuvs-lucene-25.10.0.jar.sha1 | 2 +- .../search/TextToVectorQParserTest.java | 2 +- .../pages/dense-vector-search.adoc | 12 +++++------ .../pages/major-changes-in-solr-10.adoc | 2 +- 7 files changed, 34 insertions(+), 32 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java index 453c0be9943..951b2f2f6f8 100644 --- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java @@ -108,11 +108,13 @@ public Query parse() throws SyntaxError { final DenseVectorField denseVectorType = getCheckedFieldType(schemaField); final String vectorToSearch = getVectorToSearch(); final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K); - final int efSearch = localParams.getInt("efSearch", topK * 2); - if (efSearch < topK) { + + final double efSearchScaleFactor = localParams.getDouble("efSearchScaleFactor", 1.0); + if (efSearchScaleFactor < 1.0) { throw new IllegalArgumentException( - "efSearch (" + efSearch + ") must be >= topK (" + topK + ")"); + "efSearchScaleFactor (" + efSearchScaleFactor + ") must be >= 1.0"); } + final int efSearch = (int) Math.round(efSearchScaleFactor * topK); final Integer filteredSearchThreshold = localParams.getInt(FILTERED_SEARCH_THRESHOLD); diff --git a/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java b/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java index e0b508a1afc..37f49dbabeb 100644 --- a/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java @@ -860,7 +860,7 @@ public void testFilteredSearchThreshold_floatNoThresholdInInput_shouldSetDefault DenseVectorField type = (DenseVectorField) vectorField.getType(); KnnFloatVectorQuery vectorQuery = (KnnFloatVectorQuery) - type.getKnnVectorQuery("vector", "[2, 1, 3, 4]", 3, 6, null, null, null, null); + type.getKnnVectorQuery("vector", "[2, 1, 3, 4]", 3, 3, null, null, null, null); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -884,7 +884,7 @@ public void testFilteredSearchThreshold_floatThresholdInInput_shouldSetCustomThr KnnFloatVectorQuery vectorQuery = (KnnFloatVectorQuery) type.getKnnVectorQuery( - "vector", "[2, 1, 3, 4]", 3, 6, null, null, null, expectedThreshold); + "vector", "[2, 1, 3, 4]", 3, 3, null, null, null, expectedThreshold); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -909,7 +909,7 @@ public void testFilteredSearchThreshold_seededFloatThresholdInInput_shouldSetCus SeededKnnVectorQuery vectorQuery = (SeededKnnVectorQuery) type.getKnnVectorQuery( - "vector", "[2, 1, 3, 4]", 3, 6, null, seedQuery, null, expectedThreshold); + "vector", "[2, 1, 3, 4]", 3, 3, null, seedQuery, null, expectedThreshold); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -936,7 +936,7 @@ public void testFilteredSearchThreshold_seededFloatThresholdInInput_shouldSetCus PatienceKnnVectorQuery vectorQuery = (PatienceKnnVectorQuery) type.getKnnVectorQuery( - "vector", "[2, 1, 3, 4]", 3, 6, null, null, earlyTermination, expectedThreshold); + "vector", "[2, 1, 3, 4]", 3, 3, null, null, earlyTermination, expectedThreshold); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -967,7 +967,7 @@ public void testFilteredSearchThreshold_seededFloatThresholdInInput_shouldSetCus "vector", "[2, 1, 3, 4]", 3, - 6, + 3, null, seedQuery, earlyTermination, @@ -995,7 +995,7 @@ public void testFilteredSearchThreshold_byteNoThresholdInInput_shouldSetDefaultT KnnByteVectorQuery vectorQuery = (KnnByteVectorQuery) type.getKnnVectorQuery( - "vector_byte_encoding", "[2, 1, 3, 4]", 3, 6, null, null, null, null); + "vector_byte_encoding", "[2, 1, 3, 4]", 3, 3, null, null, null, null); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -1022,7 +1022,7 @@ public void testFilteredSearchThreshold_byteThresholdInInput_shouldSetCustomThre "vector_byte_encoding", "[2, 1, 3, 4]", 3, - 6, + 3, null, null, null, @@ -1054,7 +1054,7 @@ public void testFilteredSearchThreshold_seededByteThresholdInInput_shouldSetCust "vector_byte_encoding", "[2, 1, 3, 4]", 3, - 6, + 3, null, seedQuery, null, @@ -1088,7 +1088,7 @@ public void testFilteredSearchThreshold_seededByteThresholdInInput_shouldSetCust "vector_byte_encoding", "[2, 1, 3, 4]", 3, - 6, + 3, null, null, earlyTermination, @@ -1123,7 +1123,7 @@ public void testFilteredSearchThreshold_seededByteThresholdInInput_shouldSetCust "vector_byte_encoding", "[2, 1, 3, 4]", 3, - 6, + 3, null, seedQuery, earlyTermination, diff --git a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java index a97e4618f87..124c96bb196 100644 --- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java @@ -990,7 +990,7 @@ public void testKnnFloatWithoutExplicitlyEarlyTermination_returnsSolrKnnFloatVec "debugQuery", "true"), "//result[@numFound='5']", - "//str[@name='parsedquery'][.='SolrKnnFloatVectorQuery(SolrKnnFloatVectorQuery:vector[1.0,...][10])']"); + "//str[@name='parsedquery'][.='SolrKnnFloatVectorQuery(SolrKnnFloatVectorQuery:vector[1.0,...][5])']"); } @Test @@ -1008,7 +1008,7 @@ public void testKnnFloatWithoutEarlyTermination_returnsSolrKnnFloatVectorQuery() "debugQuery", "true"), "//result[@numFound='5']", - "//str[@name='parsedquery'][.='SolrKnnFloatVectorQuery(SolrKnnFloatVectorQuery:vector[1.0,...][10])']"); + "//str[@name='parsedquery'][.='SolrKnnFloatVectorQuery(SolrKnnFloatVectorQuery:vector[1.0,...][5])']"); } @Test @@ -1029,7 +1029,7 @@ public void testKnnFloatWithEarlyTerminationDefaultParams_returnsPatienceKnnVect String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][20]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][10]})", defaultSaturationThreshold, defaultPatience); @@ -1068,7 +1068,7 @@ public void testKnnFloatWithEarlyTerminationDefaultParams_returnsPatienceKnnVect String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][20]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][10]})", explicitSaturationThreshold, explicitPatience); @@ -1101,7 +1101,7 @@ public void testKnnFloatWithEarlyTerminationDefaultParams_returnsPatienceKnnVect String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnByteVectorQuery:vector_byte_encoding[2,...][10]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnByteVectorQuery:vector_byte_encoding[2,...][5]})", explicitSaturationThreshold, explicitPatience); @@ -1133,7 +1133,7 @@ public void testKnnFloatWithEarlyTerminationDefaultParams_returnsPatienceKnnVect String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][20]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][10]})", explicitSaturationThreshold, explicitPatience); @@ -1219,7 +1219,7 @@ public void knnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { "debugQuery", "true"), "//result[@numFound='4']", - "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][8]})']"); + "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][4]})']"); } @Test @@ -1239,7 +1239,7 @@ public void byteKnnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { "debugQuery", "true"), "//result[@numFound='4']", - "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=SolrKnnByteVectorQuery:vector_byte_encoding[2,...][8]})']"); + "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=SolrKnnByteVectorQuery:vector_byte_encoding[2,...][4]})']"); } @Test @@ -1286,7 +1286,7 @@ public void knnQueryWithKnnSeedQuery_shouldPerformSeededKnnVectorQuery() { "debugQuery", "true"), "//result[@numFound='4']", - "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=SolrKnnFloatVectorQuery:vector[0.1,...][8], seedWeight=null, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][8]})']"); + "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=SolrKnnFloatVectorQuery:vector[0.1,...][4], seedWeight=null, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][4]})']"); } @Test @@ -1318,7 +1318,7 @@ public void knnQueryWithKnnSeedQuery_shouldPerformSeededKnnVectorQuery() { // Verify that the final delegate is a SolrKnnFloatVectorQuery with the expected vector and // topK // value - "//str[@name='parsedquery'][contains(.,'delegate=SolrKnnFloatVectorQuery:vector[1.0,...][8]')]"); + "//str[@name='parsedquery'][contains(.,'delegate=SolrKnnFloatVectorQuery:vector[1.0,...][4]')]"); } @Test diff --git a/solr/licenses/cuvs-lucene-25.10.0.jar.sha1 b/solr/licenses/cuvs-lucene-25.10.0.jar.sha1 index 60bd5f8e97d..7401290cfe1 100644 --- a/solr/licenses/cuvs-lucene-25.10.0.jar.sha1 +++ b/solr/licenses/cuvs-lucene-25.10.0.jar.sha1 @@ -1 +1 @@ -735cac75bcf3b55763941cfd1e473dfcf47fbf97 +4be6b8c869886861f391522f8075ea121d38c722 diff --git a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/search/TextToVectorQParserTest.java b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/search/TextToVectorQParserTest.java index f2e1938c2e6..752c547fe0d 100644 --- a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/search/TextToVectorQParserTest.java +++ b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/search/TextToVectorQParserTest.java @@ -406,7 +406,7 @@ public void earlyTerminationEnabled_returnsPatienceKnnVectorQuery() throws Excep String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][10]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][5]})", defaultSaturationThreshold, defaultPatience); diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index f58a7a2aeae..0d6572b713a 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -452,24 +452,24 @@ Our recommendation is to rely on the default value and change this parameter onl + This parameter must be used together with `saturationThreshold`; either specify both to customize the behavior, or omit both to rely on the default values. -`efSearch`:: +`efSearchScaleFactor`:: + [%autowidth,frame=none] |=== -|Optional | Default: `topK * 2` +|Optional | Default: `1.0` |=== + -(advanced) Controls how many candidates the HNSW algorithm examines during search. +(advanced) Multiplier factor for calculating how many candidates the HNSW algorithm examines during search. + -The algorithm fetches more results than the requested `topK` and then selects the best ones. Higher values fetch more candidates, improving recall but slowing down the search. Lower values fetch fewer candidates for faster performance but may miss some good matches. +The effective `efSearch` value is calculated internally as `efSearchScaleFactor * topK`. Lower values fetch fewer candidates for faster performance but may miss some good matches. Higher values fetch more candidates, improving recall but slowing down the search. + Accepted values: -Any positive integer. +Any float >= 1.0. Here's an example of a `knn` search using the early termination with input parameters: [source,text] -?q={!knn f=vector topK=10 earlyTermination=true saturationThreshold=0.989 patience=10 efSearch=30}[1.0, 2.0, 3.0, 4.0] +?q={!knn f=vector topK=10 earlyTermination=true saturationThreshold=0.989 patience=10 efSearchScaleFactor=3.0}[1.0, 2.0, 3.0, 4.0] `seedQuery`:: + diff --git a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc index 7df7cf84d4e..d4fb0fe1a99 100644 --- a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc +++ b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc @@ -128,7 +128,7 @@ It is available as part of the `analysis-extras` module. === Vector Search Enhancements -* The `efSearch` parameter is now available for the KNN query parser (SOLR-17928). This parameter controls how many candidate vectors are explored during HNSW graph traversal, allowing users to independently tune search accuracy versus the number of results returned. Previously, improving accuracy required increasing `topK` (which returns more results), but `efSearch` enables exploring more candidates while still receiving exactly `topK` results. Default value is `topK * 2`. +* The `efSearchScaleFactor` parameter is now available for the KNN query parser (SOLR-17928). This parameter controls how many candidate vectors are explored during HNSW graph traversal, allowing users to independently tune search accuracy versus the number of results returned. Previously, improving accuracy required increasing `topK` (which returns more results), but `efSearchScaleFactor` enables exploring more candidates while still receiving exactly `topK` results. The `efSearch` value is calculated internally as `efSearchScaleFactor * topK`. Default value is `1.0`, which means `efSearch` defaults to `topK`. === Deprecation removals From 8adedb1fe933a2a7e13cb13973db2b7685707edb Mon Sep 17 00:00:00 2001 From: punAhuja Date: Thu, 6 Nov 2025 14:52:03 +0530 Subject: [PATCH 10/15] Updated changelog --- changelog/unreleased/puneet/SOLR-17928-ef-search.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/changelog/unreleased/puneet/SOLR-17928-ef-search.yml b/changelog/unreleased/puneet/SOLR-17928-ef-search.yml index 5566b6670f5..583a6834c9c 100644 --- a/changelog/unreleased/puneet/SOLR-17928-ef-search.yml +++ b/changelog/unreleased/puneet/SOLR-17928-ef-search.yml @@ -1,8 +1,8 @@ # See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc -title: Added efSearch parameter to knn query +title: Added efSearch parameter to knn query, exposed efSearchScaleFactor that is used to calculate efSearch internally type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other authors: - - name: punAhuja + - name: Puneet Ahuja links: - name: SOLR-17928 url: https://issues.apache.org/jira/browse/SOLR-17928 From f0fab8d656b0b2583052ae64ea4855a7e7a98632 Mon Sep 17 00:00:00 2001 From: Elia Date: Wed, 10 Dec 2025 17:33:26 +0100 Subject: [PATCH 11/15] Fix Exception handling Handled NaN value Added tests --- .../apache/solr/search/vector/KnnQParser.java | 5 +- .../solr/search/vector/KnnQParserTest.java | 96 +++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java index 892583b6dae..1a5beb83880 100644 --- a/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java @@ -110,8 +110,9 @@ public Query parse() throws SyntaxError { final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K); final double efSearchScaleFactor = localParams.getDouble("efSearchScaleFactor", 1.0); - if (efSearchScaleFactor < 1.0) { - throw new IllegalArgumentException( + if (Double.isNaN(efSearchScaleFactor) || efSearchScaleFactor < 1.0) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, "efSearchScaleFactor (" + efSearchScaleFactor + ") must be >= 1.0"); } final int efSearch = (int) Math.round(efSearchScaleFactor * topK); diff --git a/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java index d3ff83a528b..1e568590dc7 100644 --- a/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java @@ -138,6 +138,49 @@ public void incorrectTopK_shouldThrowException() { SolrException.ErrorCode.BAD_REQUEST); } + @Test + public void efSearchScaleFactorLessThanOne_shouldThrowException() { + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + assertQEx( + "efSearchScaleFactor < 1.0 should throw Exception", + "efSearchScaleFactor (0.5) must be >= 1.0", + req(CommonParams.Q, "{!knn f=vector topK=5 efSearchScaleFactor=0.5}" + vectorToSearch, "fl", "id"), + SolrException.ErrorCode.BAD_REQUEST); + + assertQEx( + "efSearchScaleFactor = 0.0 should throw Exception", + "efSearchScaleFactor (0.0) must be >= 1.0", + req(CommonParams.Q, "{!knn f=vector topK=5 efSearchScaleFactor=0.0}" + vectorToSearch, "fl", "id"), + SolrException.ErrorCode.BAD_REQUEST); + } + + @Test + public void efSearchScaleFactorNaN_shouldThrowException() { + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + assertQEx( + "efSearchScaleFactor = NaN should throw Exception", + "efSearchScaleFactor (NaN) must be >= 1.0", + req(CommonParams.Q, "{!knn f=vector topK=5 efSearchScaleFactor=NaN}" + vectorToSearch, "fl", "id"), + SolrException.ErrorCode.BAD_REQUEST); + } + + @Test + public void efSearchScaleFactorSet_shouldWorkCorrectly() { + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + // Test functional behavior with efSearchScaleFactor = 2.0 + assertQ( + req(CommonParams.Q, "{!knn f=vector topK=5 efSearchScaleFactor=2.0}" + vectorToSearch, "fl", "id"), + "//result[@numFound='5']", + "//result/doc[1]/str[@name='id'][.='1']", + "//result/doc[2]/str[@name='id'][.='4']", + "//result/doc[3]/str[@name='id'][.='2']", + "//result/doc[4]/str[@name='id'][.='10']", + "//result/doc[5]/str[@name='id'][.='3']"); + } + @Test public void topKMissing_shouldReturnDefaultTopK() { String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; @@ -442,6 +485,59 @@ public void correctQuery_shouldRankBySimilarityFunction() { "//result/doc[10]/str[@name='id'][.='8']"); } + + @Test + public void efSearchScaleFactorWithEarlyTermination_shouldWorkCorrectly() { + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + // Test efSearchScaleFactor with early termination enabled - should return results + assertQ( + req( + CommonParams.Q, + "{!knn f=vector topK=5 efSearchScaleFactor=2.0 earlyTermination=true saturationThreshold=0.989 patience=10}" + + vectorToSearch, + "fl", + "id"), + "//result[@numFound='5']", + "//result/doc[1]/str[@name='id'][.='1']", + "//result/doc[2]/str[@name='id'][.='4']"); + } + + @Test + public void efSearchScaleFactorWithSeedQuery_shouldWorkCorrectly() { + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + // Test efSearchScaleFactor with seed query - should return results + assertQ( + req( + CommonParams.Q, + "{!knn f=vector topK=4 efSearchScaleFactor=1.5 seedQuery='id:(1 4 7 8 9)'}" + vectorToSearch, + "fl", + "id"), + "//result[@numFound='4']"); + } + + @Test + public void efSearchScaleFactorWithByteVectors_shouldWorkCorrectly() { + String vectorToSearch = "[2, 2, 1, 3]"; + + // Test functional behavior with byte vectors and efSearchScaleFactor + assertQ( + req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=3 efSearchScaleFactor=1.5}" + vectorToSearch, "fl", "id"), + "//result[@numFound='3']", + "//result/doc[1]/str[@name='id'][.='2']", + "//result/doc[2]/str[@name='id'][.='3']", + "//result/doc[3]/str[@name='id'][.='1']"); + + // Also test with default efSearchScaleFactor + assertQ( + req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=3}" + vectorToSearch, "fl", "id"), + "//result[@numFound='3']", + "//result/doc[1]/str[@name='id'][.='2']", + "//result/doc[2]/str[@name='id'][.='3']", + "//result/doc[3]/str[@name='id'][.='1']"); + } + @Test public void knnQueryUsedInFilter_shouldFilterResultsBeforeTheQueryExecution() { String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; From 6383802f2c58d6bafc45b84527dc837f122ba76d Mon Sep 17 00:00:00 2001 From: Elia Date: Wed, 10 Dec 2025 18:05:58 +0100 Subject: [PATCH 12/15] Minor fixes --- .../apache/solr/schema/DenseVectorField.java | 2 +- .../search/vector/SolrKnnByteVectorQuery.java | 2 +- .../vector/SolrKnnFloatVectorQuery.java | 2 +- .../solr/search/vector/KnnQParserTest.java | 34 +++++++++++++++---- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index afba4e379b1..c8d2822b876 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -46,9 +46,9 @@ import org.apache.lucene.util.hnsw.HnswGraph; import org.apache.solr.common.SolrException; import org.apache.solr.search.QParser; -import org.apache.solr.search.neural.KnnQParser.EarlyTerminationParams; import org.apache.solr.search.neural.SolrKnnByteVectorQuery; import org.apache.solr.search.neural.SolrKnnFloatVectorQuery; +import org.apache.solr.search.vector.KnnQParser.EarlyTerminationParams; import org.apache.solr.uninverting.UninvertingReader; import org.apache.solr.util.vector.ByteDenseVectorParser; import org.apache.solr.util.vector.DenseVectorParser; diff --git a/solr/core/src/java/org/apache/solr/search/vector/SolrKnnByteVectorQuery.java b/solr/core/src/java/org/apache/solr/search/vector/SolrKnnByteVectorQuery.java index 91275d23267..3876380580f 100644 --- a/solr/core/src/java/org/apache/solr/search/vector/SolrKnnByteVectorQuery.java +++ b/solr/core/src/java/org/apache/solr/search/vector/SolrKnnByteVectorQuery.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.search.neural; +package org.apache.solr.search.vector; import org.apache.lucene.search.KnnByteVectorQuery; import org.apache.lucene.search.Query; diff --git a/solr/core/src/java/org/apache/solr/search/vector/SolrKnnFloatVectorQuery.java b/solr/core/src/java/org/apache/solr/search/vector/SolrKnnFloatVectorQuery.java index bc96e5463bd..f465c471007 100644 --- a/solr/core/src/java/org/apache/solr/search/vector/SolrKnnFloatVectorQuery.java +++ b/solr/core/src/java/org/apache/solr/search/vector/SolrKnnFloatVectorQuery.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.search.neural; +package org.apache.solr.search.vector; import org.apache.lucene.search.KnnFloatVectorQuery; import org.apache.lucene.search.Query; diff --git a/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java index 1e568590dc7..a3e556450f8 100644 --- a/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java @@ -145,13 +145,21 @@ public void efSearchScaleFactorLessThanOne_shouldThrowException() { assertQEx( "efSearchScaleFactor < 1.0 should throw Exception", "efSearchScaleFactor (0.5) must be >= 1.0", - req(CommonParams.Q, "{!knn f=vector topK=5 efSearchScaleFactor=0.5}" + vectorToSearch, "fl", "id"), + req( + CommonParams.Q, + "{!knn f=vector topK=5 efSearchScaleFactor=0.5}" + vectorToSearch, + "fl", + "id"), SolrException.ErrorCode.BAD_REQUEST); assertQEx( "efSearchScaleFactor = 0.0 should throw Exception", "efSearchScaleFactor (0.0) must be >= 1.0", - req(CommonParams.Q, "{!knn f=vector topK=5 efSearchScaleFactor=0.0}" + vectorToSearch, "fl", "id"), + req( + CommonParams.Q, + "{!knn f=vector topK=5 efSearchScaleFactor=0.0}" + vectorToSearch, + "fl", + "id"), SolrException.ErrorCode.BAD_REQUEST); } @@ -162,7 +170,11 @@ public void efSearchScaleFactorNaN_shouldThrowException() { assertQEx( "efSearchScaleFactor = NaN should throw Exception", "efSearchScaleFactor (NaN) must be >= 1.0", - req(CommonParams.Q, "{!knn f=vector topK=5 efSearchScaleFactor=NaN}" + vectorToSearch, "fl", "id"), + req( + CommonParams.Q, + "{!knn f=vector topK=5 efSearchScaleFactor=NaN}" + vectorToSearch, + "fl", + "id"), SolrException.ErrorCode.BAD_REQUEST); } @@ -172,7 +184,11 @@ public void efSearchScaleFactorSet_shouldWorkCorrectly() { // Test functional behavior with efSearchScaleFactor = 2.0 assertQ( - req(CommonParams.Q, "{!knn f=vector topK=5 efSearchScaleFactor=2.0}" + vectorToSearch, "fl", "id"), + req( + CommonParams.Q, + "{!knn f=vector topK=5 efSearchScaleFactor=2.0}" + vectorToSearch, + "fl", + "id"), "//result[@numFound='5']", "//result/doc[1]/str[@name='id'][.='1']", "//result/doc[2]/str[@name='id'][.='4']", @@ -485,7 +501,6 @@ public void correctQuery_shouldRankBySimilarityFunction() { "//result/doc[10]/str[@name='id'][.='8']"); } - @Test public void efSearchScaleFactorWithEarlyTermination_shouldWorkCorrectly() { String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; @@ -511,7 +526,8 @@ public void efSearchScaleFactorWithSeedQuery_shouldWorkCorrectly() { assertQ( req( CommonParams.Q, - "{!knn f=vector topK=4 efSearchScaleFactor=1.5 seedQuery='id:(1 4 7 8 9)'}" + vectorToSearch, + "{!knn f=vector topK=4 efSearchScaleFactor=1.5 seedQuery='id:(1 4 7 8 9)'}" + + vectorToSearch, "fl", "id"), "//result[@numFound='4']"); @@ -523,7 +539,11 @@ public void efSearchScaleFactorWithByteVectors_shouldWorkCorrectly() { // Test functional behavior with byte vectors and efSearchScaleFactor assertQ( - req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=3 efSearchScaleFactor=1.5}" + vectorToSearch, "fl", "id"), + req( + CommonParams.Q, + "{!knn f=vector_byte_encoding topK=3 efSearchScaleFactor=1.5}" + vectorToSearch, + "fl", + "id"), "//result[@numFound='3']", "//result/doc[1]/str[@name='id'][.='2']", "//result/doc[2]/str[@name='id'][.='3']", From 848e5af92b6ee465084ce0443fadfa5aa82a3649 Mon Sep 17 00:00:00 2001 From: Elia Date: Wed, 10 Dec 2025 18:08:22 +0100 Subject: [PATCH 13/15] Fix package name --- .../src/java/org/apache/solr/schema/DenseVectorField.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index c8d2822b876..961907f3da7 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -46,8 +46,8 @@ import org.apache.lucene.util.hnsw.HnswGraph; import org.apache.solr.common.SolrException; import org.apache.solr.search.QParser; -import org.apache.solr.search.neural.SolrKnnByteVectorQuery; -import org.apache.solr.search.neural.SolrKnnFloatVectorQuery; +import org.apache.solr.search.vector.SolrKnnByteVectorQuery; +import org.apache.solr.search.vector.SolrKnnFloatVectorQuery; import org.apache.solr.search.vector.KnnQParser.EarlyTerminationParams; import org.apache.solr.uninverting.UninvertingReader; import org.apache.solr.util.vector.ByteDenseVectorParser; From 85af30d023596dfe04a0f6e70c5c93af7311438a Mon Sep 17 00:00:00 2001 From: Elia Date: Wed, 10 Dec 2025 18:13:05 +0100 Subject: [PATCH 14/15] gradlew tidy --- solr/core/src/java/org/apache/solr/schema/DenseVectorField.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 961907f3da7..4acdd1757b9 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -46,9 +46,9 @@ import org.apache.lucene.util.hnsw.HnswGraph; import org.apache.solr.common.SolrException; import org.apache.solr.search.QParser; +import org.apache.solr.search.vector.KnnQParser.EarlyTerminationParams; import org.apache.solr.search.vector.SolrKnnByteVectorQuery; import org.apache.solr.search.vector.SolrKnnFloatVectorQuery; -import org.apache.solr.search.vector.KnnQParser.EarlyTerminationParams; import org.apache.solr.uninverting.UninvertingReader; import org.apache.solr.util.vector.ByteDenseVectorParser; import org.apache.solr.util.vector.DenseVectorParser; From 11174437a634dbd24b805ec393728f2aa297243f Mon Sep 17 00:00:00 2001 From: Elia Date: Wed, 10 Dec 2025 18:14:07 +0100 Subject: [PATCH 15/15] Added changelog --- ...8-ef-search.yml => SOLR-17928_added_efSearch_parameter.yml} | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) rename changelog/unreleased/{puneet/SOLR-17928-ef-search.yml => SOLR-17928_added_efSearch_parameter.yml} (77%) diff --git a/changelog/unreleased/puneet/SOLR-17928-ef-search.yml b/changelog/unreleased/SOLR-17928_added_efSearch_parameter.yml similarity index 77% rename from changelog/unreleased/puneet/SOLR-17928-ef-search.yml rename to changelog/unreleased/SOLR-17928_added_efSearch_parameter.yml index 583a6834c9c..01db5422157 100644 --- a/changelog/unreleased/puneet/SOLR-17928-ef-search.yml +++ b/changelog/unreleased/SOLR-17928_added_efSearch_parameter.yml @@ -1,8 +1,9 @@ # See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc title: Added efSearch parameter to knn query, exposed efSearchScaleFactor that is used to calculate efSearch internally -type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other +type: added authors: - name: Puneet Ahuja + - name: Elia Porciani links: - name: SOLR-17928 url: https://issues.apache.org/jira/browse/SOLR-17928