diff --git a/changelog/unreleased/SOLR-17928_added_efSearch_parameter.yml b/changelog/unreleased/SOLR-17928_added_efSearch_parameter.yml new file mode 100644 index 00000000000..01db5422157 --- /dev/null +++ b/changelog/unreleased/SOLR-17928_added_efSearch_parameter.yml @@ -0,0 +1,11 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Added efSearch parameter to knn query, exposed efSearchScaleFactor that is used to calculate efSearch internally +type: added +authors: + - name: Puneet Ahuja + - name: Elia Porciani +links: + - name: SOLR-17928 + url: https://issues.apache.org/jira/browse/SOLR-17928 +issues: + - 17928 diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java index 21c8d0d789c..4acdd1757b9 100644 --- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java +++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java @@ -37,8 +37,6 @@ import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.ByteKnnVectorFieldSource; import org.apache.lucene.queries.function.valuesource.FloatKnnVectorFieldSource; -import org.apache.lucene.search.KnnByteVectorQuery; -import org.apache.lucene.search.KnnFloatVectorQuery; import org.apache.lucene.search.PatienceKnnVectorQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SeededKnnVectorQuery; @@ -49,6 +47,8 @@ import org.apache.solr.common.SolrException; import org.apache.solr.search.QParser; import org.apache.solr.search.vector.KnnQParser.EarlyTerminationParams; +import org.apache.solr.search.vector.SolrKnnByteVectorQuery; +import org.apache.solr.search.vector.SolrKnnFloatVectorQuery; import org.apache.solr.uninverting.UninvertingReader; import org.apache.solr.util.vector.ByteDenseVectorParser; import org.apache.solr.util.vector.DenseVectorParser; @@ -502,6 +502,7 @@ public Query getKnnVectorQuery( String fieldName, String vectorToSearch, int topK, + int efSearch, Query filterQuery, Query seedQuery, EarlyTerminationParams earlyTermination, @@ -516,22 +517,32 @@ public Query getKnnVectorQuery( if (filteredSearchThreshold != null) { KnnSearchStrategy knnSearchStrategy = new KnnSearchStrategy.Hnsw(filteredSearchThreshold); - yield new KnnFloatVectorQuery( - fieldName, vectorBuilder.getFloatVector(), topK, filterQuery, knnSearchStrategy); + yield new SolrKnnFloatVectorQuery( + fieldName, + vectorBuilder.getFloatVector(), + topK, + efSearch, + filterQuery, + knnSearchStrategy); } else { - yield new KnnFloatVectorQuery( - fieldName, vectorBuilder.getFloatVector(), topK, filterQuery); + yield new SolrKnnFloatVectorQuery( + fieldName, vectorBuilder.getFloatVector(), topK, efSearch, filterQuery); } } case BYTE -> { if (filteredSearchThreshold != null) { KnnSearchStrategy knnSearchStrategy = new KnnSearchStrategy.Hnsw(filteredSearchThreshold); - yield new KnnByteVectorQuery( - fieldName, vectorBuilder.getByteVector(), topK, filterQuery, knnSearchStrategy); + yield new SolrKnnByteVectorQuery( + fieldName, + vectorBuilder.getByteVector(), + topK, + efSearch, + filterQuery, + knnSearchStrategy); } else { - yield new KnnByteVectorQuery( - fieldName, vectorBuilder.getByteVector(), topK, filterQuery); + yield new SolrKnnByteVectorQuery( + fieldName, vectorBuilder.getByteVector(), topK, efSearch, filterQuery); } } }; @@ -586,9 +597,9 @@ public SortField getSortField(SchemaField field, boolean top) { private Query getSeededQuery(Query knnQuery, Query seed) { return switch (knnQuery) { - case KnnFloatVectorQuery knnFloatQuery -> SeededKnnVectorQuery.fromFloatQuery( + case SolrKnnFloatVectorQuery knnFloatQuery -> SeededKnnVectorQuery.fromFloatQuery( knnFloatQuery, seed); - case KnnByteVectorQuery knnByteQuery -> SeededKnnVectorQuery.fromByteQuery( + case SolrKnnByteVectorQuery knnByteQuery -> SeededKnnVectorQuery.fromByteQuery( knnByteQuery, seed); default -> throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Invalid type of knn query"); @@ -600,13 +611,13 @@ private Query getEarlyTerminationQuery(Query knnQuery, EarlyTerminationParams ea (earlyTermination.getSaturationThreshold() != null && earlyTermination.getPatience() != null); return switch (knnQuery) { - case KnnFloatVectorQuery knnFloatQuery -> useExplicitParams + case SolrKnnFloatVectorQuery knnFloatQuery -> useExplicitParams ? PatienceKnnVectorQuery.fromFloatQuery( knnFloatQuery, earlyTermination.getSaturationThreshold(), earlyTermination.getPatience()) : PatienceKnnVectorQuery.fromFloatQuery(knnFloatQuery); - case KnnByteVectorQuery knnByteQuery -> useExplicitParams + case SolrKnnByteVectorQuery knnByteQuery -> useExplicitParams ? PatienceKnnVectorQuery.fromByteQuery( knnByteQuery, earlyTermination.getSaturationThreshold(), diff --git a/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java index 08fa6a5fc9a..1a5beb83880 100644 --- a/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java +++ b/solr/core/src/java/org/apache/solr/search/vector/KnnQParser.java @@ -108,12 +108,22 @@ public Query parse() throws SyntaxError { final DenseVectorField denseVectorType = getCheckedFieldType(schemaField); final String vectorToSearch = getVectorToSearch(); final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K); + + final double efSearchScaleFactor = localParams.getDouble("efSearchScaleFactor", 1.0); + if (Double.isNaN(efSearchScaleFactor) || efSearchScaleFactor < 1.0) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "efSearchScaleFactor (" + efSearchScaleFactor + ") must be >= 1.0"); + } + final int efSearch = (int) Math.round(efSearchScaleFactor * topK); + final Integer filteredSearchThreshold = localParams.getInt(FILTERED_SEARCH_THRESHOLD); return denseVectorType.getKnnVectorQuery( schemaField.getName(), vectorToSearch, topK, + efSearch, getFilterQuery(), getSeedQuery(), getEarlyTerminationParams(), diff --git a/solr/core/src/java/org/apache/solr/search/vector/SolrKnnByteVectorQuery.java b/solr/core/src/java/org/apache/solr/search/vector/SolrKnnByteVectorQuery.java new file mode 100644 index 00000000000..3876380580f --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/vector/SolrKnnByteVectorQuery.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.vector; + +import org.apache.lucene.search.KnnByteVectorQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.knn.KnnSearchStrategy; + +public class SolrKnnByteVectorQuery extends KnnByteVectorQuery { + private final int topK; + + public SolrKnnByteVectorQuery(String field, byte[] target, int topK, int efSearch, Query filter) { + // efSearch is used as 'k' to explore this many vectors in HNSW, then topK results are returned + // to the user + super(field, target, efSearch, filter); + this.topK = topK; + } + + public SolrKnnByteVectorQuery( + String field, + byte[] target, + int topK, + int efSearch, + Query filter, + KnnSearchStrategy searchStrategy) { + super(field, target, efSearch, filter, searchStrategy); + this.topK = topK; + } + + @Override + protected TopDocs mergeLeafResults(TopDocs[] perLeafResults) { + return TopDocs.merge(topK, perLeafResults); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!super.equals(obj)) return false; + if (getClass() != obj.getClass()) return false; + SolrKnnByteVectorQuery other = (SolrKnnByteVectorQuery) obj; + return this.topK == other.topK; + } + + @Override + public int hashCode() { + return 31 * super.hashCode() + Integer.hashCode(topK); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/vector/SolrKnnFloatVectorQuery.java b/solr/core/src/java/org/apache/solr/search/vector/SolrKnnFloatVectorQuery.java new file mode 100644 index 00000000000..f465c471007 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/vector/SolrKnnFloatVectorQuery.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.vector; + +import org.apache.lucene.search.KnnFloatVectorQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.knn.KnnSearchStrategy; + +public class SolrKnnFloatVectorQuery extends KnnFloatVectorQuery { + private final int topK; + + public SolrKnnFloatVectorQuery( + String field, float[] target, int topK, int efSearch, Query filter) { + // efSearch is used as 'k' to explore this many vectors in HNSW then topK results are returned + // to the user + super(field, target, efSearch, filter); + this.topK = topK; + } + + public SolrKnnFloatVectorQuery( + String field, + float[] target, + int topK, + int efSearch, + Query filter, + KnnSearchStrategy searchStrategy) { + super(field, target, efSearch, filter, searchStrategy); + this.topK = topK; + } + + @Override + protected TopDocs mergeLeafResults(TopDocs[] perLeafResults) { + return TopDocs.merge(topK, perLeafResults); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!super.equals(obj)) return false; + if (getClass() != obj.getClass()) return false; + SolrKnnFloatVectorQuery other = (SolrKnnFloatVectorQuery) obj; + return this.topK == other.topK; + } + + @Override + public int hashCode() { + return 31 * super.hashCode() + Integer.hashCode(topK); + } +} diff --git a/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java b/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java index b8e426470f1..18794907df2 100644 --- a/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java @@ -868,7 +868,7 @@ public void testFilteredSearchThreshold_floatNoThresholdInInput_shouldSetDefault DenseVectorField type = (DenseVectorField) vectorField.getType(); KnnFloatVectorQuery vectorQuery = (KnnFloatVectorQuery) - type.getKnnVectorQuery("vector", "[2, 1, 3, 4]", 3, null, null, null, null); + type.getKnnVectorQuery("vector", "[2, 1, 3, 4]", 3, 3, null, null, null, null); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -892,7 +892,7 @@ public void testFilteredSearchThreshold_floatThresholdInInput_shouldSetCustomThr KnnFloatVectorQuery vectorQuery = (KnnFloatVectorQuery) type.getKnnVectorQuery( - "vector", "[2, 1, 3, 4]", 3, null, null, null, expectedThreshold); + "vector", "[2, 1, 3, 4]", 3, 3, null, null, null, expectedThreshold); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -917,7 +917,7 @@ public void testFilteredSearchThreshold_seededFloatThresholdInInput_shouldSetCus SeededKnnVectorQuery vectorQuery = (SeededKnnVectorQuery) type.getKnnVectorQuery( - "vector", "[2, 1, 3, 4]", 3, null, seedQuery, null, expectedThreshold); + "vector", "[2, 1, 3, 4]", 3, 3, null, seedQuery, null, expectedThreshold); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -944,7 +944,7 @@ public void testFilteredSearchThreshold_seededFloatThresholdInInput_shouldSetCus PatienceKnnVectorQuery vectorQuery = (PatienceKnnVectorQuery) type.getKnnVectorQuery( - "vector", "[2, 1, 3, 4]", 3, null, null, earlyTermination, expectedThreshold); + "vector", "[2, 1, 3, 4]", 3, 3, null, null, earlyTermination, expectedThreshold); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -975,6 +975,7 @@ public void testFilteredSearchThreshold_seededFloatThresholdInInput_shouldSetCus "vector", "[2, 1, 3, 4]", 3, + 3, null, seedQuery, earlyTermination, @@ -1002,7 +1003,7 @@ public void testFilteredSearchThreshold_byteNoThresholdInInput_shouldSetDefaultT KnnByteVectorQuery vectorQuery = (KnnByteVectorQuery) type.getKnnVectorQuery( - "vector_byte_encoding", "[2, 1, 3, 4]", 3, null, null, null, null); + "vector_byte_encoding", "[2, 1, 3, 4]", 3, 3, null, null, null, null); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -1026,7 +1027,14 @@ public void testFilteredSearchThreshold_byteThresholdInInput_shouldSetCustomThre KnnByteVectorQuery vectorQuery = (KnnByteVectorQuery) type.getKnnVectorQuery( - "vector_byte_encoding", "[2, 1, 3, 4]", 3, null, null, null, expectedThreshold); + "vector_byte_encoding", + "[2, 1, 3, 4]", + 3, + 3, + null, + null, + null, + expectedThreshold); KnnSearchStrategy.Hnsw strategy = (KnnSearchStrategy.Hnsw) vectorQuery.getSearchStrategy(); Integer threshold = strategy.filteredSearchThreshold(); @@ -1054,6 +1062,7 @@ public void testFilteredSearchThreshold_seededByteThresholdInInput_shouldSetCust "vector_byte_encoding", "[2, 1, 3, 4]", 3, + 3, null, seedQuery, null, @@ -1087,6 +1096,7 @@ public void testFilteredSearchThreshold_seededByteThresholdInInput_shouldSetCust "vector_byte_encoding", "[2, 1, 3, 4]", 3, + 3, null, null, earlyTermination, @@ -1121,6 +1131,7 @@ public void testFilteredSearchThreshold_seededByteThresholdInInput_shouldSetCust "vector_byte_encoding", "[2, 1, 3, 4]", 3, + 3, null, seedQuery, earlyTermination, diff --git a/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java index 35920c882c2..a3e556450f8 100644 --- a/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/vector/KnnQParserTest.java @@ -138,6 +138,65 @@ public void incorrectTopK_shouldThrowException() { SolrException.ErrorCode.BAD_REQUEST); } + @Test + public void efSearchScaleFactorLessThanOne_shouldThrowException() { + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + assertQEx( + "efSearchScaleFactor < 1.0 should throw Exception", + "efSearchScaleFactor (0.5) must be >= 1.0", + req( + CommonParams.Q, + "{!knn f=vector topK=5 efSearchScaleFactor=0.5}" + vectorToSearch, + "fl", + "id"), + SolrException.ErrorCode.BAD_REQUEST); + + assertQEx( + "efSearchScaleFactor = 0.0 should throw Exception", + "efSearchScaleFactor (0.0) must be >= 1.0", + req( + CommonParams.Q, + "{!knn f=vector topK=5 efSearchScaleFactor=0.0}" + vectorToSearch, + "fl", + "id"), + SolrException.ErrorCode.BAD_REQUEST); + } + + @Test + public void efSearchScaleFactorNaN_shouldThrowException() { + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + assertQEx( + "efSearchScaleFactor = NaN should throw Exception", + "efSearchScaleFactor (NaN) must be >= 1.0", + req( + CommonParams.Q, + "{!knn f=vector topK=5 efSearchScaleFactor=NaN}" + vectorToSearch, + "fl", + "id"), + SolrException.ErrorCode.BAD_REQUEST); + } + + @Test + public void efSearchScaleFactorSet_shouldWorkCorrectly() { + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + // Test functional behavior with efSearchScaleFactor = 2.0 + assertQ( + req( + CommonParams.Q, + "{!knn f=vector topK=5 efSearchScaleFactor=2.0}" + vectorToSearch, + "fl", + "id"), + "//result[@numFound='5']", + "//result/doc[1]/str[@name='id'][.='1']", + "//result/doc[2]/str[@name='id'][.='4']", + "//result/doc[3]/str[@name='id'][.='2']", + "//result/doc[4]/str[@name='id'][.='10']", + "//result/doc[5]/str[@name='id'][.='3']"); + } + @Test public void topKMissing_shouldReturnDefaultTopK() { String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; @@ -442,6 +501,63 @@ public void correctQuery_shouldRankBySimilarityFunction() { "//result/doc[10]/str[@name='id'][.='8']"); } + @Test + public void efSearchScaleFactorWithEarlyTermination_shouldWorkCorrectly() { + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + // Test efSearchScaleFactor with early termination enabled - should return results + assertQ( + req( + CommonParams.Q, + "{!knn f=vector topK=5 efSearchScaleFactor=2.0 earlyTermination=true saturationThreshold=0.989 patience=10}" + + vectorToSearch, + "fl", + "id"), + "//result[@numFound='5']", + "//result/doc[1]/str[@name='id'][.='1']", + "//result/doc[2]/str[@name='id'][.='4']"); + } + + @Test + public void efSearchScaleFactorWithSeedQuery_shouldWorkCorrectly() { + String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; + + // Test efSearchScaleFactor with seed query - should return results + assertQ( + req( + CommonParams.Q, + "{!knn f=vector topK=4 efSearchScaleFactor=1.5 seedQuery='id:(1 4 7 8 9)'}" + + vectorToSearch, + "fl", + "id"), + "//result[@numFound='4']"); + } + + @Test + public void efSearchScaleFactorWithByteVectors_shouldWorkCorrectly() { + String vectorToSearch = "[2, 2, 1, 3]"; + + // Test functional behavior with byte vectors and efSearchScaleFactor + assertQ( + req( + CommonParams.Q, + "{!knn f=vector_byte_encoding topK=3 efSearchScaleFactor=1.5}" + vectorToSearch, + "fl", + "id"), + "//result[@numFound='3']", + "//result/doc[1]/str[@name='id'][.='2']", + "//result/doc[2]/str[@name='id'][.='3']", + "//result/doc[3]/str[@name='id'][.='1']"); + + // Also test with default efSearchScaleFactor + assertQ( + req(CommonParams.Q, "{!knn f=vector_byte_encoding topK=3}" + vectorToSearch, "fl", "id"), + "//result[@numFound='3']", + "//result/doc[1]/str[@name='id'][.='2']", + "//result/doc[2]/str[@name='id'][.='3']", + "//result/doc[3]/str[@name='id'][.='1']"); + } + @Test public void knnQueryUsedInFilter_shouldFilterResultsBeforeTheQueryExecution() { String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; @@ -975,7 +1091,7 @@ public void knnQueryAsRerank_shouldAddSimilarityFunctionScore() { } @Test - public void testKnnFloatWithoutExplicitlyEarlyTermination_returnsKnnFloatVectorQuery() { + public void testKnnFloatWithoutExplicitlyEarlyTermination_returnsSolrKnnFloatVectorQuery() { // It verifies that when no early termination parameters are provided, // the default behavior is applied (early termination is disabled), and no special logic is // triggered. @@ -990,11 +1106,11 @@ public void testKnnFloatWithoutExplicitlyEarlyTermination_returnsKnnFloatVectorQ "debugQuery", "true"), "//result[@numFound='5']", - "//str[@name='parsedquery'][.='KnnFloatVectorQuery(KnnFloatVectorQuery:vector[1.0,...][5])']"); + "//str[@name='parsedquery'][.='SolrKnnFloatVectorQuery(SolrKnnFloatVectorQuery:vector[1.0,...][5])']"); } @Test - public void testKnnFloatWithoutEarlyTermination_returnsKnnFloatVectorQuery() { + public void testKnnFloatWithoutEarlyTermination_returnsSolrKnnFloatVectorQuery() { // It verifies that when early termination is explicitly set to false, no special logic is // triggered. String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]"; @@ -1008,7 +1124,7 @@ public void testKnnFloatWithoutEarlyTermination_returnsKnnFloatVectorQuery() { "debugQuery", "true"), "//result[@numFound='5']", - "//str[@name='parsedquery'][.='KnnFloatVectorQuery(KnnFloatVectorQuery:vector[1.0,...][5])']"); + "//str[@name='parsedquery'][.='SolrKnnFloatVectorQuery(SolrKnnFloatVectorQuery:vector[1.0,...][5])']"); } @Test @@ -1029,7 +1145,7 @@ public void testKnnFloatWithEarlyTerminationDefaultParams_returnsPatienceKnnVect String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=KnnFloatVectorQuery:vector[1.0,...][10]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][10]})", defaultSaturationThreshold, defaultPatience); @@ -1068,7 +1184,7 @@ public void testKnnFloatWithEarlyTerminationDefaultParams_returnsPatienceKnnVect String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=KnnFloatVectorQuery:vector[1.0,...][10]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][10]})", explicitSaturationThreshold, explicitPatience); @@ -1101,7 +1217,7 @@ public void testKnnFloatWithEarlyTerminationDefaultParams_returnsPatienceKnnVect String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=KnnByteVectorQuery:vector_byte_encoding[2,...][5]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnByteVectorQuery:vector_byte_encoding[2,...][5]})", explicitSaturationThreshold, explicitPatience); @@ -1133,7 +1249,7 @@ public void testKnnFloatWithEarlyTerminationDefaultParams_returnsPatienceKnnVect String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=KnnFloatVectorQuery:vector[1.0,...][10]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][10]})", explicitSaturationThreshold, explicitPatience); @@ -1219,7 +1335,7 @@ public void knnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { "debugQuery", "true"), "//result[@numFound='4']", - "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']"); + "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][4]})']"); } @Test @@ -1239,7 +1355,7 @@ public void byteKnnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() { "debugQuery", "true"), "//result[@numFound='4']", - "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=KnnByteVectorQuery:vector_byte_encoding[2,...][4]})']"); + "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1 id:4 id:7 id:8 id:9, seedWeight=null, delegate=SolrKnnByteVectorQuery:vector_byte_encoding[2,...][4]})']"); } @Test @@ -1286,7 +1402,7 @@ public void knnQueryWithKnnSeedQuery_shouldPerformSeededKnnVectorQuery() { "debugQuery", "true"), "//result[@numFound='4']", - "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=KnnFloatVectorQuery:vector[0.1,...][4], seedWeight=null, delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']"); + "//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=SolrKnnFloatVectorQuery:vector[0.1,...][4], seedWeight=null, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][4]})']"); } @Test @@ -1315,9 +1431,10 @@ public void knnQueryWithKnnSeedQuery_shouldPerformSeededKnnVectorQuery() { // Verify that a seedWeight field is present — its value (BooleanWeight@) includes a // hash code that changes on each run, so it cannot be asserted explicitly "//str[@name='parsedquery'][contains(.,'seedWeight=')]", - // Verify that the final delegate is a KnnFloatVectorQuery with the expected vector and topK + // Verify that the final delegate is a SolrKnnFloatVectorQuery with the expected vector and + // topK // value - "//str[@name='parsedquery'][contains(.,'delegate=KnnFloatVectorQuery:vector[1.0,...][4]')]"); + "//str[@name='parsedquery'][contains(.,'delegate=SolrKnnFloatVectorQuery:vector[1.0,...][4]')]"); } @Test diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserTest.java index bfe42e30842..be47a160f64 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/search/TextToVectorQParserTest.java @@ -406,7 +406,7 @@ public void earlyTerminationEnabled_returnsPatienceKnnVectorQuery() throws Excep String expectedParsedQuery = String.format( Locale.US, - "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=KnnFloatVectorQuery:vector[1.0,...][5]})", + "PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=%.3f, patience=%d, delegate=SolrKnnFloatVectorQuery:vector[1.0,...][5]})", defaultSaturationThreshold, defaultPatience); diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index 87eb6941ba9..db5d3dd5639 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -455,10 +455,24 @@ Our recommendation is to rely on the default value and change this parameter onl + This parameter must be used together with `saturationThreshold`; either specify both to customize the behavior, or omit both to rely on the default values. +`efSearchScaleFactor`:: ++ +[%autowidth,frame=none] +|=== +|Optional | Default: `1.0` +|=== ++ +(advanced) Multiplier factor for calculating how many candidates the HNSW algorithm examines during search. ++ +The effective `efSearch` value is calculated internally as `efSearchScaleFactor * topK`. Lower values fetch fewer candidates for faster performance but may miss some good matches. Higher values fetch more candidates, improving recall but slowing down the search. ++ +Accepted values: +Any float >= 1.0. + Here's an example of a `knn` search using the early termination with input parameters: [source,text] -?q={!knn f=vector topK=10 earlyTermination=true saturationThreshold=0.989 patience=10}[1.0, 2.0, 3.0, 4.0] +?q={!knn f=vector topK=10 earlyTermination=true saturationThreshold=0.989 patience=10 efSearchScaleFactor=3.0}[1.0, 2.0, 3.0, 4.0] `seedQuery`:: + diff --git a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc index c6e61e70969..4a53180323e 100644 --- a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc +++ b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc @@ -137,6 +137,10 @@ Solr now lets you access models encoded in ONNX format, commonly sourced from Hu The DocumentCategorizerUpdateProcessorFactorythat lets you perform sentiment and other classification tasks on fields. It is available as part of the `analysis-extras` module. +=== Vector Search Enhancements + +* The `efSearchScaleFactor` parameter is now available for the KNN query parser (SOLR-17928). This parameter controls how many candidate vectors are explored during HNSW graph traversal, allowing users to independently tune search accuracy versus the number of results returned. Previously, improving accuracy required increasing `topK` (which returns more results), but `efSearchScaleFactor` enables exploring more candidates while still receiving exactly `topK` results. The `efSearch` value is calculated internally as `efSearchScaleFactor * topK`. Default value is `1.0`, which means `efSearch` defaults to `topK`. + === Deprecation removals * The `jaegertracer-configurator` module, which was deprecated in 9.2, is removed. Users should migrate to the `opentelemetry` module.