From f528fdc8c8afd96bb7830931a4dc5057e5d34fa4 Mon Sep 17 00:00:00 2001 From: Akhil-Pathivada Date: Wed, 24 Dec 2025 16:41:19 +0530 Subject: [PATCH] feat(oss-opensearch): Add Scalar Quantization support --- .../backend/clients/oss_opensearch/cli.py | 28 +++++++- .../backend/clients/oss_opensearch/config.py | 51 +++++++++++--- .../frontend/config/dbCaseConfigs.py | 68 +++++++++++++++++-- vectordb_bench/models.py | 2 + 4 files changed, 132 insertions(+), 17 deletions(-) diff --git a/vectordb_bench/backend/clients/oss_opensearch/cli.py b/vectordb_bench/backend/clients/oss_opensearch/cli.py index 804a4bc82..0c4b694ea 100644 --- a/vectordb_bench/backend/clients/oss_opensearch/cli.py +++ b/vectordb_bench/backend/clients/oss_opensearch/cli.py @@ -100,9 +100,31 @@ class OSSOpenSearchTypedDict(TypedDict): str | None, click.option( "--quantization-type", - type=click.Choice(["fp32", "fp16"]), + type=click.Choice(["None", "LuceneSQ", "FaissSQfp16"]), help="quantization type for vectors (in index)", - default="fp32", + default="None", + required=False, + ), + ] + + confidence_interval: Annotated[ + float | None, + click.option( + "--confidence-interval", + type=float, + help="Confidence interval for Lucene SQ (0.0-1.0, optional)", + default=None, + required=False, + ), + ] + + clip: Annotated[ + bool, + click.option( + "--clip", + type=bool, + help="Clip vectors to [-65504, 65504] for FAISS FP16", + default=False, required=False, ), ] @@ -150,6 +172,8 @@ def OSSOpenSearch(**parameters: Unpack[OSSOpenSearchHNSWTypedDict]): M=parameters["m"], engine=OSSOS_Engine(parameters["engine"]), quantization_type=OSSOpenSearchQuantization(parameters["quantization_type"]), + confidence_interval=parameters["confidence_interval"], + clip=parameters["clip"], ), **parameters, ) diff --git a/vectordb_bench/backend/clients/oss_opensearch/config.py b/vectordb_bench/backend/clients/oss_opensearch/config.py index 04dc69f49..83fed3d58 100644 --- a/vectordb_bench/backend/clients/oss_opensearch/config.py +++ b/vectordb_bench/backend/clients/oss_opensearch/config.py @@ -51,8 +51,11 @@ class OSSOS_Engine(Enum): class OSSOpenSearchQuantization(Enum): - fp32 = "fp32" - fp16 = "fp16" + """In-memory scalar quantization types""" + + NONE = "None" + LUCENE_SQ = "LuceneSQ" + FAISS_SQFP16 = "FaissSQfp16" # Compression level constants for disk-based mode @@ -98,7 +101,9 @@ class OSSOpenSearchIndexConfig(BaseModel, DBCaseConfig): cb_threshold: str | None = "50%" number_of_indexing_clients: int | None = 1 use_routing: bool = False # for label-filter cases - quantization_type: OSSOpenSearchQuantization = OSSOpenSearchQuantization.fp32 + quantization_type: OSSOpenSearchQuantization = OSSOpenSearchQuantization.NONE + confidence_interval: float | None = None + clip: bool = False replication_type: str | None = "DOCUMENT" knn_derived_source_enabled: bool = False memory_optimized_search: bool = False @@ -106,6 +111,23 @@ class OSSOpenSearchIndexConfig(BaseModel, DBCaseConfig): compression_level: str = CompressionLevel.LEVEL_32X oversample_factor: float = 1.0 + @validator("quantization_type", pre=True, always=True) + def validate_quantization_type(cls, value: any): + """Convert string values to enum""" + if not value: + return OSSOpenSearchQuantization.NONE + + if isinstance(value, OSSOpenSearchQuantization): + return value + + mapping = { + "None": OSSOpenSearchQuantization.NONE, + "LuceneSQ": OSSOpenSearchQuantization.LUCENE_SQ, + "FaissSQfp16": OSSOpenSearchQuantization.FAISS_SQFP16, + } + + return mapping.get(value, OSSOpenSearchQuantization.NONE) + @root_validator def validate_engine_name(cls, values: dict): """Map engine_name string from UI to engine enum""" @@ -130,6 +152,8 @@ def __eq__(self, obj: any): and self.number_of_segments == obj.number_of_segments and self.use_routing == obj.use_routing and self.quantization_type == obj.quantization_type + and self.confidence_interval == obj.confidence_interval + and self.clip == obj.clip and self.replication_type == obj.replication_type and self.knn_derived_source_enabled == obj.knn_derived_source_enabled and self.memory_optimized_search == obj.memory_optimized_search @@ -149,6 +173,8 @@ def __hash__(self) -> int: self.number_of_segments, self.use_routing, self.quantization_type, + self.confidence_interval, + self.clip, self.replication_type, self.knn_derived_source_enabled, self.memory_optimized_search, @@ -173,7 +199,7 @@ def parse_metric(self) -> str: @property def use_quant(self) -> bool: """Only use in-memory quantization when NOT in disk mode""" - return not self.on_disk and self.quantization_type is not OSSOpenSearchQuantization.fp32 + return not self.on_disk and self.quantization_type != OSSOpenSearchQuantization.NONE @property def resolved_engine(self) -> OSSOS_Engine: @@ -207,11 +233,20 @@ def index_param(self) -> dict: }, } + # Add encoder for in-memory quantization if self.use_quant: - method_config["parameters"]["encoder"] = { - "name": "sq", - "parameters": {"type": self.quantization_type.value}, - } + encoder_config = {"name": "sq"} + + if self.quantization_type == OSSOpenSearchQuantization.LUCENE_SQ: + # Lucene SQ: optional confidence_interval + if self.confidence_interval is not None: + encoder_config["parameters"] = {"confidence_interval": self.confidence_interval} + + elif self.quantization_type == OSSOpenSearchQuantization.FAISS_SQFP16 and self.clip: + # FAISS SQfp16: optional clip parameter + encoder_config["parameters"] = {"type": "fp16", "clip": True} + + method_config["parameters"]["encoder"] = encoder_config return method_config diff --git a/vectordb_bench/frontend/config/dbCaseConfigs.py b/vectordb_bench/frontend/config/dbCaseConfigs.py index 83a5bb0e0..d67d2939d 100644 --- a/vectordb_bench/frontend/config/dbCaseConfigs.py +++ b/vectordb_bench/frontend/config/dbCaseConfigs.py @@ -1973,17 +1973,65 @@ class CaseConfigInput(BaseModel): isDisplayed=lambda config: config.get(CaseConfigParamType.on_disk, False) == False, ) -CaseConfigParamInput_QUANTIZATION_TYPE_OSSOpensearch = CaseConfigInput( +CaseConfigParamInput_QUANTIZATION_TYPE_LUCENE_OSSOpensearch = CaseConfigInput( label=CaseConfigParamType.quantizationType, displayLabel="Quantization Type", - inputHelp="Scalar quantization type for in-memory vectors", + inputHelp="Scalar quantization for Lucene engine", inputType=InputType.Option, inputConfig={ - "options": ["fp32", "fp16"], - "default": "fp32", + "options": ["None", "LuceneSQ"], + "default": "None", }, - isDisplayed=lambda config: config.get(CaseConfigParamType.on_disk, False) == False, + isDisplayed=lambda config: ( + not config.get(CaseConfigParamType.on_disk, False) and config.get(CaseConfigParamType.engine_name) == "lucene" + ), +) + +CaseConfigParamInput_QUANTIZATION_TYPE_FAISS_OSSOpensearch = CaseConfigInput( + label=CaseConfigParamType.quantizationType, + displayLabel="Quantization Type", + inputHelp="Scalar quantization for FAISS engine", + inputType=InputType.Option, + inputConfig={ + "options": ["None", "FaissSQfp16"], + "default": "None", + }, + isDisplayed=lambda config: ( + not config.get(CaseConfigParamType.on_disk, False) and config.get(CaseConfigParamType.engine_name) == "faiss" + ), +) + +CaseConfigParamInput_CONFIDENCE_INTERVAL_OSSOpensearch = CaseConfigInput( + label=CaseConfigParamType.confidence_interval, + displayLabel="Confidence Interval", + inputHelp="Quantile range for Lucene SQ (0.9-1.0, 0 for dynamic, or empty for auto)", + inputType=InputType.Float, + inputConfig={ + "min": 0.0, + "max": 1.0, + "value": None, + "step": 0.1, + }, + isDisplayed=lambda config: ( + not config.get(CaseConfigParamType.on_disk, False) + and config.get(CaseConfigParamType.quantizationType) == "LuceneSQ" + ), +) + +CaseConfigParamInput_CLIP_OSSOpensearch = CaseConfigInput( + label=CaseConfigParamType.clip, + displayLabel="Clip Vectors", + inputHelp="Clip out-of-range values to [-65504, 65504] for FP16", + inputType=InputType.Bool, + inputConfig={ + "value": False, + }, + isDisplayed=lambda config: ( + not config.get(CaseConfigParamType.on_disk, False) + and config.get(CaseConfigParamType.quantizationType) == "FaissSQfp16" + ), ) + MilvusLoadConfig = [ CaseConfigParamInput_IndexType, CaseConfigParamInput_M, @@ -2448,7 +2496,10 @@ class CaseConfigInput(BaseModel): CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch, CaseConfigParamInput_M_AWSOpensearch, CaseConfigParamInput_EFConstruction_AWSOpensearch, - CaseConfigParamInput_QUANTIZATION_TYPE_OSSOpensearch, + CaseConfigParamInput_QUANTIZATION_TYPE_LUCENE_OSSOpensearch, + CaseConfigParamInput_QUANTIZATION_TYPE_FAISS_OSSOpensearch, + CaseConfigParamInput_CONFIDENCE_INTERVAL_OSSOpensearch, + CaseConfigParamInput_CLIP_OSSOpensearch, CaseConfigParamInput_REFRESH_INTERVAL_AWSOpensearch, CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch, CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch, @@ -2468,7 +2519,10 @@ class CaseConfigInput(BaseModel): CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch, CaseConfigParamInput_M_AWSOpensearch, CaseConfigParamInput_EFConstruction_AWSOpensearch, - CaseConfigParamInput_QUANTIZATION_TYPE_OSSOpensearch, + CaseConfigParamInput_QUANTIZATION_TYPE_LUCENE_OSSOpensearch, + CaseConfigParamInput_QUANTIZATION_TYPE_FAISS_OSSOpensearch, + CaseConfigParamInput_CONFIDENCE_INTERVAL_OSSOpensearch, + CaseConfigParamInput_CLIP_OSSOpensearch, CaseConfigParamInput_REFRESH_INTERVAL_AWSOpensearch, CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch, CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch, diff --git a/vectordb_bench/models.py b/vectordb_bench/models.py index 0faf0d1ab..d9ba91e54 100644 --- a/vectordb_bench/models.py +++ b/vectordb_bench/models.py @@ -134,6 +134,8 @@ class CaseConfigParamType(Enum): on_disk = "on_disk" compression_level = "compression_level" oversample_factor = "oversample_factor" + confidence_interval = "confidence_interval" + clip = "clip" # CockroachDB parameters min_partition_size = "min_partition_size"