Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions vectordb_bench/backend/clients/oss_opensearch/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,31 @@ class OSSOpenSearchTypedDict(TypedDict):
str | None,
click.option(
"--quantization-type",
type=click.Choice(["fp32", "fp16"]),
type=click.Choice(["None", "LuceneSQ", "FaissSQfp16"]),
help="quantization type for vectors (in index)",
default="fp32",
default="None",
required=False,
),
]

confidence_interval: Annotated[
float | None,
click.option(
"--confidence-interval",
type=float,
help="Confidence interval for Lucene SQ (0.0-1.0, optional)",
default=None,
required=False,
),
]

clip: Annotated[
bool,
click.option(
"--clip",
type=bool,
help="Clip vectors to [-65504, 65504] for FAISS FP16",
default=False,
required=False,
),
]
Expand Down Expand Up @@ -150,6 +172,8 @@ def OSSOpenSearch(**parameters: Unpack[OSSOpenSearchHNSWTypedDict]):
M=parameters["m"],
engine=OSSOS_Engine(parameters["engine"]),
quantization_type=OSSOpenSearchQuantization(parameters["quantization_type"]),
confidence_interval=parameters["confidence_interval"],
clip=parameters["clip"],
),
**parameters,
)
51 changes: 43 additions & 8 deletions vectordb_bench/backend/clients/oss_opensearch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,11 @@ class OSSOS_Engine(Enum):


class OSSOpenSearchQuantization(Enum):
fp32 = "fp32"
fp16 = "fp16"
"""In-memory scalar quantization types"""

NONE = "None"
LUCENE_SQ = "LuceneSQ"
FAISS_SQFP16 = "FaissSQfp16"


# Compression level constants for disk-based mode
Expand Down Expand Up @@ -98,14 +101,33 @@ class OSSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
cb_threshold: str | None = "50%"
number_of_indexing_clients: int | None = 1
use_routing: bool = False # for label-filter cases
quantization_type: OSSOpenSearchQuantization = OSSOpenSearchQuantization.fp32
quantization_type: OSSOpenSearchQuantization = OSSOpenSearchQuantization.NONE
confidence_interval: float | None = None
clip: bool = False
replication_type: str | None = "DOCUMENT"
knn_derived_source_enabled: bool = False
memory_optimized_search: bool = False
on_disk: bool = False
compression_level: str = CompressionLevel.LEVEL_32X
oversample_factor: float = 1.0

@validator("quantization_type", pre=True, always=True)
def validate_quantization_type(cls, value: any):
"""Convert string values to enum"""
if not value:
return OSSOpenSearchQuantization.NONE

if isinstance(value, OSSOpenSearchQuantization):
return value

mapping = {
"None": OSSOpenSearchQuantization.NONE,
"LuceneSQ": OSSOpenSearchQuantization.LUCENE_SQ,
"FaissSQfp16": OSSOpenSearchQuantization.FAISS_SQFP16,
}

return mapping.get(value, OSSOpenSearchQuantization.NONE)

@root_validator
def validate_engine_name(cls, values: dict):
"""Map engine_name string from UI to engine enum"""
Expand All @@ -130,6 +152,8 @@ def __eq__(self, obj: any):
and self.number_of_segments == obj.number_of_segments
and self.use_routing == obj.use_routing
and self.quantization_type == obj.quantization_type
and self.confidence_interval == obj.confidence_interval
and self.clip == obj.clip
and self.replication_type == obj.replication_type
and self.knn_derived_source_enabled == obj.knn_derived_source_enabled
and self.memory_optimized_search == obj.memory_optimized_search
Expand All @@ -149,6 +173,8 @@ def __hash__(self) -> int:
self.number_of_segments,
self.use_routing,
self.quantization_type,
self.confidence_interval,
self.clip,
self.replication_type,
self.knn_derived_source_enabled,
self.memory_optimized_search,
Expand All @@ -173,7 +199,7 @@ def parse_metric(self) -> str:
@property
def use_quant(self) -> bool:
"""Only use in-memory quantization when NOT in disk mode"""
return not self.on_disk and self.quantization_type is not OSSOpenSearchQuantization.fp32
return not self.on_disk and self.quantization_type != OSSOpenSearchQuantization.NONE

@property
def resolved_engine(self) -> OSSOS_Engine:
Expand Down Expand Up @@ -207,11 +233,20 @@ def index_param(self) -> dict:
},
}

# Add encoder for in-memory quantization
if self.use_quant:
method_config["parameters"]["encoder"] = {
"name": "sq",
"parameters": {"type": self.quantization_type.value},
}
encoder_config = {"name": "sq"}

if self.quantization_type == OSSOpenSearchQuantization.LUCENE_SQ:
# Lucene SQ: optional confidence_interval
if self.confidence_interval is not None:
encoder_config["parameters"] = {"confidence_interval": self.confidence_interval}

elif self.quantization_type == OSSOpenSearchQuantization.FAISS_SQFP16 and self.clip:
# FAISS SQfp16: optional clip parameter
encoder_config["parameters"] = {"type": "fp16", "clip": True}

method_config["parameters"]["encoder"] = encoder_config

return method_config

Expand Down
68 changes: 61 additions & 7 deletions vectordb_bench/frontend/config/dbCaseConfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1973,17 +1973,65 @@ class CaseConfigInput(BaseModel):
isDisplayed=lambda config: config.get(CaseConfigParamType.on_disk, False) == False,
)

CaseConfigParamInput_QUANTIZATION_TYPE_OSSOpensearch = CaseConfigInput(
CaseConfigParamInput_QUANTIZATION_TYPE_LUCENE_OSSOpensearch = CaseConfigInput(
label=CaseConfigParamType.quantizationType,
displayLabel="Quantization Type",
inputHelp="Scalar quantization type for in-memory vectors",
inputHelp="Scalar quantization for Lucene engine",
inputType=InputType.Option,
inputConfig={
"options": ["fp32", "fp16"],
"default": "fp32",
"options": ["None", "LuceneSQ"],
"default": "None",
},
isDisplayed=lambda config: config.get(CaseConfigParamType.on_disk, False) == False,
isDisplayed=lambda config: (
not config.get(CaseConfigParamType.on_disk, False) and config.get(CaseConfigParamType.engine_name) == "lucene"
),
)

CaseConfigParamInput_QUANTIZATION_TYPE_FAISS_OSSOpensearch = CaseConfigInput(
label=CaseConfigParamType.quantizationType,
displayLabel="Quantization Type",
inputHelp="Scalar quantization for FAISS engine",
inputType=InputType.Option,
inputConfig={
"options": ["None", "FaissSQfp16"],
"default": "None",
},
isDisplayed=lambda config: (
not config.get(CaseConfigParamType.on_disk, False) and config.get(CaseConfigParamType.engine_name) == "faiss"
),
)

CaseConfigParamInput_CONFIDENCE_INTERVAL_OSSOpensearch = CaseConfigInput(
label=CaseConfigParamType.confidence_interval,
displayLabel="Confidence Interval",
inputHelp="Quantile range for Lucene SQ (0.9-1.0, 0 for dynamic, or empty for auto)",
inputType=InputType.Float,
inputConfig={
"min": 0.0,
"max": 1.0,
"value": None,
"step": 0.1,
},
isDisplayed=lambda config: (
not config.get(CaseConfigParamType.on_disk, False)
and config.get(CaseConfigParamType.quantizationType) == "LuceneSQ"
),
)

CaseConfigParamInput_CLIP_OSSOpensearch = CaseConfigInput(
label=CaseConfigParamType.clip,
displayLabel="Clip Vectors",
inputHelp="Clip out-of-range values to [-65504, 65504] for FP16",
inputType=InputType.Bool,
inputConfig={
"value": False,
},
isDisplayed=lambda config: (
not config.get(CaseConfigParamType.on_disk, False)
and config.get(CaseConfigParamType.quantizationType) == "FaissSQfp16"
),
)

MilvusLoadConfig = [
CaseConfigParamInput_IndexType,
CaseConfigParamInput_M,
Expand Down Expand Up @@ -2448,7 +2496,10 @@ class CaseConfigInput(BaseModel):
CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
CaseConfigParamInput_M_AWSOpensearch,
CaseConfigParamInput_EFConstruction_AWSOpensearch,
CaseConfigParamInput_QUANTIZATION_TYPE_OSSOpensearch,
CaseConfigParamInput_QUANTIZATION_TYPE_LUCENE_OSSOpensearch,
CaseConfigParamInput_QUANTIZATION_TYPE_FAISS_OSSOpensearch,
CaseConfigParamInput_CONFIDENCE_INTERVAL_OSSOpensearch,
CaseConfigParamInput_CLIP_OSSOpensearch,
CaseConfigParamInput_REFRESH_INTERVAL_AWSOpensearch,
CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
Expand All @@ -2468,7 +2519,10 @@ class CaseConfigInput(BaseModel):
CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
CaseConfigParamInput_M_AWSOpensearch,
CaseConfigParamInput_EFConstruction_AWSOpensearch,
CaseConfigParamInput_QUANTIZATION_TYPE_OSSOpensearch,
CaseConfigParamInput_QUANTIZATION_TYPE_LUCENE_OSSOpensearch,
CaseConfigParamInput_QUANTIZATION_TYPE_FAISS_OSSOpensearch,
CaseConfigParamInput_CONFIDENCE_INTERVAL_OSSOpensearch,
CaseConfigParamInput_CLIP_OSSOpensearch,
CaseConfigParamInput_REFRESH_INTERVAL_AWSOpensearch,
CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
Expand Down
2 changes: 2 additions & 0 deletions vectordb_bench/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ class CaseConfigParamType(Enum):
on_disk = "on_disk"
compression_level = "compression_level"
oversample_factor = "oversample_factor"
confidence_interval = "confidence_interval"
clip = "clip"

# CockroachDB parameters
min_partition_size = "min_partition_size"
Expand Down