From d72e1d7d86dc1ce3c1914484f2c903a629cfa1cd Mon Sep 17 00:00:00 2001 From: "min.tian" Date: Sun, 4 Jan 2026 02:37:29 +0000 Subject: [PATCH] feat: add OceanBase UI config settings Signed-off-by: min.tian --- .../backend/clients/oceanbase/config.py | 12 +- .../components/run_test/generateTasks.py | 7 +- .../frontend/config/dbCaseConfigs.py | 103 ++++++++++++++++++ vectordb_bench/models.py | 4 + 4 files changed, 115 insertions(+), 11 deletions(-) diff --git a/vectordb_bench/backend/clients/oceanbase/config.py b/vectordb_bench/backend/clients/oceanbase/config.py index e074f2268..1f37cfc75 100644 --- a/vectordb_bench/backend/clients/oceanbase/config.py +++ b/vectordb_bench/backend/clients/oceanbase/config.py @@ -1,6 +1,6 @@ from typing import TypedDict -from pydantic import BaseModel, SecretStr, validator +from pydantic import BaseModel, SecretStr from ..api import DBCaseConfig, DBConfig, IndexType, MetricType @@ -31,14 +31,6 @@ def to_dict(self) -> OceanBaseConfigDict: "database": self.database, } - @validator("*") - def not_empty_field(cls, v: any, field: any): - if field.name in ["password", "host", "db_label"]: - return v - if isinstance(v, str | SecretStr) and len(v) == 0: - raise ValueError("Empty string!") - return v - class OceanBaseIndexConfig(BaseModel): index: IndexType @@ -83,7 +75,7 @@ def search_param(self) -> dict: class OceanBaseIVFConfig(OceanBaseIndexConfig, DBCaseConfig): - m: int + m: int | None = None sample_per_nlist: int nbits: int | None = None nlist: int diff --git a/vectordb_bench/frontend/components/run_test/generateTasks.py b/vectordb_bench/frontend/components/run_test/generateTasks.py index d8a678ffc..725dea769 100644 --- a/vectordb_bench/frontend/components/run_test/generateTasks.py +++ b/vectordb_bench/frontend/components/run_test/generateTasks.py @@ -6,12 +6,17 @@ def generate_tasks(activedDbList: list[DB], dbConfigs, activedCaseList: list[Cas tasks = [] for db in activedDbList: for case in activedCaseList: + cfg = {key.value: value for key, value in allCaseConfigs[db][case].items()} + # Many DBCaseConfig models require an `index` field, while the UI stores the selection under `IndexType`. + # Passing both keeps backwards-compatibility (extra fields are ignored) and enables strict models (e.g. OceanBase). + if CaseConfigParamType.IndexType in allCaseConfigs[db][case] and "index" not in cfg: + cfg["index"] = allCaseConfigs[db][case][CaseConfigParamType.IndexType] task = TaskConfig( db=db.value, db_config=dbConfigs[db], case_config=case, db_case_config=db.case_config_cls(allCaseConfigs[db][case].get(CaseConfigParamType.IndexType, None))( - **{key.value: value for key, value in allCaseConfigs[db][case].items()} + **cfg ), ) tasks.append(task) diff --git a/vectordb_bench/frontend/config/dbCaseConfigs.py b/vectordb_bench/frontend/config/dbCaseConfigs.py index dd7b5dd90..6a32e5ff1 100644 --- a/vectordb_bench/frontend/config/dbCaseConfigs.py +++ b/vectordb_bench/frontend/config/dbCaseConfigs.py @@ -423,6 +423,22 @@ class CaseConfigInput(BaseModel): }, ) +CaseConfigParamInput_IndexType_OceanBase = CaseConfigInput( + label=CaseConfigParamType.IndexType, + inputType=InputType.Option, + inputHelp="Select OceanBase index type", + inputConfig={ + "options": [ + IndexType.HNSW.value, + IndexType.HNSW_SQ.value, + IndexType.HNSW_BQ.value, + IndexType.IVFFlat.value, + IndexType.IVFSQ8.value, + IndexType.IVFPQ.value, + ], + }, +) + CaseConfigParamInput_IndexType_PgDiskANN = CaseConfigInput( label=CaseConfigParamType.IndexType, inputHelp="Select Index Type", @@ -685,6 +701,20 @@ class CaseConfigInput(BaseModel): isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value, ) +CaseConfigParamInput_m_OceanBase = CaseConfigInput( + label=CaseConfigParamType.m, + displayLabel="m", + inputHelp="HNSW graph degree (m) for OceanBase HNSW/HNSW_SQ/HNSW_BQ", + inputType=InputType.Number, + inputConfig={ + "min": 4, + "max": 128, + "value": 16, + }, + isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) + in [IndexType.HNSW.value, IndexType.HNSW_SQ.value, IndexType.HNSW_BQ.value], +) + CaseConfigParamInput_EFConstruction_Milvus = CaseConfigInput( label=CaseConfigParamType.EFConstruction, @@ -703,6 +733,20 @@ class CaseConfigInput(BaseModel): ], ) +CaseConfigParamInput_EFConstruction_OceanBase = CaseConfigInput( + label=CaseConfigParamType.EFConstruction, + displayLabel="efConstruction", + inputHelp="HNSW efConstruction for OceanBase HNSW/HNSW_SQ/HNSW_BQ", + inputType=InputType.Number, + inputConfig={ + "min": 8, + "max": 65535, + "value": 256, + }, + isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) + in [IndexType.HNSW.value, IndexType.HNSW_SQ.value, IndexType.HNSW_BQ.value], +) + CaseConfigParamInput_SQType = CaseConfigInput( label=CaseConfigParamType.sq_type, inputType=InputType.Option, @@ -862,6 +906,48 @@ class CaseConfigInput(BaseModel): isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value, ) +CaseConfigParamInput_ef_search_OceanBase = CaseConfigInput( + label=CaseConfigParamType.ef_search, + displayLabel="ef_search", + inputHelp="HNSW ef_search (session var ob_hnsw_ef_search) for OceanBase", + inputType=InputType.Number, + inputConfig={ + "min": 1, + "max": 65535, + "value": 100, + }, + isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) + in [IndexType.HNSW.value, IndexType.HNSW_SQ.value, IndexType.HNSW_BQ.value], +) + +CaseConfigParamInput_sample_per_nlist_OceanBase = CaseConfigInput( + label=CaseConfigParamType.sample_per_nlist, + displayLabel="sample_per_nlist", + inputHelp="OceanBase IVF training sample multiplier (total samples = sample_per_nlist * nlist)", + inputType=InputType.Number, + inputConfig={ + "min": 1, + "max": 1000000, + "value": 256, + }, + isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) + in [IndexType.IVFFlat.value, IndexType.IVFSQ8.value, IndexType.IVFPQ.value], +) + +CaseConfigParamInput_ivf_nprobes_OceanBase = CaseConfigInput( + label=CaseConfigParamType.ivf_nprobes, + displayLabel="ivf_nprobes", + inputHelp="OceanBase IVF search probes (session var ob_ivf_nprobes)", + inputType=InputType.Number, + inputConfig={ + "min": 1, + "max": 65535, + "value": 10, + }, + isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) + in [IndexType.IVFFlat.value, IndexType.IVFSQ8.value, IndexType.IVFPQ.value], +) + CaseConfigParamInput_EFConstruction_PgVector = CaseConfigInput( label=CaseConfigParamType.ef_construction, inputType=InputType.Number, @@ -2302,6 +2388,19 @@ class CaseConfigInput(BaseModel): CaseConfigParamInput_VectorSearchBeamSize_CockroachDB, ] +OceanBaseLoadConfig = [ + CaseConfigParamInput_IndexType_OceanBase, + CaseConfigParamInput_m_OceanBase, + CaseConfigParamInput_EFConstruction_OceanBase, + CaseConfigParamInput_ef_search_OceanBase, + CaseConfigParamInput_Nlist, + CaseConfigParamInput_sample_per_nlist_OceanBase, + CaseConfigParamInput_Nbits_PQ, + CaseConfigParamInput_M_PQ, + CaseConfigParamInput_ivf_nprobes_OceanBase, +] +OceanBasePerformanceConfig = OceanBaseLoadConfig + MariaDBLoadingConfig = [ CaseConfigParamInput_IndexType_MariaDB, CaseConfigParamInput_StorageEngine_MariaDB, @@ -2616,6 +2715,10 @@ class CaseConfigInput(BaseModel): CaseLabel.Load: CockroachDBLoadingConfig, CaseLabel.Performance: CockroachDBPerformanceConfig, }, + DB.OceanBase: { + CaseLabel.Load: OceanBaseLoadConfig, + CaseLabel.Performance: OceanBasePerformanceConfig, + }, } diff --git a/vectordb_bench/models.py b/vectordb_bench/models.py index d9ba91e54..44aff6a79 100644 --- a/vectordb_bench/models.py +++ b/vectordb_bench/models.py @@ -137,6 +137,10 @@ class CaseConfigParamType(Enum): confidence_interval = "confidence_interval" clip = "clip" + # OceanBase IVF parameters + sample_per_nlist = "sample_per_nlist" + ivf_nprobes = "ivf_nprobes" + # CockroachDB parameters min_partition_size = "min_partition_size" max_partition_size = "max_partition_size"