From 339754a4b8aa6b65cfcb563e11a80026fc9a15f4 Mon Sep 17 00:00:00 2001
From: sumanth-fw <gutti.sumanth@freshworks.com>
Date: Mon, 22 Dec 2025 17:31:12 +0530
Subject: [PATCH 1/3] feat(oss-opensearch): Introduce on-disk vector storage
 configuration.

---
 vectordb_bench/backend/clients/oss_opensearch/cli.py   | 10 ++++++++++
 .../backend/clients/oss_opensearch/config.py           |  3 +++
 .../backend/clients/oss_opensearch/oss_opensearch.py   |  3 +++
 vectordb_bench/frontend/config/dbCaseConfigs.py        | 10 ++++++++++
 vectordb_bench/models.py                               |  1 +
 5 files changed, 27 insertions(+)

diff --git a/vectordb_bench/backend/clients/oss_opensearch/cli.py b/vectordb_bench/backend/clients/oss_opensearch/cli.py
index 804a4bc82..51828bbe9 100644
--- a/vectordb_bench/backend/clients/oss_opensearch/cli.py
+++ b/vectordb_bench/backend/clients/oss_opensearch/cli.py
@@ -118,6 +118,16 @@ class OSSOpenSearchTypedDict(TypedDict):
         ),
     ]
 
+    on_disk: Annotated[
+        bool,
+        click.option(
+            "--on-disk",
+            is_flag=True,
+            help="Enable on-disk vector storage mode",
+            default=False,
+        ),
+    ]
+
 
 class OSSOpenSearchHNSWTypedDict(CommonTypedDict, OSSOpenSearchTypedDict, HNSWFlavor1): ...
 
diff --git a/vectordb_bench/backend/clients/oss_opensearch/config.py b/vectordb_bench/backend/clients/oss_opensearch/config.py
index 3f961bf09..343c29380 100644
--- a/vectordb_bench/backend/clients/oss_opensearch/config.py
+++ b/vectordb_bench/backend/clients/oss_opensearch/config.py
@@ -71,6 +71,7 @@ class OSSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
     force_merge_enabled: bool | None = True
     flush_threshold_size: str | None = "5120mb"
     index_thread_qty_during_force_merge: int = 8
+    on_disk: bool = False
     cb_threshold: str | None = "50%"
     number_of_indexing_clients: int | None = 1
     use_routing: bool = False  # for label-filter cases
@@ -107,6 +108,7 @@ def __eq__(self, obj: any):
             and self.replication_type == obj.replication_type
             and self.knn_derived_source_enabled == obj.knn_derived_source_enabled
             and self.memory_optimized_search == obj.memory_optimized_search
+            and self.on_disk == obj.on_disk
         )
 
     def __hash__(self) -> int:
@@ -123,6 +125,7 @@ def __hash__(self) -> int:
                 self.replication_type,
                 self.knn_derived_source_enabled,
                 self.memory_optimized_search,
+                self.on_disk,
             )
         )
 
diff --git a/vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py b/vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py
index a790834de..3ea680339 100644
--- a/vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py
+++ b/vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py
@@ -330,6 +330,9 @@ def _create_index(self, client: OpenSearch) -> None:
             "method": self.case_config.index_param(),
         }
 
+        if self.case_config.on_disk:
+            properties[self.vector_col_name]["mode"] = "on_disk"
+
         mappings = {
             "properties": properties,
         }
diff --git a/vectordb_bench/frontend/config/dbCaseConfigs.py b/vectordb_bench/frontend/config/dbCaseConfigs.py
index 9348c243e..15188dddd 100644
--- a/vectordb_bench/frontend/config/dbCaseConfigs.py
+++ b/vectordb_bench/frontend/config/dbCaseConfigs.py
@@ -1793,6 +1793,14 @@ class CaseConfigInput(BaseModel):
     },
 )
 
+CaseConfigParamInput_ON_DISK_AWSOpensearch = CaseConfigInput(
+    label=CaseConfigParamType.on_disk,
+    displayLabel="On Disk",
+    inputHelp="Enable on-disk vector storage mode (The on_disk mode only works with the float data type.)",
+    inputType=InputType.Bool,
+    inputConfig={"value": False},
+)
+
 CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch = CaseConfigInput(
     label=CaseConfigParamType.number_of_indexing_clients,
     displayLabel="Number of Indexing Clients",
@@ -2337,6 +2345,7 @@ class CaseConfigInput(BaseModel):
     CaseConfigParamInput_REPLICATION_TYPE_AWSOpensearch,
     CaseConfigParamInput_MEMORY_OPTIMIZED_SEARCH_AWSOpensearch,
     CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
+    CaseConfigParamInput_ON_DISK_AWSOpensearch,
 ]
 
 AWSOpenSearchPerformanceConfig = [
@@ -2354,6 +2363,7 @@ class CaseConfigInput(BaseModel):
     CaseConfigParamInput_REPLICATION_TYPE_AWSOpensearch,
     CaseConfigParamInput_MEMORY_OPTIMIZED_SEARCH_AWSOpensearch,
     CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
+    CaseConfigParamInput_ON_DISK_AWSOpensearch,
 ]
 
 # Map DB to config
diff --git a/vectordb_bench/models.py b/vectordb_bench/models.py
index cce0fa116..d67754187 100644
--- a/vectordb_bench/models.py
+++ b/vectordb_bench/models.py
@@ -113,6 +113,7 @@ class CaseConfigParamType(Enum):
     num_sub_vectors = "num_sub_vectors"
     sample_rate = "sample_rate"
     index_thread_qty_during_force_merge = "index_thread_qty_during_force_merge"
+    on_disk = "on_disk"
     number_of_indexing_clients = "number_of_indexing_clients"
     number_of_shards = "number_of_shards"
     number_of_replicas = "number_of_replicas"

From 972d8d64f53a2aa686265712cb4b48ea224afea9 Mon Sep 17 00:00:00 2001
From: sumanth-fw <gutti.sumanth@freshworks.com>
Date: Mon, 22 Dec 2025 17:52:44 +0530
Subject: [PATCH 2/3] feat(oss-opensearch): Enhance on-disk vector storage
 option with engine-specific display logic and improved help text.

---
 vectordb_bench/backend/clients/oss_opensearch/cli.py | 4 +++-
 vectordb_bench/frontend/config/dbCaseConfigs.py      | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/vectordb_bench/backend/clients/oss_opensearch/cli.py b/vectordb_bench/backend/clients/oss_opensearch/cli.py
index 51828bbe9..ee21a62fb 100644
--- a/vectordb_bench/backend/clients/oss_opensearch/cli.py
+++ b/vectordb_bench/backend/clients/oss_opensearch/cli.py
@@ -123,8 +123,9 @@ class OSSOpenSearchTypedDict(TypedDict):
         click.option(
             "--on-disk",
             is_flag=True,
-            help="Enable on-disk vector storage mode",
+            help="Enable on-disk vector storage mode only for faiss engine (The on_disk mode only works with the float data type.)",
             default=False,
+            required=False,
         ),
     ]
 
@@ -160,6 +161,7 @@ def OSSOpenSearch(**parameters: Unpack[OSSOpenSearchHNSWTypedDict]):
             M=parameters["m"],
             engine=OSSOS_Engine(parameters["engine"]),
             quantization_type=OSSOpenSearchQuantization(parameters["quantization_type"]),
+            on_disk=parameters["on_disk"],
         ),
         **parameters,
     )
diff --git a/vectordb_bench/frontend/config/dbCaseConfigs.py b/vectordb_bench/frontend/config/dbCaseConfigs.py
index 15188dddd..3dd0a2e9a 100644
--- a/vectordb_bench/frontend/config/dbCaseConfigs.py
+++ b/vectordb_bench/frontend/config/dbCaseConfigs.py
@@ -1799,6 +1799,7 @@ class CaseConfigInput(BaseModel):
     inputHelp="Enable on-disk vector storage mode (The on_disk mode only works with the float data type.)",
     inputType=InputType.Bool,
     inputConfig={"value": False},
+    isDisplayed=lambda config: (config.get(CaseConfigParamType.engine_name, "").lower() == "faiss"),
 )
 
 CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch = CaseConfigInput(

From cd3079c7067e6c6d53e25beb5d0099a51bb94592 Mon Sep 17 00:00:00 2001
From: sumanth-fw <gutti.sumanth@freshworks.com>
Date: Mon, 22 Dec 2025 23:02:56 +0530
Subject: [PATCH 3/3] feat(oss-opensearch): Implement version-specific
 properties handling for on-disk storage mode support in OpenSearch.

---
 .../clients/oss_opensearch/oss_opensearch.py  | 33 +++++++++++++++++--
 .../frontend/config/dbCaseConfigs.py          |  2 +-
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py b/vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py
index 3ea680339..8949b8f98 100644
--- a/vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py
+++ b/vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py
@@ -43,6 +43,17 @@
     },
 ]
 
+VERSION_SPECIFIC_PROPERTIES_RULES = [
+    {
+        "name": "mode",
+        "applies": lambda version, case_config: (
+            version >= Version("2.17")
+            and case_config.engine == OSSOS_Engine.faiss
+        ),
+        "value": lambda case_config: "on_disk" if case_config.on_disk else "in_memory",
+    }
+]
+
 
 class OpenSearchError(Exception):
     """Custom exception for OpenSearch operations."""
@@ -274,6 +285,18 @@ def _get_version_specific_settings(self, cluster_version: Version) -> dict:
                 value = setting["value"](self.case_config)
                 version_specific_settings[name] = value
         return version_specific_settings
+    
+    def _get_version_specific_properties(self, cluster_version: Version) -> dict:
+        """
+        Builds and returns a dictionary of applicable version-specific properties.
+        """
+        version_specific_properties = {}
+        for property in VERSION_SPECIFIC_PROPERTIES_RULES:
+            if property["applies"](cluster_version, self.case_config):
+                name = property["name"]
+                value = property["value"](self.case_config)
+                version_specific_properties[name] = value
+        return version_specific_properties
 
     def _get_bulk_manager(self, client: OpenSearch) -> BulkInsertManager:
         """Get bulk insert manager for the given client."""
@@ -291,6 +314,8 @@ def _create_index(self, client: OpenSearch) -> None:
         log.info(f"All case_config parameters: {self.case_config.__dict__}")
 
         settings_manager = self._get_settings_manager(client)
+        cluster_version = self._get_cluster_version(client)
+
         cluster_settings = {
             "knn.algo_param.index_thread_qty": self.case_config.index_thread_qty,
             "knn.memory.circuit_breaker.limit": self.case_config.cb_threshold,
@@ -311,13 +336,14 @@ def _create_index(self, client: OpenSearch) -> None:
         }
         settings["index"]["knn.algo_param.ef_search"] = ef_search_value
 
-        version_specific_settings = self._get_version_specific_settings(self._get_cluster_version(client))
+        version_specific_settings = self._get_version_specific_settings(cluster_version)
         if version_specific_settings:
             log.info(f"Applying version-dependent settings: {version_specific_settings}")
             settings["index"].update(version_specific_settings)
 
         # Build properties mapping, excluding _id which is automatically handled by OpenSearch
         properties = {}
+        version_specific_properties = self._get_version_specific_properties(cluster_version)
 
         # Only add id field to properties if it's not the special _id field
         if self.id_col_name != "_id":
@@ -330,8 +356,9 @@ def _create_index(self, client: OpenSearch) -> None:
             "method": self.case_config.index_param(),
         }
 
-        if self.case_config.on_disk:
-            properties[self.vector_col_name]["mode"] = "on_disk"
+        # mode if supported by the version else ignore
+        if("mode" in version_specific_properties):
+            properties[self.vector_col_name]["mode"] = version_specific_properties["mode"]
 
         mappings = {
             "properties": properties,
diff --git a/vectordb_bench/frontend/config/dbCaseConfigs.py b/vectordb_bench/frontend/config/dbCaseConfigs.py
index 3dd0a2e9a..4b2a17618 100644
--- a/vectordb_bench/frontend/config/dbCaseConfigs.py
+++ b/vectordb_bench/frontend/config/dbCaseConfigs.py
@@ -1796,7 +1796,7 @@ class CaseConfigInput(BaseModel):
 CaseConfigParamInput_ON_DISK_AWSOpensearch = CaseConfigInput(
     label=CaseConfigParamType.on_disk,
     displayLabel="On Disk",
-    inputHelp="Enable on-disk vector storage mode (The on_disk mode only works with the float data type.)",
+    inputHelp="Enable on-disk vector storage mode (The on_disk mode only works with the float data type.) Supported by OpenSearch >=2.17",
     inputType=InputType.Bool,
     inputConfig={"value": False},
     isDisplayed=lambda config: (config.get(CaseConfigParamType.engine_name, "").lower() == "faiss"),