From 084fa57a62e533132df0d41fa1ff8dd7f23811d1 Mon Sep 17 00:00:00 2001
From: zhou-haitao <1300182097@qq.com>
Date: Wed, 3 Dec 2025 17:34:25 +0800
Subject: [PATCH 1/4] fix docs

---
 .../getting-started/installation_npu.md       |  6 +--
 docs/source/getting-started/quick_start.md    | 46 ++++++++++++++----
 .../user-guide/prefix-cache/nfs_store.md      | 48 +++++++++++++++----
 3 files changed, 79 insertions(+), 21 deletions(-)

diff --git a/docs/source/getting-started/installation_npu.md b/docs/source/getting-started/installation_npu.md
index 571e96e15..f59109895 100644
--- a/docs/source/getting-started/installation_npu.md
+++ b/docs/source/getting-started/installation_npu.md
@@ -6,7 +6,7 @@ This document describes how to install unified-cache-management when using Ascen
 - Python: >= 3.9, < 3.12
 - A hardware with Ascend NPU. It’s usually the Atlas 800 A2 series.
 
-The current version of unified-cache-management based on vLLM-Ascend v0.9.2rc1, refer to [vLLM-Ascend Installation Requirements](https://vllm-ascend.readthedocs.io/en/latest/installation.html#requirements) to meet the requirements.
+The current version of unified-cache-management based on vLLM-Ascend v0.11.0rc1 and v0.9.1, refer to [vLLM-Ascend Installation Requirements](https://vllm-ascend.readthedocs.io/en/latest/installation.html#requirements) to meet the requirements.
 
 You have 2 ways to install for now:
 - Setup from code: First, prepare vLLM-Ascend environment, then install unified-cache-management from source code.
@@ -17,14 +17,14 @@ You have 2 ways to install for now:
 ### Prepare vLLM-Ascend Environment
 For the sake of environment isolation and simplicity, we recommend preparing the vLLM-Ascend environment by pulling the official, pre-built vLLM-Ascend Docker image.
 ```bash
-docker pull quay.io/ascend/vllm-ascend:v0.9.2rc1
+docker pull quay.io/ascend/vllm-ascend:v0.9.1
 ```
 Use the following command to run your own container:
 ```bash
 # Update DEVICE according to your device (/dev/davinci[0-7])
 export DEVICE=/dev/davinci7
 # Update the vllm-ascend image
-export IMAGE=quay.io/ascend/vllm-ascend:v0.9.2rc1
+export IMAGE=quay.io/ascend/vllm-ascend:v0.9.1
 docker run --rm \
     --name vllm-ascend-env \
     --device $DEVICE \
diff --git a/docs/source/getting-started/quick_start.md b/docs/source/getting-started/quick_start.md
index 098c2eeb7..ed8a4c361 100644
--- a/docs/source/getting-started/quick_start.md
+++ b/docs/source/getting-started/quick_start.md
@@ -54,19 +54,47 @@ python offline_inference.py
 
 For online inference , vLLM with our connector can also be deployed as a server that implements the OpenAI API protocol.
 
-First, specify the python hash seed by:
-```bash
-export PYTHONHASHSEED=123456
-```
+
 
 Create a config yaml like following and save it to your own directory:
 ```yaml
 # UCM Configuration File Example
-# Refer to file unified-cache-management/examples/ucm_config_example.yaml for more details
-ucm_connector_name: "UcmNfsStore"
-
-ucm_connector_config:
-  storage_backends: "/mnt/test"
+# 
+# This file demonstrates how to configure UCM using YAML.
+# You can use this config file by setting the path to this file in kv_connector_extra_config in launch script or command line like this:
+# kv_connector_extra_config={"UCM_CONFIG_FILE": "/workspace/unified-cache-management/examples/ucm_config_example.yaml"}
+#
+# Alternatively, you can still use kv_connector_extra_config in KVTransferConfig
+# for backward compatibility.
+
+# Connector name (e.g., "UcmNfsStore", "UcmDramStore")
+ucm_connectors:
+  - ucm_connector_name: "UcmNfsStore"
+    ucm_connector_config:
+      storage_backends: "/mnt/test"
+      use_direct: false
+
+load_only_first_rank: false
+
+# Enable UCM metrics so they can be monitored online via Grafana and Prometheus.
+# metrics_config_path: "/workspace/unified-cache-management/examples/metrics/metrics_configs.yaml"
+
+# Sparse attention configuration
+# Format 1: Dictionary format (for methods like ESA, KvComp)
+# ucm_sparse_config:
+#   ESA:
+#     init_window_sz: 1
+#     local_window_sz: 2
+#     min_blocks: 4
+#     sparse_ratio: 0.3
+#     retrieval_stride: 5
+  # Or for GSA:
+  # GSA: {}
+
+
+# Whether to use layerwise loading/saving (optional, default: True for UnifiedCacheConnectorV1)
+# use_layerwise: true
+# hit_ratio: 0.9
 ```
 
 Run the following command to start the vLLM server with the Qwen/Qwen2.5-14B-Instruct model and your config file path:
diff --git a/docs/source/user-guide/prefix-cache/nfs_store.md b/docs/source/user-guide/prefix-cache/nfs_store.md
index 741fcedf7..dd7d36fe9 100644
--- a/docs/source/user-guide/prefix-cache/nfs_store.md
+++ b/docs/source/user-guide/prefix-cache/nfs_store.md
@@ -90,12 +90,44 @@ To use the NFS connector, you need to configure the `connector_config` dictionar
 Create a config yaml like following and save it to your own directory:
 ```yaml
 # UCM Configuration File Example
-# Refer to file unified-cache-management/examples/ucm_config_example.yaml for more details
-ucm_connector_name: "UcmNfsStore"
+# 
+# This file demonstrates how to configure UCM using YAML.
+# You can use this config file by setting the path to this file in kv_connector_extra_config in launch script or command line like this:
+# kv_connector_extra_config={"UCM_CONFIG_FILE": "/workspace/unified-cache-management/examples/ucm_config_example.yaml"}
+#
+# Alternatively, you can still use kv_connector_extra_config in KVTransferConfig
+# for backward compatibility.
+
+# Connector name (e.g., "UcmNfsStore", "UcmDramStore")
+ucm_connectors:
+  - ucm_connector_name: "UcmNfsStore"
+    ucm_connector_config:
+      storage_backends: "/mnt/test"
+      use_direct: false
+
+load_only_first_rank: false
+
+# Enable UCM metrics so they can be monitored online via Grafana and Prometheus.
+# metrics_config_path: "/workspace/unified-cache-management/examples/metrics/metrics_configs.yaml"
+
+# Sparse attention configuration
+# Format 1: Dictionary format (for methods like ESA, KvComp)
+# ucm_sparse_config:
+#   ESA:
+#     init_window_sz: 1
+#     local_window_sz: 2
+#     min_blocks: 4
+#     sparse_ratio: 0.3
+#     retrieval_stride: 5
+  # Or for GSA:
+  # GSA: {}
+
+
+# Whether to use layerwise loading/saving (optional, default: True for UnifiedCacheConnectorV1)
+# use_layerwise: true
+# hit_ratio: 0.9
+
 
-ucm_connector_config:
-  storage_backends: "/mnt/test"
-  transferStreamNumber: 32
 ```
 
 ## Launching Inference
@@ -116,7 +148,6 @@ Then run the script as follows:
 
 ```bash
 cd examples/
-export PYTHONHASHSEED=123456
 python offline_inference.py
 ```
 
@@ -166,10 +197,9 @@ curl http://localhost:7800/v1/completions \
 ```
 To quickly experience the NFS Connector's effect:
 
-1. Start the service with:  
-   `--no-enable-prefix-caching`  
+1. Start the service with:   `--no-enable-prefix-caching`  
 2. Send the same request (exceed 128 tokens) twice consecutively
-3. Remember to enable prefix caching (do not add `--no-enable-prefix-caching`) in production environments.
+
 ### Log Message Structure
 ```text
 [UCMNFSSTORE] [I] Task(<task_id>,<direction>,<task_count>,<size>) finished, elapsed <time>s

From 9f914caa3b6657aa476260c4a7eccecba48e5c8f Mon Sep 17 00:00:00 2001
From: zhou-haitao <1300182097@qq.com>
Date: Fri, 5 Dec 2025 10:08:39 +0800
Subject: [PATCH 2/4] fix docs

---
 docs/source/getting-started/quick_start.md       | 4 ++--
 docs/source/user-guide/prefix-cache/nfs_store.md | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/getting-started/quick_start.md b/docs/source/getting-started/quick_start.md
index ed8a4c361..85792f11d 100644
--- a/docs/source/getting-started/quick_start.md
+++ b/docs/source/getting-started/quick_start.md
@@ -14,7 +14,7 @@
 - NPU: Atlas 800 A2/A3 series
 - CANN: CANN Version 8.1.RC1
 - vLLM: v0.9.2
-- vLLM Ascend: v0.9.2rc1
+- vLLM Ascend: v0.9.1
 
 ## Installation
 Before you start with UCM, please make sure that you have installed UCM correctly by following the [GPU Installation](./installation_gpu.md) guide or [NPU Installation](./installation_npu.md) guide.
@@ -67,7 +67,7 @@ Create a config yaml like following and save it to your own directory:
 # Alternatively, you can still use kv_connector_extra_config in KVTransferConfig
 # for backward compatibility.
 
-# Connector name (e.g., "UcmNfsStore", "UcmDramStore")
+# Connector name (e.g., "UcmNfsStore")
 ucm_connectors:
   - ucm_connector_name: "UcmNfsStore"
     ucm_connector_config:
diff --git a/docs/source/user-guide/prefix-cache/nfs_store.md b/docs/source/user-guide/prefix-cache/nfs_store.md
index dd7d36fe9..3098ed60d 100644
--- a/docs/source/user-guide/prefix-cache/nfs_store.md
+++ b/docs/source/user-guide/prefix-cache/nfs_store.md
@@ -98,7 +98,7 @@ Create a config yaml like following and save it to your own directory:
 # Alternatively, you can still use kv_connector_extra_config in KVTransferConfig
 # for backward compatibility.
 
-# Connector name (e.g., "UcmNfsStore", "UcmDramStore")
+# Connector name (e.g., "UcmNfsStore")
 ucm_connectors:
   - ucm_connector_name: "UcmNfsStore"
     ucm_connector_config:

From 968da8cf986d7d77e53b65a9d679a943a60c69c2 Mon Sep 17 00:00:00 2001
From: zhou-haitao <1300182097@qq.com>
Date: Fri, 5 Dec 2025 10:30:41 +0800
Subject: [PATCH 3/4] fix docs

---
 docs/source/getting-started/quick_start.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/source/getting-started/quick_start.md b/docs/source/getting-started/quick_start.md
index 85792f11d..dc5ca3fda 100644
--- a/docs/source/getting-started/quick_start.md
+++ b/docs/source/getting-started/quick_start.md
@@ -14,7 +14,9 @@
 - NPU: Atlas 800 A2/A3 series
 - CANN: CANN Version 8.1.RC1
 - vLLM: v0.9.2
-- vLLM Ascend: v0.9.1
+- vLLM Ascend: v0.9.1/0.9.2rc1
+
+note: If you are using Prefix Cache, please choose the vllm ascend v0.9.1 vesion; if you are using Sparse Attention, please choose the vllm ascend v0.9.2rc1 version.
 
 ## Installation
 Before you start with UCM, please make sure that you have installed UCM correctly by following the [GPU Installation](./installation_gpu.md) guide or [NPU Installation](./installation_npu.md) guide.

From 3ea7b75c1b2ae0571bbedc62404496ae388b23ce Mon Sep 17 00:00:00 2001
From: zhou-haitao <1300182097@qq.com>
Date: Mon, 8 Dec 2025 12:01:36 +0800
Subject: [PATCH 4/4] fix docs

---
 docs/source/getting-started/quick_start.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/getting-started/quick_start.md b/docs/source/getting-started/quick_start.md
index dc5ca3fda..e885782a7 100644
--- a/docs/source/getting-started/quick_start.md
+++ b/docs/source/getting-started/quick_start.md
@@ -16,7 +16,7 @@
 - vLLM: v0.9.2
 - vLLM Ascend: v0.9.1/0.9.2rc1
 
-note: If you are using Prefix Cache, please choose the vllm ascend v0.9.1 vesion; if you are using Sparse Attention, please choose the vllm ascend v0.9.2rc1 version.
+note: If you are using Prefix Cache, please choose the **vllm ascend v0.9.1** version; if you are using Sparse Attention, please choose the **vllm ascend v0.9.2rc1** version.
 
 ## Installation
 Before you start with UCM, please make sure that you have installed UCM correctly by following the [GPU Installation](./installation_gpu.md) guide or [NPU Installation](./installation_npu.md) guide.