Skip to content

Commit 406c548

Browse files
HaoLi980405HelenJia98zbb200819
authored
[feature] GSA (#190)
* GSA Impl * nfs * async prefetch pre deal * nfs * 间隔计算topk * 修改编译方式 * GSA默认关闭预取 * execute finish process topk * GSA Impl * nfs * nfs * async prefetch pre deal * 间隔计算topk * 修改编译方式 * GSA默认关闭预取 * execute finish process topk * change fsstore ->nfsstore * clean code * CI * CI * CI * pip install with no-build-isolation --------- Co-authored-by: xujia <42216276@qq.com> Co-authored-by: zbb200819 <1130072360@qq.com>
1 parent 5c191a2 commit 406c548

30 files changed

+4100
-47
lines changed

.github/workflows/unifiedcache_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ jobs:
6464
6565

6666
- name: Install unified-cache-management
67-
run: pip install -v -e .
67+
run: pip install -v -e . --no-build-isolation
6868

6969
- name: Run ut
7070
run: python3 -m unittest discover -s test

docker/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ COPY . /vllm-workspace/unified-cache-management
1111
RUN pip config set global.index-url ${PIP_INDEX_URL}
1212

1313
RUN export PLATFORM="cuda" && \
14-
pip install -v -e /vllm-workspace/unified-cache-management
14+
pip install -v -e /vllm-workspace/unified-cache-management --no-build-isolation
1515

1616
# Apply patch for vLLM
1717
RUN cd $(pip show vllm | grep Location | awk '{print $2}') \

docker/Dockerfile-NPU

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
1212

1313
RUN export PLATFORM="ascend" && \
1414
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
15-
pip install -v -e /vllm-workspace/unified-cache-management
15+
pip install -v -e /vllm-workspace/unified-cache-management --no-build-isolation
1616

1717
# Apply patch for vLLM
1818
RUN cd /vllm-workspace/vllm \

docs/source/getting-started/installation/installation.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ Follow commands below to install unified-cache-management:
4545
git clone --depth 1 --branch <branch_or_tag_name> https://github.com/ModelEngine-Group/unified-cache-management.git
4646
cd unified-cache-management
4747
export PLATFORM=cuda
48-
pip install -v -e .
48+
pip install -v -e . --no-build-isolation
4949
cd ..
5050
```
5151

docs/source/getting-started/installation/installation_npu.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ Follow commands below to install unified-cache-management:
5555
git clone --depth 1 --branch <branch_or_tag_name> https://github.com/ModelEngine-Group/unified-cache-management.git
5656
cd unified-cache-management
5757
export PLATFORM=ascend
58-
pip install -v -e .
58+
pip install -v -e . --no-build-isolation
5959
cd ..
6060
```
6161

examples/offline_inference.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def build_llm_with_uc(module_path: str, name: str, model: str):
3030
"max_cache_size": 5368709120,
3131
"kv_block_size": 262144,
3232
},
33-
"ucm_sparse_method": "ESA",
33+
"ucm_sparse_method": "GSA",
3434
},
3535
)
3636

@@ -39,6 +39,7 @@ def build_llm_with_uc(module_path: str, name: str, model: str):
3939
kv_transfer_config=ktc,
4040
max_model_len=8000,
4141
gpu_memory_utilization=0.8,
42+
block_size=128,
4243
)
4344

4445
llm = LLM(**asdict(llm_args))

setup.py

Lines changed: 81 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,22 @@
2525
import os
2626
import shutil
2727
import subprocess
28+
import sys
2829

2930
from setuptools import Extension, find_packages, setup
3031
from setuptools.command.build_ext import build_ext
3132
from setuptools.command.develop import develop
3233

3334
ROOT_DIR = os.path.abspath(os.path.dirname(__file__))
34-
SRC_DIR = os.path.join(ROOT_DIR, "ucm", "csrc", "ucmnfsstore")
35-
INSTALL_DIR = os.path.join(ROOT_DIR, "ucm", "store")
35+
FSSTORE_SRC_DIR = os.path.join(ROOT_DIR, "ucm", "csrc", "ucmnfsstore")
36+
GSA_SRC_DIR = os.path.join(ROOT_DIR, "ucm", "csrc", "gsaoffloadops")
37+
PREFETCH_SRC_DIR = os.path.join(ROOT_DIR, "ucm", "csrc", "ucmprefetch")
38+
39+
FSSTORE_INSTALL_DIR = os.path.join(ROOT_DIR, "ucm", "store")
40+
GSA_INSTALL_DIR = os.path.join(ROOT_DIR, "ucm", "ucm_sparse")
41+
3642
PLATFORM = os.getenv("PLATFORM")
43+
RUNTIME_ENVIRONMENT = os.getenv("RUNTIME_ENVIRONMENT")
3744

3845

3946
def _is_cuda() -> bool:
@@ -56,62 +63,95 @@ def run(self):
5663
self.build_cmake(ext)
5764

5865
def build_cmake(self, ext: CMakeExtension):
59-
build_dir = os.path.abspath(self.build_temp)
66+
build_dir = os.path.abspath(os.path.join(self.build_temp, ext.name))
6067
os.makedirs(build_dir, exist_ok=True)
68+
69+
cmake_args = [
70+
"cmake",
71+
f"-DCMAKE_BUILD_TYPE=Release",
72+
f"-DPYTHON_EXECUTABLE={sys.executable}",
73+
]
74+
75+
cmake_args.append("-DDOWNLOAD_DEPENDENCE=ON")
6176
if _is_cuda():
62-
subprocess.check_call(
63-
[
64-
"cmake",
65-
"-DDOWNLOAD_DEPENDENCE=ON",
66-
"-DRUNTIME_ENVIRONMENT=cuda",
67-
ext.sourcedir,
68-
],
69-
cwd=build_dir,
70-
)
77+
cmake_args.append("-DRUNTIME_ENVIRONMENT=cuda")
7178
elif _is_npu():
72-
subprocess.check_call(
73-
[
74-
"cmake",
75-
"-DDOWNLOAD_DEPENDENCE=ON",
76-
"-DRUNTIME_ENVIRONMENT=ascend",
77-
ext.sourcedir,
78-
],
79-
cwd=build_dir,
80-
)
79+
cmake_args.append("-DRUNTIME_ENVIRONMENT=ascend")
8180
else:
8281
raise RuntimeError(
8382
"No supported accelerator found. "
8483
"Please ensure either CUDA or NPU is available."
8584
)
8685

87-
subprocess.check_call(["make", "-j", "8"], cwd=build_dir)
86+
cmake_args.append(ext.sourcedir)
8887

89-
so_file = None
88+
print(f"[INFO] Building {ext.name} module with CMake")
89+
print(f"[INFO] Source directory: {ext.sourcedir}")
90+
print(f"[INFO] Build directory: {build_dir}")
91+
92+
subprocess.check_call(cmake_args, cwd=build_dir)
93+
94+
if ext.name in ["nfsstore", "gsa_offload_ops"]:
95+
subprocess.check_call(["make", "-j", "8"], cwd=build_dir)
96+
else:
97+
# 对于gsa_prefetch使用cmake --build
98+
subprocess.check_call(
99+
["cmake", "--build", ".", "--config", "Release", "--", "-j8"],
100+
cwd=build_dir,
101+
)
102+
103+
self._copy_so_files(ext)
104+
105+
def _copy_so_files(self, ext: CMakeExtension):
106+
"""复制编译好的.so文件"""
90107
so_search_dir = os.path.join(ext.sourcedir, "output", "lib")
91108
if not os.path.exists(so_search_dir):
92109
raise FileNotFoundError(f"{so_search_dir} does not exist!")
93110

94-
so_file = None
111+
so_files = []
112+
search_patterns = [ext.name]
113+
114+
if ext.name == "nfsstore":
115+
search_patterns.extend(["ucmnfsstore"])
116+
elif ext.name == "gsa_offload_ops":
117+
search_patterns.extend(["gsa_offload_ops"])
118+
elif ext.name == "gsa_prefetch":
119+
search_patterns.extend(["prefetch"])
120+
95121
for file in os.listdir(so_search_dir):
96-
if file.startswith("ucmnfsstore") and file.endswith(".so"):
97-
so_file = file
98-
break
122+
if file.endswith(".so") or ".so." in file:
123+
for pattern in search_patterns:
124+
if pattern in file:
125+
so_files.append(file)
126+
break
127+
128+
if ext.name == "nfsstore":
129+
install_dir = FSSTORE_INSTALL_DIR
130+
build_install_dir = "ucm/store"
131+
else:
132+
install_dir = GSA_INSTALL_DIR
133+
build_install_dir = "ucm_sparse"
134+
135+
for so_file in so_files:
136+
src_path = os.path.join(so_search_dir, so_file)
137+
dev_path = os.path.join(install_dir, so_file)
138+
dst_path = os.path.join(self.build_lib, build_install_dir, so_file)
139+
140+
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
141+
shutil.copy(src_path, dst_path)
142+
print(f"[INFO] Copied {so_file}{dst_path}")
143+
144+
if isinstance(self.distribution.get_command_obj("develop"), develop):
145+
os.makedirs(os.path.dirname(dev_path), exist_ok=True)
146+
shutil.copy(src_path, dev_path)
147+
print(f"[INFO] Copied in editable mode {so_file}{dev_path}")
99148

100-
if not so_file:
101-
raise FileNotFoundError(
102-
"Compiled .so file not found in output/lib directory."
103-
)
104149

105-
src_path = os.path.join(so_search_dir, so_file)
106-
dev_path = os.path.join(INSTALL_DIR, so_file)
107-
dst_path = os.path.join(self.build_lib, "ucm", "store", so_file)
108-
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
109-
shutil.copy(src_path, dst_path)
110-
print(f"[INFO] Copied {src_path}{dst_path}")
111-
if isinstance(self.distribution.get_command_obj("develop"), develop):
112-
shutil.copy(src_path, dev_path)
113-
print(f"[INFO] Copied in editable mode {src_path}{dev_path}")
150+
ext_modules = []
114151

152+
ext_modules.append(CMakeExtension(name="nfsstore", sourcedir=FSSTORE_SRC_DIR))
153+
ext_modules.append(CMakeExtension(name="gsa_offload_ops", sourcedir=GSA_SRC_DIR))
154+
ext_modules.append(CMakeExtension(name="gsa_prefetch", sourcedir=PREFETCH_SRC_DIR))
115155

116156
setup(
117157
name="ucm",
@@ -120,7 +160,7 @@ def build_cmake(self, ext: CMakeExtension):
120160
author="Unified Cache Team",
121161
packages=find_packages(),
122162
python_requires=">=3.10",
123-
ext_modules=[CMakeExtension(name="ucmnfsstore", sourcedir=SRC_DIR)],
163+
ext_modules=ext_modules,
124164
cmdclass={"build_ext": CMakeBuild},
125165
zip_safe=False,
126166
)
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
cmake_minimum_required(VERSION 3.16)
2+
project(gsa_offload_ops)
3+
4+
# 设置C++标准
5+
set(CMAKE_CXX_STANDARD 17)
6+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
7+
8+
# 从环境变量获取设备类型,默认为cuda
9+
set(RUNTIME_ENVIRONMENT $ENV{RUNTIME_ENVIRONMENT})
10+
if(NOT RUNTIME_ENVIRONMENT)
11+
set(RUNTIME_ENVIRONMENT "cuda")
12+
endif()
13+
14+
message(STATUS "[INFO] Building gsa_offload_ops for device: ${RUNTIME_ENVIRONMENT}")
15+
16+
# 查找必要的包
17+
find_package(Python COMPONENTS Interpreter Development REQUIRED)
18+
19+
# 查找PyTorch路径
20+
execute_process(
21+
COMMAND ${Python_EXECUTABLE} -c "import torch; import os; print(os.path.dirname(os.path.abspath(torch.__file__)))"
22+
OUTPUT_VARIABLE PYTORCH_PATH
23+
OUTPUT_STRIP_TRAILING_WHITESPACE
24+
RESULT_VARIABLE PYTORCH_RESULT
25+
)
26+
27+
if(NOT PYTORCH_RESULT EQUAL 0)
28+
message(FATAL_ERROR "Failed to find PyTorch installation")
29+
endif()
30+
31+
# 查找pybind11
32+
execute_process(
33+
COMMAND ${Python_EXECUTABLE} -c "import pybind11; print(pybind11.get_cmake_dir())"
34+
OUTPUT_VARIABLE pybind11_DIR
35+
OUTPUT_STRIP_TRAILING_WHITESPACE
36+
RESULT_VARIABLE PYBIND11_RESULT
37+
)
38+
39+
if(NOT PYBIND11_RESULT EQUAL 0)
40+
message(FATAL_ERROR "Failed to find pybind11")
41+
endif()
42+
43+
find_package(pybind11 REQUIRED)
44+
45+
# 设置基础编译选项
46+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fopenmp -march=native")
47+
set(CXX11_ABI "1")
48+
49+
# 根据设备类型配置
50+
set(INCLUDE_DIRS
51+
${PYTORCH_PATH}/include/torch/csrc/api/include
52+
${PYTORCH_PATH}/include
53+
${CMAKE_CURRENT_SOURCE_DIR}/include
54+
)
55+
56+
set(LIBRARY_DIRS
57+
${PYTORCH_PATH}/lib
58+
/usr/local/lib
59+
)
60+
61+
set(LIBRARIES
62+
torch
63+
c10
64+
torch_cpu
65+
torch_python
66+
gomp
67+
pthread
68+
)
69+
70+
# NPU特殊配置
71+
if(RUNTIME_ENVIRONMENT STREQUAL "ascend")
72+
message(STATUS "Configuring for NPU/Ascend device")
73+
74+
# 查找torch_npu路径
75+
execute_process(
76+
COMMAND ${Python_EXECUTABLE} -c "import torch_npu; import os; print(os.path.dirname(os.path.abspath(torch_npu.__file__)))"
77+
OUTPUT_VARIABLE PYTORCH_NPU_PATH
78+
OUTPUT_STRIP_TRAILING_WHITESPACE
79+
RESULT_VARIABLE NPU_RESULT
80+
)
81+
82+
if(NPU_RESULT EQUAL 0)
83+
message(STATUS "Found torch_npu at: ${PYTORCH_NPU_PATH}")
84+
list(INSERT INCLUDE_DIRS 0 ${PYTORCH_NPU_PATH}/include)
85+
list(INSERT LIBRARY_DIRS 0 ${PYTORCH_NPU_PATH}/lib)
86+
list(INSERT LIBRARIES 0 torch_npu)
87+
set(CXX11_ABI "0")
88+
else()
89+
message(WARNING "torch_npu not found, but RUNTIME_ENVIRONMENT is set to ascend")
90+
endif()
91+
endif()
92+
93+
# 设置CXX11_ABI宏
94+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=${CXX11_ABI}")
95+
96+
# 查找OpenMP
97+
find_package(OpenMP REQUIRED)
98+
99+
# 定义源文件
100+
set(SOURCES
101+
src/thread_safe_queue.cpp
102+
src/vec_product.cpp
103+
src/k_repre.cpp
104+
src/select_topk_block.cpp
105+
src/cal_kpre_and_topk.cpp
106+
src/pybinds.cpp
107+
)
108+
109+
# 创建pybind11模块
110+
pybind11_add_module(gsa_offload_ops ${SOURCES})
111+
112+
# 设置头文件目录
113+
target_include_directories(gsa_offload_ops PRIVATE ${INCLUDE_DIRS})
114+
115+
# 设置库文件目录
116+
target_link_directories(gsa_offload_ops PRIVATE ${LIBRARY_DIRS})
117+
118+
# 链接库
119+
target_link_libraries(gsa_offload_ops PRIVATE ${LIBRARIES})
120+
121+
# 链接OpenMP
122+
if(OpenMP_CXX_FOUND)
123+
target_link_libraries(gsa_offload_ops PRIVATE OpenMP::OpenMP_CXX)
124+
endif()
125+
126+
# 确保输出目录存在
127+
set(OUTPUT_LIB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/output/lib)
128+
file(MAKE_DIRECTORY ${OUTPUT_LIB_DIR})
129+
130+
# 设置输出路径
131+
set_target_properties(gsa_offload_ops PROPERTIES
132+
LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_LIB_DIR}
133+
RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_LIB_DIR}
134+
)
135+
136+
# 编译后输出信息
137+
add_custom_command(TARGET gsa_offload_ops POST_BUILD
138+
COMMAND ${CMAKE_COMMAND} -E echo "Built gsa_offload_ops successfully for ${RUNTIME_ENVIRONMENT}"
139+
COMMAND ${CMAKE_COMMAND} -E echo "CXX11_ABI=${CXX11_ABI}"
140+
COMMAND ${CMAKE_COMMAND} -E echo "Output location: ${OUTPUT_LIB_DIR}"
141+
)

0 commit comments

Comments
 (0)