Skip to content

Commit cc9d1bc

Browse files
author
LittleMouse
committed
[update] perf llm backend & add c tokenizer
1 parent a674665 commit cc9d1bc

File tree

16 files changed

+1276
-1370
lines changed

16 files changed

+1276
-1370
lines changed
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
# llm_cosy_voice
2+
3+
使用 npu 加速的文字转语音单元,用于提供文字转语音服务,可使用语音克隆,用于提供多语言转语音服务。
4+
5+
## setup
6+
7+
配置单元工作。
8+
9+
发送 json:
10+
11+
```json
12+
cosy_voice
13+
{
14+
"request_id": "2",
15+
"work_id": "cosy_voice",
16+
"action": "setup",
17+
"object": "cosy_voice.setup",
18+
"data": {
19+
"model": "CosyVoice2-0.5B-ax650",
20+
"response_format": "file",
21+
"input": "tts.utf-8",
22+
"enoutput": false
23+
}
24+
}
25+
```
26+
27+
28+
- request_id:参考基本数据解释。
29+
- work_id:配置单元时,为 `cosy_voice`
30+
- action:调用的方法为 `setup`
31+
- object:传输的数据类型为 `cosy_voice.setup`
32+
- model:使用的模型为 `CosyVoice2-0.5B-ax650` 模型。
33+
- prompt_files:要克隆的音频信息文件。
34+
- response_format:返回结果为 `sys.pcm`, 系统音频数据,并直接发送到 llm-audio 模块进行播放。返回结果为 `file`, 生成的音频写 wav 文件,可用 `prompt_data` 指定路径或文件名。
35+
- input:输入的为 `tts.utf-8`,代表的是从用户输入。
36+
- enoutput:是否起用用户结果输出。
37+
38+
响应 json:
39+
40+
```json
41+
{
42+
"created": 1761791627,
43+
"data": "None",
44+
"error": {
45+
"code": 0,
46+
"message": ""
47+
},
48+
"object": "None",
49+
"request_id": "2",
50+
"work_id": "cosy_voice.1000"
51+
}
52+
```
53+
54+
- created:消息创建时间,unix 时间。
55+
- work_id:返回成功创建的 work_id 单元。
56+
57+
## inference
58+
59+
### 流式输入
60+
61+
```json
62+
{
63+
"request_id": "2",
64+
"work_id": "cosy_voice.1000",
65+
"action": "inference",
66+
"object": "cosy_voice.utf-8.stream",
67+
"data": {
68+
"delta": "今天天气真好!",
69+
"index": 0,
70+
"finish": true
71+
}
72+
}
73+
```
74+
- object:传输的数据类型为 `cosy_voice.utf-8.stream` 代表的是从用户 utf-8 的流式输入
75+
- delta:流式输入的分段数据
76+
- index:流式输入的分段索引
77+
- finish:流式输入是否完成的标志位
78+
79+
### 非流式输入
80+
81+
```json
82+
{
83+
"request_id": "2",
84+
"work_id": "cosy_voice.1000",
85+
"action": "inference",
86+
"object": "cosy_voice.utf-8",
87+
"data": "今天天气真好!"
88+
}
89+
```
90+
- object:传输的数据类型为 `cosy_voice.utf-8` 代表的是从用户 utf-8 的非流式输入
91+
- data:非流式输入的数据
92+
93+
## pause
94+
95+
暂停单元工作。
96+
97+
发送 json:
98+
99+
```json
100+
{
101+
"request_id": "5",
102+
"work_id": "cosy_voice.1000",
103+
"action": "pause"
104+
}
105+
```
106+
107+
响应 json:
108+
109+
```json
110+
{
111+
"created": 1761791706,
112+
"data": "None",
113+
"error": {
114+
"code": 0,
115+
"message": ""
116+
},
117+
"object": "None",
118+
"request_id": "5",
119+
"work_id": "cosy_voice.1000"
120+
}
121+
```
122+
123+
error::code 为 0 表示执行成功。
124+
125+
## exit
126+
127+
单元退出。
128+
129+
发送 json:
130+
131+
```json
132+
{
133+
"request_id": "7",
134+
"work_id": "cosy_voice.1000",
135+
"action": "exit"
136+
}
137+
```
138+
139+
响应 json:
140+
141+
```json
142+
{
143+
"created": 1761791854,
144+
"data": "None",
145+
"error": {
146+
"code": 0,
147+
"message": ""
148+
},
149+
"object": "None",
150+
"request_id": "7",
151+
"work_id": "cosy_voice.1000"
152+
}
153+
```
154+
155+
error::code 为 0 表示执行成功。
156+
157+
## taskinfo
158+
159+
获取任务列表。
160+
161+
发送 json:
162+
163+
```json
164+
{
165+
"request_id": "2",
166+
"work_id": "cosy_voice",
167+
"action": "taskinfo"
168+
}
169+
```
170+
171+
响应 json:
172+
173+
```json
174+
{
175+
"created": 1761791739,
176+
"data": [
177+
"cosy_voice.1000"
178+
],
179+
"error": {
180+
"code": 0,
181+
"message": ""
182+
},
183+
"object": "llm.tasklist",
184+
"request_id": "2",
185+
"work_id": "cosy_voice"
186+
}
187+
```
188+
189+
获取任务运行参数。
190+
191+
```json
192+
{
193+
"request_id": "2",
194+
"work_id": "cosy_voice.1000",
195+
"action": "taskinfo"
196+
}
197+
```
198+
199+
响应 json:
200+
201+
```json
202+
{
203+
"created": 1761791761,
204+
"data": {
205+
"enoutput": false,
206+
"inputs": [
207+
"tts.utf-8"
208+
],
209+
"model": "CosyVoice2-0.5B-ax650",
210+
"response_format": "sys.pcm"
211+
},
212+
"error": {
213+
"code": 0,
214+
"message": ""
215+
},
216+
"object": "cosy_voice.taskinfo",
217+
"request_id": "2",
218+
"work_id": "cosy_voice.1000"
219+
}
220+
```
221+
222+
> **注意:work_id 是按照单元的初始化注册顺序增加的,并不是固定的索引值。**
223+
> **同类型单元不能配置多个单元同时工作,否则会产生未知错误。例如 tts 和 melo tts 不能同时拍起用工作。**

ext_components/tokenizer/Kconfig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
menuconfig AX_TOKENIZER_ENABLED
2+
bool "Enable tokenizer support"
3+
default n
4+
help
5+
enable tokenizer support
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# component2/SConscript
2+
Import("env")
3+
import os
4+
from pathlib import Path
5+
6+
with open(env["PROJECT_TOOL_S"]) as f:
7+
exec(f.read())
8+
9+
_SDK_PATH = os.path.normpath(
10+
os.environ.get("SDK_PATH", str(Path(os.getcwd()) / ".." / ".."))
11+
)
12+
13+
env["GIT_REPO_LISTS"]["tokenizer"] = {
14+
"url": "https://github.com/ZHEQIUSHUI/tokenizer.git",
15+
"commit": "83f41d4b5b9a135c167d44fcdf2a0c56ebacca6d",
16+
"path": str(Path(_SDK_PATH) / "github_source" / "tokenizer"),
17+
}
18+
19+
if "CONFIG_AX_TOKENIZER_ENABLED" in os.environ:
20+
check_component("tokenizer")
21+
SRCS = []
22+
INCLUDE = []
23+
PRIVATE_INCLUDE = []
24+
REQUIREMENTS = []
25+
STATIC_LIB = []
26+
DYNAMIC_LIB = []
27+
DEFINITIONS = []
28+
DEFINITIONS_PRIVATE = []
29+
LDFLAGS = []
30+
LINK_SEARCH_PATH = []
31+
32+
INCLUDE += [
33+
os.path.join(env["GIT_REPO_LISTS"]["tokenizer"]["path"], "include"),
34+
]
35+
print("AX-TOKENIZER INCLUDE:", INCLUDE)
36+
37+
env["COMPONENTS"].append(
38+
{
39+
"target": os.path.basename(env["component_dir"]),
40+
"SRCS": SRCS,
41+
"INCLUDE": INCLUDE,
42+
"PRIVATE_INCLUDE": PRIVATE_INCLUDE,
43+
"REQUIREMENTS": REQUIREMENTS,
44+
"STATIC_LIB": STATIC_LIB,
45+
"DYNAMIC_LIB": DYNAMIC_LIB,
46+
"DEFINITIONS": DEFINITIONS,
47+
"DEFINITIONS_PRIVATE": DEFINITIONS_PRIVATE,
48+
"LDFLAGS": LDFLAGS,
49+
"LINK_SEARCH_PATH": LINK_SEARCH_PATH,
50+
"REGISTER": "static",
51+
}
52+
)

projects/llm_framework/main_cosy_voice/src/runner/LLM.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,10 +246,10 @@ class LLM {
246246
void Deinit()
247247
{
248248
for (int i = 0; i < _attr.axmodel_num; i++) {
249-
llama_layers[i].layer.release();
249+
llama_layers[i].layer.deinit();
250250
}
251-
llama_post.release();
252-
llm_decoder.release();
251+
llama_post.deinit();
252+
llm_decoder.deinit();
253253
embed_selector.Deinit();
254254
llm_embed_selector.Deinit();
255255
speech_embed_selector.Deinit();

projects/llm_framework/main_cosy_voice/src/runner/Token2wav.hpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -145,15 +145,15 @@ class Token2Wav {
145145

146146
void Deinit()
147147
{
148-
flow_encoder_28.release();
149-
flow_encoder_53.release();
150-
flow_encoder_78.release();
151-
flow_encoder_50_final.release();
152-
flow_estimator_200.release();
153-
flow_estimator_250.release();
154-
flow_estimator_300.release();
155-
hift_p2_50_first.release();
156-
hift_p2_58.release();
148+
flow_encoder_28.deinit();
149+
flow_encoder_53.deinit();
150+
flow_encoder_78.deinit();
151+
flow_encoder_50_final.deinit();
152+
flow_estimator_200.deinit();
153+
flow_estimator_250.deinit();
154+
flow_estimator_300.deinit();
155+
hift_p2_50_first.deinit();
156+
hift_p2_58.deinit();
157157
flow_embed_selector.Deinit();
158158
}
159159

0 commit comments

Comments
 (0)