m5stack
diff --git a/‎doc/projects_llm_framework_doc/llm_cosyvoice2.md‎
Lines changed: 223 additions & 0 deletions b/‎doc/projects_llm_framework_doc/llm_cosyvoice2.md‎
Lines changed: 223 additions & 0 deletions
diff --git a/‎ext_components/tokenizer/Kconfig‎
Lines changed: 5 additions & 0 deletions b/‎ext_components/tokenizer/Kconfig‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎ext_components/tokenizer/SConstruct‎
Lines changed: 52 additions & 0 deletions b/‎ext_components/tokenizer/SConstruct‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎projects/llm_framework/main_cosy_voice/src/runner/LLM.hpp‎
Lines changed: 3 additions & 3 deletions b/‎projects/llm_framework/main_cosy_voice/src/runner/LLM.hpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎projects/llm_framework/main_cosy_voice/src/runner/Token2wav.hpp‎
Lines changed: 9 additions & 9 deletions b/‎projects/llm_framework/main_cosy_voice/src/runner/Token2wav.hpp‎
Lines changed: 9 additions & 9 deletions
@@ -0,0 +1,223 @@
+# llm_cosy_voice
+
+使用 npu 加速的文字转语音单元，用于提供文字转语音服务，可使用语音克隆，用于提供多语言转语音服务。
+
+## setup
+
+配置单元工作。
+
+发送 json：
+
+```json
+cosy_voice
+{
+  "request_id": "2",
+  "work_id": "cosy_voice",
+  "action": "setup",
+  "object": "cosy_voice.setup",
+  "data": {
+    "model": "CosyVoice2-0.5B-ax650",
+    "response_format": "file",
+    "input": "tts.utf-8",
+    "enoutput": false
+  }
+}
+```
+
+
+- request_id：参考基本数据解释。
+- work_id：配置单元时，为 `cosy_voice`。
+- action：调用的方法为 `setup`。
+- object：传输的数据类型为 `cosy_voice.setup`。
+- model：使用的模型为 `CosyVoice2-0.5B-ax650` 模型。
+- prompt_files：要克隆的音频信息文件。
+- response_format：返回结果为 `sys.pcm`, 系统音频数据，并直接发送到 llm-audio 模块进行播放。返回结果为 `file`, 生成的音频写 wav 文件，可用 `prompt_data` 指定路径或文件名。
+- input：输入的为 `tts.utf-8`,代表的是从用户输入。
+- enoutput：是否起用用户结果输出。
+
+响应 json：
+
+```json
+{
+    "created": 1761791627,
+    "data": "None",
+    "error": {
+        "code": 0,
+        "message": ""
+    },
+    "object": "None",
+    "request_id": "2",
+    "work_id": "cosy_voice.1000"
+}
+```
+
+- created：消息创建时间，unix 时间。
+- work_id：返回成功创建的 work_id 单元。
+
+## inference
+
+### 流式输入
+
+```json
+{
+    "request_id": "2",
+    "work_id": "cosy_voice.1000",
+    "action": "inference",
+    "object": "cosy_voice.utf-8.stream",
+    "data": {
+        "delta": "今天天气真好！",
+        "index": 0,
+        "finish": true
+    }
+}
+```
+- object：传输的数据类型为 `cosy_voice.utf-8.stream` 代表的是从用户 utf-8 的流式输入
+- delta：流式输入的分段数据
+- index：流式输入的分段索引
+- finish:流式输入是否完成的标志位
+
+### 非流式输入
+
+```json
+{
+    "request_id": "2",
+    "work_id": "cosy_voice.1000",
+    "action": "inference",
+    "object": "cosy_voice.utf-8",
+    "data": "今天天气真好！"
+}
+```
+- object：传输的数据类型为 `cosy_voice.utf-8` 代表的是从用户 utf-8 的非流式输入
+- data：非流式输入的数据
+
+## pause
+
+暂停单元工作。
+
+发送 json：
+
+```json
+{
+  "request_id": "5",
+  "work_id": "cosy_voice.1000",
+  "action": "pause"
+}
+```
+
+响应 json：
+
+```json
+{
+    "created": 1761791706,
+    "data": "None",
+    "error": {
+        "code": 0,
+        "message": ""
+    },
+    "object": "None",
+    "request_id": "5",
+    "work_id": "cosy_voice.1000"
+}
+```
+
+error::code 为 0 表示执行成功。
+
+## exit
+
+单元退出。
+
+发送 json：
+
+```json
+{
+  "request_id": "7",
+  "work_id": "cosy_voice.1000",
+  "action": "exit"
+}
+```
+
+响应 json：
+
+```json
+{
+    "created": 1761791854,
+    "data": "None",
+    "error": {
+        "code": 0,
+        "message": ""
+    },
+    "object": "None",
+    "request_id": "7",
+    "work_id": "cosy_voice.1000"
+}
+```
+
+error::code 为 0 表示执行成功。
+
+## taskinfo
+
+获取任务列表。
+
+发送 json：
+
+```json
+{
+  "request_id": "2",
+  "work_id": "cosy_voice",
+  "action": "taskinfo"
+}
+```
+
+响应 json：
+
+```json
+{
+    "created": 1761791739,
+    "data": [
+        "cosy_voice.1000"
+    ],
+    "error": {
+        "code": 0,
+        "message": ""
+    },
+    "object": "llm.tasklist",
+    "request_id": "2",
+    "work_id": "cosy_voice"
+}
+```
+
+获取任务运行参数。
+
+```json
+{
+  "request_id": "2",
+  "work_id": "cosy_voice.1000",
+  "action": "taskinfo"
+}
+```
+
+响应 json：
+
+```json
+{
+    "created": 1761791761,
+    "data": {
+        "enoutput": false,
+        "inputs": [
+            "tts.utf-8"
+        ],
+        "model": "CosyVoice2-0.5B-ax650",
+        "response_format": "sys.pcm"
+    },
+    "error": {
+        "code": 0,
+        "message": ""
+    },
+    "object": "cosy_voice.taskinfo",
+    "request_id": "2",
+    "work_id": "cosy_voice.1000"
+}
+```
+
+> **注意：work_id 是按照单元的初始化注册顺序增加的，并不是固定的索引值。**  
+> **同类型单元不能配置多个单元同时工作，否则会产生未知错误。例如 tts 和 melo tts 不能同时拍起用工作。**
@@ -0,0 +1,5 @@
+menuconfig AX_TOKENIZER_ENABLED
+    bool "Enable tokenizer support"
+    default n
+    help
+        enable tokenizer support
@@ -0,0 +1,52 @@
+# component2/SConscript
+Import("env")
+import os
+from pathlib import Path
+
+with open(env["PROJECT_TOOL_S"]) as f:
+    exec(f.read())
+
+_SDK_PATH = os.path.normpath(
+    os.environ.get("SDK_PATH", str(Path(os.getcwd()) / ".." / ".."))
+)
+
+env["GIT_REPO_LISTS"]["tokenizer"] = {
+    "url": "https://github.com/ZHEQIUSHUI/tokenizer.git",
+    "commit": "83f41d4b5b9a135c167d44fcdf2a0c56ebacca6d",
+    "path": str(Path(_SDK_PATH) / "github_source" / "tokenizer"),
+}
+
+if "CONFIG_AX_TOKENIZER_ENABLED" in os.environ:
+    check_component("tokenizer")
+    SRCS = []
+    INCLUDE = []
+    PRIVATE_INCLUDE = []
+    REQUIREMENTS = []
+    STATIC_LIB = []
+    DYNAMIC_LIB = []
+    DEFINITIONS = []
+    DEFINITIONS_PRIVATE = []
+    LDFLAGS = []
+    LINK_SEARCH_PATH = []
+
+    INCLUDE += [
+        os.path.join(env["GIT_REPO_LISTS"]["tokenizer"]["path"], "include"),
+    ]
+    print("AX-TOKENIZER INCLUDE:", INCLUDE)
+
+    env["COMPONENTS"].append(
+        {
+            "target": os.path.basename(env["component_dir"]),
+            "SRCS": SRCS,
+            "INCLUDE": INCLUDE,
+            "PRIVATE_INCLUDE": PRIVATE_INCLUDE,
+            "REQUIREMENTS": REQUIREMENTS,
+            "STATIC_LIB": STATIC_LIB,
+            "DYNAMIC_LIB": DYNAMIC_LIB,
+            "DEFINITIONS": DEFINITIONS,
+            "DEFINITIONS_PRIVATE": DEFINITIONS_PRIVATE,
+            "LDFLAGS": LDFLAGS,
+            "LINK_SEARCH_PATH": LINK_SEARCH_PATH,
+            "REGISTER": "static",
+        }
+    )
@@ -246,10 +246,10 @@ class LLM {
     void Deinit()
     {
         for (int i = 0; i < _attr.axmodel_num; i++) {
-            llama_layers[i].layer.release();
+            llama_layers[i].layer.deinit();
         }
-        llama_post.release();
-        llm_decoder.release();
+        llama_post.deinit();
+        llm_decoder.deinit();
         embed_selector.Deinit();
         llm_embed_selector.Deinit();
         speech_embed_selector.Deinit();
 
@@ -145,15 +145,15 @@ class Token2Wav {
 
     void Deinit()
     {
-        flow_encoder_28.release();
-        flow_encoder_53.release();
-        flow_encoder_78.release();
-        flow_encoder_50_final.release();
-        flow_estimator_200.release();
-        flow_estimator_250.release();
-        flow_estimator_300.release();
-        hift_p2_50_first.release();
-        hift_p2_58.release();
+        flow_encoder_28.deinit();
+        flow_encoder_53.deinit();
+        flow_encoder_78.deinit();
+        flow_encoder_50_final.deinit();
+        flow_estimator_200.deinit();
+        flow_estimator_250.deinit();
+        flow_estimator_300.deinit();
+        hift_p2_50_first.deinit();
+        hift_p2_58.deinit();
         flow_embed_selector.Deinit();
     }
Original file line number	Diff line number	Diff line change
`@@ -246,10 +246,10 @@ class LLM {`
`246`	`246`	`void Deinit()`
`247`	`247`	`{`
`248`	`248`	`for (int i = 0; i < _attr.axmodel_num; i++) {`
`249`		`- llama_layers[i].layer.release();`
	`249`	`+ llama_layers[i].layer.deinit();`
`250`	`250`	`}`
`251`		`- llama_post.release();`
`252`		`- llm_decoder.release();`
	`251`	`+ llama_post.deinit();`
	`252`	`+ llm_decoder.deinit();`
`253`	`253`	`embed_selector.Deinit();`
`254`	`254`	`llm_embed_selector.Deinit();`
`255`	`255`	`speech_embed_selector.Deinit();`