Skip to content

Commit 1fac79e

Browse files
author
LittleMouse
committed
[update] update llm-tts supported kokoro tts
1 parent f82b01e commit 1fac79e

File tree

6 files changed

+391
-42
lines changed

6 files changed

+391
-42
lines changed

projects/llm_framework/SConstruct

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import shutil
55
os.environ['SDK_PATH'] = os.path.normpath(str(Path(os.getcwd())/'..'/'..'/'SDK'))
66
os.environ['EXT_COMPONENTS_PATH'] = os.path.normpath(str(Path(os.getcwd())/'..'/'..'/'ext_components'))
77

8-
version = 'v0.1.3'
8+
version = 'v0.1.5'
99
static_lib = 'static_lib'
1010
update = False
1111

projects/llm_framework/main_tts/SConstruct

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ with open(env['PROJECT_TOOL_S']) as f:
88
SRCS = Glob('src/*.c*')
99
INCLUDE = [ADir('include'), ADir('.')]
1010
PRIVATE_INCLUDE = []
11-
REQUIREMENTS = ['pthread', 'utilities', 'gomp', 'eventpp', 'StackFlow']
11+
REQUIREMENTS = ['pthread', 'utilities', 'ax_msp', 'gomp', 'eventpp', 'StackFlow']
1212
STATIC_LIB = []
1313
DYNAMIC_LIB = []
1414
DEFINITIONS = []
@@ -20,14 +20,18 @@ STATIC_FILES = []
2020
DEFINITIONS += ['-O3', '-fopenmp', '-std=c++17']
2121
LDFLAGS+=['-Wl,-rpath=/opt/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib/gcc-10.3', '-Wl,-rpath=/opt/lib', '-Wl,-rpath=/opt/usr/lib', '-Wl,-rpath=./']
2222
LINK_SEARCH_PATH += [ADir('../static_lib')]
23-
REQUIREMENTS += ['tts']
23+
REQUIREMENTS += ['ax_engine', 'ax_interpreter', 'ax_sys']
24+
REQUIREMENTS += ['samplerate']
25+
REQUIREMENTS += ['tts', 'kokoro', 'libkokoro_backend']
2426

2527

2628
INCLUDE += [ADir('src/runner/eigen-3.4.0'), ADir('src/runner/src/tn/header'), ADir('src/runner/include'), ADir('src/runner/src/header')]
2729

30+
REQUIREMENTS += ['onnxruntime']
31+
2832
STATIC_FILES += Glob('mode_*.json')
2933

30-
env['COMPONENTS'].append({'target':'llm_tts-1.6',
34+
env['COMPONENTS'].append({'target':'llm_tts-1.7',
3135
'SRCS':SRCS,
3236
'INCLUDE':INCLUDE,
3337
'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"mode": "kokoro-ax650",
3+
"type": "tts",
4+
"homepage": "https://huggingface.co/AXERA-TECH/kokoro.axera",
5+
"capabilities": [
6+
"tts",
7+
"Chinese",
8+
"English"
9+
],
10+
"input_type": [
11+
"tts.utf-8"
12+
],
13+
"output_type": [
14+
"tts.wav",
15+
"sys.play.0_1"
16+
],
17+
"mode_param": {
18+
"axmodel_dir": "./models",
19+
"lang": "z",
20+
"voice_path": "./voices",
21+
"voice_name": "zf_xiaoxiao",
22+
"vocab_path": "./vocab.txt",
23+
"espeak_data_path": "./espeak-ng-data",
24+
"dict_dir": "./dict",
25+
"spacker_speed": 1.0,
26+
"max_len": 96,
27+
"mode_rate": 24000,
28+
"audio_rate": 48000,
29+
"speed": 1.0,
30+
"pause": 0.0,
31+
"fade_out": 0.0
32+
}
33+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*/
6+
#pragma once
7+
8+
#include <string>
9+
#include <vector>
10+
#include <memory>
11+
12+
namespace kokoro {
13+
struct SentenceInfo {
14+
std::string sentence;
15+
std::vector<int> input_ids;
16+
std::string phonemes;
17+
int content_len = 0;
18+
bool is_long = false;
19+
std::vector<SentenceInfo> sub_results;
20+
21+
SentenceInfo() = default;
22+
SentenceInfo(const std::string &s, const std::vector<int> &ids, const std::string &ph, int len)
23+
: sentence(s), input_ids(ids), phonemes(ph), content_len(len), is_long(false)
24+
{
25+
}
26+
27+
SentenceInfo(const std::string &s, const std::vector<SentenceInfo> &subs)
28+
: sentence(s), is_long(true), sub_results(subs)
29+
{
30+
}
31+
};
32+
33+
struct MergedGroup {
34+
bool is_long_split = false;
35+
std::vector<int> input_ids;
36+
std::string phonemes;
37+
std::vector<SentenceInfo> sub_results;
38+
39+
MergedGroup() = default;
40+
MergedGroup(const std::vector<int> &ids, const std::string &ph) : is_long_split(false), input_ids(ids), phonemes(ph)
41+
{
42+
}
43+
44+
MergedGroup(const std::vector<SentenceInfo> &subs) : is_long_split(true), sub_results(subs)
45+
{
46+
}
47+
};
48+
49+
class Kokoro {
50+
public:
51+
Kokoro();
52+
~Kokoro();
53+
Kokoro(const Kokoro &) = delete;
54+
Kokoro &operator=(const Kokoro &) = delete;
55+
56+
bool init(const std::string &model_path, int max_seq_len = 96, const std::string &lang_code = "z",
57+
const std::string &voices_path = "./voices", const std::string &voice_name = "af_heart",
58+
const std::string &vocab_path = "dict/vocab.txt",
59+
const std::string &espeak_data_path = "./espeak-ng-data", const std::string &dict_dir = "./dict");
60+
61+
bool tts(const std::string &text, const std::string &voice_name, float speed, int sample_rate, float fade_out,
62+
float pause_duration, std::vector<float> &generated_audio);
63+
64+
private:
65+
struct Impl;
66+
std::unique_ptr<Impl> impl_;
67+
};
68+
} // namespace kokoro

0 commit comments

Comments
 (0)