Skip to content

Commit 2de874c

Browse files
author
LittleMouse
committed
[update] update llm & vlm
1 parent 57404bc commit 2de874c

File tree

3 files changed

+71
-28
lines changed

3 files changed

+71
-28
lines changed

projects/llm_framework/main_llm/src/main.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ using namespace StackFlows;
2929
int main_exit_flage = 0;
3030
static void __sigint(int iSigNo)
3131
{
32-
SLOGW("llm_sys will be exit!");
32+
SLOGW("llm_llm will be exit!");
3333
main_exit_flage = 1;
3434
}
3535

@@ -130,7 +130,7 @@ class llm_task {
130130
std::string base_model = base_model_path_ + model_ + "/";
131131
SLOGI("base_model %s", base_model.c_str());
132132

133-
CONFIG_AUTO_SET(file_body["mode_param"], system_prompt);
133+
CONFIG_AUTO_SET(file_body["mode_param"], system_prompt);
134134
CONFIG_AUTO_SET(file_body["mode_param"], tokenizer_type);
135135
CONFIG_AUTO_SET(file_body["mode_param"], filename_tokenizer_model);
136136
CONFIG_AUTO_SET(file_body["mode_param"], url_tokenizer_model);
@@ -325,6 +325,14 @@ class llm_task {
325325
}
326326

327327
if (lLaMa_ctx_) {
328+
if (msg == "reset") {
329+
lLaMa_ctx_->SetSystemPrompt(mode_config_.system_prompt, _token_ids);
330+
lLaMa_ctx_->GenerateKVCachePrefill(_token_ids, k_caches, v_caches, precompute_len);
331+
last_reply.clear();
332+
if (out_callback_) out_callback_("Context has been reset.", true);
333+
return;
334+
}
335+
328336
lLaMa_ctx_->Encode(prompt_data, prompt_complete(msg), last_reply, tokens_ids, tokens_diff);
329337
if (auto ret = lLaMa_ctx_->SetKVCache(k_caches, v_caches, precompute_len, tokens_diff.size());
330338
ret != 0) {

projects/llm_framework/main_vlm/src/main.cpp

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ class llm_task {
5858
std::vector<std::string> inputs_;
5959
std::vector<unsigned short> prompt_data_;
6060
std::vector<unsigned char> image_data_;
61+
std::vector<std::vector<unsigned char>> images_data;
62+
std::vector<cv::Mat> mats;
6163
std::vector<unsigned short> img_embed;
6264
std::string prompt_;
6365
std::string last_reply;
@@ -134,8 +136,8 @@ class llm_task {
134136
CONFIG_AUTO_SET(file_body["mode_param"], url_tokenizer_model);
135137
CONFIG_AUTO_SET(file_body["mode_param"], filename_tokens_embed);
136138
CONFIG_AUTO_SET(file_body["mode_param"], filename_post_axmodel);
137-
CONFIG_AUTO_SET(file_body["mode_param"], filename_vpm_resampler_axmodedl);
138-
CONFIG_AUTO_SET(file_body["mode_param"], filename_image_encoder_axmodedl);
139+
CONFIG_AUTO_SET(file_body["mode_param"], filename_vpm_resampler_axmodel);
140+
CONFIG_AUTO_SET(file_body["mode_param"], filename_image_encoder_axmodel);
139141
CONFIG_AUTO_SET(file_body["mode_param"], template_filename_axmodel);
140142
CONFIG_AUTO_SET(file_body["mode_param"], b_use_topk);
141143
CONFIG_AUTO_SET(file_body["mode_param"], b_vpm_two_stage);
@@ -215,11 +217,11 @@ class llm_task {
215217
SLOGE("filename_tokenizer_model: %s", mode_config_.filename_tokenizer_model.c_str());
216218
}
217219
}
218-
mode_config_.filename_tokens_embed = base_model + mode_config_.filename_tokens_embed;
219-
mode_config_.filename_post_axmodel = base_model + mode_config_.filename_post_axmodel;
220-
mode_config_.template_filename_axmodel = base_model + mode_config_.template_filename_axmodel;
221-
mode_config_.filename_vpm_resampler_axmodedl = base_model + mode_config_.filename_vpm_resampler_axmodedl;
222-
mode_config_.filename_image_encoder_axmodedl = base_model + mode_config_.filename_image_encoder_axmodedl;
220+
mode_config_.filename_tokens_embed = base_model + mode_config_.filename_tokens_embed;
221+
mode_config_.filename_post_axmodel = base_model + mode_config_.filename_post_axmodel;
222+
mode_config_.template_filename_axmodel = base_model + mode_config_.template_filename_axmodel;
223+
mode_config_.filename_vpm_resampler_axmodel = base_model + mode_config_.filename_vpm_resampler_axmodel;
224+
mode_config_.filename_image_encoder_axmodel = base_model + mode_config_.filename_image_encoder_axmodel;
223225
mode_config_.runing_callback = [this](int *p_token, int n_token, const char *p_str, float token_per_sec,
224226
void *reserve) {
225227
if (this->out_callback_) {
@@ -342,17 +344,38 @@ class llm_task {
342344
}
343345

344346
if (lLaMa_ctx_) {
347+
if (msg == "reset") {
348+
lLaMa_ctx_->SetSystemPrompt(mode_config_.system_prompt, _token_ids);
349+
lLaMa_ctx_->GenerateKVCachePrefill(_token_ids, k_caches, v_caches, precompute_len);
350+
last_reply.clear();
351+
mats.clear();
352+
if (out_callback_) out_callback_("Context has been reset.", true);
353+
return;
354+
}
355+
345356
if (image_data_.empty()) {
346357
lLaMa_ctx_->Encode(prompt_data_, prompt_complete(msg), last_reply, tokens_ids, tokens_diff);
347358
if (auto ret = lLaMa_ctx_->SetKVCache(k_caches, v_caches, precompute_len, tokens_diff.size());
348359
ret != 0) {
349-
ALOGE("SetKVCache failed: %d,the context may be full,input \"reset\" to reset context", ret);
350-
return;
360+
ALOGW("The context full,Reset context");
361+
lLaMa_ctx_->SetSystemPrompt(mode_config_.system_prompt, _token_ids);
362+
lLaMa_ctx_->GenerateKVCachePrefill(_token_ids, k_caches, v_caches, precompute_len);
363+
lLaMa_ctx_->SetKVCache(k_caches, v_caches, precompute_len, tokens_diff.size());
351364
}
352365
last_reply = lLaMa_ctx_->Run(prompt_data_);
353366
lLaMa_ctx_->GetKVCache(k_caches, v_caches, precompute_len);
354367
if (out_callback_) out_callback_(last_reply, true);
355368
} else {
369+
for (const auto &img_buf : images_data) {
370+
cv::Mat src = cv::imdecode(img_buf, cv::IMREAD_COLOR);
371+
if (src.empty()) {
372+
std::cerr << "Decode failed!" << std::endl;
373+
continue;
374+
}
375+
mats.push_back(src);
376+
}
377+
if (mats.empty()) return;
378+
images_data.clear();
356379
cv::Mat src = cv::imdecode(image_data_, cv::IMREAD_COLOR);
357380
if (src.empty()) return;
358381
image_data_.clear();
@@ -361,6 +384,7 @@ class llm_task {
361384
ALOGE("lLaMaCtx.Encode failed");
362385
return;
363386
}
387+
mats.clear();
364388
if (auto ret =
365389
lLaMa_ctx_->Encode(img_embed, prompt_data_, prompt_complete(msg), tokens_ids, tokens_diff);
366390
ret != 0) {
@@ -369,8 +393,11 @@ class llm_task {
369393
}
370394
if (auto ret = lLaMa_ctx_->SetKVCache(k_caches, v_caches, precompute_len, tokens_diff.size());
371395
ret != 0) {
372-
ALOGE("SetKVCache failed: %d,the context may be full,input \"reset\" to reset context", ret);
373-
return;
396+
ALOGW("The context full,Reset context");
397+
lLaMa_ctx_->SetSystemPrompt(mode_config_.system_prompt, _token_ids);
398+
lLaMa_ctx_->GenerateKVCachePrefill(_token_ids, k_caches, v_caches, precompute_len);
399+
lLaMa_ctx_->SetKVCache(k_caches, v_caches, precompute_len, tokens_diff.size());
400+
lLaMa_ctx_->ClearImgsEmbed();
374401
}
375402
last_reply = lLaMa_ctx_->Run(prompt_data_);
376403
lLaMa_ctx_->GetKVCache(k_caches, v_caches, precompute_len);
@@ -549,6 +576,7 @@ class llm_vlm : public StackFlow {
549576
}
550577
if (object.find("jpeg") != std::string::npos) {
551578
llm_task_obj->image_data_.assign(next_data->begin(), next_data->end());
579+
llm_task_obj->images_data.emplace_back(next_data->begin(), next_data->end());
552580
return;
553581
}
554582
llm_task_obj->inference((*next_data));

projects/llm_framework/main_vlm/src/runner/LLM.hpp

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ struct LLMAttrType {
2323
std::string template_filename_axmodel = "tinyllama-int8/tinyllama_l%d.axmodel";
2424
int axmodel_num = 22;
2525

26-
std::string filename_post_axmodel = "tinyllama-int8/tinyllama_post.axmodel";
27-
std::string filename_image_encoder_axmodedl = "minicpmv/vpm_resampler_version0_fp16.axmodel";
28-
std::string filename_vpm_encoder_axmodedl = "minicpmv/vpm_resampler_version0_fp16.axmodel";
29-
std::string filename_vpm_resampler_axmodedl = "minicpmv/vpm_resampler_version0_fp16.axmodel";
26+
std::string filename_post_axmodel = "tinyllama-int8/tinyllama_post.axmodel";
27+
std::string filename_image_encoder_axmodel = "minicpmv/vpm_resampler_version0_fp16.axmodel";
28+
std::string filename_vpm_encoder_axmodel = "minicpmv/vpm_resampler_version0_fp16.axmodel";
29+
std::string filename_vpm_resampler_axmodel = "minicpmv/vpm_resampler_version0_fp16.axmodel";
3030

3131
int image_encoder_width = 448;
3232
int image_encoder_height = 448;
@@ -184,24 +184,24 @@ class LLM {
184184
update_cqdm(&cqdm, attr.axmodel_num + 2, "count", axmodel_path);
185185

186186
if (_attr.b_vpm_two_stage) {
187-
ret = vpm_encoder.init(attr.filename_vpm_encoder_axmodedl.c_str(), false);
187+
ret = vpm_encoder.init(attr.filename_vpm_encoder_axmodel.c_str(), false);
188188
if (ret != 0) {
189-
ALOGE("init vpm axmodel(%s) failed", attr.filename_vpm_encoder_axmodedl.c_str());
189+
ALOGE("init vpm axmodel(%s) failed", attr.filename_vpm_encoder_axmodel.c_str());
190190
return false;
191191
}
192192

193-
ret = vpm_resampler.init(attr.filename_vpm_resampler_axmodedl.c_str(), false);
193+
ret = vpm_resampler.init(attr.filename_vpm_resampler_axmodel.c_str(), false);
194194
if (ret != 0) {
195-
ALOGE("init vpm axmodel(%s) failed", attr.filename_vpm_resampler_axmodedl.c_str());
195+
ALOGE("init vpm axmodel(%s) failed", attr.filename_vpm_resampler_axmodel.c_str());
196196
return false;
197197
}
198198

199199
_attr.vpm_height = vpm_encoder.get_input(0).vShape[1];
200200
_attr.vpm_width = vpm_encoder.get_input(0).vShape[2];
201201
} else {
202-
ret = vpm_resampler.init(attr.filename_vpm_resampler_axmodedl.c_str(), false);
202+
ret = vpm_resampler.init(attr.filename_vpm_resampler_axmodel.c_str(), false);
203203
if (ret != 0) {
204-
ALOGE("init vpm axmodel(%s) failed", attr.filename_vpm_resampler_axmodedl.c_str());
204+
ALOGE("init vpm axmodel(%s) failed", attr.filename_vpm_resampler_axmodel.c_str());
205205
return false;
206206
}
207207
_attr.vpm_height = vpm_resampler.get_input(0).vShape[1];
@@ -637,6 +637,7 @@ class LLM_CTX {
637637
private:
638638
std::shared_ptr<BaseTokenizer> tokenizer;
639639
LLaMaEmbedSelector embed_selector;
640+
std::vector<std::vector<unsigned short>> imgs_embed_;
640641

641642
LLMAttrType _attr;
642643

@@ -718,9 +719,9 @@ class LLM_CTX {
718719
sprintf(axmodel_path, "init post axmodel ok,remain_cmm(%d MB)", remain_cmm);
719720
update_cqdm(&cqdm, attr.axmodel_num + 2, "count", axmodel_path);
720721

721-
ret = image_encoder.init(attr.filename_image_encoder_axmodedl.c_str());
722+
ret = image_encoder.init(attr.filename_image_encoder_axmodel.c_str());
722723
if (ret != 0) {
723-
ALOGE("init vpm axmodel(%s) failed", attr.filename_image_encoder_axmodedl.c_str());
724+
ALOGE("init vpm axmodel(%s) failed", attr.filename_image_encoder_axmodel.c_str());
724725
return false;
725726
}
726727

@@ -1424,8 +1425,9 @@ class LLM_CTX {
14241425
int Encode(std::vector<unsigned short> &img_embed, std::vector<unsigned short> &out_embed, std::string prompt,
14251426
std::vector<int> &tokens_ids, std::vector<int> &tokens_diff)
14261427
{
1427-
std::vector<std::vector<unsigned short>> imgs_embed = {img_embed};
1428-
return Encode(imgs_embed, out_embed, prompt, tokens_ids, tokens_diff);
1428+
// std::vector<std::vector<unsigned short>> imgs_embed = {img_embed};
1429+
imgs_embed_.push_back(img_embed);
1430+
return Encode(imgs_embed_, out_embed, prompt, tokens_ids, tokens_diff);
14291431
}
14301432

14311433
int Encode(std::vector<unsigned short> &out_embed, std::string prompt, std::string last_reply,
@@ -1447,7 +1449,12 @@ class LLM_CTX {
14471449
return 0;
14481450
}
14491451

1450-
std::string Run(std::vector<unsigned short> test_embed)
1452+
void ClearImgsEmbed()
1453+
{
1454+
imgs_embed_.clear();
1455+
}
1456+
1457+
std::string Run(std::vector<unsigned short> &test_embed)
14511458
{
14521459
b_stop = false;
14531460
std::string final_out;

0 commit comments

Comments
 (0)