@@ -58,6 +58,8 @@ class llm_task {
5858 std::vector<std::string> inputs_;
5959 std::vector<unsigned short > prompt_data_;
6060 std::vector<unsigned char > image_data_;
61+ std::vector<std::vector<unsigned char >> images_data;
62+ std::vector<cv::Mat> mats;
6163 std::vector<unsigned short > img_embed;
6264 std::string prompt_;
6365 std::string last_reply;
@@ -134,8 +136,8 @@ class llm_task {
134136 CONFIG_AUTO_SET (file_body[" mode_param" ], url_tokenizer_model);
135137 CONFIG_AUTO_SET (file_body[" mode_param" ], filename_tokens_embed);
136138 CONFIG_AUTO_SET (file_body[" mode_param" ], filename_post_axmodel);
137- CONFIG_AUTO_SET (file_body[" mode_param" ], filename_vpm_resampler_axmodedl );
138- CONFIG_AUTO_SET (file_body[" mode_param" ], filename_image_encoder_axmodedl );
139+ CONFIG_AUTO_SET (file_body[" mode_param" ], filename_vpm_resampler_axmodel );
140+ CONFIG_AUTO_SET (file_body[" mode_param" ], filename_image_encoder_axmodel );
139141 CONFIG_AUTO_SET (file_body[" mode_param" ], template_filename_axmodel);
140142 CONFIG_AUTO_SET (file_body[" mode_param" ], b_use_topk);
141143 CONFIG_AUTO_SET (file_body[" mode_param" ], b_vpm_two_stage);
@@ -215,11 +217,11 @@ class llm_task {
215217 SLOGE (" filename_tokenizer_model: %s" , mode_config_.filename_tokenizer_model .c_str ());
216218 }
217219 }
218- mode_config_.filename_tokens_embed = base_model + mode_config_.filename_tokens_embed ;
219- mode_config_.filename_post_axmodel = base_model + mode_config_.filename_post_axmodel ;
220- mode_config_.template_filename_axmodel = base_model + mode_config_.template_filename_axmodel ;
221- mode_config_.filename_vpm_resampler_axmodedl = base_model + mode_config_.filename_vpm_resampler_axmodedl ;
222- mode_config_.filename_image_encoder_axmodedl = base_model + mode_config_.filename_image_encoder_axmodedl ;
220+ mode_config_.filename_tokens_embed = base_model + mode_config_.filename_tokens_embed ;
221+ mode_config_.filename_post_axmodel = base_model + mode_config_.filename_post_axmodel ;
222+ mode_config_.template_filename_axmodel = base_model + mode_config_.template_filename_axmodel ;
223+ mode_config_.filename_vpm_resampler_axmodel = base_model + mode_config_.filename_vpm_resampler_axmodel ;
224+ mode_config_.filename_image_encoder_axmodel = base_model + mode_config_.filename_image_encoder_axmodel ;
223225 mode_config_.runing_callback = [this ](int *p_token, int n_token, const char *p_str, float token_per_sec,
224226 void *reserve) {
225227 if (this ->out_callback_ ) {
@@ -342,17 +344,38 @@ class llm_task {
342344 }
343345
344346 if (lLaMa_ctx_) {
347+ if (msg == " reset" ) {
348+ lLaMa_ctx_->SetSystemPrompt (mode_config_.system_prompt , _token_ids);
349+ lLaMa_ctx_->GenerateKVCachePrefill (_token_ids, k_caches, v_caches, precompute_len);
350+ last_reply.clear ();
351+ mats.clear ();
352+ if (out_callback_) out_callback_ (" Context has been reset." , true );
353+ return ;
354+ }
355+
345356 if (image_data_.empty ()) {
346357 lLaMa_ctx_->Encode (prompt_data_, prompt_complete (msg), last_reply, tokens_ids, tokens_diff);
347358 if (auto ret = lLaMa_ctx_->SetKVCache (k_caches, v_caches, precompute_len, tokens_diff.size ());
348359 ret != 0 ) {
349- ALOGE (" SetKVCache failed: %d,the context may be full,input \" reset\" to reset context" , ret);
350- return ;
360+ ALOGW (" The context full,Reset context" );
361+ lLaMa_ctx_->SetSystemPrompt (mode_config_.system_prompt , _token_ids);
362+ lLaMa_ctx_->GenerateKVCachePrefill (_token_ids, k_caches, v_caches, precompute_len);
363+ lLaMa_ctx_->SetKVCache (k_caches, v_caches, precompute_len, tokens_diff.size ());
351364 }
352365 last_reply = lLaMa_ctx_->Run (prompt_data_);
353366 lLaMa_ctx_->GetKVCache (k_caches, v_caches, precompute_len);
354367 if (out_callback_) out_callback_ (last_reply, true );
355368 } else {
369+ for (const auto &img_buf : images_data) {
370+ cv::Mat src = cv::imdecode (img_buf, cv::IMREAD_COLOR);
371+ if (src.empty ()) {
372+ std::cerr << " Decode failed!" << std::endl;
373+ continue ;
374+ }
375+ mats.push_back (src);
376+ }
377+ if (mats.empty ()) return ;
378+ images_data.clear ();
356379 cv::Mat src = cv::imdecode (image_data_, cv::IMREAD_COLOR);
357380 if (src.empty ()) return ;
358381 image_data_.clear ();
@@ -361,6 +384,7 @@ class llm_task {
361384 ALOGE (" lLaMaCtx.Encode failed" );
362385 return ;
363386 }
387+ mats.clear ();
364388 if (auto ret =
365389 lLaMa_ctx_->Encode (img_embed, prompt_data_, prompt_complete (msg), tokens_ids, tokens_diff);
366390 ret != 0 ) {
@@ -369,8 +393,11 @@ class llm_task {
369393 }
370394 if (auto ret = lLaMa_ctx_->SetKVCache (k_caches, v_caches, precompute_len, tokens_diff.size ());
371395 ret != 0 ) {
372- ALOGE (" SetKVCache failed: %d,the context may be full,input \" reset\" to reset context" , ret);
373- return ;
396+ ALOGW (" The context full,Reset context" );
397+ lLaMa_ctx_->SetSystemPrompt (mode_config_.system_prompt , _token_ids);
398+ lLaMa_ctx_->GenerateKVCachePrefill (_token_ids, k_caches, v_caches, precompute_len);
399+ lLaMa_ctx_->SetKVCache (k_caches, v_caches, precompute_len, tokens_diff.size ());
400+ lLaMa_ctx_->ClearImgsEmbed ();
374401 }
375402 last_reply = lLaMa_ctx_->Run (prompt_data_);
376403 lLaMa_ctx_->GetKVCache (k_caches, v_caches, precompute_len);
@@ -549,6 +576,7 @@ class llm_vlm : public StackFlow {
549576 }
550577 if (object.find (" jpeg" ) != std::string::npos) {
551578 llm_task_obj->image_data_ .assign (next_data->begin (), next_data->end ());
579+ llm_task_obj->images_data .emplace_back (next_data->begin (), next_data->end ());
552580 return ;
553581 }
554582 llm_task_obj->inference ((*next_data));
0 commit comments