[update] main_llm & main_vlm.

Abandon-ht · Abandon-ht · commit 8cf311f77baa · 2024-11-18T12:31:10.000+08:00
diff --git a/projects/llm_framework/main_llm/openbuddy-llama3.2-1b-ax630c_tokenizer.py b/projects/llm_framework/main_llm/openbuddy-llama3.2-1b-ax630c_tokenizer.py
@@ -32,7 +32,7 @@ def bos_id(self):
     @property
     def eos_id(self):
         return self.tokenizer.eos_token_id
-    
+
     @property
     def bos_token(self):
         return self.tokenizer.bos_token
@@ -44,71 +44,72 @@ def eos_token(self):
 class Request(BaseHTTPRequestHandler):
     #通过类继承，新定义类
     timeout = 5
-    server_version = 'Apache'
+    server_version = "Apache"
 
     def do_GET(self):
         print(self.path)
-        #在新类中定义get的内容（当客户端向该服务端使用get请求时，本服务端将如下运行）
+        # 在新类中定义get的内容（当客户端向该服务端使用get请求时，本服务端将如下运行）
         self.send_response(200)
-        self.send_header("type", "get")  #设置响应头，可省略或设置多个
+        self.send_header("type", "get")  # 设置响应头，可省略或设置多个
         self.end_headers()
 
-        if self.path == '/bos_id':
+        if self.path == "/bos_id":
             bos_id = tokenizer.bos_id
             # print(bos_id)
             # to json
             if bos_id is None:
-                msg = json.dumps({'bos_id': -1})
+                msg = json.dumps({"bos_id": -1})
             else:
-                msg = json.dumps({'bos_id': bos_id})
-        elif self.path == '/eos_id':
+                msg = json.dumps({"bos_id": bos_id})
+        elif self.path == "/eos_id":
             eos_id = tokenizer.eos_id
             if eos_id is None:
-                msg = json.dumps({'eos_id': -1})
+                msg = json.dumps({"eos_id": -1})
             else:
-                msg = json.dumps({'eos_id': eos_id})
+                msg = json.dumps({"eos_id": eos_id})
         else:
-            msg = 'error'
+            msg = "error"
 
         print(msg)
-        msg = str(msg).encode()  #转为str再转为byte格式
+        msg = str(msg).encode()  # 转为str再转为byte格式
 
-        self.wfile.write(msg)  #将byte格式的信息返回给客户端
+        self.wfile.write(msg)  # 将byte格式的信息返回给客户端
 
     def do_POST(self):
-        #在新类中定义post的内容（当客户端向该服务端使用post请求时，本服务端将如下运行）
-        data = self.rfile.read(int(
-            self.headers['content-length']))  #获取从客户端传入的参数（byte格式）
-        data = data.decode()  #将byte格式转为str格式
+        # 在新类中定义post的内容（当客户端向该服务端使用post请求时，本服务端将如下运行）
+        data = self.rfile.read(
+            int(self.headers["content-length"])
+        )  # 获取从客户端传入的参数（byte格式）
+        data = data.decode()  # 将byte格式转为str格式
 
         self.send_response(200)
-        self.send_header("type", "post")  #设置响应头，可省略或设置多个
+        self.send_header("type", "post")  # 设置响应头，可省略或设置多个
         self.end_headers()
 
-        if self.path == '/encode':
+        if self.path == "/encode":
             req = json.loads(data)
             prompt = req['text']
 
             token_ids = tokenizer.encode(prompt, args.content)
             if token_ids is None:
-                msg = json.dumps({'token_ids': -1})
+                msg = json.dumps({"token_ids": -1})
             else:
-                msg = json.dumps({'token_ids': token_ids})
+                msg = json.dumps({"token_ids": token_ids})
 
-        elif self.path == '/decode':
+        elif self.path == "/decode":
             req = json.loads(data)
-            token_ids = req['token_ids']
+            token_ids = req["token_ids"]
             text = tokenizer.decode(token_ids)
             if text is None:
-                msg = json.dumps({'text': ""})
+                msg = json.dumps({"text": ""})
             else:
-                msg = json.dumps({'text': text})
+                msg = json.dumps({"text": text})
         else:
-            msg = 'error'
+            msg = "error"
         print(msg)
-        msg = str(msg).encode()  #转为str再转为byte格式
+        msg = str(msg).encode()  # 转为str再转为byte格式
 
-        self.wfile.write(msg)  #将byte格式的信息返回给客户端
+        self.wfile.write(msg)  # 将byte格式的信息返回给客户端
 
 
 if __name__ == "__main__":
diff --git a/projects/llm_framework/main_llm/src/main.cpp b/projects/llm_framework/main_llm/src/main.cpp
@@ -133,14 +133,13 @@ class llm_task {
                         exit(1);
                     }
                     tokenizer_server_flage_ = true;
-                    // std::this_thread::sleep_for(std::chrono::seconds(10));
                     SLOGI("port_=%s model_id=%s content=%s", std::to_string(port_).c_str(), (base_model + "tokenizer").c_str(), ("'" + prompt_ + "'").c_str());
-                    return -1;
+                    std::this_thread::sleep_for(std::chrono::seconds(10));
                 }
             } else {
                 mode_config_.filename_tokenizer_model  = base_model + mode_config_.filename_tokenizer_model;
             }
-
+            SLOGI("filename_tokenizer_model: %s", mode_config_.filename_tokenizer_model.c_str());
             mode_config_.filename_tokens_embed     = base_model + mode_config_.filename_tokens_embed;
             mode_config_.filename_post_axmodel     = base_model + mode_config_.filename_post_axmodel;
             mode_config_.template_filename_axmodel = base_model + mode_config_.template_filename_axmodel;
diff --git a/projects/llm_framework/main_vlm/SConstruct b/projects/llm_framework/main_vlm/SConstruct
@@ -50,7 +50,7 @@ static_file += [AFile('../static_lib/libopencv-4.6-aarch64-none/lib/libzlib.a')]
 STATIC_LIB += static_file * 4
 
 STATIC_FILES += [AFile('internvl2-1b-ax630c.json'),
-                 AFile('internvl2_tokenizer.py')
+                 AFile('internvl2-1b-ax630c_tokenizer.py')
                  ]
 
 env['COMPONENTS'].append({'target':'llm_vlm',
diff --git a/projects/llm_framework/main_vlm/internvl2-1b-ax630c_tokenizer.py b/projects/llm_framework/main_vlm/internvl2-1b-ax630c_tokenizer.py
diff --git a/projects/llm_framework/main_vlm/src/main.cpp b/projects/llm_framework/main_vlm/src/main.cpp
@@ -50,6 +50,8 @@ class llm_task {
     task_callback_t out_callback_;
     bool enoutput_;
     bool enstream_;
+    std::atomic_bool tokenizer_server_flage_;
+    unsigned int port_ = 8080;
 
     void set_output(task_callback_t out_callback) {
         out_callback_ = out_callback;
@@ -122,6 +124,29 @@ class llm_task {
             CONFIG_AUTO_SET(file_body["mode_param"], b_dynamic_load_axmodel_layer);
             CONFIG_AUTO_SET(file_body["mode_param"], max_token_len);
 
+            if (mode_config_.filename_tokenizer_model.find("http:") != std::string::npos) {
+                if (!tokenizer_server_flage_) {
+                    pid_t pid = fork();
+                    if (pid == 0) {
+                        execl("/usr/bin/python3", "python3",
+                            ("/opt/m5stack/scripts/" + model_ + "_tokenizer.py").c_str(),
+                            "--host", "localhost",
+                            "--port", std::to_string(port_).c_str(),
+                            "--model_id", (base_model + "tokenizer").c_str(),
+                            "--content", ("'" + prompt_ + "'").c_str(),
+                            nullptr);
+                        perror("execl failed");
+                        exit(1);
+                    }
+                    tokenizer_server_flage_ = true;
+                    SLOGI("port_=%s model_id=%s content=%s", std::to_string(port_).c_str(), (base_model + "tokenizer").c_str(), ("'" + prompt_ + "'").c_str());
+                    std::this_thread::sleep_for(std::chrono::seconds(10));
+                    // return -1;
+                }
+            } else {
+                mode_config_.filename_tokenizer_model  = base_model + mode_config_.filename_tokenizer_model;
+            }
+            SLOGI("filename_tokenizer_model: %s", mode_config_.filename_tokenizer_model.c_str());
             mode_config_.filename_tokens_embed           = base_model + mode_config_.filename_tokens_embed;
             mode_config_.filename_post_axmodel           = base_model + mode_config_.filename_post_axmodel;
             mode_config_.filename_vpm_resampler_axmodedl = base_model + mode_config_.filename_vpm_resampler_axmodedl;
@@ -406,7 +431,6 @@ class llm_llm : public StackFlow {
         for (auto it = llm_task_obj->inputs_.begin(); it != llm_task_obj->inputs_.end();) {
             if (*it == data) {
                 it = llm_task_obj->inputs_.erase(it);
-                break;
             } else {
                 ++it;
             }

Original file line number	Diff line number	Diff line change
`@@ -133,14 +133,13 @@ class llm_task {`
`133`	`133`	`exit(1);`
`134`	`134`	`}`
`135`	`135`	`tokenizer_server_flage_ = true;`
`136`		`- // std::this_thread::sleep_for(std::chrono::seconds(10));`
`137`	`136`	`SLOGI("port_=%s model_id=%s content=%s", std::to_string(port_).c_str(), (base_model + "tokenizer").c_str(), ("'" + prompt_ + "'").c_str());`
`138`		`- return -1;`
	`137`	`+ std::this_thread::sleep_for(std::chrono::seconds(10));`
`139`	`138`	`}`
`140`	`139`	`} else {`
`141`	`140`	`mode_config_.filename_tokenizer_model = base_model + mode_config_.filename_tokenizer_model;`
`142`	`141`	`}`
`143`		`-`
	`142`	`+ SLOGI("filename_tokenizer_model: %s", mode_config_.filename_tokenizer_model.c_str());`
`144`	`143`	`mode_config_.filename_tokens_embed = base_model + mode_config_.filename_tokens_embed;`
`145`	`144`	`mode_config_.filename_post_axmodel = base_model + mode_config_.filename_post_axmodel;`
`146`	`145`	`mode_config_.template_filename_axmodel = base_model + mode_config_.template_filename_axmodel;`
Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,7 @@ static_file += [AFile('../static_lib/libopencv-4.6-aarch64-none/lib/libzlib.a')]`
`50`	`50`	`STATIC_LIB += static_file * 4`
`51`	`51`
`52`	`52`	`STATIC_FILES += [AFile('internvl2-1b-ax630c.json'),`
`53`		`- AFile('internvl2_tokenizer.py')`
	`53`	`+ AFile('internvl2-1b-ax630c_tokenizer.py')`
`54`	`54`	`]`
`55`	`55`
`56`	`56`	`env['COMPONENTS'].append({'target':'llm_vlm',`