[update] update llm-vlm version & model config

LittleMouse · LittleMouse · commit 324f04de194d · 2025-11-21T09:47:03.000+08:00
diff --git a/projects/llm_framework/main_cosy_voice/src/runner/Tokenizer/Tokenizer.cpp b/projects/llm_framework/main_cosy_voice/src/runner/Tokenizer/Tokenizer.cpp
@@ -389,7 +389,7 @@ class Tokenizer_Http : public BaseTokenizer {
     bool Init(std::string model_path = "http://localhost:8080") override
     {
         base_url = model_path;
-        if (!test_connect_http(base_url, 10)) {
+        if (!test_connect_http(base_url, 20)) {
             ALOGE("connect %s failed", base_url.c_str());
             return false;
         } else {
diff --git a/projects/llm_framework/main_llm/src/runner/Tokenizer/Tokenizer.cpp b/projects/llm_framework/main_llm/src/runner/Tokenizer/Tokenizer.cpp
@@ -349,11 +349,17 @@ class Tokenizer_Http : public BaseTokenizer {
     bool Init(std::string model_path = "http://localhost:8080", bool b_bos = true, bool b_eos = false) override
     {
         base_url = model_path;
+        if (!test_connect_http(base_url, 20)) {
+            ALOGE("connect %s failed", base_url.c_str());
+            return false;
+        } else {
+            ALOGI("connect %s ok", base_url.c_str());
+        }
         try {
             cli = std::make_shared<httplib::Client>(base_url);
-            cli->set_connection_timeout(1);
-            cli->set_read_timeout(1);
-            cli->set_write_timeout(1);
+            cli->set_connection_timeout(10);
+            cli->set_read_timeout(10);
+            cli->set_write_timeout(10);
             {
                 auto ret = cli->Get("/bos_id");
                 auto rep = ret.value();
@@ -389,7 +395,7 @@ class Tokenizer_Http : public BaseTokenizer {
     bool Init(std::string model_path = "http://localhost:8080") override
     {
         base_url = model_path;
-        if (!test_connect_http(base_url, 10)) {
+        if (!test_connect_http(base_url, 20)) {
             ALOGE("connect %s failed", base_url.c_str());
             return false;
         } else {
diff --git a/projects/llm_framework/main_vlm/SConstruct b/projects/llm_framework/main_vlm/SConstruct
@@ -17,7 +17,10 @@ LDFLAGS = []
 LINK_SEARCH_PATH = []
 STATIC_FILES = []
 
-python_venv = check_wget_down("https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/linux/llm/m5stack_llm-vlm-python-venv_v1.6.tar.gz", 'm5stack_llm-vlm-python-venv_v1.6.tar.gz')
+if 'CONFIG_AX_620E_MSP_ENABLED' in os.environ:
+    python_venv = check_wget_down("https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/linux/llm/m5stack_llm-vlm-python-venv_v1.6.tar.gz", 'm5stack_llm-vlm-python-venv_v1.6.tar.gz')
+else:
+    python_venv = check_wget_down("https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/linux/llm/m5stack_llm-vlm-python-venv_v1.7.tar.gz", 'm5stack_llm-vlm-python-venv_v1.7.tar.gz')
 
 DEFINITIONS += ['-O2']
 DEFINITIONS += ['-std=c++17']
@@ -73,7 +76,7 @@ ignore['ignore'] = list(set(ignore['ignore']))
 with open('../dist/fileignore', 'w') as f:
     json.dump(ignore, f, indent=4)
 
-env['COMPONENTS'].append({'target':'llm_vlm-1.9',
+env['COMPONENTS'].append({'target':'llm_vlm-1.10',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_vlm/models/mode_qwen3-vl-2B-Int4-ax650.json b/projects/llm_framework/main_vlm/models/mode_qwen3-vl-2B-Int4-ax650.json
@@ -46,7 +46,7 @@
         "video_token_id": 151656,
         "vision_start_token_id": 151652,
         "precompute_len": 0,
-        "cmm_size": 1919044,
+        "cmm_size": 3582336,
         "ext_scripts": [
             "tokenizer_qwen3-vl-2B-Int4-ax650.py"
         ]
diff --git a/projects/llm_framework/main_vlm/src/main.cpp b/projects/llm_framework/main_vlm/src/main.cpp
@@ -480,13 +480,15 @@ class llm_task {
                     std::vector<std::vector<unsigned short>> all_embeds;
                     if (auto ret = lLaMa_ctx_->Encode(mats, all_embeds); ret != 0) {
                         ALOGE("lLaMaCtx.Encode failed");
+                        if (out_callback_) out_callback_("Encode failed", true);
                         return;
                     }
                     mats.clear();
                     if (auto ret =
                             lLaMa_ctx_->Encode(all_embeds, prompt_data_, prompt_complete(msg), tokens_ids, tokens_diff);
                         ret != 0) {
                         ALOGE("lLaMaCtx.Encode failed");
+                        if (out_callback_) out_callback_("Encode failed", true);
                         return;
                     }
                     if (auto ret = lLaMa_ctx_->SetKVCache(k_caches, v_caches, precompute_len, tokens_diff.size());
diff --git a/projects/llm_framework/main_vlm/src/runner/LLM.hpp b/projects/llm_framework/main_vlm/src/runner/LLM.hpp
@@ -3,6 +3,8 @@
 #include <algorithm>
 #include <cmath>
 #include <numeric>
+#include <atomic>
+
 #include "bfloat16.hpp"
 #include "image_processor.hpp"
 #include "mrope.hpp"
@@ -49,7 +51,7 @@ struct LLMAttrType {
     TokenizerType tokenizer_type         = TKT_LLaMa;
     std::string filename_tokenizer_model = "tokenizer.model";
     std::string url_tokenizer_model;
-    bool b_bos                        = true;
+    bool b_bos                        = false;
     bool b_eos                        = false;
     std::string filename_tokens_embed = "tinyllama.model.embed_tokens.weight.bfloat16.bin";
     int tokens_embed_num              = 32000;
diff --git a/projects/llm_framework/main_vlm/src/runner/Tokenizer/Tokenizer.cpp b/projects/llm_framework/main_vlm/src/runner/Tokenizer/Tokenizer.cpp
@@ -25,7 +25,7 @@ class Tokenizer_Http : public BaseTokenizer {
     bool Init(std::string model_path) override
     {
         base_url = model_path;
-        if (!test_connect_http(base_url, 10)) {
+        if (!test_connect_http(base_url, 20)) {
             ALOGE("connect %s failed", base_url.c_str());
             return false;
         } else {
@@ -153,7 +153,7 @@ class Tokenizer_Http : public BaseTokenizer {
     bool Init(std::string model_path = "http://localhost:8080", bool b_bos = true, bool b_eos = false) override
     {
         base_url = model_path;
-        if (!test_connect_http(base_url, 10)) {
+        if (!test_connect_http(base_url, 30)) {
             ALOGE("connect %s failed", base_url.c_str());
             return false;
         } else {
@@ -200,7 +200,7 @@ class Tokenizer_Http : public BaseTokenizer {
     bool Init_new(std::string model_path, bool b_bos, bool b_eos) override
     {
         base_url = model_path;
-        if (!test_connect_http(base_url, 10)) {
+        if (!test_connect_http(base_url, 30)) {
             ALOGE("connect %s failed", base_url.c_str());
             return false;
         } else {
diff --git a/projects/llm_framework/tools/llm_pack.py b/projects/llm_framework/tools/llm_pack.py
@@ -381,7 +381,7 @@ def create_bin_deb(package_name, version, src_folder, revision = 'm5stack1', dep
         'llm-tts':[create_bin_deb,'llm-tts', '1.6', src_folder, revision],
         'llm-melotts':[create_bin_deb,'llm-melotts', '1.9', src_folder, revision],
         'llm-camera':[create_bin_deb,'llm-camera', '1.9', src_folder, revision, 'lib-llm'],
-        'llm-vlm':[create_bin_deb,'llm-vlm', '1.9', src_folder, revision],
+        'llm-vlm':[create_bin_deb,'llm-vlm', '1.10', src_folder, revision],
         'llm-yolo':[create_bin_deb,'llm-yolo', '1.9', src_folder, revision],
         'llm-skel':[create_bin_deb,'llm-skel', version, src_folder, revision],
         'llm-depth-anything':[create_bin_deb,'llm-depth-anything', '1.7', src_folder, revision],

Original file line number	Diff line number	Diff line change
`@@ -389,7 +389,7 @@ class Tokenizer_Http : public BaseTokenizer {`
`389`	`389`	`bool Init(std::string model_path = "http://localhost:8080") override`
`390`	`390`	`{`
`391`	`391`	`base_url = model_path;`
`392`		`- if (!test_connect_http(base_url, 10)) {`
	`392`	`+ if (!test_connect_http(base_url, 20)) {`
`393`	`393`	`ALOGE("connect %s failed", base_url.c_str());`
`394`	`394`	`return false;`
`395`	`395`	`} else {`
Original file line number	Diff line number	Diff line change
`@@ -46,7 +46,7 @@`
`46`	`46`	`"video_token_id": 151656,`
`47`	`47`	`"vision_start_token_id": 151652,`
`48`	`48`	`"precompute_len": 0,`
`49`		`- "cmm_size": 1919044,`
	`49`	`+ "cmm_size": 3582336,`
`50`	`50`	`"ext_scripts": [`
`51`	`51`	`"tokenizer_qwen3-vl-2B-Int4-ax650.py"`
`52`	`52`	`]`