Skip to content

Commit 236eff5

Browse files
committed
feat: implement GLM41VChatHandler for GLM-4.1V-9B-Thinking Model
- Patch stop tokens in __call__ to handle </answer> and EOS truncation. Signed-off-by: JamePeng <jame_peng@sina.com>
1 parent 2789d34 commit 236eff5

File tree

2 files changed

+78
-0
lines changed

2 files changed

+78
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,7 @@ Below are the supported multi-modal models and their respective chat handlers (P
496496
| [llama-3-vision-alpha](https://huggingface.co/abetlen/llama-3-vision-alpha-gguf) | `Llama3VisionAlphaChatHandler` | `llama-3-vision-alpha` |
497497
| [minicpm-v-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) | `MiniCPMv26ChatHandler` | `minicpm-v-2.6`, `minicpm-v-4.0` |
498498
| [gemma3](https://huggingface.co/unsloth/gemma-3-27b-it-GGUF) | `Gemma3ChatHandler` | `gemma3` |
499+
| [glm4.1v](https://huggingface.co/unsloth/GLM-4.1V-9B-Thinking-GGUF) | `GLM41VChatHandler` | `glm4.1v` |
499500
| [qwen2.5-vl](https://huggingface.co/unsloth/Qwen2.5-VL-3B-Instruct-GGUF) | `Qwen25VLChatHandler` | `qwen2.5-vl` |
500501
| [qwen3-vl](https://huggingface.co/unsloth/Qwen3-VL-8B-Thinking-GGUF) | `Qwen3VLChatHandler` | `qwen3-vl` |
501502

llama_cpp/llama_chat_format.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3717,6 +3717,83 @@ class Gemma3ChatHandler(Llava15ChatHandler):
37173717
)
37183718

37193719

3720+
class GLM41VChatHandler(Llava15ChatHandler):
3721+
# Note: Make sure the GGUF files of your converted model and mmproj are F16 or F32.
3722+
3723+
GLM41V_EOS_TOKEN = "<|endoftext|>"
3724+
GLM41V_PAD_TOKEN = "<|endoftext|>"
3725+
GLM41V_IMAGE_START_TOKEN = "<|begin_of_image|>"
3726+
GLM41V_IMAGE_END_TOKEN = "<|end_of_image|>"
3727+
3728+
CHAT_FORMAT = (
3729+
"[gMASK]<sop>\n"
3730+
"{%- for msg in messages -%}"
3731+
"{%- if msg.role == 'system' -%}"
3732+
"<|system|>\n{{ msg.content }}{{ GLM41V_EOS_TOKEN }}"
3733+
"{%- elif msg.role == 'user' -%}"
3734+
"<|user|>\n"
3735+
"{%- if msg.content is string -%}"
3736+
"{{ msg.content }}"
3737+
"{%- else -%}"
3738+
"{%- for item in msg.content -%}"
3739+
"{%- if item.type == 'image_url' or 'image_url' in item -%}"
3740+
"<|begin_of_image|>"
3741+
"{%- if item.image_url is string -%}"
3742+
"{{- item.image_url -}}"
3743+
"{%- else -%}"
3744+
"{{- item.image_url.url -}}"
3745+
"{%- endif -%}"
3746+
"<|end_of_image|>"
3747+
"{%- elif item.type == 'text' -%}"
3748+
"{{ item.text }}"
3749+
"{%- endif -%}"
3750+
"{%- endfor -%}"
3751+
"{%- endif -%}{{ GLM41V_EOS_TOKEN }}"
3752+
"{%- elif msg.role == 'assistant' -%}"
3753+
"{%- if msg.metadata -%}"
3754+
"<|assistant|>{{ msg.metadata }}\n{{ msg.content }}{{ GLM41V_EOS_TOKEN }}"
3755+
"{%- else -%}"
3756+
"<|assistant|>\n{{ msg.content }}{{ GLM41V_EOS_TOKEN }}"
3757+
"{%- endif -%}"
3758+
"{%- endif -%}"
3759+
"{%- endfor -%}"
3760+
"{%- if add_generation_prompt -%}"
3761+
"<|assistant|>\n"
3762+
"{%- endif -%}"
3763+
)
3764+
3765+
def __call__(self, **kwargs):
3766+
self.extra_template_arguments["GLM41V_EOS_TOKEN"] = self.GLM41V_EOS_TOKEN
3767+
stop_tokens = [self.GLM41V_EOS_TOKEN, "</answer>"] # Stop token patch
3768+
kwargs['stop'] = stop_tokens
3769+
3770+
llama = kwargs['llama']
3771+
3772+
# Clear state for multiple runs
3773+
llama.reset()
3774+
llama._ctx.memory_clear(True)
3775+
llama.n_tokens = 0
3776+
3777+
if hasattr(llama, 'input_ids'):
3778+
llama.input_ids.fill(0)
3779+
3780+
# Clear any handler state
3781+
if hasattr(self, '_last_image_embed'):
3782+
self._last_image_embed = None
3783+
self._last_image_hash = None
3784+
3785+
if self.verbose:
3786+
messages = kwargs.get('messages', [])
3787+
try:
3788+
image_count = len(self.get_image_urls(messages))
3789+
print(f"GLM4VChatHandler - Processing {image_count} images", file=sys.stderr)
3790+
except Exception:
3791+
print(f"GLM4VChatHandler - State reset", file=sys.stderr)
3792+
3793+
# Use parent implementation
3794+
return super().__call__(**kwargs)
3795+
3796+
37203797
class Qwen25VLChatHandler(Llava15ChatHandler):
37213798
DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant."
37223799

0 commit comments

Comments
 (0)