@@ -3717,6 +3717,83 @@ class Gemma3ChatHandler(Llava15ChatHandler):
37173717 )
37183718
37193719
3720+ class GLM41VChatHandler (Llava15ChatHandler ):
3721+ # Note: Make sure the GGUF files of your converted model and mmproj are F16 or F32.
3722+
3723+ GLM41V_EOS_TOKEN = "<|endoftext|>"
3724+ GLM41V_PAD_TOKEN = "<|endoftext|>"
3725+ GLM41V_IMAGE_START_TOKEN = "<|begin_of_image|>"
3726+ GLM41V_IMAGE_END_TOKEN = "<|end_of_image|>"
3727+
3728+ CHAT_FORMAT = (
3729+ "[gMASK]<sop>\n "
3730+ "{%- for msg in messages -%}"
3731+ "{%- if msg.role == 'system' -%}"
3732+ "<|system|>\n {{ msg.content }}{{ GLM41V_EOS_TOKEN }}"
3733+ "{%- elif msg.role == 'user' -%}"
3734+ "<|user|>\n "
3735+ "{%- if msg.content is string -%}"
3736+ "{{ msg.content }}"
3737+ "{%- else -%}"
3738+ "{%- for item in msg.content -%}"
3739+ "{%- if item.type == 'image_url' or 'image_url' in item -%}"
3740+ "<|begin_of_image|>"
3741+ "{%- if item.image_url is string -%}"
3742+ "{{- item.image_url -}}"
3743+ "{%- else -%}"
3744+ "{{- item.image_url.url -}}"
3745+ "{%- endif -%}"
3746+ "<|end_of_image|>"
3747+ "{%- elif item.type == 'text' -%}"
3748+ "{{ item.text }}"
3749+ "{%- endif -%}"
3750+ "{%- endfor -%}"
3751+ "{%- endif -%}{{ GLM41V_EOS_TOKEN }}"
3752+ "{%- elif msg.role == 'assistant' -%}"
3753+ "{%- if msg.metadata -%}"
3754+ "<|assistant|>{{ msg.metadata }}\n {{ msg.content }}{{ GLM41V_EOS_TOKEN }}"
3755+ "{%- else -%}"
3756+ "<|assistant|>\n {{ msg.content }}{{ GLM41V_EOS_TOKEN }}"
3757+ "{%- endif -%}"
3758+ "{%- endif -%}"
3759+ "{%- endfor -%}"
3760+ "{%- if add_generation_prompt -%}"
3761+ "<|assistant|>\n "
3762+ "{%- endif -%}"
3763+ )
3764+
3765+ def __call__ (self , ** kwargs ):
3766+ self .extra_template_arguments ["GLM41V_EOS_TOKEN" ] = self .GLM41V_EOS_TOKEN
3767+ stop_tokens = [self .GLM41V_EOS_TOKEN , "</answer>" ] # Stop token patch
3768+ kwargs ['stop' ] = stop_tokens
3769+
3770+ llama = kwargs ['llama' ]
3771+
3772+ # Clear state for multiple runs
3773+ llama .reset ()
3774+ llama ._ctx .memory_clear (True )
3775+ llama .n_tokens = 0
3776+
3777+ if hasattr (llama , 'input_ids' ):
3778+ llama .input_ids .fill (0 )
3779+
3780+ # Clear any handler state
3781+ if hasattr (self , '_last_image_embed' ):
3782+ self ._last_image_embed = None
3783+ self ._last_image_hash = None
3784+
3785+ if self .verbose :
3786+ messages = kwargs .get ('messages' , [])
3787+ try :
3788+ image_count = len (self .get_image_urls (messages ))
3789+ print (f"GLM4VChatHandler - Processing { image_count } images" , file = sys .stderr )
3790+ except Exception :
3791+ print (f"GLM4VChatHandler - State reset" , file = sys .stderr )
3792+
3793+ # Use parent implementation
3794+ return super ().__call__ (** kwargs )
3795+
3796+
37203797class Qwen25VLChatHandler (Llava15ChatHandler ):
37213798 DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant."
37223799
0 commit comments