From 8445aafea75ff12e720cb36d3835427edefd8142 Mon Sep 17 00:00:00 2001 From: Toolkit User Date: Tue, 2 Dec 2025 16:35:37 +0000 Subject: [PATCH 1/7] add non-approximated gelu --- fast_llm/functional/config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fast_llm/functional/config.py b/fast_llm/functional/config.py index 684193848..77fbefe37 100644 --- a/fast_llm/functional/config.py +++ b/fast_llm/functional/config.py @@ -39,6 +39,7 @@ class ActivationType(enum.StrEnum): An enum for the available activation types for the MLP layer. """ + gelu_gaussian = "gelu_gaussian" gelu = "gelu" silu = "silu" relu = "relu" @@ -67,6 +68,7 @@ def _set_activation_fn_map() -> None: global _ACTIVATION_FN_MAP _ACTIVATION_FN_MAP = { + ActivationType.gelu_gaussian: torch.nn.functional.gelu, ActivationType.gelu: lambda x: torch.nn.functional.gelu(x, approximate="tanh"), ActivationType.silu: torch.nn.functional.silu, ActivationType.relu: torch.nn.functional.relu, @@ -78,6 +80,7 @@ def _set_activation_fn_map() -> None: _ACTIVATION_FN_MAP: dict[ActivationType, typing.Callable[["torch.Tensor"], "torch.Tensor"]] = {} _ACTIVATION_HF_NAMES = { + ActivationType.gelu_gaussian: "gelu", ActivationType.gelu: "gelu_pytorch_tanh", ActivationType.silu: "silu", ActivationType.relu: "relu", From aa46283dec1954af08134b870539aa15bd408ae6 Mon Sep 17 00:00:00 2001 From: Raymond Li Date: Tue, 2 Dec 2025 13:24:03 -0500 Subject: [PATCH 2/7] remove projector_intermediate_size --- fast_llm/models/multimodal/conversion/llava.py | 3 --- .../llava_hybrid/configuration_llava_hybrid.py | 3 --- .../llava_hybrid/modeling_llava_hybrid.py | 4 ++-- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/fast_llm/models/multimodal/conversion/llava.py b/fast_llm/models/multimodal/conversion/llava.py index 9657d71b6..098514f51 100644 --- a/fast_llm/models/multimodal/conversion/llava.py +++ b/fast_llm/models/multimodal/conversion/llava.py @@ -184,8 +184,6 @@ def export_config(cls, config: MLPConfig) -> dict: return { "projector_hidden_act": config.activation.hf_name, "multimodal_projector_bias": config.add_linear_biases, - # Not in LlavaConfig, but needed for consistency check in LlavaBaseModelConverter. - "projector_intermediate_size": config.intermediate_size, } @classmethod @@ -311,7 +309,6 @@ def export_config(cls, config: MultiModalBaseModelConfig) -> dict: "vision_feature_layer": -1, }, ) - Assert.eq(out.pop("projector_intermediate_size"), out["text_config"]["hidden_size"]) return out @classmethod diff --git a/fast_llm_external_models/llava_hybrid/configuration_llava_hybrid.py b/fast_llm_external_models/llava_hybrid/configuration_llava_hybrid.py index 9d1f014d8..eeeb0bca5 100644 --- a/fast_llm_external_models/llava_hybrid/configuration_llava_hybrid.py +++ b/fast_llm_external_models/llava_hybrid/configuration_llava_hybrid.py @@ -59,7 +59,6 @@ def __init__( text_config=None, image_token_index=32000, projector_hidden_act="gelu", - projector_intermediate_size=4096, vision_feature_select_strategy="default", vision_feature_layer=-2, image_seq_length=576, @@ -68,8 +67,6 @@ def __init__( ): self.image_token_index = image_token_index self.projector_hidden_act = projector_hidden_act - # projector_intermediate_size is an addition to the original Llava config - self.projector_intermediate_size = projector_intermediate_size self.image_seq_length = image_seq_length if vision_feature_select_strategy not in ["default", "full"]: diff --git a/fast_llm_external_models/llava_hybrid/modeling_llava_hybrid.py b/fast_llm_external_models/llava_hybrid/modeling_llava_hybrid.py index 243413a33..e51915321 100644 --- a/fast_llm_external_models/llava_hybrid/modeling_llava_hybrid.py +++ b/fast_llm_external_models/llava_hybrid/modeling_llava_hybrid.py @@ -22,12 +22,12 @@ def __init__(self, config: LlavaHybridConfig): num_feature_layers = 1 if isinstance(config.vision_feature_layer, int) else len(config.vision_feature_layer) self.linear_1 = nn.Linear( config.vision_config.hidden_size * num_feature_layers, - config.projector_intermediate_size, + config.text_config.hidden_size, bias=config.multimodal_projector_bias, ) self.act = ACT2FN[config.projector_hidden_act] self.linear_2 = nn.Linear( - config.projector_intermediate_size, config.text_config.hidden_size, bias=config.multimodal_projector_bias + config.text_config.hidden_size, config.text_config.hidden_size, bias=config.multimodal_projector_bias ) def forward(self, image_features): From 17c99706b0cca59e951e01876364389badd97700 Mon Sep 17 00:00:00 2001 From: Toolkit User Date: Tue, 2 Dec 2025 21:24:08 +0000 Subject: [PATCH 3/7] fix llava hf weight prefixes --- fast_llm/models/multimodal/conversion/llava.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fast_llm/models/multimodal/conversion/llava.py b/fast_llm/models/multimodal/conversion/llava.py index 098514f51..76596a450 100644 --- a/fast_llm/models/multimodal/conversion/llava.py +++ b/fast_llm/models/multimodal/conversion/llava.py @@ -266,11 +266,11 @@ def get_converters( *cls.normalization_converter_class.get_converters( config.normalization, f"{fast_llm_prefix}.final_norm", - f"model.language_model.norm", + f"language_model.model.norm", ), get_parameter_converter( f"{fast_llm_prefix}.output_weights", - "lm_head.weight", + "language_model.lm_head.weight", drop_on_import=exported_config["tie_word_embeddings"], ), ] @@ -316,10 +316,10 @@ def get_converters(cls, config: MultiModalBaseModelConfig, exported_config: dict return [ *cls.vision_model_converter_class.get_converters(config.vision_encoder), *cls.language_model_converter_class.embeddings_converter_class.get_converters( - config.embeddings, "embeddings", "model.language_model" + config.embeddings, "embeddings", "language_model.model" ), *cls.language_model_converter_class.decoder_converter_class.get_converters( - config.decoder, "decoder", "model.language_model.layers" + config.decoder, "decoder", "language_model.model.layers" ), *cls.language_model_converter_class.head_converter_class.get_converters( config.head, {"tie_word_embeddings": False}, "head" From 6e5da16d3f4c5df75967dbc37b767296c365b0b9 Mon Sep 17 00:00:00 2001 From: Toolkit User Date: Wed, 3 Dec 2025 20:38:03 +0000 Subject: [PATCH 4/7] fix vision tower hf prefix --- fast_llm/models/multimodal/conversion/llava.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fast_llm/models/multimodal/conversion/llava.py b/fast_llm/models/multimodal/conversion/llava.py index 76596a450..556e38f4a 100644 --- a/fast_llm/models/multimodal/conversion/llava.py +++ b/fast_llm/models/multimodal/conversion/llava.py @@ -243,13 +243,13 @@ def export_config(cls, config: VisionEncoderConfig) -> dict: def get_converters(cls, config: VisionEncoderConfig) -> list[WeightConverter]: return [ *cls.embeddings_converter_class.get_converters( - config.embeddings, "vision_encoder.embeddings", "model.vision_tower" + config.embeddings, "vision_encoder.embeddings", "vision_tower" ), *cls.encoder_converter_class.get_converters( - config.encoder, "vision_encoder.encoder", "model.vision_tower.transformer.layers" + config.encoder, "vision_encoder.encoder", "vision_tower.transformer.layers" ), *cls.vision_adapter_converter_class.get_converters( - config.adapter, "vision_encoder.adapter", "model.multi_modal_projector" + config.adapter, "vision_encoder.adapter", "multi_modal_projector" ), ] From f26027747aa31b6816dbdfbc05bf7329becfe585 Mon Sep 17 00:00:00 2001 From: Toolkit User Date: Wed, 3 Dec 2025 21:30:11 +0000 Subject: [PATCH 5/7] fix intermediate size import --- fast_llm/models/multimodal/conversion/llava.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fast_llm/models/multimodal/conversion/llava.py b/fast_llm/models/multimodal/conversion/llava.py index 556e38f4a..a489444ae 100644 --- a/fast_llm/models/multimodal/conversion/llava.py +++ b/fast_llm/models/multimodal/conversion/llava.py @@ -168,7 +168,7 @@ class LlavaVisionAdapterConverter: @classmethod def import_config(cls, config: dict) -> dict: return { - "intermediate_size": config["vision_config"]["hidden_size"], + "intermediate_size": config["text_config"]["hidden_size"], "add_linear_biases": config["multimodal_projector_bias"], "gated": False, "activation": ActivationType.from_hf_name(config["projector_hidden_act"]), From 98b6283c797b2c750a540edc4c8a30cfd273f192 Mon Sep 17 00:00:00 2001 From: Toolkit User Date: Wed, 3 Dec 2025 21:59:05 +0000 Subject: [PATCH 6/7] remove gelu_gaussian --- fast_llm/functional/config.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fast_llm/functional/config.py b/fast_llm/functional/config.py index 77fbefe37..d7ceb8d6d 100644 --- a/fast_llm/functional/config.py +++ b/fast_llm/functional/config.py @@ -39,7 +39,6 @@ class ActivationType(enum.StrEnum): An enum for the available activation types for the MLP layer. """ - gelu_gaussian = "gelu_gaussian" gelu = "gelu" silu = "silu" relu = "relu" @@ -68,7 +67,6 @@ def _set_activation_fn_map() -> None: global _ACTIVATION_FN_MAP _ACTIVATION_FN_MAP = { - ActivationType.gelu_gaussian: torch.nn.functional.gelu, ActivationType.gelu: lambda x: torch.nn.functional.gelu(x, approximate="tanh"), ActivationType.silu: torch.nn.functional.silu, ActivationType.relu: torch.nn.functional.relu, @@ -80,14 +78,21 @@ def _set_activation_fn_map() -> None: _ACTIVATION_FN_MAP: dict[ActivationType, typing.Callable[["torch.Tensor"], "torch.Tensor"]] = {} _ACTIVATION_HF_NAMES = { - ActivationType.gelu_gaussian: "gelu", ActivationType.gelu: "gelu_pytorch_tanh", ActivationType.silu: "silu", ActivationType.relu: "relu", ActivationType.squared_relu: "relu2", ActivationType.identity: "identity", } -_ACTIVATION_HF_NAMES_INV = {value: key for key, value in _ACTIVATION_HF_NAMES.items()} +# gelu and gelu_pytorch_tanh both map to our standard gelu +_ACTIVATION_HF_NAMES_INV = { + "gelu": ActivationType.gelu, + "gelu_pytorch_tanh": ActivationType.gelu, + "silu": ActivationType.silu, + "relu": ActivationType.relu, + "relu2": ActivationType.squared_relu, + "identity": ActivationType.identity, +} MAX_DROPLESS_BLOCK_SIZE_ROW = 128 From 9cce2aa7f2d6833d59af017fad4ce4eaab115be9 Mon Sep 17 00:00:00 2001 From: Toolkit User Date: Fri, 5 Dec 2025 20:33:33 +0000 Subject: [PATCH 7/7] update --- fast_llm/functional/config.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fast_llm/functional/config.py b/fast_llm/functional/config.py index d7ceb8d6d..713bc5321 100644 --- a/fast_llm/functional/config.py +++ b/fast_llm/functional/config.py @@ -84,15 +84,9 @@ def _set_activation_fn_map() -> None: ActivationType.squared_relu: "relu2", ActivationType.identity: "identity", } +_ACTIVATION_HF_NAMES_INV = {value: key for key, value in _ACTIVATION_HF_NAMES.items()} # gelu and gelu_pytorch_tanh both map to our standard gelu -_ACTIVATION_HF_NAMES_INV = { - "gelu": ActivationType.gelu, - "gelu_pytorch_tanh": ActivationType.gelu, - "silu": ActivationType.silu, - "relu": ActivationType.relu, - "relu2": ActivationType.squared_relu, - "identity": ActivationType.identity, -} +_ACTIVATION_HF_NAMES_INV["gelu"] = ActivationType.gelu MAX_DROPLESS_BLOCK_SIZE_ROW = 128