We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 81b8417 commit ca1698eCopy full SHA for ca1698e
src/transformers/integrations/finegrained_fp8.py
@@ -606,7 +606,7 @@ def replace_with_fp8_linear(
606
module_kwargs = {} if pre_quantized else {"dtype": None}
607
new_module = None
608
with init_empty_weights():
609
- if "gate_up_proj" in module_name or "down_proj" in module_name and "experts" in module_name:
+ if module_name.endswith(".experts"):
610
new_module = FP8Expert(
611
config=model.config, block_size=quantization_config.weight_block_size, **module_kwargs
612
)
0 commit comments