[Quantization] Fix FP8 experts replacing (#42654)

MekkCyber · SunMarc · web-flow · commit ca1698ef52b5 · 2025-12-08T10:20:55.000+01:00
small fix

Co-authored-by: Marc Sun &lt;57196510+SunMarc@users.noreply.github.com&gt;
diff --git a/src/transformers/integrations/finegrained_fp8.py b/src/transformers/integrations/finegrained_fp8.py
@@ -606,7 +606,7 @@ def replace_with_fp8_linear(
         module_kwargs = {} if pre_quantized else {"dtype": None}
         new_module = None
         with init_empty_weights():
-            if "gate_up_proj" in module_name or "down_proj" in module_name and "experts" in module_name:
+            if module_name.endswith(".experts"):
                 new_module = FP8Expert(
                     config=model.config, block_size=quantization_config.weight_block_size, **module_kwargs
                 )

Original file line number	Diff line number	Diff line change
`@@ -606,7 +606,7 @@ def replace_with_fp8_linear(`
`606`	`606`	`module_kwargs = {} if pre_quantized else {"dtype": None}`
`607`	`607`	`new_module = None`
`608`	`608`	`with init_empty_weights():`
`609`		`- if "gate_up_proj" in module_name or "down_proj" in module_name and "experts" in module_name:`
	`609`	`+ if module_name.endswith(".experts"):`
`610`	`610`	`new_module = FP8Expert(`
`611`	`611`	`config=model.config, block_size=quantization_config.weight_block_size, **module_kwargs`
`612`	`612`	`)`