We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a25fd8d commit e98380bCopy full SHA for e98380b
paddleformers/trainer/trainer.py
@@ -2007,8 +2007,6 @@ def hybrid_parallel_scale_param_grad(paramlist, hcg):
2007
cp_worldsize = hcg.get_context_parallel_world_size()
2008
2009
for p in paramlist:
2010
- if not getattr(p, "no_sync", False):
2011
- continue
2012
color = getattr(p, "color", -1)
2013
is_expert = isinstance(color, dict) and color.get("color", -1) == "moe_expert"
2014
disable_scale_grad = getattr(p, "context_parallel_disable_scale_grad", False)
0 commit comments