Skip to content

Commit facbe9b

Browse files
committed
rename run_single_model and remove redundant code
1 parent 68fe785 commit facbe9b

File tree

3 files changed

+5
-13
lines changed

3 files changed

+5
-13
lines changed

paddleformers/cli/train/auto_parallel/workflow.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ def run_auto_parallel(model_args, data_args, generating_args, training_args):
268268
training_args.no_recompute_layers.sort()
269269

270270
if training_args.use_intermediate_api:
271-
config.run_single_model = True
271+
config.use_single_model_implementation = True
272272
config.tensor_parallel_degree = 1
273273
config.sharding_parallel_degree = 1
274274
config.sep_parallel_degree = 1
@@ -288,13 +288,6 @@ def run_auto_parallel(model_args, data_args, generating_args, training_args):
288288

289289
if not training_args.enable_auto_parallel and training_args.pipeline_parallel_degree > 1:
290290
model_class = AutoModelForCausalLMPipe
291-
if "LLama" in str(config.architectures):
292-
try:
293-
from utils.register_reshard import register_pp_reshard_information
294-
295-
register_pp_reshard_information(config.num_hidden_layers)
296-
except:
297-
print("Not register llama pp reshard information.")
298291

299292
architectures_to_check = {"Qwen2Moe", "DeepseekV2", "DeepseekV3"}
300293
if (
@@ -304,7 +297,6 @@ def run_auto_parallel(model_args, data_args, generating_args, training_args):
304297
training_args.use_expert_parallel = True
305298

306299
if model_args.continue_training:
307-
# NOTE(gongenlei): new add
308300
if training_args.autotuner_benchmark:
309301
model = model_class.from_config(config, dtype=dtype)
310302
else:

paddleformers/transformers/configuration_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ class PretrainedConfig:
539539
Whether the model's input and output word embeddings should be tied. Note that this is only relevant if the
540540
model has a output word embedding layer.
541541
542-
run_single_model (`bool`, *optional*, defaults to `False`):
542+
use_single_model_implementation (`bool`, *optional*, defaults to `False`):
543543
Whether to run the model in single card mode. When enabled, all parallel degree configurations will be disabled.
544544
545545
dtype (`str`, *optional*):
@@ -613,8 +613,8 @@ def __init__(self, **kwargs):
613613
self.tie_word_embeddings = kwargs.pop("tie_word_embeddings", True)
614614

615615
# for run model in single card mode
616-
self.run_single_model = kwargs.pop("run_single_model", False)
617-
if self.run_single_model:
616+
self.use_single_model_implementation = kwargs.pop("use_single_model_implementation", False)
617+
if self.use_single_model_implementation:
618618
self.tensor_parallel_degree = 1
619619
self.sep_parallel_degree = 1
620620
self.context_parallel_degree = 1

paddleformers/transformers/llama/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -693,7 +693,7 @@ def forward(
693693
)
694694

695695
def auto_dist_config(self, prefix=""):
696-
assert self.config.run_single_model, "Use `get_dist_config` only in single card mode."
696+
assert self.config.use_single_model_implementation, "Use `get_dist_config` only in single card mode."
697697
return get_dist_config(self, prefix)
698698

699699

0 commit comments

Comments
 (0)