Skip to content

Commit 8d6ca3a

Browse files
author
root
committed
rename run_single_model and remove redundant code
1 parent 8e83c57 commit 8d6ca3a

File tree

3 files changed

+5
-13
lines changed

3 files changed

+5
-13
lines changed

paddleformers/cli/train/auto_parallel/workflow.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def run_auto_parallel(model_args, data_args, generating_args, training_args):
270270
training_args.no_recompute_layers.sort()
271271

272272
if training_args.use_intermediate_api:
273-
config.run_single_model = True
273+
config.use_single_model_implementation = True
274274
config.tensor_parallel_degree = 1
275275
config.sharding_parallel_degree = 1
276276
config.sep_parallel_degree = 1
@@ -290,13 +290,6 @@ def run_auto_parallel(model_args, data_args, generating_args, training_args):
290290

291291
if not training_args.enable_auto_parallel and training_args.pipeline_parallel_degree > 1:
292292
model_class = AutoModelForCausalLMPipe
293-
if "LLama" in str(config.architectures):
294-
try:
295-
from utils.register_reshard import register_pp_reshard_information
296-
297-
register_pp_reshard_information(config.num_hidden_layers)
298-
except:
299-
print("Not register llama pp reshard information.")
300293

301294
architectures_to_check = {"Qwen2Moe", "DeepseekV2", "DeepseekV3"}
302295
if (
@@ -306,7 +299,6 @@ def run_auto_parallel(model_args, data_args, generating_args, training_args):
306299
training_args.use_expert_parallel = True
307300

308301
if model_args.continue_training:
309-
# NOTE(gongenlei): new add
310302
if training_args.autotuner_benchmark:
311303
model = model_class.from_config(config, dtype=dtype)
312304
else:

paddleformers/transformers/configuration_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,7 @@ class PretrainedConfig:
537537
Whether the model's input and output word embeddings should be tied. Note that this is only relevant if the
538538
model has a output word embedding layer.
539539
540-
run_single_model (`bool`, *optional*, defaults to `False`):
540+
use_single_model_implementation (`bool`, *optional*, defaults to `False`):
541541
Whether to run the model in single card mode. When enabled, all parallel degree configurations will be disabled.
542542
543543
dtype (`str`, *optional*):
@@ -605,8 +605,8 @@ def __init__(self, **kwargs):
605605
self.tie_word_embeddings = kwargs.pop("tie_word_embeddings", True)
606606

607607
# for run model in single card mode
608-
self.run_single_model = kwargs.pop("run_single_model", False)
609-
if self.run_single_model:
608+
self.use_single_model_implementation = kwargs.pop("use_single_model_implementation", False)
609+
if self.use_single_model_implementation:
610610
self.tensor_parallel_degree = 1
611611
self.sep_parallel_degree = 1
612612
self.context_parallel_degree = 1

paddleformers/transformers/llama/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2160,5 +2160,5 @@ def forward(
21602160
)
21612161

21622162
def auto_dist_config(self, prefix=""):
2163-
assert self.config.run_single_model, "Use `get_dist_config` only in single card mode."
2163+
assert self.config.use_single_model_implementation, "Use `get_dist_config` only in single card mode."
21642164
return get_dist_config(self, prefix)

0 commit comments

Comments
 (0)