Browse Source

!10165 Performance optimization of Bert on GPU by the graph_kernel

From: @hanhuifeng2020
Reviewed-by: @gaoxiong1,@ryanww
Signed-off-by: @ryanww
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
1631765922
4 changed files with 20 additions and 18 deletions
  1. +1
    -1
      akg
  2. +4
    -2
      model_zoo/official/nlp/bert/run_pretrain.py
  3. +14
    -14
      model_zoo/official/nlp/bert/scripts/run_distributed_pretrain_for_gpu.sh
  4. +1
    -1
      model_zoo/official/nlp/bert/scripts/run_standalone_pretrain_for_gpu.sh

+ 1
- 1
akg

@@ -1 +1 @@
Subproject commit 72b359ad457ed8f4f254c8a3bd2bde88967202fb
Subproject commit 065ca5353077903828bebc1baedd4d1c0b052bb6

+ 4
- 2
model_zoo/official/nlp/bert/run_pretrain.py View File

@@ -94,7 +94,8 @@ def _get_optimizer(args_opt, network):
def _auto_enable_graph_kernel(device_target, graph_kernel_mode):
"""Judge whether is suitable to enable graph kernel."""
return graph_kernel_mode in ("auto", "true") and device_target == 'GPU' and \
cfg.bert_network == 'base' and cfg.batch_size == 32 and cfg.optimizer == 'AdamWeightDecay'
cfg.bert_network == 'base' and (cfg.batch_size == 32 or cfg.batch_size == 64) and \
cfg.optimizer == 'AdamWeightDecay'


def run_pretrain():
@@ -148,7 +149,8 @@ def run_pretrain():
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
device_num=device_num)
_set_bert_all_reduce_split()
if args_opt.device_target == 'Ascend':
_set_bert_all_reduce_split()
else:
rank = 0
device_num = 1


+ 14
- 14
model_zoo/official/nlp/bert/scripts/run_distributed_pretrain_for_gpu.sh View File

@@ -27,18 +27,18 @@ DATA_DIR=$3
SCHEMA_DIR=$4

mpirun --allow-run-as-root -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout \
python run_pretrain.py \
--device_target="GPU" \
--distribute="true" \
--epoch_size=$EPOCH_SIZE \
--enable_save_ckpt="true" \
--enable_lossscale="false" \
--do_shuffle="true" \
--enable_data_sink="true" \
--data_sink_steps=1 \
--load_checkpoint_path="" \
--save_checkpoint_steps=10000 \
--save_checkpoint_num=1 \
--data_dir=$DATA_DIR \
--schema_dir=$SCHEMA_DIR > log.txt 2>&1 &
python run_pretrain.py \
--device_target="GPU" \
--distribute="true" \
--epoch_size=$EPOCH_SIZE \
--enable_save_ckpt="true" \
--enable_lossscale="false" \
--do_shuffle="true" \
--enable_data_sink="true" \
--data_sink_steps=20 \
--load_checkpoint_path="" \
--save_checkpoint_steps=10000 \
--save_checkpoint_num=1 \
--data_dir=$DATA_DIR \
--schema_dir=$SCHEMA_DIR > log.txt 2>&1 &


+ 1
- 1
model_zoo/official/nlp/bert/scripts/run_standalone_pretrain_for_gpu.sh View File

@@ -39,7 +39,7 @@ python run_pretrain.py \
--enable_lossscale="false" \
--do_shuffle="true" \
--enable_data_sink="true" \
--data_sink_steps=1 \
--data_sink_steps=20 \
--load_checkpoint_path="" \
--save_checkpoint_path="" \
--save_checkpoint_steps=10000 \


Loading…
Cancel
Save