update shufflenetv2 scripts

5 years ago · 121943bdb5
--- a/model_zoo/official/cv/shufflenetv2/Readme.md
+++ b/model_zoo/official/cv/shufflenetv2/Readme.md
@@ -55,7 +55,7 @@ Dataset used: [imagenet](http://www.image-net.org/)
  +-- Readme.md     # descriptions about ShuffleNetV2
  +-- scripts
  ¦   +--run_distribute_train_for_gpu.sh   # shell script for distributed training
  ¦   +--run_eval_for_multi_gpu.sh         # shell script for evaluation
  ¦   +--run_eval_for_gpu.sh         # shell script for evaluation
  ¦   +--run_standalone_train_for_gpu.sh   # shell script for standalone training
  +-- src
  ¦   +--config.py      # parameter configuration
@@ -75,23 +75,23 @@ Dataset used: [imagenet](http://www.image-net.org/)

 You can start training using python or shell scripts. The usage of shell scripts as follows:

 - Ditributed training on GPU: sh run_distribute_train_for_gpu.sh [DATA_DIR]
 - Standalone training on GPU: sh run_standalone_train_for_gpu.sh [DEVICE_ID] [DATA_DIR]
 - Ditributed training on GPU: sh run_standalone_train_for_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]
 - Standalone training on GPU: sh run_standalone_train_for_gpu.sh [DATASET_PATH]

 ### Launch

 ```
 # training example
  python:
      GPU: mpirun --allow-run-as-root -n 8 python train.py --is_distributed --platform 'GPU' --dataset_path '~/imagenet/train/' > train.log 2>&1 &
      GPU: mpirun --allow-run-as-root -n 8 python train.py --is_distributed=True --platform='GPU' --dataset_path='~/imagenet/train/' > train.log 2>&1 &

  shell:
      GPU: sh run_distribute_train_for_gpu.sh ~/imagenet/train/
      GPU: cd scripts & sh run_distribute_train_for_gpu.sh 8 0,1,2,3,4,5,6,7 ~/imagenet/train/
 ```

 ### Result

 Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log  will be redirected to `./train/train.log`.
 Training result will be stored in the example path. Checkpoints will be stored at `./checkpoint` by default, and training log will be redirected to `./train/train.log`.

 ## [Eval process](#contents)

@@ -99,21 +99,21 @@ Training result will be stored in the example path. Checkpoints will be stored a

 You can start evaluation using python or shell scripts. The usage of shell scripts as follows:

 - GPU: sh run_eval_for_multi_gpu.sh [DEVICE_ID] [EPOCH]
 - GPU: sh run_eval_for_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]

 ### Launch

 ``` 
 # infer example
  python:
      GPU: CUDA_VISIBLE_DEVICES=0 python eval.py --platform 'GPU' --dataset_path '~/imagenet/val/' --epoch 250 > eval.log 2>&1 &
      GPU: CUDA_VISIBLE_DEVICES=0 python eval.py --platform='GPU' --dataset_path='~/imagenet/val/' > eval.log 2>&1 &

  shell:
      GPU: sh run_eval_for_multi_gpu.sh 0 250
      GPU: cd scripts & sh run_eval_for_gpu.sh '~/imagenet/val/' 'checkpoint_file' 
 ```

 > checkpoint can be produced in training process.

 ### Result

 Inference result will be stored in the example path, you can find result in `val.log`.
 Inference result will be stored in the example path, you can find result in `eval.log`.
--- a/model_zoo/official/cv/shufflenetv2/eval.py
+++ b/model_zoo/official/cv/shufflenetv2/eval.py
@@ -31,7 +31,6 @@ if __name__ == '__main__':
    parser.add_argument('--checkpoint', type=str, default='', help='checkpoint of ShuffleNetV2 (Default: None)')
    parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
    parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform')
    parser.add_argument('--epoch', type=str, default='')
    args_opt = parser.parse_args()

    if args_opt.platform == 'Ascend':
@@ -43,7 +42,7 @@ if __name__ == '__main__':
    ckpt = load_checkpoint(args_opt.checkpoint)
    load_param_into_net(net, ckpt)
    net.set_train(False)
    dataset = create_dataset(args_opt.dataset_path, cfg, False)
    dataset = create_dataset(args_opt.dataset_path, False, 0, 1)
    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False,
                                            smooth_factor=0.1, num_classes=cfg.num_classes)
    eval_metrics = {'Loss': nn.Loss(),
--- a/model_zoo/official/cv/shufflenetv2/scripts/run_distribute_train_for_gpu.sh
+++ b/model_zoo/official/cv/shufflenetv2/scripts/run_distribute_train_for_gpu.sh
@@ -13,5 +13,45 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 DATA_DIR=$1
 mpirun --allow-run-as-root -n 8 python ./train.py --is_distributed --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 &
 if [ $# -lt 3 ]
 then
    echo "Usage: \
          sh run_distribute_train_for_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] \
          "
 exit 1
 fi

 if [ $1 -lt 1 ] && [ $1 -gt 8 ]
 then
    echo "error: DEVICE_NUM=$1 is not in (1-8)"
 exit 1
 fi

 # check dataset file
 if [ ! -d $3 ]
 then
    echo "error: DATASET_PATH=$3 is not a directory"    
 exit 1
 fi

 export DEVICE_NUM=$1
 export RANK_SIZE=$1

 BASEPATH=$(cd "`dirname $0`" || exit; pwd)
 export PYTHONPATH=${BASEPATH}:$PYTHONPATH
 if [ -d "../train" ];
 then
    rm -rf ../train
 fi
 mkdir ../train
 cd ../train || exit

 export CUDA_VISIBLE_DEVICES="$2"

 if [ $1 -gt 1 ]
 then
    mpirun -n $1 --allow-run-as-root \
    python ${BASEPATH}/../train.py --platform='GPU' --is_distributed=True --dataset_path=$3 > train.log 2>&1 &
 else
    python ${BASEPATH}/../train.py --platform='GPU' --dataset_path=$3 > train.log 2>&1 &
 fi
--- a/model_zoo/official/cv/shufflenetv2/scripts/run_eval_for_multi_gpu.sh
+++ b/model_zoo/official/cv/shufflenetv2/scripts/run_eval_for_multi_gpu.sh
@@ -13,6 +13,35 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 DEVICE_ID=$1
 EPOCH=$2
 CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./eval.py --platform 'GPU' --dataset_path '/home/data/ImageNet_Original/val/' --epoch $EPOCH > eval.log 2>&1 &
 if [ $# != 2 ]
 then
    echo "GPU: sh run_eval_for_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]"
 exit 1
 fi

 # check dataset file
 if [ ! -d $1 ]
 then
    echo "error: DATASET_PATH=$1 is not a directory"    
 exit 1
 fi

 # check checkpoint file
 if [ ! -f $2 ]
 then
    echo "error: CHECKPOINT_PATH=$2 is not a file"    
 exit 1
 fi

 BASEPATH=$(cd "`dirname $0`" || exit; pwd)
 export PYTHONPATH=${BASEPATH}:$PYTHONPATH
 export DEVICE_ID=0

 if [ -d "../eval" ];
 then
    rm -rf ../eval
 fi
 mkdir ../eval
 cd ../eval || exit

 python ${BASEPATH}/../eval.py --dataset_path=$1 --checkpoint=$2 > ./eval.log 2>&1 &
--- a/model_zoo/official/cv/shufflenetv2/scripts/run_standalone_train_for_gpu.sh
+++ b/model_zoo/official/cv/shufflenetv2/scripts/run_standalone_train_for_gpu.sh
@@ -13,6 +13,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 DEVICE_ID=$1
 DATA_DIR=$2
 CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./train.py --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 &
 if [ $# -lt 1 ]
 then
    echo "Usage: \
          sh run_standalone_train_for_gpu.sh [DATASET_PATH] \
          "
 exit 1
 fi

 # check dataset file
 if [ ! -d $1 ]
 then
    echo "error: DATASET_PATH=$1 is not a directory"    
 exit 1
 fi

 BASEPATH=$(cd "`dirname $0`" || exit; pwd)
 export PYTHONPATH=${BASEPATH}:$PYTHONPATH
 if [ -d "../train" ];
 then
    rm -rf ../train
 fi
 mkdir ../train
 cd ../train || exit

 python ${BASEPATH}/../train.py --platform='GPU' --dataset_path=$1 > train.log 2>&1 &
--- a/model_zoo/official/cv/shufflenetv2/src/dataset.py
+++ b/model_zoo/official/cv/shufflenetv2/src/dataset.py
@@ -75,7 +75,5 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
    ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums)
    # apply batch operations
    ds = ds.batch(cfg.batch_size, drop_remainder=True)
    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
--- a/model_zoo/official/cv/shufflenetv2/train.py
+++ b/model_zoo/official/cv/shufflenetv2/train.py
@@ -14,6 +14,7 @@
 # ============================================================================
 """train_imagenet."""
 import argparse
 import ast
 import os
 import random
 import numpy as np
@@ -23,7 +24,7 @@ from network import ShuffleNetV2
 import mindspore.nn as nn
 from mindspore import context
 from mindspore import dataset as de
 from mindspore import ParallelMode
 from mindspore.context import ParallelMode
 from mindspore import Tensor
 from mindspore.communication.management import init, get_rank, get_group_size
 from mindspore.nn.optim.momentum import Momentum
@@ -42,10 +43,9 @@ de.config.set_seed(cfg.random_seed)

 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='image classification training')
    parser.add_argument('--dataset_path', type=str, default='/home/data/imagenet_jpeg/train/', help='Dataset path')
    parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
    parser.add_argument('--resume', type=str, default='', help='resume training with existed checkpoint')
    parser.add_argument('--is_distributed', action='store_true', default=False,
                        help='distributed training')
    parser.add_argument('--is_distributed', type=ast.literal_eval, default=False, help='distributed training')
    parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform')
    parser.add_argument('--model_size', type=str, default='1.0x', help='ShuffleNetV2 model size parameter')
    args_opt = parser.parse_args()