add mobilenetv2 and mobilenetv3

6 years ago · 82fc2f7ebd
--- a/example/mobilenetv2_imagenet2012/README.md
+++ b/example/mobilenetv2_imagenet2012/README.md
@@ -1,101 +0,0 @@
 # MobileNetV2 Example

 ## Description

 This is an example of training MobileNetV2 with ImageNet2012 dataset in MindSpore. 

 ## Requirements

 * Install [MindSpore](https://www.mindspore.cn/install/en). 

 * Download the dataset [ImageNet2012]. 

 > Unzip the ImageNet2012 dataset to any path you want and the folder structure should be as follows:
 > ```
 > .  
 > ├── train  # train dataset
 > └── val   # infer dataset
 > ```

 ## Example structure

 ``` shell
 .
 ├── config.py               # parameter configuration
 ├── dataset.py              # data preprocessing
 ├── eval.py                 # infer script
 ├── launch.py               # launcher for distributed training
 ├── lr_generator.py         # generate learning rate for each step
 ├── run_infer.sh            # launch infering
 ├── run_train.sh            # launch training
 └── train.py                # train script
 ```

 ## Parameter configuration

 Parameters for both training and inference can be set in 'config.py'. 

 ``` 
 "num_classes": 1000,                    # dataset class num
 "image_height": 224,                    # image height
 "image_width": 224,                     # image width
 "batch_size": 256,                      # training or infering batch size
 "epoch_size": 200,                      # total training epochs, including warmup_epochs
 "warmup_epochs": 4,                     # warmup epochs
 "lr": 0.4,                              # base learning rate
 "momentum": 0.9,                        # momentum
 "weight_decay": 4e-5,                   # weight decay
 "loss_scale": 1024,                     # loss scale
 "save_checkpoint": True,                # whether save checkpoint
 "save_checkpoint_epochs": 1,            # the epoch interval between two checkpoints
 "keep_checkpoint_max": 200,             # only keep the last keep_checkpoint_max checkpoint
 "save_checkpoint_path": "./checkpoint"  # path to save checkpoint
 ```

 ## Running the example

 ### Train

 #### Usage
 Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]

 #### Launch

 ``` 
 # training example
 sh run_train.sh 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet
 ```

 #### Result

 Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log  will be redirected to `./train/train.log` like followings. 

 ``` 
 epoch: [  0/200], step:[  624/  625], loss:[5.258/5.258], time:[140412.236], lr:[0.100]
 epoch time: 140522.500, per step time: 224.836, avg loss: 5.258
 epoch: [  1/200], step:[  624/  625], loss:[3.917/3.917], time:[138221.250], lr:[0.200]
 epoch time: 138331.250, per step time: 221.330, avg loss: 3.917
 ```

 ### Infer

 #### Usage

 Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]

 #### Launch

 ``` 
 # infer example
 sh run_infer.sh ~/imagenet ~/train/mobilenet-200_625.ckpt
 ```

 > checkpoint can be produced in training process. 

 #### Result

 Inference result will be stored in the example path, you can find result like the followings in `val.log`. 

 ``` 
 result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt
 ```
--- a/example/mobilenetv2_imagenet2012/run_infer.sh
+++ b/example/mobilenetv2_imagenet2012/run_infer.sh
@@ -1,33 +0,0 @@
 #!/usr/bin/env bash
 if [ $# != 2 ]
 then
    echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]"
 exit 1
 fi

 if [ ! -d $1 ]
 then
    echo "error: DATASET_PATH=$1 is not a directory"
 exit 1
 fi

 if [ ! -f $2 ]
 then
    echo "error: CHECKPOINT_PATH=$2 is not a file"
 exit 1
 fi

 BASEPATH=$(cd "`dirname $0`" || exit; pwd)
 export PYTHONPATH=${BASEPATH}:$PYTHONPATH
 export DEVICE_ID=0
 export RANK_ID=0
 export RANK_SIZE=1
 if [ -d "eval" ];
 then
    rm -rf ./eval
 fi
 mkdir ./eval
 cd ./eval || exit
 python ${BASEPATH}/eval.py \
        --checkpoint_path=$2 \
        --dataset_path=$1 &> infer.log &  # dataset val folder path
--- a/example/mobilenetv2_imagenet2012/run_train.sh
+++ b/example/mobilenetv2_imagenet2012/run_train.sh
@@ -1,33 +0,0 @@
 #!/usr/bin/env bash
 if [ $# != 4 ]
 then
    echo "Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]"
 exit 1
 fi

 if [ $1 -lt 1 ] && [ $1 -gt 8 ]
 then
    echo "error: DEVICE_NUM=$1 is not in (1-8)"
 exit 1
 fi

 if [ ! -d $4 ]
 then
    echo "error: DATASET_PATH=$4 is not a directory"
 exit 1
 fi

 BASEPATH=$(cd "`dirname $0`" || exit; pwd)
 export PYTHONPATH=${BASEPATH}:$PYTHONPATH
 if [ -d "train" ];
 then
    rm -rf ./train
 fi
 mkdir ./train
 cd ./train || exit
 python ${BASEPATH}/launch.py \
        --nproc_per_node=$1 \
        --visible_devices=$3 \
        --server_id=$2 \
        --training_script=${BASEPATH}/train.py \
        --dataset_path=$4 &> train.log &  # dataset train folder
--- a/example/mobilenetv2_imagenet2012/train.py
+++ b/example/mobilenetv2_imagenet2012/train.py
@@ -1,188 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """train_imagenet."""
 import os
 import time
 import argparse
 import random
 import numpy as np
 from dataset import create_dataset
 from lr_generator import get_lr
 from config import config
 from mindspore import context
 from mindspore import Tensor
 from mindspore import nn
 from mindspore.model_zoo.mobilenet import mobilenet_v2
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.loss.loss import _Loss
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 from mindspore.common import dtype as mstype

 from mindspore.train.model import Model, ParallelMode

 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 import mindspore.dataset.engine as de
 from mindspore.communication.management import init

 random.seed(1)
 np.random.seed(1)
 de.config.set_seed(1)

 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
 parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path')
 args_opt = parser.parse_args()

 device_id = int(os.getenv('DEVICE_ID'))
 rank_id = int(os.getenv('RANK_ID'))
 rank_size = int(os.getenv('RANK_SIZE'))
 run_distribute = rank_size > 1

 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False)

 class CrossEntropyWithLabelSmooth(_Loss):
    """
    CrossEntropyWith LabelSmooth.

    Args:
        smooth_factor (float): smooth factor, default=0.
        num_classes (int): num classes

    Returns:
        None.

    Examples:
        >>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000)
    """

    def __init__(self, smooth_factor=0., num_classes=1000):
        super(CrossEntropyWithLabelSmooth, self).__init__()
        self.onehot = P.OneHot()
        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
        self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
        self.ce = nn.SoftmaxCrossEntropyWithLogits()
        self.mean = P.ReduceMean(False)
        self.cast = P.Cast()

    def construct(self, logit, label):
        one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1], self.on_value, self.off_value)
        out_loss = self.ce(logit, one_hot_label)
        out_loss = self.mean(out_loss, 0)
        return out_loss

 class Monitor(Callback):
    """
    Monitor loss and time.

    Args:
        lr_init (numpy array): train lr

    Returns:
        None

    Examples:
        >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
    """

    def __init__(self, lr_init=None):
        super(Monitor, self).__init__()
        self.lr_init = lr_init
        self.lr_init_len = len(lr_init)

    def epoch_begin(self, run_context):
        self.losses = []
        self.epoch_time = time.time()

    def epoch_end(self, run_context):
        cb_params = run_context.original_args()

        epoch_mseconds = (time.time() - self.epoch_time) * 1000
        per_step_mseconds = epoch_mseconds / cb_params.batch_num
        print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds,
                                                                                      per_step_mseconds,
                                                                                      np.mean(self.losses)
                                                                                      ))

    def step_begin(self, run_context):
        self.step_time = time.time()

    def step_end(self, run_context):
        cb_params = run_context.original_args()
        step_mseconds = (time.time() - self.step_time) * 1000
        step_loss = cb_params.net_outputs

        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
            step_loss = step_loss[0]
        if isinstance(step_loss, Tensor):
            step_loss = np.mean(step_loss.asnumpy())

        self.losses.append(step_loss)
        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num

        print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.3f}]".format(
            cb_params.cur_epoch_num - 1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss,
            np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))


 if __name__ == '__main__':
    if run_distribute:
        context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL,
                                          parameter_broadcast=True, mirror_mean=True)
        auto_parallel_context().set_all_reduce_fusion_split_indices([140])
        init()

    epoch_size = config.epoch_size
    net = mobilenet_v2(num_classes=config.num_classes)
    net.to_float(mstype.float16)
    for _, cell in net.cells_and_names():
        if isinstance(cell, nn.Dense):
            cell.add_flags_recursive(fp32=True)
    if config.label_smooth > 0:
        loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth, num_classes=config.num_classes)
    else:
        loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')

    print("train args: ", args_opt, "\ncfg: ", config,
          "\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size))

    dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
                             repeat_num=epoch_size, batch_size=config.batch_size)
    step_size = dataset.get_dataset_size()
    if args_opt.pre_trained:
        param_dict = load_checkpoint(args_opt.pre_trained)
        load_param_into_net(net, param_dict)

    loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
    lr = Tensor(get_lr(global_step=0, lr_init=0, lr_end=0, lr_max=config.lr,
                       warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size))
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
                   config.weight_decay, config.loss_scale)

    model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale)

    cb = None
    if rank_id == 0:
        cb = [Monitor(lr_init=lr.asnumpy())]
        if config.save_checkpoint:
            config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * step_size,
                                         keep_checkpoint_max=config.keep_checkpoint_max)
            ckpt_cb = ModelCheckpoint(prefix="mobilenet", directory=config.save_checkpoint_path, config=config_ck)
            cb += [ckpt_cb]
    model.train(epoch_size, dataset, callbacks=cb)
--- a/mindspore/model_zoo/mobilenetv2/Readme.md
+++ b/mindspore/model_zoo/mobilenetv2/Readme.md
@@ -0,0 +1,151 @@
 # MobileNetV2 Description


 MobileNetV2 is tuned to mobile phone CPUs through a combination of hardware- aware network architecture search (NAS) complemented by the NetAdapt algorithm and then subsequently improved through novel architecture advances.Nov 20, 2019.

 [Paper](https://arxiv.org/pdf/1905.02244) Howard, Andrew, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang et al. "Searching for MobileNetV2." In Proceedings of the IEEE International Conference on Computer Vision, pp. 1314-1324. 2019.

 # Model architecture

 The overall network architecture of MobileNetV2 is show below:

 [Link](https://arxiv.org/pdf/1905.02244)

 # Dataset

 Dataset used: [imagenet](http://www.image-net.org/)

 - Dataset size: ~125G, 1.2W colorful images in 1000 classes
 	- Train: 120G, 1.2W images
 	- Test: 5G, 50000 images
 - Data format: RGB images.
 	- Note: Data will be processed in src/dataset.py 


 # Features


 # Environment Requirements

 - Hardware（Ascend/GPU）
  - Prepare hardware environment with Ascend or GPU processor. If you want to try Ascend  , please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources. 
 - Framework
  - [MindSpore](http://10.90.67.50/mindspore/archive/20200506/OpenSource/me_vm_x86/)
 - For more information, please check the resources below：
  - [MindSpore tutorials](https://www.mindspore.cn/tutorial/zh-CN/master/index.html) 
  - [MindSpore API](https://www.mindspore.cn/api/zh-CN/master/index.html)


 # Script description

 ## Script and sample code

 ```python
 ├── MobileNetV2        
  ├── Readme.md                      
  ├── scripts 
  │   ├──run_train.sh                  
  │   ├──run_eval.sh                    
  ├── src                              
  │   ├──config.py                     
  │   ├──dataset.py
  │   ├──luanch.py       
  │   ├──lr_generator.py                                 
  │   ├──mobilenetV2.py
  ├── train.py
  ├── eval.py
 ```

 ## Training process

 ### Usage

 - Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]
 - GPU: sh run_trian.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]

 ### Launch

 ``` 
 # training example
  Ascend: sh run_train.sh Ascend 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet/train/
  GPU: sh run_train.sh GPU 8 0,1,2,3,4,5,6,7 ~/imagenet/train/
 ```

 ### Result

 Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log  will be redirected to `./train/train.log` like followings. 

 ``` 
 epoch: [  0/200], step:[  624/  625], loss:[5.258/5.258], time:[140412.236], lr:[0.100]
 epoch time: 140522.500, per step time: 224.836, avg loss: 5.258
 epoch: [  1/200], step:[  624/  625], loss:[3.917/3.917], time:[138221.250], lr:[0.200]
 epoch time: 138331.250, per step time: 221.330, avg loss: 3.917
 ```

 ## Eval process

 ### Usage

 - Ascend: sh run_infer.sh Ascend [DATASET_PATH] [CHECKPOINT_PATH]
 - GPU: sh run_infer.sh GPU [DATASET_PATH] [CHECKPOINT_PATH]

 ### Launch

 ``` 
 # infer example
    Ascend: sh run_infer.sh Ascend ~/imagenet/val/ ~/train/mobilenet-200_625.ckpt
    GPU: sh run_infer.sh GPU ~/imagenet/val/ ~/train/mobilenet-200_625.ckpt
 ```

 > checkpoint can be produced in training process. 

 ### Result

 Inference result will be stored in the example path, you can find result like the followings in `val.log`. 

 ``` 
 result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt
 ```

 # Model description

 ## Performance

 ### Training Performance

 | Parameters                 | MobilenetV2                                                |                           |
 | -------------------------- | ---------------------------------------------------------- | ------------------------- |
 | Model Version              |                                                            | large                     |
 | Resource                   | Ascend 910, cpu:2.60GHz 56cores, memory:314G               | NV SMX2 V100-32G          |
 | uploaded Date              | 05/06/2020                                                 | 05/06/2020                |
 | MindSpore Version          | 0.3.0                                                      | 0.3.0                     |
 | Dataset                    | ImageNet                                                   | ImageNet                  |
 | Training Parameters        | src/config.py                                              | src/config.py             |
 | Optimizer                  | Momentum                                                   | Momentum                  |
 | Loss Function              | SoftmaxCrossEntropy                                        | SoftmaxCrossEntropy       |
 | outputs                    |                                                            |                           |
 | Loss                       |                                                            | 1.913                     |
 | Accuracy                   |                                                            | ACC1[77.09%] ACC5[92.57%] |
 | Total time                 |                                                            |                           |
 | Params (M)                 |                                                            |                           |
 | Checkpoint for Fine tuning |                                                            |                           |
 | Model for inference        |                                                            |                           |

 #### Inference Performance

 | Parameters                 | GoogLeNet                     |                           |                      |
 | -------------------------- | ----------------------------- | ------------------------- | -------------------- |
 | Model Version              | V1                            |                           |                      |
 | Resource                   | Huawei 910                    | NV SMX2 V100-32G          | Huawei 310           |
 | uploaded Date              | 05/06/2020                    | 05/22/2020                |                      |
 | MindSpore Version          | 0.2.0                         | 0.2.0                     | 0.2.0                | 
 | Dataset                    | ImageNet, 1.2W                | ImageNet, 1.2W            | ImageNet, 1.2W       |
 | batch_size                 |                               | 130(8P)                   |                      |
 | outputs                    |                               |                           |                      |
 | Accuracy                   |                               | ACC1[72.07%] ACC5[90.90%] |                      |
 | Speed                      |                               |                           |                      |
 | Total time                 |                               |                           |                      |
 | Model for inference        |                               |                           |                      |

 # ModelZoo Homepage  
 [Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo)  
--- a/mindspore/model_zoo/mobilenetv2/eval.py
+++ b/mindspore/model_zoo/mobilenetv2/eval.py
@@ -0,0 +1,75 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 eval.
 """
 import os
 import argparse
 from mindspore import context
 from mindspore import nn
 from mindspore.train.model import Model
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 from mindspore.common import dtype as mstype
 from src.dataset import create_dataset
 from src.config import config_ascend, config_gpu
 from src.mobilenetV2 import mobilenet_v2


 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
 parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
 parser.add_argument('--platform', type=str, default=None, help='run platform')
 args_opt = parser.parse_args()


 if __name__ == '__main__':
    config_platform = None
    if args_opt.platform == "Ascend":
        config_platform = config_ascend
        device_id = int(os.getenv('DEVICE_ID'))
        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend",
                            device_id=device_id, save_graphs=False)
    elif args_opt.platform == "GPU":
        config_platform = config_gpu
        context.set_context(mode=context.GRAPH_MODE,
                            device_target="GPU", save_graphs=False)
    else:
        raise ValueError("Unsupport platform.")

    loss = nn.SoftmaxCrossEntropyWithLogits(
        is_grad=False, sparse=True, reduction='mean')
    net = mobilenet_v2(num_classes=config_platform.num_classes)

    if args_opt.platform == "Ascend":
        net.to_float(mstype.float16)
        for _, cell in net.cells_and_names():
            if isinstance(cell, nn.Dense):
                cell.to_float(mstype.float32)

    dataset = create_dataset(dataset_path=args_opt.dataset_path,
                             do_train=False,
                             config=config_platform,
                             platform=args_opt.platform,
                             batch_size=config_platform.batch_size)
    step_size = dataset.get_dataset_size()

    if args_opt.checkpoint_path:
        param_dict = load_checkpoint(args_opt.checkpoint_path)
        load_param_into_net(net, param_dict)
    net.set_train(False)

    model = Model(net, loss_fn=loss, metrics={'acc'})
    res = model.eval(dataset)
    print("result:", res, "ckpt=", args_opt.checkpoint_path)
--- a/mindspore/model_zoo/mobilenetv2/scripts/run_infer.sh
+++ b/mindspore/model_zoo/mobilenetv2/scripts/run_infer.sh
@@ -0,0 +1,55 @@
 #!/usr/bin/env bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 if [ $# != 3 ]
 then
    echo "Ascend: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH] \
          GPU: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH]"
 exit 1
 fi

 # check dataset path
 if [ ! -d $2 ]
 then
    echo "error: DATASET_PATH=$2 is not a directory"
 exit 1
 fi

 # check checkpoint file
 if [ ! -f $3 ]
 then
    echo "error: CHECKPOINT_PATH=$3 is not a file"
 exit 1
 fi

 # set environment
 BASEPATH=$(cd "`dirname $0`" || exit; pwd)
 export PYTHONPATH=${BASEPATH}:$PYTHONPATH
 export DEVICE_ID=0
 export RANK_ID=0
 export RANK_SIZE=1
 if [ -d "eval" ];
 then
    rm -rf ../eval
 fi
 mkdir ../eval
 cd ../eval || exit

 # luanch
 python ${BASEPATH}/../eval.py \
        --platform=$1 \
        --dataset_path=$2 \
        --checkpoint_path=$3 \
        &> infer.log &  # dataset val folder path
--- a/mindspore/model_zoo/mobilenetv2/scripts/run_train.sh
+++ b/mindspore/model_zoo/mobilenetv2/scripts/run_train.sh
@@ -0,0 +1,95 @@
 #!/usr/bin/env bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 run_ascend()
 {
    if [ $2 -lt 1 ] && [ $2 -gt 8 ]
    then
        echo "error: DEVICE_NUM=$2 is not in (1-8)"
    exit 1
    fi

    if [ ! -d $5 ]
    then
        echo "error: DATASET_PATH=$5 is not a directory"
    exit 1
    fi

    BASEPATH=$(cd "`dirname $0`" || exit; pwd)
    export PYTHONPATH=${BASEPATH}:$PYTHONPATH
    if [ -d "train" ];
    then
        rm -rf ../train
    fi
    mkdir ../train
    cd ../train || exit
    python ${BASEPATH}/../launch.py \
            --nproc_per_node=$2 \
            --visible_devices=$4 \
            --server_id=$3 \
            --training_script=${BASEPATH}/train.py \
            --dataset_path=$5 \
            --platform=$1 &> train.log &  # dataset train folder
 }

 run_gpu()
 {
    if [ $2 -lt 1 ] && [ $2 -gt 8 ]
    then
        echo "error: DEVICE_NUM=$2 is not in (1-8)"
    exit 1
    fi

    if [ ! -d $4 ]
    then
        echo "error: DATASET_PATH=$4 is not a directory"
    exit 1
    fi

    BASEPATH=$(cd "`dirname $0`" || exit; pwd)
    export PYTHONPATH=${BASEPATH}:$PYTHONPATH
    if [ -d "train" ];
    then
        rm -rf ../train
    fi
    mkdir ../train
    cd ../train || exit

    export CUDA_VISIBLE_DEVICES="$3"
    mpirun -n $2 --allow-run-as-root \
    python ${BASEPATH}/../train.py \
        --dataset_path=$4 \
        --platform=$1 \
        &> train.log &  # dataset train folder
 }

 if [ $# -gt 5 ] || [ $# -lt 4 ]
 then
    echo "Usage:\n \
          Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \
          GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \
          "
 exit 1
 fi

 if [ $1 = "Ascend" ] ; then
    run_ascend "$@"
 elif [ $1 = "GPU" ] ; then
    run_gpu "$@"
 else
    echo "not support platform"
 fi;

--- a/mindspore/model_zoo/mobilenetv2/src/config.py
+++ b/mindspore/model_zoo/mobilenetv2/src/config.py
@@ -17,7 +17,7 @@ network config setting, will be used in train.py and eval.py
 """
 from easydict import EasyDict as ed

 config = ed({
 config_ascend = ed({
    "num_classes": 1000,
    "image_height": 224,
    "image_width": 224,
@@ -34,3 +34,21 @@ config = ed({
    "keep_checkpoint_max": 200,
    "save_checkpoint_path": "./checkpoint",
 })

 config_gpu = ed({
    "num_classes": 1000,
    "image_height": 224,
    "image_width": 224,
    "batch_size": 64,
    "epoch_size": 200,
    "warmup_epochs": 4,
    "lr": 0.5,
    "momentum": 0.9,
    "weight_decay": 4e-5,
    "label_smooth": 0.1,
    "loss_scale": 1024,
    "save_checkpoint": True,
    "save_checkpoint_epochs": 1,
    "keep_checkpoint_max": 200,
    "save_checkpoint_path": "./checkpoint",
 })
--- a/mindspore/model_zoo/mobilenetv2/src/dataset.py
+++ b/mindspore/model_zoo/mobilenetv2/src/dataset.py
@@ -20,10 +20,9 @@ import mindspore.common.dtype as mstype
 import mindspore.dataset.engine as de
 import mindspore.dataset.transforms.vision.c_transforms as C
 import mindspore.dataset.transforms.c_transforms as C2
 from config import config


 def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
 def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32):
    """
    create a train or eval dataset

@@ -36,14 +35,18 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    Returns:
        dataset
    """
    rank_size = int(os.getenv("RANK_SIZE"))
    rank_id = int(os.getenv("RANK_ID"))

    if rank_size == 1:
    if platform == "Ascend":
        rank_size = int(os.getenv("RANK_SIZE"))
        rank_id = int(os.getenv("RANK_ID"))
        if rank_size == 1:
            ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
        else:
            ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
                                         num_shards=rank_size, shard_id=rank_id)
    elif platform == "GPU":
        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
    else:
        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
                                     num_shards=rank_size, shard_id=rank_id)
        raise ValueError("Unsupport platform.")

    resize_height = config.image_height
    resize_width = config.image_width
--- a/mindspore/model_zoo/mobilenetv2/src/launch.py
+++ b/mindspore/model_zoo/mobilenetv2/src/launch.py
--- a/mindspore/model_zoo/mobilenetv2/src/lr_generator.py
+++ b/mindspore/model_zoo/mobilenetv2/src/lr_generator.py
--- a/mindspore/model_zoo/mobilenetv2/src/mobilenetV2.py
+++ b/mindspore/model_zoo/mobilenetv2/src/mobilenetV2.py
@@ -20,20 +20,10 @@ from mindspore.ops.operations import TensorAdd
 from mindspore import Parameter, Tensor
 from mindspore.common.initializer import initializer

 __all__ = ['MobileNetV2', 'mobilenet_v2']
 __all__ = ['mobilenet_v2']


 def _make_divisible(v, divisor, min_value=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:
    :param divisor:
    :param min_value:
    :return:
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
@@ -55,6 +45,7 @@ class GlobalAvgPooling(nn.Cell):
    Examples:
        >>> GlobalAvgPooling()
    """

    def __init__(self):
        super(GlobalAvgPooling, self).__init__()
        self.mean = P.ReduceMean(keep_dims=False)
@@ -82,6 +73,7 @@ class DepthwiseConv(nn.Cell):
    Examples:
        >>> DepthwiseConv(16, 3, 1, 'pad', 1, channel_multiplier=1)
    """

    def __init__(self, in_planes, kernel_size, stride, pad_mode, pad, channel_multiplier=1, has_bias=False):
        super(DepthwiseConv, self).__init__()
        self.has_bias = has_bias
@@ -126,14 +118,19 @@ class ConvBNReLU(nn.Cell):
    Examples:
        >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
    """
    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):

    def __init__(self, platform, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
        super(ConvBNReLU, self).__init__()
        padding = (kernel_size - 1) // 2
        if groups == 1:
            conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, pad_mode='pad',
                             padding=padding)
            conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding)
        else:
            conv = DepthwiseConv(in_planes, kernel_size, stride, pad_mode='pad', pad=padding)
            if platform == "Ascend":
                conv = DepthwiseConv(in_planes, kernel_size, stride, pad_mode='pad', pad=padding)
            elif platform == "GPU":
                conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride,
                                 group=in_planes, pad_mode='pad', padding=padding)

        layers = [conv, nn.BatchNorm2d(out_planes), nn.ReLU6()]
        self.features = nn.SequentialCell(layers)

@@ -158,7 +155,8 @@ class InvertedResidual(nn.Cell):
    Examples:
        >>> ResidualBlock(3, 256, 1, 1)
    """
    def __init__(self, inp, oup, stride, expand_ratio):

    def __init__(self, platform, inp, oup, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        assert stride in [1, 2]

@@ -167,12 +165,14 @@ class InvertedResidual(nn.Cell):

        layers = []
        if expand_ratio != 1:
            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
            layers.append(ConvBNReLU(platform, inp, hidden_dim, kernel_size=1))
        layers.extend([
            # dw
            ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
            ConvBNReLU(platform, hidden_dim, hidden_dim,
                       stride=stride, groups=hidden_dim),
            # pw-linear
            nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, has_bias=False),
            nn.Conv2d(hidden_dim, oup, kernel_size=1,
                      stride=1, has_bias=False),
            nn.BatchNorm2d(oup),
        ])
        self.conv = nn.SequentialCell(layers)
@@ -203,7 +203,8 @@ class MobileNetV2(nn.Cell):
    Examples:
        >>> MobileNetV2(num_classes=1000)
    """
    def __init__(self, num_classes=1000, width_mult=1.,

    def __init__(self, platform, num_classes=1000, width_mult=1.,
                 has_dropout=False, inverted_residual_setting=None, round_nearest=8):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
@@ -226,16 +227,16 @@ class MobileNetV2(nn.Cell):
        # building first layer
        input_channel = _make_divisible(input_channel * width_mult, round_nearest)
        self.out_channels = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
        features = [ConvBNReLU(3, input_channel, stride=2)]
        features = [ConvBNReLU(platform, 3, input_channel, stride=2)]
        # building inverted residual blocks
        for t, c, n, s in self.cfgs:
            output_channel = _make_divisible(c * width_mult, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(block(input_channel, output_channel, stride, expand_ratio=t))
                features.append(block(platform, input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel
        # building last several layers
        features.append(ConvBNReLU(input_channel, self.out_channels, kernel_size=1))
        features.append(ConvBNReLU(platform, input_channel, self.out_channels, kernel_size=1))
        # make it nn.CellList
        self.features = nn.SequentialCell(features)
        # mobilenet head
@@ -268,14 +269,19 @@ class MobileNetV2(nn.Cell):
                m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2. / n),
                                                                    m.weight.data.shape()).astype("float32")))
                if m.bias is not None:
                    m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape(), dtype="float32")))
                    m.bias.set_parameter_data(
                        Tensor(np.zeros(m.bias.data.shape(), dtype="float32")))
            elif isinstance(m, nn.BatchNorm2d):
                m.gamma.set_parameter_data(Tensor(np.ones(m.gamma.data.shape(), dtype="float32")))
                m.beta.set_parameter_data(Tensor(np.zeros(m.beta.data.shape(), dtype="float32")))
                m.gamma.set_parameter_data(
                    Tensor(np.ones(m.gamma.data.shape(), dtype="float32")))
                m.beta.set_parameter_data(
                    Tensor(np.zeros(m.beta.data.shape(), dtype="float32")))
            elif isinstance(m, nn.Dense):
                m.weight.set_parameter_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape()).astype("float32")))
                m.weight.set_parameter_data(Tensor(np.random.normal(
                    0, 0.01, m.weight.data.shape()).astype("float32")))
                if m.bias is not None:
                    m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape(), dtype="float32")))
                    m.bias.set_parameter_data(
                        Tensor(np.zeros(m.bias.data.shape(), dtype="float32")))


 def mobilenet_v2(**kwargs):
--- a/mindspore/model_zoo/mobilenetv2/train.py
+++ b/mindspore/model_zoo/mobilenetv2/train.py
@@ -0,0 +1,267 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """train_imagenet."""
 import os
 import time
 import argparse
 import random
 import numpy as np
 from mindspore import context
 from mindspore import Tensor
 from mindspore import nn
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.loss.loss import _Loss
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 from mindspore.common import dtype as mstype
 from mindspore.train.model import Model, ParallelMode
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 from mindspore.communication.management import init
 import mindspore.dataset.engine as de
 from src.dataset import create_dataset
 from src.lr_generator import get_lr
 from src.config import config_gpu, config_ascend
 from src.mobilenetV2 import mobilenet_v2

 random.seed(1)
 np.random.seed(1)
 de.config.set_seed(1)

 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
 parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path')
 parser.add_argument('--platform', type=str, default=None, help='run platform')
 args_opt = parser.parse_args()

 if args_opt.platform == "Ascend":
    device_id = int(os.getenv('DEVICE_ID'))
    rank_id = int(os.getenv('RANK_ID'))
    rank_size = int(os.getenv('RANK_SIZE'))
    run_distribute = rank_size > 1
    device_id = int(os.getenv('DEVICE_ID'))
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        device_id=device_id, save_graphs=False)
 elif args_opt.platform == "GPU":
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="GPU", save_graphs=False)
 else:
    raise ValueError("Unsupport platform.")


 class CrossEntropyWithLabelSmooth(_Loss):
    """
    CrossEntropyWith LabelSmooth.

    Args:
        smooth_factor (float): smooth factor, default=0.
        num_classes (int): num classes

    Returns:
        None.

    Examples:
        >>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000)
    """

    def __init__(self, smooth_factor=0., num_classes=1000):
        super(CrossEntropyWithLabelSmooth, self).__init__()
        self.onehot = P.OneHot()
        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
        self.off_value = Tensor(1.0 * smooth_factor /
                                (num_classes - 1), mstype.float32)
        self.ce = nn.SoftmaxCrossEntropyWithLogits()
        self.mean = P.ReduceMean(False)
        self.cast = P.Cast()

    def construct(self, logit, label):
        one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1],
                                    self.on_value, self.off_value)
        out_loss = self.ce(logit, one_hot_label)
        out_loss = self.mean(out_loss, 0)
        return out_loss


 class Monitor(Callback):
    """
    Monitor loss and time.

    Args:
        lr_init (numpy array): train lr

    Returns:
        None

    Examples:
        >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
    """

    def __init__(self, lr_init=None):
        super(Monitor, self).__init__()
        self.lr_init = lr_init
        self.lr_init_len = len(lr_init)

    def epoch_begin(self, run_context):
        self.losses = []
        self.epoch_time = time.time()

    def epoch_end(self, run_context):
        cb_params = run_context.original_args()

        epoch_mseconds = (time.time() - self.epoch_time) * 1000
        per_step_mseconds = epoch_mseconds / cb_params.batch_num
        print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds,
                                                                                      per_step_mseconds,
                                                                                      np.mean(self.losses)))

    def step_begin(self, run_context):
        self.step_time = time.time()

    def step_end(self, run_context):
        cb_params = run_context.original_args()
        step_mseconds = (time.time() - self.step_time) * 1000
        step_loss = cb_params.net_outputs

        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
            step_loss = step_loss[0]
        if isinstance(step_loss, Tensor):
            step_loss = np.mean(step_loss.asnumpy())

        self.losses.append(step_loss)
        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num

        print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.3f}]".format(
            cb_params.cur_epoch_num -
            1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss,
            np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))


 if __name__ == '__main__':
    if args_opt.platform == "GPU":
        # train on gpu
        print("train args: ", args_opt, "\ncfg: ", config_gpu)

        # define net
        net = mobilenet_v2(num_classes=config_gpu.num_classes, platform="GPU")
        # define loss
        if config_gpu.label_smooth > 0:
            loss = CrossEntropyWithLabelSmooth(
                smooth_factor=config_gpu.label_smooth, num_classes=config_gpu.num_classes)
        else:
            loss = SoftmaxCrossEntropyWithLogits(
                is_grad=False, sparse=True, reduction='mean')
        # define dataset
        epoch_size = config_gpu.epoch_size
        dataset = create_dataset(dataset_path=args_opt.dataset_path,
                                 do_train=True,
                                 config=config_gpu,
                                 platform=args_opt.platform,
                                 repeat_num=epoch_size,
                                 batch_size=config_gpu.batch_size)
        step_size = dataset.get_dataset_size()
        # resume
        if args_opt.pre_trained:
            param_dict = load_checkpoint(args_opt.pre_trained)
            load_param_into_net(net, param_dict)
        # define optimizer
        loss_scale = FixedLossScaleManager(
            config_gpu.loss_scale, drop_overflow_update=False)
        lr = Tensor(get_lr(global_step=0,
                           lr_init=0,
                           lr_end=0,
                           lr_max=config_gpu.lr,
                           warmup_epochs=config_gpu.warmup_epochs,
                           total_epochs=epoch_size,
                           steps_per_epoch=step_size))
        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config_gpu.momentum,
                       config_gpu.weight_decay, config_gpu.loss_scale)
        # define model
        model = Model(net, loss_fn=loss, optimizer=opt,
                      loss_scale_manager=loss_scale)

        cb = [Monitor(lr_init=lr.asnumpy())]
        if config_gpu.save_checkpoint:
            config_ck = CheckpointConfig(save_checkpoint_steps=config_gpu.save_checkpoint_epochs * step_size,
                                         keep_checkpoint_max=config_gpu.keep_checkpoint_max)
            ckpt_cb = ModelCheckpoint(
                prefix="mobilenet", directory=config_gpu.save_checkpoint_path, config=config_ck)
            cb += [ckpt_cb]
        # begine train
        model.train(epoch_size, dataset, callbacks=cb)
    elif args_opt.platform == "Ascend":
        # train on ascend
        print("train args: ", args_opt, "\ncfg: ", config_ascend,
              "\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size))

        if run_distribute:
            context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL,
                                              parameter_broadcast=True, mirror_mean=True)
            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
            init()

        epoch_size = config_ascend.epoch_size
        net = mobilenet_v2(num_classes=config_ascend.num_classes)
        net.to_float(mstype.float16)
        for _, cell in net.cells_and_names():
            if isinstance(cell, nn.Dense):
                cell.to_float(mstype.float32)
        if config_ascend.label_smooth > 0:
            loss = CrossEntropyWithLabelSmooth(
                smooth_factor=config_ascend.label_smooth, num_classes=config.num_classes)
        else:
            loss = SoftmaxCrossEntropyWithLogits(
                is_grad=False, sparse=True, reduction='mean')
        dataset = create_dataset(dataset_path=args_opt.dataset_path,
                                 do_train=True,
                                 config=config_ascend,
                                 platform=args_opt.platform,
                                 repeat_num=epoch_size,
                                 batch_size=config_ascend.batch_size)
        step_size = dataset.get_dataset_size()
        if args_opt.pre_trained:
            param_dict = load_checkpoint(args_opt.pre_trained)
            load_param_into_net(net, param_dict)

        loss_scale = FixedLossScaleManager(
            config_ascend.loss_scale, drop_overflow_update=False)
        lr = Tensor(get_lr(global_step=0,
                           lr_init=0,
                           lr_end=0,
                           lr_max=config_ascend.lr,
                           warmup_epochs=config_ascend.warmup_epochs,
                           total_epochs=epoch_size,
                           steps_per_epoch=step_size))
        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config_ascend.momentum,
                       config_ascend.weight_decay, config_ascend.loss_scale)

        model = Model(net, loss_fn=loss, optimizer=opt,
                      loss_scale_manager=loss_scale)

        cb = None
        if rank_id == 0:
            cb = [Monitor(lr_init=lr.asnumpy())]
            if config_ascend.save_checkpoint:
                config_ck = CheckpointConfig(save_checkpoint_steps=config_ascend.save_checkpoint_epochs * step_size,
                                             keep_checkpoint_max=config_ascend.keep_checkpoint_max)
                ckpt_cb = ModelCheckpoint(
                    prefix="mobilenet", directory=config_ascend.save_checkpoint_path, config=config_ck)
                cb += [ckpt_cb]
        model.train(epoch_size, dataset, callbacks=cb)
    else:
        raise ValueError("Unsupport platform.")
--- a/mindspore/model_zoo/mobilenetv3/Readme.md
+++ b/mindspore/model_zoo/mobilenetv3/Readme.md
@@ -0,0 +1,152 @@
 # MobileNetV3 Description


 MobileNetV3 is tuned to mobile phone CPUs through a combination of hardware- aware network architecture search (NAS) complemented by the NetAdapt algorithm and then subsequently improved through novel architecture advances.Nov 20, 2019.

 [Paper](https://arxiv.org/pdf/1905.02244) Howard, Andrew, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang et al. "Searching for mobilenetv3." In Proceedings of the IEEE International Conference on Computer Vision, pp. 1314-1324. 2019.

 # Model architecture

 The overall network architecture of MobileNetV3 is show below:

 [Link](https://arxiv.org/pdf/1905.02244)

 # Dataset

 Dataset used: [imagenet](http://www.image-net.org/)

 - Dataset size: ~125G, 1.2W colorful images in 1000 classes
 	- Train: 120G, 1.2W images
 	- Test: 5G, 50000 images
 - Data format: RGB images.
 	- Note: Data will be processed in src/dataset.py 


 # Features


 # Environment Requirements

 - Hardware（Ascend/GPU）
  - Prepare hardware environment with Ascend or GPU processor. If you want to try Ascend  , please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources. 
 - Framework
  - [MindSpore](http://10.90.67.50/mindspore/archive/20200506/OpenSource/me_vm_x86/)
 - For more information, please check the resources below：
  - [MindSpore tutorials](https://www.mindspore.cn/tutorial/zh-CN/master/index.html) 
  - [MindSpore API](https://www.mindspore.cn/api/zh-CN/master/index.html)


 # Script description

 ## Script and sample code

 ```python
 ├── MobilenetV3        
  ├── Readme.md                      
  ├── scripts 
  │   ├──run_train.sh                  
  │   ├──run_eval.sh                    
  ├── src                              
  │   ├──config.py                     
  │   ├──dataset.py
  │   ├──luanch.py       
  │   ├──lr_generator.py                                 
  │   ├──mobilenetV2.py
  ├── train.py
  ├── eval.py
 ```

 ## Training process

 ### Usage

 - Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]
 - GPU: sh run_trian.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]

 ### Launch

 ``` 
 # training example
  Ascend: sh run_train.sh Ascend 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet/train/
  GPU: sh run_train.sh GPU 8 0,1,2,3,4,5,6,7 ~/imagenet/train/
 ```

 ### Result

 Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log  will be redirected to `./train/train.log` like followings. 

 ``` 
 epoch: [  0/200], step:[  624/  625], loss:[5.258/5.258], time:[140412.236], lr:[0.100]
 epoch time: 140522.500, per step time: 224.836, avg loss: 5.258
 epoch: [  1/200], step:[  624/  625], loss:[3.917/3.917], time:[138221.250], lr:[0.200]
 epoch time: 138331.250, per step time: 221.330, avg loss: 3.917
 ```

 ## Eval process

 ### Usage

 - Ascend: sh run_infer.sh Ascend [DATASET_PATH] [CHECKPOINT_PATH]
 - GPU: sh run_infer.sh GPU [DATASET_PATH] [CHECKPOINT_PATH]

 ### Launch

 ``` 
 # infer example
    Ascend: sh run_infer.sh Ascend ~/imagenet/val/ ~/train/mobilenet-200_625.ckpt
    GPU: sh run_infer.sh GPU ~/imagenet/val/ ~/train/mobilenet-200_625.ckpt
 ```

 > checkpoint can be produced in training process. 

 ### Result

 Inference result will be stored in the example path, you can find result like the followings in `val.log`. 

 ``` 
 result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt
 ```

 # Model description

 ## Performance

 ### Training Performance

 | Parameters                 | MobilenetV3                                                |                           |
 | -------------------------- | ---------------------------------------------------------- | ------------------------- |
 | Model Version              |                                                            | large                     |
 | Resource                   | Ascend 910, cpu:2.60GHz 56cores, memory:314G               | NV SMX2 V100-32G          |
 | uploaded Date              | 05/06/2020                                                 | 05/06/2020                |
 | MindSpore Version          | 0.3.0                                                      | 0.3.0                     |
 | Dataset                    | ImageNet                                                   | ImageNet                  |
 | Training Parameters        | src/config.py                                              | src/config.py             |
 | Optimizer                  | Momentum                                                   | Momentum                  |
 | Loss Function              | SoftmaxCrossEntropy                                        | SoftmaxCrossEntropy       |
 | outputs                    |                                                            |                           |
 | Loss                       |                                                            | 1.913                     |
 | Accuracy                   |                                                            | ACC1[77.57%] ACC5[92.51%] |
 | Total time                 |                                                            |                           |
 | Params (M)                 |                                                            |                           |
 | Checkpoint for Fine tuning |                                                            |                           |
 | Model for inference        |                                                            |                           |

 #### Inference Performance

 | Parameters                 | GoogLeNet                     |                           |                      |
 | -------------------------- | ----------------------------- | ------------------------- | -------------------- |
 | Model Version              | V1                            |                           |                      |
 | Resource                   | Huawei 910                    | NV SMX2 V100-32G          | Huawei 310           |
 | uploaded Date              | 05/06/2020                    | 05/22/2020                |                      |
 | MindSpore Version          | 0.2.0                         | 0.2.0                     | 0.2.0                | 
 | Dataset                    | ImageNet, 1.2W                | ImageNet, 1.2W            | ImageNet, 1.2W       |
 | batch_size                 |                               | 130(8P)                   |                      |
 | outputs                    |                               |                           |                      |
 | Accuracy                   |                               | ACC1[75.43%] ACC5[92.51%] |                      |
 | Speed                      |                               |                           |                      |
 | Total time                 |                               |                           |                      |
 | Model for inference        |                               |                           |                      |


 # ModelZoo Homepage  
 [Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo)  
--- a/example/mobilenetv2_imagenet2012/eval.py
+++ b/example/mobilenetv2_imagenet2012/eval.py
@@ -17,33 +17,51 @@ eval.
 """
 import os
 import argparse
 from dataset import create_dataset
 from config import config
 from mindspore import context
 from mindspore.model_zoo.mobilenet import mobilenet_v2
 from mindspore import nn
 from mindspore.train.model import Model
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.common import dtype as mstype
 from src.dataset import create_dataset
 from src.config import config_ascend, config_gpu
 from src.mobilenetV2 import mobilenet_v2

 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
 parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
 parser.add_argument('--platform', type=str, default=None, help='run platform')
 args_opt = parser.parse_args()

 device_id = int(os.getenv('DEVICE_ID'))

 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False)

 if __name__ == '__main__':
    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
    net = mobilenet_v2(num_classes=config.num_classes)
    net.to_float(mstype.float16)
    for _, cell in net.cells_and_names():
        if isinstance(cell, nn.Dense):
            cell.add_flags_recursive(fp32=True)

    dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size)
    config_platform = None
    if args_opt.platform == "Ascend":
        config_platform = config_ascend
        device_id = int(os.getenv('DEVICE_ID'))
        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend",
                            device_id=device_id, save_graphs=False)
    elif args_opt.platform == "GPU":
        config_platform = config_gpu
        context.set_context(mode=context.GRAPH_MODE,
                            device_target="GPU", save_graphs=False)
    else:
        raise ValueError("Unsupport platform.")

    loss = nn.SoftmaxCrossEntropyWithLogits(
        is_grad=False, sparse=True, reduction='mean')
    net = mobilenet_v2(num_classes=config_platform.num_classes)

    if args_opt.platform == "Ascend":
        net.to_float(mstype.float16)
        for _, cell in net.cells_and_names():
            if isinstance(cell, nn.Dense):
                cell.to_float(mstype.float32)

    dataset = create_dataset(dataset_path=args_opt.dataset_path,
                             do_train=False,
                             config=config_platform,
                             platform=args_opt.platform,
                             batch_size=config_platform.batch_size)
    step_size = dataset.get_dataset_size()

    if args_opt.checkpoint_path:
--- a/mindspore/model_zoo/mobilenetv3/scripts/run_infer.sh
+++ b/mindspore/model_zoo/mobilenetv3/scripts/run_infer.sh
@@ -0,0 +1,55 @@
 #!/usr/bin/env bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 if [ $# != 3 ]
 then
    echo "Ascend: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH] \
          GPU: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH]"
 exit 1
 fi

 # check dataset path
 if [ ! -d $2 ]
 then
    echo "error: DATASET_PATH=$2 is not a directory"
 exit 1
 fi

 # check checkpoint file
 if [ ! -f $3 ]
 then
    echo "error: CHECKPOINT_PATH=$3 is not a file"
 exit 1
 fi

 # set environment
 BASEPATH=$(cd "`dirname $0`" || exit; pwd)
 export PYTHONPATH=${BASEPATH}:$PYTHONPATH
 export DEVICE_ID=0
 export RANK_ID=0
 export RANK_SIZE=1
 if [ -d "eval" ];
 then
    rm -rf ./eval
 fi
 mkdir ./eval
 cd ./eval || exit

 # luanch
 python ${BASEPATH}/eval.py \
        --platform=$1 \
        --dataset_path=$2 \
        --checkpoint_path=$3 \
        &> infer.log &  # dataset val folder path
--- a/mindspore/model_zoo/mobilenetv3/scripts/run_train.sh
+++ b/mindspore/model_zoo/mobilenetv3/scripts/run_train.sh
@@ -0,0 +1,94 @@
 #!/usr/bin/env bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 run_ascend()
 {
    if [ $2 -lt 1 ] && [ $2 -gt 8 ]
    then
        echo "error: DEVICE_NUM=$2 is not in (1-8)"
    exit 1
    fi

    if [ ! -d $5 ]
    then
        echo "error: DATASET_PATH=$5 is not a directory"
    exit 1
    fi

    BASEPATH=$(cd "`dirname $0`" || exit; pwd)
    export PYTHONPATH=${BASEPATH}:$PYTHONPATH
    if [ -d "train" ];
    then
        rm -rf ./train
    fi
    mkdir ./train
    cd ./train || exit
    python ${BASEPATH}/launch.py \
            --nproc_per_node=$2 \
            --visible_devices=$4 \
            --server_id=$3 \
            --training_script=${BASEPATH}/train.py \
            --dataset_path=$5 \
            --platform=$1 &> train.log &  # dataset train folder
 }

 run_gpu()
 {
    if [ $2 -lt 1 ] && [ $2 -gt 8 ]
    then
        echo "error: DEVICE_NUM=$2 is not in (1-8)"
    exit 1
    fi

    if [ ! -d $4 ]
    then
        echo "error: DATASET_PATH=$4 is not a directory"
    exit 1
    fi

    BASEPATH=$(cd "`dirname $0`" || exit; pwd)
    export PYTHONPATH=${BASEPATH}:$PYTHONPATH
    if [ -d "train" ];
    then
        rm -rf ./train
    fi
    mkdir ./train
    cd ./train || exit

    export CUDA_VISIBLE_DEVICES="$3"
    mpirun -n $2 --allow-run-as-root \
    python ${BASEPATH}/train.py \
        --dataset_path=$4 \
        --platform=$1 \
        &> train.log &  # dataset train folder
 }

 if [ $# -gt 5 ] || [ $# -lt 4 ]
 then
    echo "Usage:\n \
          Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \
          GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \
          "
 exit 1
 fi

 if [ $1 = "Ascend" ] ; then
    run_ascend "$@"
 elif [ $1 = "GPU" ] ; then
    run_gpu "$@"
 else
    echo "not support platform"
 fi;

--- a/mindspore/model_zoo/mobilenetv3/src/config.py
+++ b/mindspore/model_zoo/mobilenetv3/src/config.py
@@ -0,0 +1,54 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 network config setting, will be used in train.py and eval.py
 """
 from easydict import EasyDict as ed

 config_ascend = ed({
    "num_classes": 1000,
    "image_height": 224,
    "image_width": 224,
    "batch_size": 256,
    "epoch_size": 200,
    "warmup_epochs": 4,
    "lr": 0.4,
    "momentum": 0.9,
    "weight_decay": 4e-5,
    "label_smooth": 0.1,
    "loss_scale": 1024,
    "save_checkpoint": True,
    "save_checkpoint_epochs": 1,
    "keep_checkpoint_max": 200,
    "save_checkpoint_path": "./checkpoint",
 })

 config_gpu = ed({
    "num_classes": 1000,
    "image_height": 224,
    "image_width": 224,
    "batch_size": 64,
    "epoch_size": 300,
    "warmup_epochs": 4,
    "lr": 0.5,
    "momentum": 0.9,
    "weight_decay": 4e-5,
    "label_smooth": 0.1,
    "loss_scale": 1024,
    "save_checkpoint": True,
    "save_checkpoint_epochs": 1,
    "keep_checkpoint_max": 500,
    "save_checkpoint_path": "./checkpoint",
 })
--- a/mindspore/model_zoo/mobilenetv3/src/dataset.py
+++ b/mindspore/model_zoo/mobilenetv3/src/dataset.py
@@ -0,0 +1,85 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 create train or eval dataset.
 """
 import os
 import mindspore.common.dtype as mstype
 import mindspore.dataset.engine as de
 import mindspore.dataset.transforms.vision.c_transforms as C
 import mindspore.dataset.transforms.c_transforms as C2


 def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32

    Returns:
        dataset
    """
    if platform == "Ascend":
        rank_size = int(os.getenv("RANK_SIZE"))
        rank_id = int(os.getenv("RANK_ID"))
        if rank_size == 1:
            ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
        else:
            ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
                                         num_shards=rank_size, shard_id=rank_id)
    elif platform == "GPU":
        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
    else:
        raise ValueError("Unsupport platform.")

    resize_height = config.image_height
    resize_width = config.image_width
    buffer_size = 1000

    # define map operations
    decode_op = C.Decode()
    resize_crop_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333))
    horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5)

    resize_op = C.Resize((256, 256))
    center_crop = C.CenterCrop(resize_width)
    rescale_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
    normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255])
    change_swap_op = C.HWC2CHW()

    if do_train:
        trans = [resize_crop_op, horizontal_flip_op, rescale_op, normalize_op, change_swap_op]
    else:
        trans = [decode_op, resize_op, center_crop, normalize_op, change_swap_op]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
    ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)

    # apply shuffle operations
    ds = ds.shuffle(buffer_size=buffer_size)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
--- a/mindspore/model_zoo/mobilenetv3/src/launch.py
+++ b/mindspore/model_zoo/mobilenetv3/src/launch.py
@@ -0,0 +1,163 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """launch train script"""
 import os
 import sys
 import json
 import subprocess
 import shutil
 from argparse import ArgumentParser

 def parse_args():
    """
    parse args .

    Args:

    Returns:
        args.

    Examples:
        >>> parse_args()
    """
    parser = ArgumentParser(description="mindspore distributed training launch "
                                        "helper utilty that will spawn up "
                                        "multiple distributed processes")
    parser.add_argument("--nproc_per_node", type=int, default=1,
                        help="The number of processes to launch on each node, "
                             "for D training, this is recommended to be set "
                             "to the number of D in your system so that "
                             "each process can be bound to a single D.")
    parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7",
                        help="will use the visible devices sequentially")
    parser.add_argument("--server_id", type=str, default="",
                        help="server ip")
    parser.add_argument("--training_script", type=str,
                        help="The full path to the single D training "
                             "program/script to be launched in parallel, "
                             "followed by all the arguments for the "
                             "training script")
    # rest from the training program
    args, unknown = parser.parse_known_args()
    args.training_script_args = unknown
    return args


 def main():
    print("start", __file__)
    args = parse_args()
    print(args)
    visible_devices = args.visible_devices.split(',')
    assert os.path.isfile(args.training_script)
    assert len(visible_devices) >= args.nproc_per_node
    print('visible_devices:{}'.format(visible_devices))
    if not args.server_id:
        print('pleaser input server ip!!!')
        exit(0)
    print('server_id:{}'.format(args.server_id))

    # construct hccn_table
    hccn_configs = open('/etc/hccn.conf', 'r').readlines()
    device_ips = {}
    for hccn_item in hccn_configs:
        hccn_item = hccn_item.strip()
        if hccn_item.startswith('address_'):
            device_id, device_ip = hccn_item.split('=')
            device_id = device_id.split('_')[1]
            device_ips[device_id] = device_ip
            print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
    hccn_table = {}
    hccn_table['board_id'] = '0x0000'
    hccn_table['chip_info'] = '910'
    hccn_table['deploy_mode'] = 'lab'
    hccn_table['group_count'] = '1'
    hccn_table['group_list'] = []
    instance_list = []
    usable_dev = ''
    for instance_id in range(args.nproc_per_node):
        instance = {}
        instance['devices'] = []
        device_id = visible_devices[instance_id]
        device_ip = device_ips[device_id]
        usable_dev += str(device_id)
        instance['devices'].append({
            'device_id': device_id,
            'device_ip': device_ip,
        })
        instance['rank_id'] = str(instance_id)
        instance['server_id'] = args.server_id
        instance_list.append(instance)
    hccn_table['group_list'].append({
        'device_num': str(args.nproc_per_node),
        'server_num': '1',
        'group_name': '',
        'instance_count': str(args.nproc_per_node),
        'instance_list': instance_list,
    })
    hccn_table['para_plane_nic_location'] = 'device'
    hccn_table['para_plane_nic_name'] = []
    for instance_id in range(args.nproc_per_node):
        eth_id = visible_devices[instance_id]
        hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
    hccn_table['para_plane_nic_num'] = str(args.nproc_per_node)
    hccn_table['status'] = 'completed'

    # save hccn_table to file
    table_path = os.getcwd()
    if not os.path.exists(table_path):
        os.mkdir(table_path)
    table_fn = os.path.join(table_path,
                            'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id))
    with open(table_fn, 'w') as table_fp:
        json.dump(hccn_table, table_fp, indent=4)
    sys.stdout.flush()

    # spawn the processes
    processes = []
    cmds = []
    log_files = []
    env = os.environ.copy()
    env['RANK_SIZE'] = str(args.nproc_per_node)
    cur_path = os.getcwd()
    for rank_id in range(0, args.nproc_per_node):
        os.chdir(cur_path)
        device_id = visible_devices[rank_id]
        device_dir = os.path.join(cur_path, 'device{}'.format(rank_id))
        env['RANK_ID'] = str(rank_id)
        env['DEVICE_ID'] = str(device_id)
        if args.nproc_per_node > 1:
            env['MINDSPORE_HCCL_CONFIG_PATH'] = table_fn
            env['RANK_TABLE_FILE'] = table_fn
        if os.path.exists(device_dir):
            shutil.rmtree(device_dir)
        os.mkdir(device_dir)
        os.chdir(device_dir)
        cmd = [sys.executable, '-u']
        cmd.append(args.training_script)
        cmd.extend(args.training_script_args)
        log_file = open('{dir}/log{id}.log'.format(dir=device_dir, id=rank_id), 'w')
        process = subprocess.Popen(cmd, stdout=log_file, stderr=log_file, env=env)
        processes.append(process)
        cmds.append(cmd)
        log_files.append(log_file)
    for process, cmd, log_file in zip(processes, cmds, log_files):
        process.wait()
        if process.returncode != 0:
            raise subprocess.CalledProcessError(returncode=process, cmd=cmd)
        log_file.close()


 if __name__ == "__main__":
    main()
--- a/mindspore/model_zoo/mobilenetv3/src/lr_generator.py
+++ b/mindspore/model_zoo/mobilenetv3/src/lr_generator.py
@@ -0,0 +1,54 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """learning rate generator"""
 import math
 import numpy as np


 def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
    """
    generate learning rate array

    Args:
       global_step(int): total steps of the training
       lr_init(float): init learning rate
       lr_end(float): end learning rate
       lr_max(float): max learning rate
       warmup_epochs(int): number of warmup epochs
       total_epochs(int): total epoch of training
       steps_per_epoch(int): steps of one epoch

    Returns:
       np.array, learning rate array
    """
    lr_each_step = []
    total_steps = steps_per_epoch * total_epochs
    warmup_steps = steps_per_epoch * warmup_epochs
    for i in range(total_steps):
        if i < warmup_steps:
            lr = lr_init + (lr_max - lr_init) * i / warmup_steps
        else:
            lr = lr_end + \
                 (lr_max - lr_end) * \
                 (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2.
        if lr < 0.0:
            lr = 0.0
        lr_each_step.append(lr)

    current_step = global_step
    lr_each_step = np.array(lr_each_step).astype(np.float32)
    learning_rate = lr_each_step[current_step:]

    return learning_rate
--- a/mindspore/model_zoo/mobilenetv3/src/mobilenetV3.py
+++ b/mindspore/model_zoo/mobilenetv3/src/mobilenetV3.py
@@ -0,0 +1,390 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """MobileNetV3 model define"""
 from functools import partial
 import numpy as np
 import mindspore.nn as nn
 from mindspore.ops import operations as P
 from mindspore import Tensor


 __all__ = ['mobilenet_v3_large',
           'mobilenet_v3_small']


 def _make_divisible(x, divisor=8):
    return int(np.ceil(x * 1. / divisor) * divisor)


 class Activation(nn.Cell):
    """
    Activation definition.

    Args:
        act_func(string): activation name.

    Returns:
         Tensor, output tensor.
    """

    def __init__(self, act_func):
        super(Activation, self).__init__()
        if act_func == 'relu':
            self.act = nn.ReLU()
        elif act_func == 'relu6':
            self.act = nn.ReLU6()
        elif act_func in ('hsigmoid', 'hard_sigmoid'):
            self.act = nn.HSigmoid()
        elif act_func in ('hswish', 'hard_swish'):
            self.act = nn.HSwish()
        else:
            raise NotImplementedError

    def construct(self, x):
        return self.act(x)


 class GlobalAvgPooling(nn.Cell):
    """
    Global avg pooling definition.

    Args:

    Returns:
        Tensor, output tensor.

    Examples:
        >>> GlobalAvgPooling()
    """

    def __init__(self, keep_dims=False):
        super(GlobalAvgPooling, self).__init__()
        self.mean = P.ReduceMean(keep_dims=keep_dims)

    def construct(self, x):
        x = self.mean(x, (2, 3))
        return x


 class SE(nn.Cell):
    """
    SE warpper definition.

    Args:
        num_out (int): Output channel.
        ratio (int): middle output ratio.

    Returns:
        Tensor, output tensor.

    Examples:
        >>> SE(4)
    """

    def __init__(self, num_out, ratio=4):
        super(SE, self).__init__()
        num_mid = _make_divisible(num_out // ratio)
        self.pool = GlobalAvgPooling(keep_dims=True)
        self.conv1 = nn.Conv2d(in_channels=num_out, out_channels=num_mid,
                               kernel_size=1, has_bias=True, pad_mode='pad')
        self.act1 = Activation('relu')
        self.conv2 = nn.Conv2d(in_channels=num_mid, out_channels=num_out,
                               kernel_size=1, has_bias=True, pad_mode='pad')
        self.act2 = Activation('hsigmoid')
        self.mul = P.Mul()

    def construct(self, x):
        out = self.pool(x)
        out = self.conv1(out)
        out = self.act1(out)
        out = self.conv2(out)
        out = self.act2(out)
        out = self.mul(x, out)
        return out


 class Unit(nn.Cell):
    """
    Unit warpper definition.

    Args:
        num_in (int): Input channel.
        num_out (int): Output channel.
        kernel_size (int): Input kernel size.
        stride (int): Stride size.
        padding (int): Padding number.
        num_groups (int): Output num group.
        use_act (bool): Used activation or not.
        act_type (string): Activation type.

    Returns:
        Tensor, output tensor.

    Examples:
        >>> Unit(3, 3)
    """

    def __init__(self, num_in, num_out, kernel_size=1, stride=1, padding=0, num_groups=1,
                 use_act=True, act_type='relu'):
        super(Unit, self).__init__()
        self.conv = nn.Conv2d(in_channels=num_in,
                              out_channels=num_out,
                              kernel_size=kernel_size,
                              stride=stride,
                              padding=padding,
                              group=num_groups,
                              has_bias=False,
                              pad_mode='pad')
        self.bn = nn.BatchNorm2d(num_out)
        self.use_act = use_act
        self.act = Activation(act_type) if use_act else None

    def construct(self, x):
        out = self.conv(x)
        out = self.bn(out)
        if self.use_act:
            out = self.act(out)
        return out


 class ResUnit(nn.Cell):
    """
    ResUnit warpper definition.

    Args:
        num_in (int): Input channel.
        num_mid (int): Middle channel.
        num_out (int): Output channel.
        kernel_size (int): Input kernel size.
        stride (int): Stride size.
        act_type (str): Activation type.
        use_se (bool): Use SE warpper or not.

    Returns:
        Tensor, output tensor.

    Examples:
        >>> ResUnit(16, 3, 1, 1)
    """
    def __init__(self, num_in, num_mid, num_out, kernel_size, stride=1, act_type='relu', use_se=False):
        super(ResUnit, self).__init__()
        self.use_se = use_se
        self.first_conv = (num_out != num_mid)
        self.use_short_cut_conv = True

        if self.first_conv:
            self.expand = Unit(num_in, num_mid, kernel_size=1,
                               stride=1, padding=0, act_type=act_type)
        else:
            self.expand = None
        self.conv1 = Unit(num_mid, num_mid, kernel_size=kernel_size, stride=stride,
                          padding=self._get_pad(kernel_size), act_type=act_type, num_groups=num_mid)
        if use_se:
            self.se = SE(num_mid)
        self.conv2 = Unit(num_mid, num_out, kernel_size=1, stride=1,
                          padding=0, act_type=act_type, use_act=False)
        if num_in != num_out or stride != 1:
            self.use_short_cut_conv = False
        self.add = P.TensorAdd() if self.use_short_cut_conv else None

    def construct(self, x):
        if self.first_conv:
            out = self.expand(x)
        else:
            out = x
        out = self.conv1(out)
        if self.use_se:
            out = self.se(out)
        out = self.conv2(out)
        if self.use_short_cut_conv:
            out = self.add(x, out)
        return out

    def _get_pad(self, kernel_size):
        """set the padding number"""
        pad = 0
        if kernel_size == 1:
            pad = 0
        elif kernel_size == 3:
            pad = 1
        elif kernel_size == 5:
            pad = 2
        elif kernel_size == 7:
            pad = 3
        else:
            raise NotImplementedError
        return pad


 class MobileNetV3(nn.Cell):
    """
    MobileNetV3 architecture.

    Args:
        model_cfgs (Cell): number of classes.
        num_classes (int): Output number classes.
        multiplier (int): Channels multiplier for round to 8/16 and others. Default is 1.
        final_drop (float): Dropout number.
        round_nearest (list): Channel round to . Default is 8.
    Returns:
        Tensor, output tensor.

    Examples:
        >>> MobileNetV3(num_classes=1000)
    """

    def __init__(self, model_cfgs, num_classes=1000, multiplier=1., final_drop=0., round_nearest=8):
        super(MobileNetV3, self).__init__()
        self.cfgs = model_cfgs['cfg']
        self.inplanes = 16
        self.features = []
        first_conv_in_channel = 3
        first_conv_out_channel = _make_divisible(multiplier * self.inplanes)

        self.features.append(nn.Conv2d(in_channels=first_conv_in_channel,
                                       out_channels=first_conv_out_channel,
                                       kernel_size=3, padding=1, stride=2,
                                       has_bias=False, pad_mode='pad'))
        self.features.append(nn.BatchNorm2d(first_conv_out_channel))
        self.features.append(Activation('hswish'))
        for layer_cfg in self.cfgs:
            self.features.append(self._make_layer(kernel_size=layer_cfg[0],
                                                  exp_ch=_make_divisible(multiplier * layer_cfg[1]),
                                                  out_channel=_make_divisible(multiplier * layer_cfg[2]),
                                                  use_se=layer_cfg[3],
                                                  act_func=layer_cfg[4],
                                                  stride=layer_cfg[5]))
        output_channel = _make_divisible(multiplier * model_cfgs["cls_ch_squeeze"])
        self.features.append(nn.Conv2d(in_channels=_make_divisible(multiplier * self.cfgs[-1][2]),
                                       out_channels=output_channel,
                                       kernel_size=1, padding=0, stride=1,
                                       has_bias=False, pad_mode='pad'))
        self.features.append(nn.BatchNorm2d(output_channel))
        self.features.append(Activation('hswish'))
        self.features.append(GlobalAvgPooling(keep_dims=True))
        self.features.append(nn.Conv2d(in_channels=output_channel,
                                       out_channels=model_cfgs['cls_ch_expand'],
                                       kernel_size=1, padding=0, stride=1,
                                       has_bias=False, pad_mode='pad'))
        self.features.append(Activation('hswish'))
        if final_drop > 0:
            self.features.append((nn.Dropout(final_drop)))

        # make it nn.CellList
        self.features = nn.SequentialCell(self.features)
        self.output = nn.Conv2d(in_channels=model_cfgs['cls_ch_expand'],
                                out_channels=num_classes,
                                kernel_size=1, has_bias=True, pad_mode='pad')
        self.squeeze = P.Squeeze(axis=(2, 3))

        self._initialize_weights()

    def construct(self, x):
        x = self.features(x)
        x = self.output(x)
        x = self.squeeze(x)
        return x

    def _make_layer(self, kernel_size, exp_ch, out_channel, use_se, act_func, stride=1):
        mid_planes = exp_ch
        out_planes = out_channel
        #num_in, num_mid, num_out, kernel_size, stride=1, act_type='relu', use_se=False):
        layer = ResUnit(self.inplanes, mid_planes, out_planes,
                        kernel_size, stride=stride, act_type=act_func, use_se=use_se)
        self.inplanes = out_planes
        return layer

    def _initialize_weights(self):
        """
        Initialize weights.

        Args:

        Returns:
            None.

        Examples:
            >>> _initialize_weights()
        """
        for _, m in self.cells_and_names():
            if isinstance(m, (nn.Conv2d)):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2. / n),
                                                                    m.weight.data.shape()).astype("float32")))
                if m.bias is not None:
                    m.bias.set_parameter_data(
                        Tensor(np.zeros(m.bias.data.shape(), dtype="float32")))
            elif isinstance(m, nn.BatchNorm2d):
                m.gamma.set_parameter_data(
                    Tensor(np.ones(m.gamma.data.shape(), dtype="float32")))
                m.beta.set_parameter_data(
                    Tensor(np.zeros(m.beta.data.shape(), dtype="float32")))
            elif isinstance(m, nn.Dense):
                m.weight.set_parameter_data(Tensor(np.random.normal(
                    0, 0.01, m.weight.data.shape()).astype("float32")))
                if m.bias is not None:
                    m.bias.set_parameter_data(
                        Tensor(np.zeros(m.bias.data.shape(), dtype="float32")))


 def mobilenet_v3(model_name, **kwargs):
    """
    Constructs a MobileNet V2 model
    """
    model_cfgs = {
        "large": {
            "cfg": [
                # k, exp, c,  se,     nl,  s,
                [3, 16, 16, False, 'relu', 1],
                [3, 64, 24, False, 'relu', 2],
                [3, 72, 24, False, 'relu', 1],
                [5, 72, 40, True, 'relu', 2],
                [5, 120, 40, True, 'relu', 1],
                [5, 120, 40, True, 'relu', 1],
                [3, 240, 80, False, 'hswish', 2],
                [3, 200, 80, False, 'hswish', 1],
                [3, 184, 80, False, 'hswish', 1],
                [3, 184, 80, False, 'hswish', 1],
                [3, 480, 112, True, 'hswish', 1],
                [3, 672, 112, True, 'hswish', 1],
                [5, 672, 160, True, 'hswish', 2],
                [5, 960, 160, True, 'hswish', 1],
                [5, 960, 160, True, 'hswish', 1]],
            "cls_ch_squeeze": 960,
            "cls_ch_expand": 1280,
        },
        "small": {
            "cfg": [
                # k, exp, c,  se,     nl,  s,
                [3, 16, 16, True, 'relu', 2],
                [3, 72, 24, False, 'relu', 2],
                [3, 88, 24, False, 'relu', 1],
                [5, 96, 40, True, 'hswish', 2],
                [5, 240, 40, True, 'hswish', 1],
                [5, 240, 40, True, 'hswish', 1],
                [5, 120, 48, True, 'hswish', 1],
                [5, 144, 48, True, 'hswish', 1],
                [5, 288, 96, True, 'hswish', 2],
                [5, 576, 96, True, 'hswish', 1],
                [5, 576, 96, True, 'hswish', 1]],
            "cls_ch_squeeze": 576,
            "cls_ch_expand": 1280,
        }
    }
    return MobileNetV3(model_cfgs[model_name], **kwargs)


 mobilenet_v3_large = partial(mobilenet_v3, model_name="large")
 mobilenet_v3_small = partial(mobilenet_v3, model_name="small")
--- a/mindspore/model_zoo/mobilenetv3/train.py
+++ b/mindspore/model_zoo/mobilenetv3/train.py
@@ -0,0 +1,267 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """train_imagenet."""
 import os
 import time
 import argparse
 import random
 import numpy as np
 from mindspore import context
 from mindspore import Tensor
 from mindspore import nn
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.loss.loss import _Loss
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 from mindspore.common import dtype as mstype
 from mindspore.train.model import Model, ParallelMode
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 import mindspore.dataset.engine as de
 from mindspore.communication.management import init
 from src.dataset import create_dataset
 from src.lr_generator import get_lr
 from src.config import config_gpu, config_ascend
 from src.mobilenetV3 import mobilenet_v3_large

 random.seed(1)
 np.random.seed(1)
 de.config.set_seed(1)

 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
 parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path')
 parser.add_argument('--platform', type=str, default=None, help='run platform')
 args_opt = parser.parse_args()

 if args_opt.platform == "Ascend":
    device_id = int(os.getenv('DEVICE_ID'))
    rank_id = int(os.getenv('RANK_ID'))
    rank_size = int(os.getenv('RANK_SIZE'))
    run_distribute = rank_size > 1
    device_id = int(os.getenv('DEVICE_ID'))
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        device_id=device_id, save_graphs=False)
 elif args_opt.platform == "GPU":
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="GPU", save_graphs=False)
 else:
    raise ValueError("Unsupport platform.")


 class CrossEntropyWithLabelSmooth(_Loss):
    """
    CrossEntropyWith LabelSmooth.

    Args:
        smooth_factor (float): smooth factor, default=0.
        num_classes (int): num classes

    Returns:
        None.

    Examples:
        >>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000)
    """

    def __init__(self, smooth_factor=0., num_classes=1000):
        super(CrossEntropyWithLabelSmooth, self).__init__()
        self.onehot = P.OneHot()
        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
        self.off_value = Tensor(1.0 * smooth_factor /
                                (num_classes - 1), mstype.float32)
        self.ce = nn.SoftmaxCrossEntropyWithLogits()
        self.mean = P.ReduceMean(False)
        self.cast = P.Cast()

    def construct(self, logit, label):
        one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1],
                                    self.on_value, self.off_value)
        out_loss = self.ce(logit, one_hot_label)
        out_loss = self.mean(out_loss, 0)
        return out_loss


 class Monitor(Callback):
    """
    Monitor loss and time.

    Args:
        lr_init (numpy array): train lr

    Returns:
        None

    Examples:
        >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
    """

    def __init__(self, lr_init=None):
        super(Monitor, self).__init__()
        self.lr_init = lr_init
        self.lr_init_len = len(lr_init)

    def epoch_begin(self, run_context):
        self.losses = []
        self.epoch_time = time.time()

    def epoch_end(self, run_context):
        cb_params = run_context.original_args()

        epoch_mseconds = (time.time() - self.epoch_time) * 1000
        per_step_mseconds = epoch_mseconds / cb_params.batch_num
        print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds,
                                                                                      per_step_mseconds,
                                                                                      np.mean(self.losses)))

    def step_begin(self, run_context):
        self.step_time = time.time()

    def step_end(self, run_context):
        cb_params = run_context.original_args()
        step_mseconds = (time.time() - self.step_time) * 1000
        step_loss = cb_params.net_outputs

        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
            step_loss = step_loss[0]
        if isinstance(step_loss, Tensor):
            step_loss = np.mean(step_loss.asnumpy())

        self.losses.append(step_loss)
        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num

        print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.3f}]".format(
            cb_params.cur_epoch_num -
            1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss,
            np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))


 if __name__ == '__main__':
    if args_opt.platform == "GPU":
        # train on gpu
        print("train args: ", args_opt, "\ncfg: ", config_gpu)

        # define net
        net = mobilenet_v3_large(num_classes=config_gpu.num_classes)
        # define loss
        if config_gpu.label_smooth > 0:
            loss = CrossEntropyWithLabelSmooth(
                smooth_factor=config_gpu.label_smooth, num_classes=config_gpu.num_classes)
        else:
            loss = SoftmaxCrossEntropyWithLogits(
                is_grad=False, sparse=True, reduction='mean')
        # define dataset
        epoch_size = config_gpu.epoch_size
        dataset = create_dataset(dataset_path=args_opt.dataset_path,
                                 do_train=True,
                                 config=config_gpu,
                                 platform=args_opt.platform,
                                 repeat_num=epoch_size,
                                 batch_size=config_gpu.batch_size)
        step_size = dataset.get_dataset_size()
        # resume
        if args_opt.pre_trained:
            param_dict = load_checkpoint(args_opt.pre_trained)
            load_param_into_net(net, param_dict)
        # define optimizer
        loss_scale = FixedLossScaleManager(
            config_gpu.loss_scale, drop_overflow_update=False)
        lr = Tensor(get_lr(global_step=0,
                           lr_init=0,
                           lr_end=0,
                           lr_max=config_gpu.lr,
                           warmup_epochs=config_gpu.warmup_epochs,
                           total_epochs=epoch_size,
                           steps_per_epoch=step_size))
        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config_gpu.momentum,
                       config_gpu.weight_decay, config_gpu.loss_scale)
        # define model
        model = Model(net, loss_fn=loss, optimizer=opt,
                      loss_scale_manager=loss_scale)

        cb = [Monitor(lr_init=lr.asnumpy())]
        if config_gpu.save_checkpoint:
            config_ck = CheckpointConfig(save_checkpoint_steps=config_gpu.save_checkpoint_epochs * step_size,
                                         keep_checkpoint_max=config_gpu.keep_checkpoint_max)
            ckpt_cb = ModelCheckpoint(
                prefix="mobilenet", directory=config_gpu.save_checkpoint_path, config=config_ck)
            cb += [ckpt_cb]
        # begine train
        model.train(epoch_size, dataset, callbacks=cb)
    elif args_opt.platform == "Ascend":
        # train on ascend
        print("train args: ", args_opt, "\ncfg: ", config_ascend,
              "\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size))

        if run_distribute:
            context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL,
                                              parameter_broadcast=True, mirror_mean=True)
            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
            init()

        epoch_size = config_ascend.epoch_size
        net = mobilenet_v3_large(num_classes=config_ascend.num_classes)
        net.to_float(mstype.float16)
        for _, cell in net.cells_and_names():
            if isinstance(cell, nn.Dense):
                cell.to_float(mstype.float32)
        if config_ascend.label_smooth > 0:
            loss = CrossEntropyWithLabelSmooth(
                smooth_factor=config_ascend.label_smooth, num_classes=config.num_classes)
        else:
            loss = SoftmaxCrossEntropyWithLogits(
                is_grad=False, sparse=True, reduction='mean')
        dataset = create_dataset(dataset_path=args_opt.dataset_path,
                                 do_train=True,
                                 config=config_ascend,
                                 platform=args_opt.platform,
                                 repeat_num=epoch_size,
                                 batch_size=config_ascend.batch_size)
        step_size = dataset.get_dataset_size()
        if args_opt.pre_trained:
            param_dict = load_checkpoint(args_opt.pre_trained)
            load_param_into_net(net, param_dict)

        loss_scale = FixedLossScaleManager(
            config_ascend.loss_scale, drop_overflow_update=False)
        lr = Tensor(get_lr(global_step=0,
                           lr_init=0,
                           lr_end=0,
                           lr_max=config_ascend.lr,
                           warmup_epochs=config_ascend.warmup_epochs,
                           total_epochs=epoch_size,
                           steps_per_epoch=step_size))
        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config_ascend.momentum,
                       config_ascend.weight_decay, config_ascend.loss_scale)

        model = Model(net, loss_fn=loss, optimizer=opt,
                      loss_scale_manager=loss_scale)

        cb = None
        if rank_id == 0:
            cb = [Monitor(lr_init=lr.asnumpy())]
            if config_ascend.save_checkpoint:
                config_ck = CheckpointConfig(save_checkpoint_steps=config_ascend.save_checkpoint_epochs * step_size,
                                             keep_checkpoint_max=config_ascend.keep_checkpoint_max)
                ckpt_cb = ModelCheckpoint(
                    prefix="mobilenet", directory=config_ascend.save_checkpoint_path, config=config_ck)
                cb += [ckpt_cb]
        model.train(epoch_size, dataset, callbacks=cb)
    else:
        raise Exception