mobilenetV2 change for gpu

5 years ago · 26b1242bc4
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -960,7 +960,7 @@ class ActQuant(_QuantActivation):
        Tensor, with the same type and shape as the `x`.

    Examples:
        >>> act_quant = nn.ActQuant(nn.ReLU)
        >>> act_quant = nn.ActQuant(nn.ReLU())
        >>> input_x = Tensor(np.array([[1, 2, -1], [-2, 0, -1]]), mindspore.float32)
        >>> result = act_quant(input_x)
    """
@@ -1009,7 +1009,7 @@ class LeakyReLUQuant(_QuantActivation):
        quant_delay (int): Quantization delay parameters according by global step. Default: 0.

    Inputs:
        - **x** (Tensor) - The input of HSwishQuant.
        - **x** (Tensor) - The input of LeakyReLUQuant.

    Outputs:
        Tensor, with the same type and shape as the `x`.
--- a/mindspore/train/quant/quant.py
+++ b/mindspore/train/quant/quant.py
@@ -306,7 +306,7 @@ class ExportToQuantInferNetwork:
        std_dev (int, float): Input data variance. Default: 127.5.

    Returns:
        Cell, GEIR backend Infer network.
        Cell, Infer network.
    """
    __quant_op_name__ = ["TensorAdd", "Sub", "Mul", "RealDiv"]

--- a/model_zoo/official/cv/mobilenetv2/scripts/run_train.sh
+++ b/model_zoo/official/cv/mobilenetv2/scripts/run_train.sh
@@ -91,6 +91,6 @@ if [ $1 = "Ascend" ] ; then
 elif [ $1 = "GPU" ] ; then
    run_gpu "$@"
 else
    echo "not support platform"
    echo "Unsupported platform."
 fi;

--- a/model_zoo/official/cv/mobilenetv2/src/mobilenetV2_fusion.py
+++ b/model_zoo/official/cv/mobilenetv2/src/mobilenetV2_fusion.py
@@ -0,0 +1,239 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 # """MobileNetV2 Quant model define"""

 import numpy as np
 import mindspore.nn as nn
 from mindspore.ops import operations as P
 from mindspore import Tensor

 __all__ = ['mobilenetV2']


 def _make_divisible(v, divisor, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10 %.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


 class GlobalAvgPooling(nn.Cell):
    """
    Global avg pooling definition.

    Args:

    Returns:
        Tensor, output tensor.

    Examples:
        >>> GlobalAvgPooling()
    """

    def __init__(self):
        super(GlobalAvgPooling, self).__init__()
        self.mean = P.ReduceMean(keep_dims=False)

    def construct(self, x):
        x = self.mean(x, (2, 3))
        return x


 class ConvBNReLU(nn.Cell):
    """
    Convolution/Depthwise fused with Batchnorm and ReLU block definition.

    Args:
        in_planes (int): Input channel.
        out_planes (int): Output channel.
        kernel_size (int): Input kernel size.
        stride (int): Stride size for the first convolutional layer. Default: 1.
        groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.

    Returns:
        Tensor, output tensor.

    Examples:
        >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
    """

    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
        super(ConvBNReLU, self).__init__()
        padding = (kernel_size - 1) // 2
        self.conv = nn.Conv2dBnAct(in_planes, out_planes, kernel_size,
                                   stride=stride,
                                   pad_mode='pad',
                                   padding=padding,
                                   group=groups,
                                   has_bn=True,
                                   activation='relu')

    def construct(self, x):
        x = self.conv(x)
        return x


 class InvertedResidual(nn.Cell):
    """
    Mobilenetv2 residual block definition.

    Args:
        inp (int): Input channel.
        oup (int): Output channel.
        stride (int): Stride size for the first convolutional layer. Default: 1.
        expand_ratio (int): expand ration of input channel

    Returns:
        Tensor, output tensor.

    Examples:
        >>> ResidualBlock(3, 256, 1, 1)
    """

    def __init__(self, inp, oup, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        assert stride in [1, 2]

        hidden_dim = int(round(inp * expand_ratio))
        self.use_res_connect = stride == 1 and inp == oup

        layers = []
        if expand_ratio != 1:
            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
        layers.extend([
            ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
            nn.Conv2dBnAct(hidden_dim, oup, kernel_size=1, stride=1, pad_mode='pad', padding=0, group=1, has_bn=True)
        ])
        self.conv = nn.SequentialCell(layers)
        self.add = P.TensorAdd()

    def construct(self, x):
        out = self.conv(x)
        if self.use_res_connect:
            out = self.add(out, x)
        return out


 class mobilenetV2(nn.Cell):
    """
    mobilenetV2 fusion architecture.

    Args:
        class_num (Cell): number of classes.
        width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1.
        has_dropout (bool): Is dropout used. Default is false
        inverted_residual_setting (list): Inverted residual settings. Default is None
        round_nearest (list): Channel round to . Default is 8
    Returns:
        Tensor, output tensor.

    Examples:
        >>> mobilenetV2(num_classes=1000)
    """

    def __init__(self, num_classes=1000, width_mult=1.,
                 has_dropout=False, inverted_residual_setting=None, round_nearest=8):
        super(mobilenetV2, self).__init__()
        block = InvertedResidual
        input_channel = 32
        last_channel = 1280
        # setting of inverted residual blocks
        self.cfgs = inverted_residual_setting
        if inverted_residual_setting is None:
            self.cfgs = [
                # t, c, n, s
                [1, 16, 1, 1],
                [6, 24, 2, 2],
                [6, 32, 3, 2],
                [6, 64, 4, 2],
                [6, 96, 3, 1],
                [6, 160, 3, 2],
                [6, 320, 1, 1],
            ]

        # building first layer
        input_channel = _make_divisible(input_channel * width_mult, round_nearest)
        self.out_channels = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)

        features = [ConvBNReLU(3, input_channel, stride=2)]
        # building inverted residual blocks
        for t, c, n, s in self.cfgs:
            output_channel = _make_divisible(c * width_mult, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(block(input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel
        # building last several layers
        features.append(ConvBNReLU(input_channel, self.out_channels, kernel_size=1))
        # make it nn.CellList
        self.features = nn.SequentialCell(features)
        # mobilenet head
        head = ([GlobalAvgPooling(),
                 nn.DenseBnAct(self.out_channels, num_classes, has_bias=True, has_bn=False)
                 ] if not has_dropout else
                [GlobalAvgPooling(),
                 nn.Dropout(0.2),
                 nn.DenseBnAct(self.out_channels, num_classes, has_bias=True, has_bn=False)
                 ])
        self.head = nn.SequentialCell(head)

        # init weights
        self._initialize_weights()

    def construct(self, x):
        x = self.features(x)
        x = self.head(x)
        return x

    def _initialize_weights(self):
        """
        Initialize weights.

        Args:

        Returns:
            None.

        Examples:
            >>> _initialize_weights()
        """
        for _, m in self.cells_and_names():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                w = Tensor(np.random.normal(0, np.sqrt(2. / n), m.weight.data.shape).astype("float32"))
                m.weight.set_parameter_data(w)
                if m.bias is not None:
                    m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
            elif isinstance(m, nn.Conv2dBnAct):
                n = m.conv.kernel_size[0] * m.conv.kernel_size[1] * m.conv.out_channels
                w = Tensor(np.random.normal(0, np.sqrt(2. / n), m.conv.weight.data.shape).astype("float32"))
                m.conv.weight.set_parameter_data(w)
                if m.conv.bias is not None:
                    m.conv.bias.set_parameter_data(Tensor(np.zeros(m.conv.bias.data.shape, dtype="float32")))
            elif isinstance(m, nn.BatchNorm2d):
                m.gamma.set_parameter_data(Tensor(np.ones(m.gamma.data.shape, dtype="float32")))
                m.beta.set_parameter_data(Tensor(np.zeros(m.beta.data.shape, dtype="float32")))
            elif isinstance(m, nn.Dense):
                m.weight.set_parameter_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape).astype("float32")))
                if m.bias is not None:
                    m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
            elif isinstance(m, nn.DenseBnAct):
                m.dense.weight.set_parameter_data(
                    Tensor(np.random.normal(0, 0.01, m.dense.weight.data.shape).astype("float32")))
                if m.dense.bias is not None:
                    m.dense.bias.set_parameter_data(Tensor(np.zeros(m.dense.bias.data.shape, dtype="float32")))
--- a/model_zoo/official/cv/mobilenetv2/train.py
+++ b/model_zoo/official/cv/mobilenetv2/train.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """train_imagenet."""
 """Train mobilenetV2 on ImageNet."""

 import os
 import time
 import argparse
@@ -165,15 +166,14 @@ if __name__ == '__main__':
        print("train args: ", args_opt)
        print("cfg: ", config_gpu)

        # define net
        # define network
        net = mobilenet_v2(num_classes=config_gpu.num_classes, platform="GPU")
        # define loss
        if config_gpu.label_smooth > 0:
            loss = CrossEntropyWithLabelSmooth(
                smooth_factor=config_gpu.label_smooth, num_classes=config_gpu.num_classes)
            loss = CrossEntropyWithLabelSmooth(smooth_factor=config_gpu.label_smooth,
                                               num_classes=config_gpu.num_classes)
        else:
            loss = SoftmaxCrossEntropyWithLogits(
                is_grad=False, sparse=True, reduction='mean')
            loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
        # define dataset
        epoch_size = config_gpu.epoch_size
        dataset = create_dataset(dataset_path=args_opt.dataset_path,
@@ -187,7 +187,8 @@ if __name__ == '__main__':
        if args_opt.pre_trained:
            param_dict = load_checkpoint(args_opt.pre_trained)
            load_param_into_net(net, param_dict)
        # define optimizer

        # get learning rate
        loss_scale = FixedLossScaleManager(
            config_gpu.loss_scale, drop_overflow_update=False)
        lr = Tensor(get_lr(global_step=0,
@@ -197,12 +198,14 @@ if __name__ == '__main__':
                           warmup_epochs=config_gpu.warmup_epochs,
                           total_epochs=epoch_size,
                           steps_per_epoch=step_size))

        # define optimization
        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config_gpu.momentum,
                       config_gpu.weight_decay, config_gpu.loss_scale)
        # define model
        model = Model(net, loss_fn=loss, optimizer=opt,
                      loss_scale_manager=loss_scale)
        model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale)

        print("============== Starting Training ==============")
        cb = [Monitor(lr_init=lr.asnumpy())]
        ckpt_save_dir = config_gpu.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
        if config_gpu.save_checkpoint:
@@ -212,6 +215,7 @@ if __name__ == '__main__':
            cb += [ckpt_cb]
        # begin train
        model.train(epoch_size, dataset, callbacks=cb)
        print("============== End Training ==============")
    elif args_opt.platform == "Ascend":
        # train on ascend
        print("train args: ", args_opt, "\ncfg: ", config_ascend,
--- a/model_zoo/official/cv/mobilenetv2_quant/Readme.md
+++ b/model_zoo/official/cv/mobilenetv2_quant/Readme.md
@@ -64,12 +64,14 @@ Dataset use: ImageNet

 Train a MindSpore fusion MobileNetV2 model for ImageNet, like:

 - sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]
 - Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]
 - GPU: sh run_trian.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]

 You can just run this command instead.

 ``` bash
 >>> sh run_train.sh Ascend 4 192.168.0.1 0,1,2,3 ~/imagenet/train/ ~/mobilenet.ckpt
 >>> Ascend: sh run_train.sh Ascend 4 192.168.0.1 0,1,2,3 ~/imagenet/train/ ~/mobilenet.ckpt
 >>> GPU: sh run_train.sh GPU 8 0,1,2,3,4,5,6,7 ~/imagenet/train/
 ```

 Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log  will be redirected to `./train/train.log` like followings. 
--- a/model_zoo/official/cv/mobilenetv2_quant/scripts/run_train.sh
+++ b/model_zoo/official/cv/mobilenetv2_quant/scripts/run_train.sh
@@ -46,16 +46,50 @@ run_ascend()
            --device_target=$1 &> train.log &  # dataset train folder
 }

 run_gpu()
 {
    if [ $2 -lt 1 ] && [ $2 -gt 8 ]
    then
        echo "error: DEVICE_NUM=$2 is not in (1-8)"
    exit 1
    fi

    if [ ! -d $4 ]
    then
        echo "error: DATASET_PATH=$4 is not a directory"
    exit 1
    fi

    BASEPATH=$(cd "`dirname $0`" || exit; pwd)
    export PYTHONPATH=${BASEPATH}:$PYTHONPATH
    if [ -d "../train" ];
    then
        rm -rf ../train
    fi
    mkdir ../train
    cd ../train || exit

    export CUDA_VISIBLE_DEVICES="$3"
    mpirun -n $2 --allow-run-as-root \
    python ${BASEPATH}/../train.py \
        --dataset_path=$4 \
        --device_target=$1 \
        &> ../train.log &  # dataset train folder
 }

 if [ $# -gt 6 ] || [ $# -lt 4 ]
 then
    echo "Usage:\n \
          Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
          GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \
          "
 exit 1
 fi

 if [ $1 = "Ascend" ] ; then
    run_ascend "$@"
 elif [ $1 = "GPU" ] ; then
    run_gpu "$@"
 else
    echo "Unsupported device target."
 fi;
--- a/model_zoo/official/cv/mobilenetv2_quant/scripts/run_train_quant.sh
+++ b/model_zoo/official/cv/mobilenetv2_quant/scripts/run_train_quant.sh
@@ -47,16 +47,51 @@ run_ascend()
            --device_target=$1 &> train.log &  # dataset train folder
 }

 run_gpu()
 {
    if [ $2 -lt 1 ] && [ $2 -gt 8 ]
    then
        echo "error: DEVICE_NUM=$2 is not in (1-8)"
    exit 1
    fi

    if [ ! -d $4 ]
    then
        echo "error: DATASET_PATH=$4 is not a directory"
    exit 1
    fi

    BASEPATH=$(cd "`dirname $0`" || exit; pwd)
    export PYTHONPATH=${BASEPATH}:$PYTHONPATH
    if [ -d "../train" ];
    then
        rm -rf ../train
    fi
    mkdir ../train
    cd ../train || exit

    export CUDA_VISIBLE_DEVICES="$3"
    mpirun -n $2 --allow-run-as-root \
    python ${BASEPATH}/../train.py \
        --dataset_path=$4 \
        --device_target=$1 \
        --quantization_aware=True \
        &> ../train.log &  # dataset train folder
 }

 if [ $# -gt 6 ] || [ $# -lt 4 ]
 then
    echo "Usage:\n \
          Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
          GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \
          "
 exit 1
 fi

 if [ $1 = "Ascend" ] ; then
    run_ascend "$@"
 elif [ $1 = "GPU" ] ; then
    run_gpu "$@"
 else
    echo "Unsupported device target."
 fi;
--- a/model_zoo/official/cv/mobilenetv2_quant/src/config.py
+++ b/model_zoo/official/cv/mobilenetv2_quant/src/config.py
@@ -33,7 +33,7 @@ config_ascend = ed({
    "loss_scale": 1024,
    "save_checkpoint": True,
    "save_checkpoint_epochs": 1,
    "keep_checkpoint_max": 200,
    "keep_checkpoint_max": 300,
    "save_checkpoint_path": "./checkpoint",
    "quantization_aware": False,
 })
@@ -54,7 +54,45 @@ config_ascend_quant = ed({
    "loss_scale": 1024,
    "save_checkpoint": True,
    "save_checkpoint_epochs": 1,
    "keep_checkpoint_max": 200,
    "keep_checkpoint_max": 300,
    "save_checkpoint_path": "./checkpoint",
    "quantization_aware": True,
 })

 config_gpu = ed({
    "num_classes": 1000,
    "image_height": 224,
    "image_width": 224,
    "batch_size": 150,
    "epoch_size": 200,
    "warmup_epochs": 4,
    "lr": 0.8,
    "momentum": 0.9,
    "weight_decay": 4e-5,
    "label_smooth": 0.1,
    "loss_scale": 1024,
    "save_checkpoint": True,
    "save_checkpoint_epochs": 1,
    "keep_checkpoint_max": 300,
    "save_checkpoint_path": "./checkpoint",
 })

 config_gpu_quant = ed({
    "num_classes": 1000,
    "image_height": 224,
    "image_width": 224,
    "batch_size": 134,
    "epoch_size": 60,
    "start_epoch": 200,
    "warmup_epochs": 1,
    "lr": 0.3,
    "momentum": 0.9,
    "weight_decay": 4e-5,
    "label_smooth": 0.1,
    "loss_scale": 1024,
    "save_checkpoint": True,
    "save_checkpoint_epochs": 1,
    "keep_checkpoint_max": 300,
    "save_checkpoint_path": "./checkpoint",
    "quantization_aware": True,
 })
--- a/model_zoo/official/cv/mobilenetv2_quant/src/mobilenetV2.py
+++ b/model_zoo/official/cv/mobilenetv2_quant/src/mobilenetV2.py
@@ -222,6 +222,12 @@ class mobilenetV2(nn.Cell):
                m.weight.set_parameter_data(w)
                if m.bias is not None:
                    m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
            elif isinstance(m, nn.Conv2dBnAct):
                n = m.conv.kernel_size[0] * m.conv.kernel_size[1] * m.conv.out_channels
                w = Tensor(np.random.normal(0, np.sqrt(2. / n), m.conv.weight.data.shape).astype("float32"))
                m.conv.weight.set_parameter_data(w)
                if m.conv.bias is not None:
                    m.conv.bias.set_parameter_data(Tensor(np.zeros(m.conv.bias.data.shape, dtype="float32")))
            elif isinstance(m, nn.BatchNorm2d):
                m.gamma.set_parameter_data(Tensor(np.ones(m.gamma.data.shape, dtype="float32")))
                m.beta.set_parameter_data(Tensor(np.zeros(m.beta.data.shape, dtype="float32")))
@@ -229,3 +235,8 @@ class mobilenetV2(nn.Cell):
                m.weight.set_parameter_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape).astype("float32")))
                if m.bias is not None:
                    m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
            elif isinstance(m, nn.DenseBnAct):
                m.dense.weight.set_parameter_data(
                    Tensor(np.random.normal(0, 0.01, m.dense.weight.data.shape).astype("float32")))
                if m.dense.bias is not None:
                    m.dense.bias.set_parameter_data(Tensor(np.zeros(m.dense.bias.data.shape, dtype="float32")))
--- a/model_zoo/official/cv/mobilenetv2_quant/train.py
+++ b/model_zoo/official/cv/mobilenetv2_quant/train.py
@@ -23,16 +23,17 @@ from mindspore import context
 from mindspore import Tensor
 from mindspore import nn
 from mindspore.train.model import Model, ParallelMode
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 from mindspore.communication.management import init
 from mindspore.communication.management import init, get_group_size, get_rank
 from mindspore.train.quant import quant
 import mindspore.dataset.engine as de

 from src.dataset import create_dataset
 from src.lr_generator import get_lr
 from src.utils import Monitor, CrossEntropyWithLabelSmooth
 from src.config import config_ascend, config_ascend_quant
 from src.config import config_ascend_quant, config_ascend, config_gpu_quant, config_gpu
 from src.mobilenetV2 import mobilenetV2

 random.seed(1)
@@ -55,11 +56,19 @@ if args_opt.device_target == "Ascend":
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        device_id=device_id, save_graphs=False)
 elif args_opt.platform == "GPU":
    init("nccl")
    context.set_auto_parallel_context(device_num=get_group_size(),
                                      parallel_mode=ParallelMode.DATA_PARALLEL,
                                      mirror_mean=True)
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="GPU",
                        save_graphs=False)
 else:
    raise ValueError("Unsupported device target.")

 if __name__ == '__main__':
    # train on ascend

 def train_on_ascend():
    config = config_ascend_quant if args_opt.quantization_aware else config_ascend
    print("training args: {}".format(args_opt))
    print("training configure: {}".format(config))
@@ -129,3 +138,72 @@ if __name__ == '__main__':
            callback += [ckpt_cb]
    model.train(epoch_size, dataset, callbacks=callback)
    print("============== End Training ==============")


 def train_on_gpu():
    config = config_gpu_quant if args_opt.quantization_aware else config_gpu
    print("training args: {}".format(args_opt))
    print("training configure: {}".format(config))

    # define network
    network = mobilenetV2(num_classes=config.num_classes)
    # define loss
    if config.label_smooth > 0:
        loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth,
                                           num_classes=config.num_classes)
    else:
        loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
    # define dataset
    epoch_size = config.epoch_size
    dataset = create_dataset(dataset_path=args_opt.dataset_path,
                             do_train=True,
                             config=config,
                             device_target=args_opt.device_target,
                             repeat_num=1,
                             batch_size=config.batch_size)
    step_size = dataset.get_dataset_size()
    # resume
    if args_opt.pre_trained:
        param_dict = load_checkpoint(args_opt.pre_trained)
        load_param_into_net(network, param_dict)

    # convert fusion network to quantization aware network
    if config.quantization_aware:
        network = quant.convert_quant_network(network,
                                              bn_fold=True,
                                              per_channel=[True, False],
                                              symmetric=[True, True])

    # get learning rate
    loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
    lr = Tensor(get_lr(global_step=config.start_epoch * step_size,
                       lr_init=0,
                       lr_end=0,
                       lr_max=config.lr,
                       warmup_epochs=config.warmup_epochs,
                       total_epochs=epoch_size + config.start_epoch,
                       steps_per_epoch=step_size))

    # define optimization
    opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum,
                      config.weight_decay, config.loss_scale)
    # define model
    model = Model(network, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale)

    print("============== Starting Training ==============")
    callback = [Monitor(lr_init=lr.asnumpy())]
    ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
    if config.save_checkpoint:
        config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * step_size,
                                     keep_checkpoint_max=config.keep_checkpoint_max)
        ckpt_cb = ModelCheckpoint(prefix="mobilenetV2", directory=ckpt_save_dir, config=config_ck)
        callback += [ckpt_cb]
    model.train(epoch_size, dataset, callbacks=callback)
    print("============== End Training ==============")


 if __name__ == '__main__':
    if args_opt.device_target == "Ascend":
        train_on_ascend()
    elif args_opt.platform == "GPU":
        train_on_gpu()