modify centernet scripts for cpu adaption and usr friendliness

5 years ago · 6ada7318be
--- a/model_zoo/research/cv/centernet/README.md
+++ b/model_zoo/research/cv/centernet/README.md
@@ -119,19 +119,27 @@ After installing MindSpore via the official website, you can start training and

 Note: 1.the first run of training will generate the mindrecord file, which will take a long time.
      2.MINDRECORD_DATASET_PATH is the mindrecord dataset directory.
      3.LOAD_CHECKPOINT_PATH is the pretrained checkpoint file directory, if no just set ""
      4.RUN_MODE support validation and testing, set to be "val"/"test"

 ```shell
 # create dataset in mindrecord format
 bash scripts/convert_dataset_to_mindrecord.sh

 # standalone training
 bash scripts/run_standalone_train_ascend.sh [DEVICE_ID] [EPOCH_SIZE]
 # standalone training on Ascend
 bash scripts/run_standalone_train_ascend.sh [DEVICE_ID] [MINDRECORD_DATASET_PATH] [LOAD_CHECKPOINT_PATH]

 # distributed training
 bash scripts/run_distributed_train_ascend.sh [MINDRECORD_DATASET_PATH] [RANK_TABLE_FILE]
 # standalone training on CPU
 bash scripts/run_standalone_train_cpu.sh [MINDRECORD_DATASET_PATH] [LOAD_CHECKPOINT_PATH]

 # eval
 bash scripts/run_standalone_eval_ascend.sh [DEVICE_ID]
 # distributed training on Ascend
 bash scripts/run_distributed_train_ascend.sh [MINDRECORD_DATASET_PATH] [LOAD_CHECKPOINT_PATH] [RANK_TABLE_FILE]

 # eval on Ascend
 bash scripts/run_standalone_eval_ascend.sh [DEVICE_ID] [RUN_MODE] [DATA_DIR] [LOAD_CHECKPOINT_PATH]

 # eval on CPU
 bash scripts/run_standalone_eval_cpu.sh [RUN_MODE] [DATA_DIR] [LOAD_CHECKPOINT_PATH]
 ```

 # [Script Description](#contents)
@@ -153,9 +161,11 @@ bash scripts/run_standalone_eval_ascend.sh [DEVICE_ID]
        │   │    ├──get_distribute_pretrain_cmd.py     // script for distributed pretraining
        │   │    ├──README.md
        │   ├──convert_dataset_to_mindrecord.sh        // shell script for converting coco type dataset to mindrecord
        │   ├──run_standalone_train_ascend.sh          // shell script for standalone pretrain on ascend
        │   ├──run_distributed_train_ascend.sh         // shell script for distributed pretrain on ascend
        │   ├──run_standalone_train_ascend.sh          // shell script for standalone training on ascend
        │   ├──run_distributed_train_ascend.sh         // shell script for distributed training on ascend
        │   ├──run_standalone_eval_ascend.sh           // shell script for standalone evaluation on ascend
        │   ├──run_standalone_train_cpu.sh             // shell script for standalone training on cpu
        │   ├──run_standalone_eval_cpu.sh              // shell script for standalone evaluation on cpu
        └── src
            ├──__init__.py
            ├──centernet_pose.py         // centernet networks, training entry
@@ -259,7 +269,6 @@ config for training.

 ```text
 config for evaluation.
    flip_test                       whether to use flip test: True | False, default is False
    soft_nms                        nms after decode: True | False, default is True
    keep_res                        keep original or fix resolution: True | False, default is False
    multi_scales                    use multi-scales of image: List, default is [1.0]
@@ -350,12 +359,12 @@ bash scripts/convert_dataset_to_mindrecord.sh

 The command above will run in the background, after converting mindrecord files will be located in path specified by yourself.

 ### Training
 ### Standalone Training

 #### Running on Ascend

 ```bash
 bash scripts/run_standalone_pretrain_ascend.sh 0 1
 bash scripts/run_standalone_train_ascend.sh device_id /path/mindrecord_dataset /path/load_ckpt
 ```

 The command above will run in the background, you can view training logs in training_log.txt. After training finished, you will get some checkpoint files under the script folder by default. The loss values will be displayed as follows:
@@ -368,12 +377,31 @@ epoch: 349.0, current epoch percent: 1.00, step: 87500, outputs are (Tensor(shap
 ...
 ```

 #### Running on CPU

 ```bash
 bash scripts/run_standalone_train_cpu.sh /path/mindrecord_dataset /path/load_ckpt
 ```

 The command above will run in the background, you can view training logs in training_log.txt. After training finished, you will get some checkpoint files under the script folder by default. The loss values will be displayed as follows (rusume from pretrained checkpoint and batch_size was set to be 8):

 ```text
 # grep "epoch" training_log.txt
 ...
 epoch: 0.0, current epoch percent: 0.00, step: 1, time of per steps: 66.693 s, outputs are 3.645
 epoch: 0.0, current epoch percent: 0.00, step: 2, time of per steps: 46.594 s, outputs are 4.862
 epoch: 0.0, current epoch percent: 0.00, step: 3, time of per steps: 44.718 s, outputs are 3.927
 epoch: 0.0, current epoch percent: 0.00, step: 4, time of per steps: 45.113 s, outputs are 3.910
 epoch: 0.0, current epoch percent: 0.00, step: 5, time of per steps: 45.213 s, outputs are 3.749
 ...
 ```

 ### Distributed Training

 #### Running on Ascend

 ```bash
 bash scripts/run_distributed_pretrain_ascend.sh /path/coco2017 /path/mindrecord /path/hccl.json
 bash scripts/run_distributed_pretrain_ascend.sh /path/mindrecord_dataset /path/load_ckpt /path/hccl.json
 ```

 The command above will run in the background, you can view training logs in LOG*/training_log.txt and LOG*/ms_log/. After training finished, you will get some checkpoint files under the LOG*/ckpt_0 folder by default. The loss value will be displayed as follows:
@@ -394,7 +422,11 @@ epoch: 0.0, current epoch percent: 0.002, step: 200, outputs are (Tensor(shape=[

 ```bash
 # Evaluation base on validation dataset will be done automatically, while for test or test-dev dataset, the accuracy should be upload to the CodaLab official website(https://competitions.codalab.org).
 bash scripts/run_standalone_eval_ascend.sh [DEVICE_ID]
 # On Ascend
 bash scripts/run_standalone_eval_ascend.sh device_id val(or test) /path/coco_dataset /path/load_ckpt

 # On CPU
 bash scripts/run_standalone_eval_cpu.sh val(or test) /path/coco_dataset /path/load_ckpt
 ```

 you can see the MAP result below as below:
@@ -439,7 +471,7 @@ python export.py [DEVICE_ID]

 ## [Performance](#contents)

 ### Training Performance
 ### Training Performance On Ascend

 CenterNet on 11.8K images(The annotation and data format must be the same as coco)

@@ -460,7 +492,7 @@ CenterNet on 11.8K images(The annotation and data format must be the same as coc
 | Checkpoint                 | 242M (.ckpt file)                                              |
 | Scripts                    | <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/centernet> |

 ### Inference Performance
 ### Inference Performance On Ascend

 CenterNet on validation(5K images) and test-dev(40K images)

--- a/model_zoo/research/cv/centernet/eval.py
+++ b/model_zoo/research/cv/centernet/eval.py
@@ -36,6 +36,8 @@ from src.config import dataset_config, net_config, eval_config
 _current_dir = os.path.dirname(os.path.realpath(__file__))

 parser = argparse.ArgumentParser(description='CenterNet evaluation')
 parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'CPU'],
                    help='device where the code will be implemented. (Default: Ascend)')
 parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
 parser.add_argument("--load_checkpoint_path", type=str, default="", help="Load checkpoint file path")
 parser.add_argument("--data_dir", type=str, default="", help="Dataset directory, "
@@ -52,15 +54,20 @@ def predict():
    '''
    Predict function
    '''
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
    context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target)
    if args_opt.device_target == "Ascend":
        context.set_context(device_id=args_opt.device_id)
        enable_nms_fp16 = True
    else:
        enable_nms_fp16 = False

    logger.info("Begin creating {} dataset".format(args_opt.run_mode))
    coco = COCOHP(dataset_config, run_mode=args_opt.run_mode, net_opt=net_config,
                  enable_visual_image=(args_opt.visual_image == "true"), save_path=args_opt.save_result_dir,)
    coco.init(args_opt.data_dir, keep_res=eval_config.keep_res, flip_test=eval_config.flip_test)
    coco.init(args_opt.data_dir, keep_res=eval_config.keep_res)
    dataset = coco.create_eval_dataset()

    net_for_eval = CenterNetMultiPoseEval(net_config, eval_config.flip_test, eval_config.K)
    net_for_eval = CenterNetMultiPoseEval(net_config, eval_config.K, enable_nms_fp16)
    net_for_eval.set_train(False)

    param_dict = load_checkpoint(args_opt.load_checkpoint_path)
@@ -103,9 +110,7 @@ def predict():
        print("Image {}/{} id: {} cost time {} ms".format(index, total_nums, image_id, (end - start) * 1000.))

        # post-process
        soft_nms = eval_config.soft_nms or len(eval_config.multi_scales) > 0
        detections = merge_outputs(detections, soft_nms)

        detections = merge_outputs(detections, eval_config.soft_nms)
        # get prediction result
        pred_json = convert_eval_format(detections, image_id)
        gt_image_info = coco.coco.loadImgs([image_id])
--- a/model_zoo/research/cv/centernet/export.py
+++ b/model_zoo/research/cv/centernet/export.py
@@ -31,7 +31,7 @@ args = parser.parse_args()
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args.device_id)

 if __name__ == '__main__':
    net = CenterNetMultiPoseEval(net_config, eval_config.flip_test, eval_config.K)
    net = CenterNetMultiPoseEval(net_config, eval_config.K)
    net.set_train(False)

    param_dict = load_checkpoint(export_config.ckpt_file)
--- a/model_zoo/research/cv/centernet/scripts/ascend_distributed_launcher/get_distribute_train_cmd.py
+++ b/model_zoo/research/cv/centernet/scripts/ascend_distributed_launcher/get_distribute_train_cmd.py
@@ -39,8 +39,7 @@ def parse_args():
    parser.add_argument("--hyper_parameter_config_dir", type=str, default="",
                        help="Hyper Parameter config path, it is better to use absolute path")
    parser.add_argument("--mindrecord_dir", type=str, default="", help="Mindrecord dataset directory")
    parser.add_argument("--mindrecord_prefix", type=str, default="coco_hp.train.mind",
                        help="Prefix of MindRecord dataset filename.")
    parser.add_argument("--load_checkpoint_path", type=str, default="", help="Load checkpoint file path")
    parser.add_argument("--hccl_config_dir", type=str, default="",
                        help="Hccl config path, it is better to use absolute path")
    parser.add_argument("--cmd_file", type=str, default="distributed_cmd.sh",
@@ -72,7 +71,7 @@ def distribute_train():

    run_script = args.run_script_dir
    mindrecord_dir = args.mindrecord_dir
    mindrecord_prefix = args.mindrecord_prefix
    load_checkpoint_path = args.load_checkpoint_path
    cf = configparser.ConfigParser()
    cf.read(args.hyper_parameter_config_dir)
    cfg = dict(cf.items("config"))
@@ -151,7 +150,7 @@ def distribute_train():
                             " 'device_num' or 'mindrecord_dir'! ")
        run_cmd += opt
        run_cmd += " --mindrecord_dir=" + mindrecord_dir
        run_cmd += " --mindrecord_prefix=" + mindrecord_prefix
        run_cmd += " --load_checkpoint_path=" + load_checkpoint_path
        run_cmd += ' --device_id=' + str(device_id) + ' --device_num=' \
               + str(rank_size) + ' >./training_log.txt 2>&1 &'

--- a/model_zoo/research/cv/centernet/scripts/ascend_distributed_launcher/hyper_parameter_config.ini
+++ b/model_zoo/research/cv/centernet/scripts/ascend_distributed_launcher/hyper_parameter_config.ini
@@ -5,7 +5,6 @@ enable_save_ckpt=true
 do_shuffle=true
 enable_data_sink=true
 data_sink_steps=50
 load_checkpoint_path=""
 save_checkpoint_path=./
 save_checkpoint_steps=3000
 save_checkpoint_num=1
--- a/model_zoo/research/cv/centernet/scripts/run_distributed_train_ascend.sh
+++ b/model_zoo/research/cv/centernet/scripts/run_distributed_train_ascend.sh
@@ -14,21 +14,26 @@
 # limitations under the License.
 # ============================================================================

 echo "=============================================================================================================="
 echo "Please run the scipt as: "
 echo "bash run_distributed_train_ascend.sh DATA_DIR MINDRECORD_DIR RANK_TABLE_FILE"
 echo "for example: bash run_distributed_train_ascend.sh /path/dataset /path/mindrecord /path/hccl.json"
 echo "It is better to use absolute path."
 echo "================================================================================================================"
 echo "Please run the script as: "
 echo "bash run_distributed_train_ascend.sh MINDRECORD_DIR LOAD_CHECKPOINT_PATH RANK_TABLE_FILE"
 echo "for example: bash run_distributed_train_ascend.sh /path/mindrecord_dataset /path/load_ckpt /path/hccl.json"
 echo "if no ckpt, just run: bash run_distributed_train_ascend.sh /path/mindrecord_dataset \"\" /path/hccl.json"
 echo "It is better to use the absolute path."
 echo "For hyper parameter, please note that you should customize the scripts:
          '{CUR_DIR}/scripts/ascend_distributed_launcher/hyper_parameter_config.ini' "
 echo "=============================================================================================================="
 echo "================================================================================================================"
 CUR_DIR=`pwd`
 MINDRECORD_DIR=$1
 LOAD_CHECKPOINT_PATH=$2
 HCCL_RANK_FILE=$3

 python ${CUR_DIR}/scripts/ascend_distributed_launcher/get_distribute_train_cmd.py \
    --run_script_dir=${CUR_DIR}/train.py \
    --hyper_parameter_config_dir=${CUR_DIR}/scripts/ascend_distributed_launcher/hyper_parameter_config.ini \
    --mindrecord_dir=$1 \
    --hccl_config_dir=$2 \
    --mindrecord_dir=$MINDRECORD_DIR \
    --load_checkpoint_path=$LOAD_CHECKPOINT_PATH \
    --hccl_config_dir=$HCCL_RANK_FILE \
    --hccl_time_out=1200 \
    --cmd_file=distributed_cmd.sh

--- a/model_zoo/research/cv/centernet/scripts/run_standalone_eval_ascend.sh
+++ b/model_zoo/research/cv/centernet/scripts/run_standalone_eval_ascend.sh
@@ -16,11 +16,14 @@

 echo "=============================================================================================================="
 echo "Please run the scipt as: "
 echo "bash run_standalone_eval_ascend.sh DEVICE_ID"
 echo "for example: bash run_standalone_eval_ascend.sh 0"
 echo "bash run_standalone_eval_ascend.sh DEVICE_ID RUN_MODE DATA_DIR LOAD_CHECKPOINT_PATH"
 echo "for example of validation: bash run_standalone_eval_ascend.sh 0 val /path/coco_dataset /path/load_ckpt"
 echo "for example of test: bash run_standalone_eval_ascend.sh 0 test /path/coco_dataset /path/load_ckpt"
 echo "=============================================================================================================="

 DEVICE_ID=$1
 RUN_MODE=$2
 DATA_DIR=$3
 LOAD_CHECKPOINT_PATH=$4
 mkdir -p ms_log 
 PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
 CUR_DIR=`pwd`
@@ -42,10 +45,11 @@ else
 fi

 python ${PROJECT_DIR}/../eval.py  \
    --device_target=Ascend \
    --device_id=$DEVICE_ID \
    --load_checkpoint_path="" \
    --data_dir="" \
    --load_checkpoint_path=$LOAD_CHECKPOINT_PATH \
    --data_dir=$DATA_DIR \
    --run_mode=$RUN_MODE \
    --visual_image=true \
    --enable_eval=true \
    --save_result_dir="" \
    --run_mode=val > eval_log.txt 2>&1 &
    --save_result_dir=./ > eval_log.txt 2>&1 &
--- a/model_zoo/research/cv/centernet/scripts/run_standalone_eval_cpu.sh
+++ b/model_zoo/research/cv/centernet/scripts/run_standalone_eval_cpu.sh
@@ -0,0 +1,53 @@
 #!/bin/bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 echo "=============================================================================================================="
 echo "Please run the scipt as: "
 echo "bash run_standalone_eval_cpu.sh RUN_MODE DATA_DIR LOAD_CHECKPOINT_PATH"
 echo "for example of validation: bash run_standalone_eval_cpu.sh val /path/coco_dataset /path/load_ckpt"
 echo "for example of test: bash run_standalone_eval_cpu.sh test /path/coco_dataset /path/load_ckpt"
 echo "=============================================================================================================="
 RUN_MODE=$1
 DATA_DIR=$2
 LOAD_CHECKPOINT_PATH=$3
 mkdir -p ms_log 
 PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
 CUR_DIR=`pwd`
 export GLOG_log_dir=${CUR_DIR}/ms_log
 export GLOG_logtostderr=0

 # install nms module from third party
 if python -c "import nms" > /dev/null 2>&1
 then
    echo "NMS module already exits, no need reinstall."
 else
    echo "NMS module was not found, install it now..."
    git clone https://github.com/xingyizhou/CenterNet.git
    cd CenterNet/src/lib/external/
    make
    python setup.py install
    cd -
    rm -rf CenterNet
 fi

 python ${PROJECT_DIR}/../eval.py  \
    --device_target=CPU \
    --load_checkpoint_path=$LOAD_CHECKPOINT_PATH \
    --data_dir=$DATA_DIR \
    --run_mode=$RUN_MODE \
    --visual_image=true \
    --enable_eval=true \
    --save_result_dir=./ > eval_log.txt 2>&1 &
--- a/model_zoo/research/cv/centernet/scripts/run_standalone_train_ascend.sh
+++ b/model_zoo/research/cv/centernet/scripts/run_standalone_train_ascend.sh
@@ -16,12 +16,14 @@

 echo "=============================================================================================================="
 echo "Please run the scipt as: "
 echo "bash run_standalone_pretrain_ascend.sh DEVICE_ID EPOCH_SIZE"
 echo "for example: bash run_standalone_pretrain_ascend.sh 0 350"
 echo "bash run_standalone_train_ascend.sh DEVICE_ID MINDRECORD_DIR LOAD_CHECKPOINT_PATH"
 echo "for example: bash run_standalone_train_ascend.sh 0 /path/mindrecord_dataset /path/load_ckpt"
 echo "if no ckpt, just run: bash run_standalone_train_ascend.sh 0 /path/mindrecord_dataset \"\" "
 echo "=============================================================================================================="

 DEVICE_ID=$1
 EPOCH_SIZE=$2
 MINDRECORD_DIR=$2
 LOAD_CHECKPOINT_PATH=$3

 mkdir -p ms_log 
 PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
@@ -33,16 +35,16 @@ python ${PROJECT_DIR}/../train.py  \
    --distribute=false \
    --need_profiler=false \
    --profiler_path=./profiler \
    --epoch_size=$EPOCH_SIZE \
    --device_id=$DEVICE_ID \
    --enable_save_ckpt=true \
    --do_shuffle=true \
    --enable_data_sink=true \
    --data_sink_steps=50 \
    --load_checkpoint_path="" \
    --epoch_size=350 \
    --load_checkpoint_path=$LOAD_CHECKPOINT_PATH \
    --save_checkpoint_steps=10000 \
    --save_checkpoint_num=1 \
    --mindrecord_dir="" \
    --mindrecord_dir=$MINDRECORD_DIR \
    --mindrecord_prefix="coco_hp.train.mind" \
    --visual_image=false \
    --save_result_dir="" > training_log.txt 2>&1 &
--- a/model_zoo/research/cv/centernet/scripts/run_standalone_train_cpu.sh
+++ b/model_zoo/research/cv/centernet/scripts/run_standalone_train_cpu.sh
@@ -0,0 +1,44 @@
 #!/bin/bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 echo "=============================================================================================================="
 echo "Please run the scipt as: "
 echo "bash run_standalone_train_cpu.sh MINDRECORD_DIR LOAD_CHECKPOINT_PATH"
 echo "for example: bash run_standalone_train_cpu.sh /path/mindrecord_dataset /path/load_ckpt"
 echo "if no ckpt, just run: bash run_standalone_train_cpu.sh /path/mindrecord_dataset \"\" "
 echo "=============================================================================================================="

 MINDRECORD_DIR=$1
 LOAD_CHECKPOINT_PATH=$2

 mkdir -p ms_log
 PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
 CUR_DIR=`pwd`
 export GLOG_log_dir=${CUR_DIR}/ms_log
 export GLOG_logtostderr=0

 python ${PROJECT_DIR}/../train.py  \
    --device_target=CPU \
    --enable_save_ckpt=true \
    --do_shuffle=true \
    --epoch_size=1 \
    --load_checkpoint_path=$LOAD_CHECKPOINT_PATH \
    --save_checkpoint_steps=1000 \
    --save_checkpoint_num=1 \
    --mindrecord_dir=$MINDRECORD_DIR \
    --mindrecord_prefix="coco_hp.train.mind" \
    --visual_image=false \
    --save_result_dir="" > training_log.txt 2>&1 &
--- a/model_zoo/research/cv/centernet/src/init.py
+++ b/model_zoo/research/cv/centernet/src/init.py
@@ -15,7 +15,7 @@
 """CenterNet Init."""

 from .centernet_pose import GatherMultiPoseFeatureCell, CenterNetMultiPoseLossCell, \
    CenterNetWithLossScaleCell, CenterNetMultiPoseEval
    CenterNetWithLossScaleCell, CenterNetMultiPoseEval, CenterNetWithoutLossScaleCell
 from .dataset import COCOHP
 from .visual import visual_allimages, visual_image
 from .decode import MultiPoseDecode
@@ -23,6 +23,7 @@ from .post_process import convert_eval_format, to_float, resize_detection, post_

 __all__ = [
    "GatherMultiPoseFeatureCell", "CenterNetMultiPoseLossCell", "CenterNetWithLossScaleCell", \
    "CenterNetMultiPoseEval", "COCOHP", "visual_allimages", "visual_image", "MultiPoseDecode", \
    "convert_eval_format", "to_float", "resize_detection", "post_process", "merge_outputs"
    "CenterNetMultiPoseEval", "CenterNetWithoutLossScaleCell", "COCOHP", "visual_allimages", \
    "visual_image", "MultiPoseDecode", "convert_eval_format", "to_float", "resize_detection", \
    "post_process", "merge_outputs"
 ]
--- a/model_zoo/research/cv/centernet/src/centernet_pose.py
+++ b/model_zoo/research/cv/centernet/src/centernet_pose.py
@@ -197,6 +197,46 @@ class CenterNetMultiPoseLossCell(nn.Cell):
        return total_loss


 class CenterNetWithoutLossScaleCell(nn.Cell):
    """
    Encapsulation class of centernet training.

    Append an optimizer to the training network after that the construct
    function can be called to create the backward graph.

    Args:
        network (Cell): The training network. Note that loss function should have been added.
        optimizer (Optimizer): Optimizer for updating the weights.

    Returns:
        Tuple of Tensors, the loss, overflow flag and scaling sens of the network.
    """
    def __init__(self, network, optimizer):
        super(CenterNetWithoutLossScaleCell, self).__init__(auto_prefix=False)
        self.image = ImagePreProcess()
        self.network = network
        self.network.set_grad()
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = ops.GradOperation(get_by_list=True, sens_param=False)

    @ops.add_flags(has_effect=True)
    def construct(self, image, hm, reg_mask, ind, wh, kps, kps_mask, reg,
                  hm_hp, hp_offset, hp_ind, hp_mask):
        """Defines the computation performed."""
        image = self.image(image)
        weights = self.weights
        loss = self.network(image, hm, reg_mask, ind, wh, kps, kps_mask, reg,
                            hm_hp, hp_offset, hp_ind, hp_mask)

        grads = self.grad(self.network, weights)(image, hm, reg_mask, ind, wh, kps,
                                                 kps_mask, reg, hm_hp, hp_offset,
                                                 hp_ind, hp_mask)
        succ = self.optimizer(grads)
        ret = loss
        return ops.depend(ret, succ)


 class CenterNetWithLossScaleCell(nn.Cell):
    """
    Encapsulation class of centernet training.
@@ -279,17 +319,16 @@ class CenterNetMultiPoseEval(nn.Cell):

    Args:
        net_config: The config info of CenterNet network.
        flip_test(bool): Flip data augmentation or not. Default: False.
        K(number): Max number of output objects. Default: 100.
        enable_nms_fp16(bool): Use float16 data for max_pool, adaption for CPU. Default: True.

    Returns:
        Tensor, detection of images(bboxes, score, keypoints and category id of each objects)
    """
    def __init__(self, net_config, flip_test=False, K=100):
    def __init__(self, net_config, K=100, enable_nms_fp16=True):
        super(CenterNetMultiPoseEval, self).__init__()
        self.network = GatherMultiPoseFeatureCell(net_config)
        self.decode = MultiPoseDecode(net_config, flip_test, K)
        self.flip_test = flip_test
        self.decode = MultiPoseDecode(net_config, K, enable_nms_fp16)
        self.shape = ops.Shape()
        self.reshape = ops.Reshape()

--- a/model_zoo/research/cv/centernet/src/config.py
+++ b/model_zoo/research/cv/centernet/src/config.py
@@ -104,8 +104,7 @@ train_config = edict({


 eval_config = edict({
    'flip_test': False,
    'soft_nms': False,
    'soft_nms': True,
    'keep_res': True,
    'multi_scales': [1.0],
    'pad': 31,
--- a/model_zoo/research/cv/centernet/src/dataset.py
+++ b/model_zoo/research/cv/centernet/src/dataset.py
@@ -17,7 +17,6 @@ Data operations, will be used in train.py
 """

 import os
 import copy
 import math
 import argparse
 import cv2
@@ -66,7 +65,7 @@ class COCOHP(ds.Dataset):
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)

    def init(self, data_dir, keep_res=False, flip_test=False):
    def init(self, data_dir, keep_res=False):
        """initailize additional info"""
        logger.info('Initializing coco 2017 {} data.'.format(self.run_mode))
        if not os.path.isdir(data_dir):
@@ -94,7 +93,6 @@ class COCOHP(ds.Dataset):
            self.images = image_ids
        self.num_samples = len(self.images)
        self.keep_res = keep_res
        self.flip_test = flip_test
        if self.run_mode != "train":
            self.pad = 31
        logger.info('Loaded {} {} samples'.format(self.run_mode, self.num_samples))
@@ -167,7 +165,7 @@ class COCOHP(ds.Dataset):
        ret = (img, image_id)
        return ret

    def pre_process_for_test(self, image, img_id, scale, meta=None):
    def pre_process_for_test(self, image, img_id, scale):
        """image pre-process for evaluation"""
        b, h, w, ch = image.shape
        assert b == 1, "only single image was supported here"
@@ -191,17 +189,8 @@ class COCOHP(ds.Dataset):
                                   flags=cv2.INTER_LINEAR)
        inp_img = (inp_image.astype(np.float32) / 255. - self.data_opt.mean) / self.data_opt.std

        h, w, ch = inp_img.shape
        images = copy.deepcopy(inp_img)
        if self.flip_test:
            flip_image = inp_img[:, ::-1, :]
            inp_img = inp_img.reshape((1, h, w, ch))
            flip_image = flip_image.reshape((1, h, w, ch))
            # (2, h, w, c)
            images = np.concatenate((inp_img, flip_image), axis=0)
        else:
            images = images.reshape((1, h, w, ch))
        images = images.transpose(0, 3, 1, 2)
        eval_image = inp_img.reshape((1,) + inp_img.shape)
        eval_image = eval_image.transpose(0, 3, 1, 2)

        meta = {'c': c, 's': s,
                'out_height': inp_height // self.net_opt.down_ratio,
@@ -244,7 +233,7 @@ class COCOHP(ds.Dataset):
                image_name = "gt_" + self.run_mode + "_image_" + str(img_id) + "_scale_" + str(scale) + ".png"
                cv2.imwrite("{}/{}".format(self.save_path, image_name), inp_image)

        return images, meta
        return eval_image, meta

    def preprocess_fn(self, img, num_objects, keypoints, bboxes, category_id):
        """image pre-process and augmentation"""
--- a/model_zoo/research/cv/centernet/src/decode.py
+++ b/model_zoo/research/cv/centernet/src/decode.py
@@ -30,25 +30,32 @@ class NMS(nn.Cell):

    Args:
        kernel(int): Maxpooling kernel size. Default: 3.
        enable_nms_fp16(bool): Use float16 data for max_pool, adaption for CPU. Default: True.

    Returns:
        Tensor, heatmap after non-maximum suppression.
    """
    def __init__(self, kernel=3):
    def __init__(self, kernel=3, enable_nms_fp16=True):
        super(NMS, self).__init__()
        self.pad = (kernel - 1) // 2
        self.cast = ops.Cast()
        self.dtype = ops.DType()
        self.equal = ops.Equal()
        self.max_pool = nn.MaxPool2d(kernel, stride=1, pad_mode="same")
        self.enable_fp16 = enable_nms_fp16

    def construct(self, heat):
        """Non-maximum suppression"""
        dtype = self.dtype(heat)
        heat = self.cast(heat, mstype.float16)
        heat_max = self.max_pool(heat)
        keep = self.equal(heat, heat_max)
        keep = self.cast(keep, dtype)
        heat = self.cast(heat, dtype)
        if self.enable_fp16:
            heat = self.cast(heat, mstype.float16)
            heat_max = self.max_pool(heat)
            keep = self.equal(heat, heat_max)
            keep = self.cast(keep, dtype)
            heat = self.cast(heat, dtype)
        else:
            heat_max = self.max_pool(heat)
            keep = self.equal(heat, heat_max)
        heat = heat * keep
        return heat

@@ -127,18 +134,24 @@ class GatherFeatureByInd(nn.Cell):
    """
    Gather features by index

    Args: None
    Args:
        enable_cpu_gather (bool): Use cpu operator GatherD to gather feature or not, adaption for CPU. Default: True.

    Returns:
        Tensor
    """
    def __init__(self):
    def __init__(self, enable_cpu_gatherd=True):
        super(GatherFeatureByInd, self).__init__()
        self.tile = ops.Tile()
        self.shape = ops.Shape()
        self.concat = ops.Concat(axis=1)
        self.reshape = ops.Reshape()
        self.gather_nd = ops.GatherNd()
        self.enable_cpu_gatherd = enable_cpu_gatherd
        if self.enable_cpu_gatherd:
            self.gather_nd = ops.GatherD()
            self.expand_dims = ops.ExpandDims()
        else:
            self.gather_nd = ops.GatherNd()

    def construct(self, feat, ind):
        """gather by index"""
@@ -147,18 +160,24 @@ class GatherFeatureByInd(nn.Cell):
        b, J, K = self.shape(ind)
        feat = self.reshape(feat, (b, J, K, -1))
        _, _, _, N = self.shape(feat)
        ind = self.reshape(ind, (-1, 1))
        ind_b = nn.Range(0, b * J, 1)()
        ind_b = self.reshape(ind_b, (-1, 1))
        ind_b = self.tile(ind_b, (1, K))
        ind_b = self.reshape(ind_b, (-1, 1))
        index = self.concat((ind_b, ind))
        # (b, N, 2)
        index = self.reshape(index, (-1, K, 2))
        # (b, N, c)
        feat = self.reshape(feat, (-1, K, N))
        feat = self.gather_nd(feat, index)
        feat = self.reshape(feat, (b, J, K, -1))
        if self.enable_cpu_gatherd:
            # (b, J, K, N)
            index = self.expand_dims(ind, -1)
            index = self.tile(index, (1, 1, 1, N))
            feat = self.gather_nd(feat, 2, index)
        else:
            ind = self.reshape(ind, (-1, 1))
            ind_b = nn.Range(0, b * J, 1)()
            ind_b = self.reshape(ind_b, (-1, 1))
            ind_b = self.tile(ind_b, (1, K))
            ind_b = self.reshape(ind_b, (-1, 1))
            index = self.concat((ind_b, ind))
            # (b*J, K, 2)
            index = self.reshape(index, (-1, K, 2))
            # (b*J, K)
            feat = self.reshape(feat, (-1, K, N))
            feat = self.gather_nd(feat, index)
            feat = self.reshape(feat, (b, J, K, -1))
        return feat


@@ -285,17 +304,16 @@ class MultiPoseDecode(nn.Cell):

    Args:
        net_config(edict): config info for CenterNet network.
        flip_test(bool): flip test of not. Default: False.
        K(int): maximum objects number. Default: 100.
        enable_nms_fp16(bool): Use float16 data for max_pool, adaption for CPU. Default: True.

    Returns:
        Tensor, multi-objects detections.
    """
    def __init__(self, net_config, flip_test=False, K=100):
    def __init__(self, net_config, K=100, enable_nms_fp16=True):
        super(MultiPoseDecode, self).__init__()
        self.K = K
        self.flip_test = flip_test
        self.nms = NMS()
        self.nms = NMS(enable_nms_fp16=enable_nms_fp16)
        self.shape = ops.Shape()
        self.gather_topk = GatherTopK()
        self.gather_topk_channel = GatherTopKChannel()
@@ -336,8 +354,6 @@ class MultiPoseDecode(nn.Cell):
    def construct(self, feature):
        """gather detections"""
        heat = feature[0]
        if self.flip_test:
            heat = self.flip_tensor(heat)
        K = self.K
        b, _, _, _ = self.shape(heat)
        heat = self.nms(heat)
@@ -346,8 +362,6 @@ class MultiPoseDecode(nn.Cell):
        xs = self.reshape(xs, (b, K, 1))

        kps = feature[1]
        if self.flip_test:
            kps = self.flip_lr_off(kps)
        num_joints = self.shape(kps)[1] / 2
        # (b, K, num_joints*2)
        kps = self.trans_gather_feature(kps, inds)
@@ -365,15 +379,11 @@ class MultiPoseDecode(nn.Cell):
        kps = self.reshape(kps, (b, K, num_joints * 2))

        wh = feature[2]
        if self.flip_test:
            wh = self.flip_tensor(wh)
        wh = self.trans_gather_feature(wh, inds)
        ws, hs = self.half(wh)

        if self.reg_offset:
            reg = feature[self.reg_ind]
            if self.flip_test:
                reg, _ = self.half_first(reg)
            reg = self.trans_gather_feature(reg, inds)
            reg = self.reshape(reg, (b, K, 2))
            reg_w, reg_h = self.half(reg)
@@ -387,16 +397,12 @@ class MultiPoseDecode(nn.Cell):

        if self.hm_hp:
            hm_hp = feature[self.hm_hp_ind]
            if self.flip_test:
                hm_hp = self.flip_lr(hm_hp)
            hm_hp = self.nms(hm_hp)
            # (b, num_joints, K)
            hm_score, hm_inds, hm_ys, hm_xs = self.gather_topk_channel(hm_hp, K=K)

            if self.reg_hp_offset:
                hp_offset = feature[self.reg_hp_ind]
                if self.flip_test:
                    hp_offset, _ = self.half_first(hp_offset)
                hp_offset = self.trans_gather_feature(hp_offset, self.reshape(hm_inds, (b, -1)))
                hp_offset = self.reshape(hp_offset, (b, num_joints, K, 2))
                hp_ws, hp_hs = self.half(hp_offset)
--- a/model_zoo/research/cv/centernet/src/utils.py
+++ b/model_zoo/research/cv/centernet/src/utils.py
@@ -17,6 +17,7 @@ Functional Cells to be used.
 """

 import math
 import time
 import numpy as np
 import mindspore.nn as nn
 import mindspore.ops as ops
@@ -119,33 +120,46 @@ class GatherFeature(nn.Cell):
    """
    Gather feature at specified position

    Args: None
    Args:
        enable_cpu_gather (bool): Use cpu operator GatherD to gather feature or not, adaption for CPU. Default: True.

    Returns:
        Tensor, feature at spectified position
    """
    def __init__(self):
    def __init__(self, enable_cpu_gather=True):
        super(GatherFeature, self).__init__()
        self.tile = ops.Tile()
        self.shape = ops.Shape()
        self.concat = ops.Concat(axis=1)
        self.reshape = ops.Reshape()
        self.gather_nd = ops.GatherNd()
        self.enable_cpu_gather = enable_cpu_gather
        if self.enable_cpu_gather:
            self.gather_nd = ops.GatherD()
            self.expand_dims = ops.ExpandDims()
        else:
            self.gather_nd = ops.GatherND()

    def construct(self, feat, ind):
        """gather by specified index"""
        # (b, N)->(b*N, 1)
        b, N = self.shape(ind)
        ind = self.reshape(ind, (-1, 1))
        ind_b = nn.Range(0, b, 1)()
        ind_b = self.reshape(ind_b, (-1, 1))
        ind_b = self.tile(ind_b, (1, N))
        ind_b = self.reshape(ind_b, (-1, 1))
        index = self.concat((ind_b, ind))
        # (b, N, 2)
        index = self.reshape(index, (b, N, -1))
        # (b, N, c)
        feat = self.gather_nd(feat, index)
        if self.enable_cpu_gather:
            _, _, c = self.shape(feat)
            # (b, N, c)
            index = self.expand_dims(ind, -1)
            index = self.tile(index, (1, 1, c))
            feat = self.gather_nd(feat, 1, index)
        else:
            # (b, N)->(b*N, 1)
            b, N = self.shape(ind)
            ind = self.reshape(ind, (-1, 1))
            ind_b = nn.Range(0, b, 1)()
            ind_b = self.reshape(ind_b, (-1, 1))
            ind_b = self.tile(ind_b, (1, N))
            ind_b = self.reshape(ind_b, (-1, 1))
            index = self.concat((ind_b, ind))
            # (b, N, 2)
            index = self.reshape(index, (b, N, -1))
            # (b, N, c)
            feat = self.gather_nd(feat, index)
        return feat


@@ -477,11 +491,19 @@ class LossCallBack(Callback):

    Args:
        dataset_size (int): Dataset size. Default: -1.
        enable_static_time (bool): enable static time cost, adaption for CPU. Default: False.
    """

    def __init__(self, dataset_size=-1):
    def __init__(self, dataset_size=-1, enable_static_time=False):
        super(LossCallBack, self).__init__()
        self._dataset_size = dataset_size
        self._enable_static_time = enable_static_time

    def step_begin(self, run_context):
        """
        Get begining time of each step
        """
        self._begin_time = time.time()

    def step_end(self, run_context):
        """
@@ -493,11 +515,19 @@ class LossCallBack(Callback):
            if percent == 0:
                percent = 1
                epoch_num -= 1
            print("epoch: {}, current epoch percent: {}, step: {}, outputs are {}"
                  .format(int(epoch_num), "%.3f" % percent, cb_params.cur_step_num, str(cb_params.net_outputs)))
            if self._enable_static_time:
                cur_time = time.time()
                time_per_step = cur_time - self._begin_time
                print("epoch: {}, current epoch percent: {}, step: {}, time per step: {} s, outputs are {}"
                      .format(int(epoch_num), "%.3f" % percent, cb_params.cur_step_num, "%.3f" % time_per_step,
                              str(cb_params.net_outputs)), flush=True)
            else:
                print("epoch: {}, current epoch percent: {}, step: {}, outputs are {}"
                      .format(int(epoch_num), "%.3f" % percent, cb_params.cur_step_num,
                              str(cb_params.net_outputs)), flush=True)
        else:
            print("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num,
                                                               str(cb_params.net_outputs)))
                                                               str(cb_params.net_outputs)), flush=True)


 class CenterNetPolynomialDecayLR(LearningRateSchedule):
--- a/model_zoo/research/cv/centernet/train.py
+++ b/model_zoo/research/cv/centernet/train.py
@@ -31,12 +31,15 @@ from mindspore.common import set_seed
 from mindspore.profiler import Profiler
 from src.dataset import COCOHP
 from src import CenterNetMultiPoseLossCell, CenterNetWithLossScaleCell
 from src import CenterNetWithoutLossScaleCell
 from src.utils import LossCallBack, CenterNetPolynomialDecayLR, CenterNetMultiEpochsDecayLR
 from src.config import dataset_config, net_config, train_config

 _current_dir = os.path.dirname(os.path.realpath(__file__))

 parser = argparse.ArgumentParser(description='CenterNet training')
 parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'CPU'],
                    help='device where the code will be implemented. (Default: Ascend)')
 parser.add_argument("--distribute", type=str, default="false", choices=["true", "false"],
                    help="Run distribute, default is false.")
 parser.add_argument("--need_profiler", type=str, default="false", choices=["true", "false"],
@@ -125,26 +128,32 @@ def _get_optimizer(network, dataset_size):

 def train():
    """training CenterNet"""
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
    context.set_context(enable_auto_mixed_precision=False)
    context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target)
    context.set_context(reserve_class_name_in_scope=False)
    context.set_context(save_graphs=False)

    ckpt_save_dir = args_opt.save_checkpoint_path
    if args_opt.distribute == "true":
        D.init()
        device_num = args_opt.device_num
        rank = args_opt.device_id % device_num
        ckpt_save_dir = args_opt.save_checkpoint_path + 'ckpt_' + str(get_rank()) + '/'

        context.reset_auto_parallel_context()
        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
                                          device_num=device_num)
        _set_parallel_all_reduce_split()
    else:
        rank = 0
        device_num = 1
    rank = 0
    device_num = 1
    num_workers = 8
    if args_opt.device_target == "Ascend":
        context.set_context(enable_auto_mixed_precision=False)
        context.set_context(device_id=args_opt.device_id)
        if args_opt.distribute == "true":
            D.init()
            device_num = args_opt.device_num
            rank = args_opt.device_id % device_num
            ckpt_save_dir = args_opt.save_checkpoint_path + 'ckpt_' + str(get_rank()) + '/'

            context.reset_auto_parallel_context()
            context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
                                              device_num=device_num)
            _set_parallel_all_reduce_split()
    else:
        args_opt.distribute = "false"
        args_opt.need_profiler = "false"
        args_opt.enable_data_sink = "false"

    # Start create dataset!
    # mindrecord files will be generated at args_opt.mindrecord_dir such as centernet.mindrecord0, 1, ... file_num.
    logger.info("Begin creating dataset for CenterNet")
@@ -167,7 +176,8 @@ def train():

    optimizer = _get_optimizer(net_with_loss, dataset_size)

    callback = [TimeMonitor(args_opt.data_sink_steps), LossCallBack(dataset_size)]
    enable_static_time = args_opt.device_target == "CPU"
    callback = [TimeMonitor(args_opt.data_sink_steps), LossCallBack(dataset_size, enable_static_time)]
    if args_opt.enable_save_ckpt == "true" and args_opt.device_id % min(8, device_num) == 0:
        config_ck = CheckpointConfig(save_checkpoint_steps=args_opt.save_checkpoint_steps,
                                     keep_checkpoint_max=args_opt.save_checkpoint_num)
@@ -178,12 +188,13 @@ def train():
    if args_opt.load_checkpoint_path:
        param_dict = load_checkpoint(args_opt.load_checkpoint_path)
        load_param_into_net(net_with_loss, param_dict)

    net_with_grads = CenterNetWithLossScaleCell(net_with_loss, optimizer=optimizer,
                                                sens=train_config.loss_scale_value)
    if args_opt.device_target == "Ascend":
        net_with_grads = CenterNetWithLossScaleCell(net_with_loss, optimizer=optimizer,
                                                    sens=train_config.loss_scale_value)
    else:
        net_with_grads = CenterNetWithoutLossScaleCell(net_with_loss, optimizer=optimizer)

    model = Model(net_with_grads)

    model.train(new_repeat_count, dataset, callbacks=callback, dataset_sink_mode=(args_opt.enable_data_sink == "true"),
                sink_size=args_opt.data_sink_steps)