From f1cec60dc85f63bb2745f221888940f7745f38b2 Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Sun, 26 Apr 2020 15:35:11 +0800
Subject: [PATCH 1/3] upload resnet101 scripts

---
 example/resnet101_imagenet/README.md          | 139 +++++++++++++
 example/resnet101_imagenet/config.py          |  42 ++++
 example/resnet101_imagenet/crossentropy.py    |  36 ++++
 example/resnet101_imagenet/dataset.py         |  89 +++++++++
 example/resnet101_imagenet/eval.py            |  84 ++++++++
 example/resnet101_imagenet/lr_generator.py    | 113 +++++++++++
 .../run_distribute_train.sh                   |  54 ++++++
 example/resnet101_imagenet/run_infer.sh       |  52 +++++
 .../run_standalone_train.sh                   |  46 +++++
 example/resnet101_imagenet/train.py           | 113 +++++++++++
 example/resnet101_imagenet/var_init.py        | 183 ++++++++++++++++++
 mindspore/model_zoo/resnet.py                 |  21 ++
 12 files changed, 972 insertions(+)
 create mode 100644 example/resnet101_imagenet/README.md
 create mode 100755 example/resnet101_imagenet/config.py
 create mode 100755 example/resnet101_imagenet/crossentropy.py
 create mode 100755 example/resnet101_imagenet/dataset.py
 create mode 100755 example/resnet101_imagenet/eval.py
 create mode 100755 example/resnet101_imagenet/lr_generator.py
 create mode 100755 example/resnet101_imagenet/run_distribute_train.sh
 create mode 100755 example/resnet101_imagenet/run_infer.sh
 create mode 100755 example/resnet101_imagenet/run_standalone_train.sh
 create mode 100755 example/resnet101_imagenet/train.py
 create mode 100755 example/resnet101_imagenet/var_init.py

diff --git a/example/resnet101_imagenet/README.md b/example/resnet101_imagenet/README.md
new file mode 100644
index 0000000000..bc653675f2
--- /dev/null
+++ b/example/resnet101_imagenet/README.md
@@ -0,0 +1,139 @@
+# ResNet101 Example
+ 
+## Description
+ 
+This is an example of training ResNet101 with ImageNet dataset in MindSpore.
+
+## Requirements
+
+- Install [MindSpore](https://www.mindspore.cn/install/en).
+
+- Download the dataset [ImageNet](http://image-net.org/download).
+ 
+> Unzip the ImageNet dataset to any path you want, the folder should include train and eval dataset as follows:
+ 
+```
+.
+└─dataset
+    ├─ilsvrc
+    │
+    └─validation_preprocess
+```
+
+## Example structure
+ 
+```shell
+.
+├── crossentropy.py                 # CrossEntropy loss function
+├── var_init.py                     # weight initial
+├── config.py                       # parameter configuration
+├── dataset.py                      # data preprocessing
+├── eval.py                         # eval net
+├── lr_generator.py                 # generate learning rate
+├── run_distribute_train.sh         # launch distributed training(8p)
+├── run_infer.sh                    # launch evaluating
+├── run_standalone_train.sh         # launch standalone training(1p)
+└── train.py                        # train net
+```
+ 
+## Parameter configuration
+ 
+Parameters for both training and evaluating can be set in config.py.
+ 
+```
+"class_num": 1001,                # dataset class number
+"batch_size": 32,                 # batch size of input tensor
+"loss_scale": 1024,               # loss scale
+"momentum": 0.9,                  # momentum optimizer
+"weight_decay": 1e-4,             # weight decay
+"epoch_size": 120,                # epoch sizes for training
+"buffer_size": 1000,              # number of queue size in data preprocessing
+"image_height": 224,              # image height
+"image_width": 224,               # image width
+"save_checkpoint": True,          # whether save checkpoint or not
+"save_checkpoint_steps": 500,     # the step interval between two checkpoints. By default, the last checkpoint will be saved after the last step
+"keep_checkpoint_max": 40,        # only keep the last keep_checkpoint_max checkpoint
+"save_checkpoint_path": "./",     # path to save checkpoint relative to the executed path
+"lr_init": 0.01,                  # initial learning rate
+"lr_end": 0.00001,                # final learning rate
+"lr_max": 0.1,                    # maximum learning rate
+"warmup_epochs": 0,               # number of warmup epoch
+"lr_decay_mode": "cosine"         # decay mode for generating learning rate
+"label_smooth": 1,                # label_smooth
+"label_smooth_factor": 0.1,       # label_smooth_factor
+"lr": 0.1                         # base learning rate
+```
+
+## Running the example
+
+### Train
+ 
+#### Usage
+
+```
+# distributed training
+sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]
+ 
+# standalone training
+sh run_standalone_train.sh [DATASET_PATH]
+```
+ 
+#### Launch
+ 
+```bash
+# distributed training example(8p)
+sh run_distribute_train.sh rank_table_8p.json dataset/ilsvrc
+ 
+# standalone training example（1p）
+sh run_standalone_train.sh dataset/ilsvrc
+```
+ 
+> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).
+
+#### Result
+ 
+Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". You can find checkpoint file together with result like the followings in log.
+
+ 
+```
+# distribute training result(8p)
+epoch: 1 step: 5004, loss is 4.805483
+epoch: 2 step: 5004, loss is 3.2121816
+epoch: 3 step: 5004, loss is 3.429647
+epoch: 4 step: 5004, loss is 3.3667371
+epoch: 5 step: 5004, loss is 3.1718972
+...
+epoch: 67 step: 5004, loss is 2.2768745
+epoch: 68 step: 5004, loss is 1.7223864
+epoch: 69 step: 5004, loss is 2.0665488
+epoch: 70 step: 5004, loss is 1.8717369
+...
+```
+
+### Infer
+ 
+#### Usage
+ 
+```
+# infer
+sh run_infer.sh [VALIDATION_DATASET_PATH] [CHECKPOINT_PATH]
+```
+ 
+#### Launch
+ 
+```bash
+# infer with checkpoint
+sh run_infer.sh dataset/validation_preprocess/ train_parallel0/resnet-120_5004.ckpt
+
+```
+ 
+> checkpoint can be produced in training process.
+ 
+
+#### Result
+ 
+Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log.
+ 
+```
+result: {'top_5_accuracy': 0.9429417413572343, 'top_1_accuracy': 0.7853513124199744} ckpt=train_parallel0/resnet-120_5004.ckpt
+```
diff --git a/example/resnet101_imagenet/config.py b/example/resnet101_imagenet/config.py
new file mode 100755
index 0000000000..0ad37c8678
--- /dev/null
+++ b/example/resnet101_imagenet/config.py
@@ -0,0 +1,42 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+network config setting, will be used in train.py and eval.py
+"""
+from easydict import EasyDict as ed
+
+config = ed({
+    "class_num": 1001,
+    "batch_size": 32,
+    "loss_scale": 1024,
+    "momentum": 0.9,
+    "weight_decay": 1e-4,
+    "epoch_size": 120,
+    "buffer_size": 1000,
+    "image_height": 224,
+    "image_width": 224,
+    "save_checkpoint": True,
+    "save_checkpoint_steps": 500,
+    "keep_checkpoint_max": 40,
+    "save_checkpoint_path": "./",
+    "lr_init": 0.01,
+    "lr_end": 0.00001,
+    "lr_max": 0.1,
+    "warmup_epochs": 0,
+    "lr_decay_mode": "cosine",
+    "label_smooth": 1,
+    "label_smooth_factor": 0.1,
+    "lr": 0.1
+})
diff --git a/example/resnet101_imagenet/crossentropy.py b/example/resnet101_imagenet/crossentropy.py
new file mode 100755
index 0000000000..e636b8529e
--- /dev/null
+++ b/example/resnet101_imagenet/crossentropy.py
@@ -0,0 +1,36 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+from mindspore.nn.loss.loss import _Loss
+from mindspore.ops import operations as P
+from mindspore.ops import functional as F
+from mindspore import Tensor
+from mindspore.common import dtype as mstype
+import mindspore.nn as nn
+ 
+"""define loss function for network"""
+class CrossEntropy(_Loss):
+    def __init__(self, smooth_factor=0., num_classes=1001):
+        super(CrossEntropy, self).__init__()
+        self.onehot = P.OneHot()
+        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
+        self.off_value = Tensor(1.0 * smooth_factor / (num_classes -1), mstype.float32)
+        self.ce = nn.SoftmaxCrossEntropyWithLogits()
+        self.mean = P.ReduceMean(False)
+ 
+    def construct(self, logit, label):
+        one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
+        loss = self.ce(logit, one_hot_label)
+        loss = self.mean(loss, 0)
+        return loss
diff --git a/example/resnet101_imagenet/dataset.py b/example/resnet101_imagenet/dataset.py
new file mode 100755
index 0000000000..920e1c093c
--- /dev/null
+++ b/example/resnet101_imagenet/dataset.py
@@ -0,0 +1,89 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+create train or eval dataset.
+"""
+import os
+import mindspore.common.dtype as mstype
+import mindspore.dataset.engine as de
+import mindspore.dataset.transforms.vision.c_transforms as C
+import mindspore.dataset.transforms.c_transforms as C2
+from config import config
+
+def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
+    """
+    create a train or evaluate dataset
+    Args:
+        dataset_path(string): the path of dataset.
+        do_train(bool): whether dataset is used for train or eval.
+        repeat_num(int): the repeat times of dataset. Default: 1
+        batch_size(int): the batch size of dataset. Default: 32
+
+    Returns:
+        dataset
+    """
+    device_num = int(os.getenv("RANK_SIZE"))
+    rank_id = int(os.getenv("RANK_ID"))
+
+    if device_num == 1:
+        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
+    else:
+        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
+                                     num_shards=device_num, shard_id=rank_id)
+    resize_height = 224
+    rescale = 1.0 / 255.0
+    shift = 0.0
+
+    # define map operations
+    decode_op = C.Decode()
+
+    random_resize_crop_op = C.RandomResizedCrop(resize_height, (0.08, 1.0), (0.75, 1.33), max_attempts=100)
+    horizontal_flip_op = C.RandomHorizontalFlip(rank_id / (rank_id + 1))
+    resize_op_256 = C.Resize((256, 256))
+    center_crop = C.CenterCrop(224)
+    rescale_op = C.Rescale(rescale, shift)
+    normalize_op = C.Normalize((0.475, 0.451, 0.392), (0.275, 0.267, 0.278))
+    changeswap_op = C.HWC2CHW()
+
+    trans=[]
+    if do_train:
+        trans = [decode_op,
+                 random_resize_crop_op,
+                 horizontal_flip_op,
+                 rescale_op,
+                 normalize_op,
+                 changeswap_op]
+
+    else:
+        trans = [decode_op,
+                 resize_op_256,
+                 center_crop,
+                 rescale_op,
+                 normalize_op,
+                 changeswap_op]
+
+    type_cast_op = C2.TypeCast(mstype.int32)
+
+    ds = ds.map(input_columns="image", operations=trans)
+    ds = ds.map(input_columns="label", operations=type_cast_op)
+
+    # apply shuffle operations
+    ds = ds.shuffle(buffer_size=config.buffer_size)
+    # apply batch operations
+    ds = ds.batch(batch_size, drop_remainder=True)
+    # apply dataset repeat operation
+    ds = ds.repeat(repeat_num)
+
+    return ds
diff --git a/example/resnet101_imagenet/eval.py b/example/resnet101_imagenet/eval.py
new file mode 100755
index 0000000000..00fe825e91
--- /dev/null
+++ b/example/resnet101_imagenet/eval.py
@@ -0,0 +1,84 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+eval.
+"""
+import os
+import argparse
+import random
+import numpy as np
+from dataset import create_dataset
+from config import config
+from mindspore import context
+from mindspore.model_zoo.resnet import resnet101
+from mindspore.parallel._auto_parallel_context import auto_parallel_context
+from mindspore.train.model import Model, ParallelMode
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+import mindspore.dataset.engine as de
+from mindspore.communication.management import init
+from crossentropy import CrossEntropy
+
+random.seed(1)
+np.random.seed(1)
+de.config.set_seed(1)
+
+parser = argparse.ArgumentParser(description='Image classification')
+parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
+parser.add_argument('--device_num', type=int, default=1, help='Device num.')
+parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.')
+parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.')
+parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
+parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
+args_opt = parser.parse_args()
+
+device_id = int(os.getenv('DEVICE_ID'))
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id)
+context.set_context(enable_task_sink=True)
+context.set_context(enable_loop_sink=True)
+context.set_context(enable_mem_reuse=True)
+
+if __name__ == '__main__':
+    if args_opt.do_eval:
+        context.set_context(enable_hccl=False)
+    else:
+        if args_opt.run_distribute:
+            context.set_context(enable_hccl=True)
+            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
+                                              mirror_mean=True, parameter_broadcast=True)
+            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
+            init()
+        else:
+            context.set_context(enable_hccl=False)
+
+    epoch_size = config.epoch_size
+    net = resnet101(class_num=config.class_num)
+
+    if not config.label_smooth:
+        config.label_smooth_factor = 0.0
+    loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
+
+    if args_opt.do_eval:
+        dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size)
+        step_size = dataset.get_dataset_size()
+
+        if args_opt.checkpoint_path:
+            param_dict = load_checkpoint(args_opt.checkpoint_path)
+            load_param_into_net(net, param_dict)
+        net.set_train(False)
+
+        model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'})
+        res = model.eval(dataset)
+        print("result:", res, "ckpt=", args_opt.checkpoint_path)
diff --git a/example/resnet101_imagenet/lr_generator.py b/example/resnet101_imagenet/lr_generator.py
new file mode 100755
index 0000000000..b2271a1382
--- /dev/null
+++ b/example/resnet101_imagenet/lr_generator.py
@@ -0,0 +1,113 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""learning rate generator"""
+import numpy as np
+import math
+ 
+
+def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr):
+    lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
+    lr = float(init_lr) + lr_inc * current_step
+    return lr
+
+def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch):
+    """
+    generate learning rate array with cosine
+
+    Args:
+       lr(float): base learning rate
+       steps_per_epoch(int): steps size of one epoch
+       warmup_epochs(int): number of warmup epochs
+       max_epoch(int): total epochs of training
+    Returns:
+       np.array, learning rate array
+    """
+    base_lr = lr
+    warmup_init_lr = 0
+    total_steps = int(max_epoch * steps_per_epoch)
+    warmup_steps = int(warmup_epochs * steps_per_epoch)
+    decay_steps = total_steps - warmup_steps
+
+    lr_each_step = []
+    for i in range(total_steps):
+        if i < warmup_steps:
+            lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr)
+        else:
+            linear_decay = (total_steps - i) / decay_steps
+            cosine_decay = 0.5 * (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps))
+            decayed = linear_decay * cosine_decay + 0.00001
+            lr = base_lr * decayed
+        lr_each_step.append(lr)
+    return np.array(lr_each_step).astype(np.float32)  
+
+def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
+    """
+    generate learning rate array
+
+    Args:
+       global_step(int): total steps of the training
+       lr_init(float): init learning rate
+       lr_end(float): end learning rate
+       lr_max(float): max learning rate
+       warmup_epochs(int): number of warmup epochs
+       total_epochs(int): total epoch of training
+       steps_per_epoch(int): steps of one epoch
+       lr_decay_mode(string): learning rate decay mode, including steps, poly or default
+
+    Returns:
+       np.array, learning rate array
+    """
+    lr_each_step = []
+    total_steps = steps_per_epoch * total_epochs
+    warmup_steps = steps_per_epoch * warmup_epochs
+    if lr_decay_mode == 'steps':
+        decay_epoch_index = [0.3 * total_steps, 0.6 * total_steps, 0.8 * total_steps]
+        for i in range(total_steps):
+            if i < decay_epoch_index[0]:
+                lr = lr_max
+            elif i < decay_epoch_index[1]:
+                lr = lr_max * 0.1
+            elif i < decay_epoch_index[2]:
+                lr = lr_max * 0.01
+            else:
+                lr = lr_max * 0.001
+            lr_each_step.append(lr)
+    elif lr_decay_mode == 'poly':
+        if warmup_steps != 0:
+            inc_each_step = (float(lr_max) - float(lr_init)) / float(warmup_steps)
+        else:
+            inc_each_step = 0
+        for i in range(total_steps):
+            if i < warmup_steps:
+                lr = float(lr_init) + inc_each_step * float(i)
+            else:
+                base = (1.0 - (float(i) - float(warmup_steps)) / (float(total_steps) - float(warmup_steps)))
+                lr = float(lr_max) * base * base
+                if lr < 0.0:
+                    lr = 0.0
+            lr_each_step.append(lr)
+    else:
+        for i in range(total_steps):
+            if i < warmup_steps:
+                lr = lr_init + (lr_max - lr_init) * i / warmup_steps
+            else:
+                lr = lr_max - (lr_max - lr_end) * (i - warmup_steps) / (total_steps - warmup_steps)
+            lr_each_step.append(lr)
+
+    current_step = global_step
+    lr_each_step = np.array(lr_each_step).astype(np.float32)
+    learning_rate = lr_each_step[current_step:]
+
+    return learning_rate
diff --git a/example/resnet101_imagenet/run_distribute_train.sh b/example/resnet101_imagenet/run_distribute_train.sh
new file mode 100755
index 0000000000..5165f58cab
--- /dev/null
+++ b/example/resnet101_imagenet/run_distribute_train.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 2 ]
+then 
+    echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]"
+exit 1
+fi
+
+if [ ! -f $1 ]
+then 
+    echo "error: DMINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
+exit 1
+fi 
+
+if [ ! -d $2 ]
+then 
+    echo "error: DATASET_PATH=$2 is not a directory"
+exit 1
+fi 
+
+ulimit -u unlimited
+export DEVICE_NUM=8
+export RANK_SIZE=8
+export MINDSPORE_HCCL_CONFIG_PATH=$1
+export RANK_TABLE_FILE=$1
+
+for((i=0; i<${DEVICE_NUM}; i++))
+do
+    export DEVICE_ID=$i
+    export RANK_ID=$i
+    rm -rf ./train_parallel$i
+    mkdir ./train_parallel$i
+    cp *.py ./train_parallel$i
+    cp *.sh ./train_parallel$i
+    cd ./train_parallel$i || exit
+    echo "start training for rank $RANK_ID, device $DEVICE_ID"
+    env > env.log
+    python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$2 &> log &
+    cd ..
+done
diff --git a/example/resnet101_imagenet/run_infer.sh b/example/resnet101_imagenet/run_infer.sh
new file mode 100755
index 0000000000..5df659275e
--- /dev/null
+++ b/example/resnet101_imagenet/run_infer.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 2 ]
+then 
+    echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]"
+exit 1
+fi
+
+if [ ! -d $1 ]
+then 
+    echo "error: DATASET_PATH=$1 is not a directory"
+exit 1
+fi 
+
+if [ ! -f $2 ]
+then 
+    echo "error: CHECKPOINT_PATH=$2 is not a file"
+exit 1
+fi 
+
+ulimit -u unlimited
+export DEVICE_NUM=1
+export DEVICE_ID=0
+export RANK_SIZE=$DEVICE_NUM
+export RANK_ID=0
+
+if [ -d "infer" ];
+then
+    rm -rf ./infer
+fi
+mkdir ./infer
+cp *.py ./infer
+cp *.sh ./infer
+cd ./infer || exit
+env > env.log
+echo "start infering for device $DEVICE_ID"
+python eval.py --do_eval=True --dataset_path=$1 --checkpoint_path=$2 &> log &
+cd ..
diff --git a/example/resnet101_imagenet/run_standalone_train.sh b/example/resnet101_imagenet/run_standalone_train.sh
new file mode 100755
index 0000000000..9ba5742515
--- /dev/null
+++ b/example/resnet101_imagenet/run_standalone_train.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 1 ]
+then 
+    echo "Usage: sh run_standalone_train.sh [DATASET_PATH]"
+exit 1
+fi
+
+if [ ! -d $1 ]
+then 
+    echo "error: DATASET_PATH=$1 is not a directory"
+exit 1
+fi 
+
+ulimit -u unlimited
+export DEVICE_NUM=1
+export DEVICE_ID=0
+export RANK_ID=0
+export RANK_SIZE=1
+
+if [ -d "train" ];
+then
+    rm -rf ./train
+fi
+mkdir ./train
+cp *.py ./train
+cp *.sh ./train
+cd ./train || exit
+echo "start training for device $DEVICE_ID"
+env > env.log
+python train.py --do_train=True --dataset_path=$1 &> log &
+cd ..
diff --git a/example/resnet101_imagenet/train.py b/example/resnet101_imagenet/train.py
new file mode 100755
index 0000000000..2df6c3bad4
--- /dev/null
+++ b/example/resnet101_imagenet/train.py
@@ -0,0 +1,113 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""train_imagenet."""
+import os
+import argparse
+import random
+import numpy as np
+from dataset import create_dataset
+from lr_generator import get_lr
+from config import config
+from mindspore import context
+from mindspore import Tensor
+from mindspore.model_zoo.resnet import resnet101
+from mindspore.parallel._auto_parallel_context import auto_parallel_context
+from mindspore.nn.optim.momentum import Momentum
+from mindspore.train.model import Model, ParallelMode
+from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
+from mindspore.train.loss_scale_manager import FixedLossScaleManager
+import mindspore.dataset.engine as de
+from mindspore.communication.management import init
+import mindspore.nn as nn
+from crossentropy import CrossEntropy
+from var_init import default_recurisive_init, KaimingNormal
+from mindspore.common import initializer as weight_init
+
+random.seed(1)
+np.random.seed(1)
+de.config.set_seed(1)
+
+parser = argparse.ArgumentParser(description='Image classification')
+parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
+parser.add_argument('--device_num', type=int, default=1, help='Device num.')
+parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.')
+parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.')
+parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
+args_opt = parser.parse_args()
+
+device_id = int(os.getenv('DEVICE_ID'))
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id)
+context.set_context(enable_task_sink=True)
+context.set_context(enable_loop_sink=True)
+context.set_context(enable_mem_reuse=True)
+
+if __name__ == '__main__':
+    if args_opt.do_eval:
+        context.set_context(enable_hccl=False)
+    else:
+        if args_opt.run_distribute:
+            context.set_context(enable_hccl=True)
+            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
+                                              mirror_mean=True, parameter_broadcast=True)
+            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
+            init()
+        else:
+            context.set_context(enable_hccl=False)
+
+    epoch_size = config.epoch_size
+    net = resnet101(class_num=config.class_num)
+    
+    # weight init
+    default_recurisive_init(net)
+    for name, cell in net.cells_and_names():
+        if isinstance(cell, nn.Conv2d):
+            cell.weight.default_input = weight_init.initializer(KaimingNormal(a=math.sqrt(5),
+                                                                mode='fan_out', nonlinearity='relu'),
+                                                                cell.weight.default_input.shape(),
+                                                                cell.weight.default_input.dtype())
+            
+    if not config.label_smooth:
+        config.label_smooth_factor = 0.0
+    loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) 
+
+    if args_opt.do_train:
+        dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
+                                 repeat_num=epoch_size, batch_size=config.batch_size)
+        step_size = dataset.get_dataset_size()
+        loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
+
+        # learning rate strategy
+        if config.lr_decay_mode == 'cosine':
+            lr = Tensor(warmup_cosine_annealing_lr(config.lr, step_size, config.warmup_epochs, config.epoch_size))
+        else:
+            lr = Tensor(get_lr(global_step=0, lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max,
+                               warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size,
+                               lr_decay_mode='poly'))
+            
+        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
+                       config.weight_decay, config.loss_scale)
+
+        model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', keep_batchnorm_fp32=False, loss_scale_manager=loss_scale, metrics={'acc'}) 
+
+        time_cb = TimeMonitor(data_size=step_size)
+        loss_cb = LossMonitor()
+        cb = [time_cb, loss_cb]
+        if config.save_checkpoint:
+            config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps,
+                                         keep_checkpoint_max=config.keep_checkpoint_max)
+            ckpt_cb = ModelCheckpoint(prefix="resnet", directory=config.save_checkpoint_path, config=config_ck)
+            cb += [ckpt_cb]
+        model.train(epoch_size, dataset, callbacks=cb)
diff --git a/example/resnet101_imagenet/var_init.py b/example/resnet101_imagenet/var_init.py
new file mode 100755
index 0000000000..af4cd64b3b
--- /dev/null
+++ b/example/resnet101_imagenet/var_init.py
@@ -0,0 +1,183 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""weight initial"""
+import math
+import numpy as np
+from mindspore.common import initializer as init
+import mindspore.nn as nn
+from mindspore import Tensor
+
+ 
+def calculate_gain(nonlinearity, param=None):
+    r"""Return the recommended gain value for the given nonlinearity function.
+    The values are as follows:
+ 
+    ================= ====================================================
+    nonlinearity      gain
+    ================= ====================================================
+    Linear / Identity :math:`1`
+    Conv{1,2,3}D      :math:`1`
+    Sigmoid           :math:`1`
+    Tanh              :math:`\frac{5}{3}`
+    ReLU              :math:`\sqrt{2}`
+    Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
+    ================= ====================================================
+ 
+    Args:
+        nonlinearity: the non-linear function (`nn.functional` name)
+        param: optional parameter for the non-linear function
+ 
+    """
+    linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
+    if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
+        return 1
+    elif nonlinearity == 'tanh':
+        return 5.0 / 3
+    elif nonlinearity == 'relu':
+        return math.sqrt(2.0)
+    elif nonlinearity == 'leaky_relu':
+        if param is None:
+            negative_slope = 0.01
+        elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float):
+            # True/False are instances of int, hence check above
+            negative_slope = param
+        else:
+            raise ValueError("negative_slope {} not a valid number".format(param))
+        return math.sqrt(2.0 / (1 + negative_slope ** 2))
+    else:
+        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) 
+
+def _calculate_correct_fan(array, mode):
+    mode = mode.lower()
+    valid_modes = ['fan_in', 'fan_out']
+    if mode not in valid_modes:
+        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
+ 
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(array)
+    return fan_in if mode == 'fan_in' else fan_out 
+ 
+
+def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
+    r"""Fills the input `Tensor` with values according to the method
+    described in `Delving deep into rectifiers: Surpassing human-level
+    performance on ImageNet classification` - He, K. et al. (2015), using a
+    uniform distribution. The resulting tensor will have values sampled from
+    :math:`\mathcal{U}(-\text{bound}, \text{bound})` where
+ 
+    .. math::
+        \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}
+ 
+    Also known as He initialization.
+ 
+    Args:
+        array: an n-dimensional `tensor`
+        a: the negative slope of the rectifier used after this layer (only
+        used with ``'leaky_relu'``)
+        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
+            preserves the magnitude of the variance of the weights in the
+            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
+            backwards pass.
+        nonlinearity: the non-linear function (`nn.functional` name),
+            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
+    """
+ 
+    fan = _calculate_correct_fan(array, mode)
+    gain = calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    bound = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
+    return np.random.uniform(-bound, bound, array.shape)
+ 
+
+def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
+    r"""Fills the input `Tensor` with values according to the method
+    described in `Delving deep into rectifiers: Surpassing human-level
+    performance on ImageNet classification` - He, K. et al. (2015), using a
+    normal distribution. The resulting tensor will have values sampled from
+    :math:`\mathcal{N}(0, \text{std}^2)` where
+ 
+    .. math::
+        \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}
+
+    Also known as He initialization.
+ 
+    Args:
+        array: an n-dimensional `tensor`
+        a: the negative slope of the rectifier used after this layer (only
+        used with ``'leaky_relu'``)
+        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
+            preserves the magnitude of the variance of the weights in the
+            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
+            backwards pass.
+        nonlinearity: the non-linear function (`nn.functional` name),
+            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
+    """
+    fan = _calculate_correct_fan(array, mode)
+    gain = calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    return np.random.normal(0, std, array.shape)
+ 
+def _calculate_fan_in_and_fan_out(array):
+    dimensions = len(array.shape)
+    if dimensions < 2:
+        raise ValueError("Fan in and fan out can not be computed for array with fewer than 2 dimensions")
+ 
+    num_input_fmaps = array.shape[1]
+    num_output_fmaps = array.shape[0]
+    receptive_field_size = 1
+    if dimensions > 2:
+        receptive_field_size = array[0][0].size
+    fan_in = num_input_fmaps * receptive_field_size
+    fan_out = num_output_fmaps * receptive_field_size
+ 
+    return fan_in, fan_out
+ 
+class KaimingUniform(init.Initializer):
+    def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
+        super(KaimingUniform, self).__init__()
+        self.a = a
+        self.mode = mode
+        self.nonlinearity = nonlinearity
+ 
+    def _initialize(self, arr):
+        tmp = kaiming_uniform_(arr, self.a, self.mode, self.nonlinearity)
+        init._assignment(arr, tmp) 
+
+class KaimingNormal(init.Initializer):
+    def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
+        super(KaimingNormal, self).__init__()
+        self.a = a
+        self.mode = mode
+        self.nonlinearity = nonlinearity
+ 
+    def _initialize(self, arr):
+        tmp = kaiming_normal_(arr, self.a, self.mode, self.nonlinearity)
+        init._assignment(arr, tmp)
+
+def default_recurisive_init(custom_cell):
+    for name, cell in custom_cell.cells_and_names():
+        if isinstance(cell, nn.Conv2d):
+            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), cell.weight.default_input.shape(), cell.weight.default_input.dtype())
+            if cell.bias is not None:
+                fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
+                bound = 1 / math.sqrt(fan_in)
+                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, cell.bias.default_input.shape()), cell.bias.default_input.dtype())
+        elif isinstance(cell, nn.Dense):
+            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), cell.weight.default_input.shape(), cell.weight.default_input.dtype())
+            if cell.bias is not None:
+                fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
+                bound = 1 / math.sqrt(fan_in)
+                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, cell.bias.default_input.shape()), cell.bias.default_input.dtype())
+        elif isinstance(cell, nn.BatchNorm2d) or isinstance(cell, nn.BatchNorm1d):
+            pass
diff --git a/mindspore/model_zoo/resnet.py b/mindspore/model_zoo/resnet.py
index 9d010eede1..a243ff5a2a 100755
--- a/mindspore/model_zoo/resnet.py
+++ b/mindspore/model_zoo/resnet.py
@@ -260,3 +260,24 @@ def resnet50(class_num=10):
                   [256, 512, 1024, 2048],
                   [1, 2, 2, 2],
                   class_num)
+
+def resnet101(class_num=1001):
+    """
+    Get ResNet101 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet101 neural network.
+
+    Examples:
+        >>> net = resnet101(1001)
+    """
+    return ResNet(ResidualBlock,
+                  [3, 4, 23, 3],
+                  [64, 256, 512, 1024],
+                  [256, 512, 1024, 2048],
+                  [1, 2, 2, 2],
+                  class_num)
+    

From 99bbb3a3b2a0eac1c224256eb2782149733b10ef Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Sun, 26 Apr 2020 17:25:12 +0800
Subject: [PATCH 2/3] modify scripts for pylint

---
 example/resnet101_imagenet/crossentropy.py |  6 +--
 example/resnet101_imagenet/dataset.py      |  2 +-
 example/resnet101_imagenet/lr_generator.py |  5 +--
 example/resnet101_imagenet/train.py        | 20 ++++------
 example/resnet101_imagenet/var_init.py     | 43 +++++++++++-----------
 mindspore/model_zoo/resnet.py              |  3 +-
 6 files changed, 37 insertions(+), 42 deletions(-)

diff --git a/example/resnet101_imagenet/crossentropy.py b/example/resnet101_imagenet/crossentropy.py
index e636b8529e..1145a41804 100755
--- a/example/resnet101_imagenet/crossentropy.py
+++ b/example/resnet101_imagenet/crossentropy.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
+"""define loss function for network"""
 from mindspore.nn.loss.loss import _Loss
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 from mindspore import Tensor
 from mindspore.common import dtype as mstype
 import mindspore.nn as nn
- 
-"""define loss function for network"""
+
 class CrossEntropy(_Loss):
+    """the redefined loss function with SoftmaxCrossEntropyWithLogits"""
     def __init__(self, smooth_factor=0., num_classes=1001):
         super(CrossEntropy, self).__init__()
         self.onehot = P.OneHot()
@@ -28,7 +29,6 @@ class CrossEntropy(_Loss):
         self.off_value = Tensor(1.0 * smooth_factor / (num_classes -1), mstype.float32)
         self.ce = nn.SoftmaxCrossEntropyWithLogits()
         self.mean = P.ReduceMean(False)
- 
     def construct(self, logit, label):
         one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
         loss = self.ce(logit, one_hot_label)
diff --git a/example/resnet101_imagenet/dataset.py b/example/resnet101_imagenet/dataset.py
index 920e1c093c..27d93dc086 100755
--- a/example/resnet101_imagenet/dataset.py
+++ b/example/resnet101_imagenet/dataset.py
@@ -57,7 +57,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
     normalize_op = C.Normalize((0.475, 0.451, 0.392), (0.275, 0.267, 0.278))
     changeswap_op = C.HWC2CHW()
 
-    trans=[]
+    trans = []
     if do_train:
         trans = [decode_op,
                  random_resize_crop_op,
diff --git a/example/resnet101_imagenet/lr_generator.py b/example/resnet101_imagenet/lr_generator.py
index b2271a1382..67ff1fef25 100755
--- a/example/resnet101_imagenet/lr_generator.py
+++ b/example/resnet101_imagenet/lr_generator.py
@@ -13,9 +13,8 @@
 # limitations under the License.
 # ============================================================================
 """learning rate generator"""
-import numpy as np
 import math
- 
+import numpy as np
 
 def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr):
     lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
@@ -50,7 +49,7 @@ def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch):
             decayed = linear_decay * cosine_decay + 0.00001
             lr = base_lr * decayed
         lr_each_step.append(lr)
-    return np.array(lr_each_step).astype(np.float32)  
+    return np.array(lr_each_step).astype(np.float32)
 
 def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
     """
diff --git a/example/resnet101_imagenet/train.py b/example/resnet101_imagenet/train.py
index 2df6c3bad4..37f49ec3d7 100755
--- a/example/resnet101_imagenet/train.py
+++ b/example/resnet101_imagenet/train.py
@@ -14,11 +14,12 @@
 # ============================================================================
 """train_imagenet."""
 import os
+import math
 import argparse
 import random
 import numpy as np
 from dataset import create_dataset
-from lr_generator import get_lr
+from lr_generator import get_lr, warmup_cosine_annealing_lr
 from config import config
 from mindspore import context
 from mindspore import Tensor
@@ -33,7 +34,7 @@ from mindspore.communication.management import init
 import mindspore.nn as nn
 from crossentropy import CrossEntropy
 from var_init import default_recurisive_init, KaimingNormal
-from mindspore.common import initializer as weight_init
+import mindspore.common.initializer as weight_init
 
 random.seed(1)
 np.random.seed(1)
@@ -69,23 +70,20 @@ if __name__ == '__main__':
 
     epoch_size = config.epoch_size
     net = resnet101(class_num=config.class_num)
-    
     # weight init
     default_recurisive_init(net)
     for name, cell in net.cells_and_names():
         if isinstance(cell, nn.Conv2d):
             cell.weight.default_input = weight_init.initializer(KaimingNormal(a=math.sqrt(5),
-                                                                mode='fan_out', nonlinearity='relu'),
+                                                                              mode='fan_out', nonlinearity='relu'),
                                                                 cell.weight.default_input.shape(),
                                                                 cell.weight.default_input.dtype())
-            
     if not config.label_smooth:
         config.label_smooth_factor = 0.0
-    loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) 
-
+    loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
     if args_opt.do_train:
         dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
-                                 repeat_num=epoch_size, batch_size=config.batch_size)
+                repeat_num=epoch_size, batch_size=config.batch_size)
         step_size = dataset.get_dataset_size()
         loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
 
@@ -96,12 +94,10 @@ if __name__ == '__main__':
             lr = Tensor(get_lr(global_step=0, lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max,
                                warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size,
                                lr_decay_mode='poly'))
-            
         opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
                        config.weight_decay, config.loss_scale)
-
-        model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', keep_batchnorm_fp32=False, loss_scale_manager=loss_scale, metrics={'acc'}) 
-
+        model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', keep_batchnorm_fp32=False,
+                      loss_scale_manager=loss_scale, metrics={'acc'})
         time_cb = TimeMonitor(data_size=step_size)
         loss_cb = LossMonitor()
         cb = [time_cb, loss_cb]
diff --git a/example/resnet101_imagenet/var_init.py b/example/resnet101_imagenet/var_init.py
index af4cd64b3b..061ec94fbf 100755
--- a/example/resnet101_imagenet/var_init.py
+++ b/example/resnet101_imagenet/var_init.py
@@ -18,12 +18,10 @@ import numpy as np
 from mindspore.common import initializer as init
 import mindspore.nn as nn
 from mindspore import Tensor
-
  
 def calculate_gain(nonlinearity, param=None):
     r"""Return the recommended gain value for the given nonlinearity function.
-    The values are as follows:
- 
+    The values are as follows: 
     ================= ====================================================
     nonlinearity      gain
     ================= ====================================================
@@ -34,11 +32,9 @@ def calculate_gain(nonlinearity, param=None):
     ReLU              :math:`\sqrt{2}`
     Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
     ================= ====================================================
- 
     Args:
         nonlinearity: the non-linear function (`nn.functional` name)
         param: optional parameter for the non-linear function
- 
     """
     linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
     if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
@@ -57,17 +53,15 @@ def calculate_gain(nonlinearity, param=None):
             raise ValueError("negative_slope {} not a valid number".format(param))
         return math.sqrt(2.0 / (1 + negative_slope ** 2))
     else:
-        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) 
-
+        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
+    
 def _calculate_correct_fan(array, mode):
     mode = mode.lower()
     valid_modes = ['fan_in', 'fan_out']
     if mode not in valid_modes:
-        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
- 
+        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes)) 
     fan_in, fan_out = _calculate_fan_in_and_fan_out(array)
-    return fan_in if mode == 'fan_in' else fan_out 
- 
+    return fan_in if mode == 'fan_in' else fan_out
 
 def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
     r"""Fills the input `Tensor` with values according to the method
@@ -75,12 +69,10 @@ def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
     performance on ImageNet classification` - He, K. et al. (2015), using a
     uniform distribution. The resulting tensor will have values sampled from
     :math:`\mathcal{U}(-\text{bound}, \text{bound})` where
- 
     .. math::
         \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}
- 
     Also known as He initialization.
- 
+
     Args:
         array: an n-dimensional `tensor`
         a: the negative slope of the rectifier used after this layer (only
@@ -91,8 +83,7 @@ def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
             backwards pass.
         nonlinearity: the non-linear function (`nn.functional` name),
             recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
-    """
- 
+    """ 
     fan = _calculate_correct_fan(array, mode)
     gain = calculate_gain(nonlinearity, a)
     std = gain / math.sqrt(fan)
@@ -129,6 +120,7 @@ def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
     return np.random.normal(0, std, array.shape)
  
 def _calculate_fan_in_and_fan_out(array):
+    """calculate the fan_in and fan_out for input array"""
     dimensions = len(array.shape)
     if dimensions < 2:
         raise ValueError("Fan in and fan out can not be computed for array with fewer than 2 dimensions")
@@ -166,18 +158,27 @@ class KaimingNormal(init.Initializer):
         init._assignment(arr, tmp)
 
 def default_recurisive_init(custom_cell):
+    """weight init for conv2d and dense"""
     for name, cell in custom_cell.cells_and_names():
         if isinstance(cell, nn.Conv2d):
-            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), cell.weight.default_input.shape(), cell.weight.default_input.dtype())
+            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), 
+                    cell.weight.default_input.shape(), 
+                    cell.weight.default_input.dtype())
             if cell.bias is not None:
                 fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
                 bound = 1 / math.sqrt(fan_in)
-                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, cell.bias.default_input.shape()), cell.bias.default_input.dtype())
+                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, 
+                    cell.bias.default_input.shape()), 
+                    cell.bias.default_input.dtype())
         elif isinstance(cell, nn.Dense):
-            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), cell.weight.default_input.shape(), cell.weight.default_input.dtype())
+            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), 
+                    cell.weight.default_input.shape(), 
+                    cell.weight.default_input.dtype())
             if cell.bias is not None:
                 fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
                 bound = 1 / math.sqrt(fan_in)
-                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, cell.bias.default_input.shape()), cell.bias.default_input.dtype())
-        elif isinstance(cell, nn.BatchNorm2d) or isinstance(cell, nn.BatchNorm1d):
+                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, 
+                    cell.bias.default_input.shape()), 
+                    cell.bias.default_input.dtype())
+        elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)):
             pass
diff --git a/mindspore/model_zoo/resnet.py b/mindspore/model_zoo/resnet.py
index a243ff5a2a..d67f26814c 100755
--- a/mindspore/model_zoo/resnet.py
+++ b/mindspore/model_zoo/resnet.py
@@ -279,5 +279,4 @@ def resnet101(class_num=1001):
                   [64, 256, 512, 1024],
                   [256, 512, 1024, 2048],
                   [1, 2, 2, 2],
-                  class_num)
-    
+                  class_num)
\ No newline at end of file

From 3cb692bea10e477deffdbe631bc88fc30f93622e Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Sun, 26 Apr 2020 17:57:12 +0800
Subject: [PATCH 3/3] modify resnet101 scripts for pylint

---
 example/resnet101_imagenet/README.md       |  3 -
 example/resnet101_imagenet/config.py       |  3 -
 example/resnet101_imagenet/lr_generator.py | 60 -----------------
 example/resnet101_imagenet/train.py        | 17 ++---
 example/resnet101_imagenet/var_init.py     | 76 ++++++++++++----------
 mindspore/model_zoo/resnet.py              |  2 +-
 6 files changed, 49 insertions(+), 112 deletions(-)

diff --git a/example/resnet101_imagenet/README.md b/example/resnet101_imagenet/README.md
index bc653675f2..d5729b70db 100644
--- a/example/resnet101_imagenet/README.md
+++ b/example/resnet101_imagenet/README.md
@@ -54,9 +54,6 @@ Parameters for both training and evaluating can be set in config.py.
 "save_checkpoint_steps": 500,     # the step interval between two checkpoints. By default, the last checkpoint will be saved after the last step
 "keep_checkpoint_max": 40,        # only keep the last keep_checkpoint_max checkpoint
 "save_checkpoint_path": "./",     # path to save checkpoint relative to the executed path
-"lr_init": 0.01,                  # initial learning rate
-"lr_end": 0.00001,                # final learning rate
-"lr_max": 0.1,                    # maximum learning rate
 "warmup_epochs": 0,               # number of warmup epoch
 "lr_decay_mode": "cosine"         # decay mode for generating learning rate
 "label_smooth": 1,                # label_smooth
diff --git a/example/resnet101_imagenet/config.py b/example/resnet101_imagenet/config.py
index 0ad37c8678..ca58f24da3 100755
--- a/example/resnet101_imagenet/config.py
+++ b/example/resnet101_imagenet/config.py
@@ -31,9 +31,6 @@ config = ed({
     "save_checkpoint_steps": 500,
     "keep_checkpoint_max": 40,
     "save_checkpoint_path": "./",
-    "lr_init": 0.01,
-    "lr_end": 0.00001,
-    "lr_max": 0.1,
     "warmup_epochs": 0,
     "lr_decay_mode": "cosine",
     "label_smooth": 1,
diff --git a/example/resnet101_imagenet/lr_generator.py b/example/resnet101_imagenet/lr_generator.py
index 67ff1fef25..88cb85cc5b 100755
--- a/example/resnet101_imagenet/lr_generator.py
+++ b/example/resnet101_imagenet/lr_generator.py
@@ -50,63 +50,3 @@ def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch):
             lr = base_lr * decayed
         lr_each_step.append(lr)
     return np.array(lr_each_step).astype(np.float32)
-
-def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
-    """
-    generate learning rate array
-
-    Args:
-       global_step(int): total steps of the training
-       lr_init(float): init learning rate
-       lr_end(float): end learning rate
-       lr_max(float): max learning rate
-       warmup_epochs(int): number of warmup epochs
-       total_epochs(int): total epoch of training
-       steps_per_epoch(int): steps of one epoch
-       lr_decay_mode(string): learning rate decay mode, including steps, poly or default
-
-    Returns:
-       np.array, learning rate array
-    """
-    lr_each_step = []
-    total_steps = steps_per_epoch * total_epochs
-    warmup_steps = steps_per_epoch * warmup_epochs
-    if lr_decay_mode == 'steps':
-        decay_epoch_index = [0.3 * total_steps, 0.6 * total_steps, 0.8 * total_steps]
-        for i in range(total_steps):
-            if i < decay_epoch_index[0]:
-                lr = lr_max
-            elif i < decay_epoch_index[1]:
-                lr = lr_max * 0.1
-            elif i < decay_epoch_index[2]:
-                lr = lr_max * 0.01
-            else:
-                lr = lr_max * 0.001
-            lr_each_step.append(lr)
-    elif lr_decay_mode == 'poly':
-        if warmup_steps != 0:
-            inc_each_step = (float(lr_max) - float(lr_init)) / float(warmup_steps)
-        else:
-            inc_each_step = 0
-        for i in range(total_steps):
-            if i < warmup_steps:
-                lr = float(lr_init) + inc_each_step * float(i)
-            else:
-                base = (1.0 - (float(i) - float(warmup_steps)) / (float(total_steps) - float(warmup_steps)))
-                lr = float(lr_max) * base * base
-                if lr < 0.0:
-                    lr = 0.0
-            lr_each_step.append(lr)
-    else:
-        for i in range(total_steps):
-            if i < warmup_steps:
-                lr = lr_init + (lr_max - lr_init) * i / warmup_steps
-            else:
-                lr = lr_max - (lr_max - lr_end) * (i - warmup_steps) / (total_steps - warmup_steps)
-            lr_each_step.append(lr)
-
-    current_step = global_step
-    lr_each_step = np.array(lr_each_step).astype(np.float32)
-    learning_rate = lr_each_step[current_step:]
-
-    return learning_rate
diff --git a/example/resnet101_imagenet/train.py b/example/resnet101_imagenet/train.py
index 37f49ec3d7..0f20637595 100755
--- a/example/resnet101_imagenet/train.py
+++ b/example/resnet101_imagenet/train.py
@@ -19,7 +19,7 @@ import argparse
 import random
 import numpy as np
 from dataset import create_dataset
-from lr_generator import get_lr, warmup_cosine_annealing_lr
+from lr_generator import warmup_cosine_annealing_lr
 from config import config
 from mindspore import context
 from mindspore import Tensor
@@ -32,9 +32,9 @@ from mindspore.train.loss_scale_manager import FixedLossScaleManager
 import mindspore.dataset.engine as de
 from mindspore.communication.management import init
 import mindspore.nn as nn
+import mindspore.common.initializer as weight_init
 from crossentropy import CrossEntropy
 from var_init import default_recurisive_init, KaimingNormal
-import mindspore.common.initializer as weight_init
 
 random.seed(1)
 np.random.seed(1)
@@ -72,7 +72,7 @@ if __name__ == '__main__':
     net = resnet101(class_num=config.class_num)
     # weight init
     default_recurisive_init(net)
-    for name, cell in net.cells_and_names():
+    for _, cell in net.cells_and_names():
         if isinstance(cell, nn.Conv2d):
             cell.weight.default_input = weight_init.initializer(KaimingNormal(a=math.sqrt(5),
                                                                               mode='fan_out', nonlinearity='relu'),
@@ -83,17 +83,12 @@ if __name__ == '__main__':
     loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
     if args_opt.do_train:
         dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
-                repeat_num=epoch_size, batch_size=config.batch_size)
+                                 repeat_num=epoch_size, batch_size=config.batch_size)
         step_size = dataset.get_dataset_size()
         loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
 
-        # learning rate strategy
-        if config.lr_decay_mode == 'cosine':
-            lr = Tensor(warmup_cosine_annealing_lr(config.lr, step_size, config.warmup_epochs, config.epoch_size))
-        else:
-            lr = Tensor(get_lr(global_step=0, lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max,
-                               warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size,
-                               lr_decay_mode='poly'))
+        # learning rate strategy with cosine
+        lr = Tensor(warmup_cosine_annealing_lr(config.lr, step_size, config.warmup_epochs, config.epoch_size))
         opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
                        config.weight_decay, config.loss_scale)
         model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', keep_batchnorm_fp32=False,
diff --git a/example/resnet101_imagenet/var_init.py b/example/resnet101_imagenet/var_init.py
index 061ec94fbf..34d8664a49 100755
--- a/example/resnet101_imagenet/var_init.py
+++ b/example/resnet101_imagenet/var_init.py
@@ -18,10 +18,10 @@ import numpy as np
 from mindspore.common import initializer as init
 import mindspore.nn as nn
 from mindspore import Tensor
- 
+
 def calculate_gain(nonlinearity, param=None):
     r"""Return the recommended gain value for the given nonlinearity function.
-    The values are as follows: 
+    The values are as follows:
     ================= ====================================================
     nonlinearity      gain
     ================= ====================================================
@@ -37,12 +37,13 @@ def calculate_gain(nonlinearity, param=None):
         param: optional parameter for the non-linear function
     """
     linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
+    gain = 0
     if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
-        return 1
+        gain = 1
     elif nonlinearity == 'tanh':
-        return 5.0 / 3
+        gain = 5.0 / 3
     elif nonlinearity == 'relu':
-        return math.sqrt(2.0)
+        gain = math.sqrt(2.0)
     elif nonlinearity == 'leaky_relu':
         if param is None:
             negative_slope = 0.01
@@ -51,15 +52,16 @@ def calculate_gain(nonlinearity, param=None):
             negative_slope = param
         else:
             raise ValueError("negative_slope {} not a valid number".format(param))
-        return math.sqrt(2.0 / (1 + negative_slope ** 2))
+        gain = math.sqrt(2.0 / (1 + negative_slope ** 2))
     else:
         raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
-    
+    return gain
+
 def _calculate_correct_fan(array, mode):
     mode = mode.lower()
     valid_modes = ['fan_in', 'fan_out']
     if mode not in valid_modes:
-        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes)) 
+        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
     fan_in, fan_out = _calculate_fan_in_and_fan_out(array)
     return fan_in if mode == 'fan_in' else fan_out
 
@@ -83,13 +85,12 @@ def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
             backwards pass.
         nonlinearity: the non-linear function (`nn.functional` name),
             recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
-    """ 
+    """
     fan = _calculate_correct_fan(array, mode)
     gain = calculate_gain(nonlinearity, a)
     std = gain / math.sqrt(fan)
     bound = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
     return np.random.uniform(-bound, bound, array.shape)
- 
 
 def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
     r"""Fills the input `Tensor` with values according to the method
@@ -97,12 +98,10 @@ def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
     performance on ImageNet classification` - He, K. et al. (2015), using a
     normal distribution. The resulting tensor will have values sampled from
     :math:`\mathcal{N}(0, \text{std}^2)` where
- 
     .. math::
         \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}
-
     Also known as He initialization.
- 
+
     Args:
         array: an n-dimensional `tensor`
         a: the negative slope of the rectifier used after this layer (only
@@ -118,13 +117,12 @@ def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
     gain = calculate_gain(nonlinearity, a)
     std = gain / math.sqrt(fan)
     return np.random.normal(0, std, array.shape)
- 
+
 def _calculate_fan_in_and_fan_out(array):
     """calculate the fan_in and fan_out for input array"""
     dimensions = len(array.shape)
     if dimensions < 2:
         raise ValueError("Fan in and fan out can not be computed for array with fewer than 2 dimensions")
- 
     num_input_fmaps = array.shape[1]
     num_output_fmaps = array.shape[0]
     receptive_field_size = 1
@@ -132,19 +130,30 @@ def _calculate_fan_in_and_fan_out(array):
         receptive_field_size = array[0][0].size
     fan_in = num_input_fmaps * receptive_field_size
     fan_out = num_output_fmaps * receptive_field_size
- 
     return fan_in, fan_out
- 
+
+def assignment(arr, num):
+    """Assign the value of num to arr"""
+    if arr.shape == ():
+        arr = arr.reshape((1))
+        arr[:] = num
+        arr = arr.reshape(())
+    else:
+        if isinstance(num, np.ndarray):
+            arr[:] = num[:]
+        else:
+            arr[:] = num
+    return arr
+
 class KaimingUniform(init.Initializer):
     def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
         super(KaimingUniform, self).__init__()
         self.a = a
         self.mode = mode
         self.nonlinearity = nonlinearity
- 
     def _initialize(self, arr):
         tmp = kaiming_uniform_(arr, self.a, self.mode, self.nonlinearity)
-        init._assignment(arr, tmp) 
+        assignment(arr, tmp)
 
 class KaimingNormal(init.Initializer):
     def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
@@ -152,33 +161,32 @@ class KaimingNormal(init.Initializer):
         self.a = a
         self.mode = mode
         self.nonlinearity = nonlinearity
- 
     def _initialize(self, arr):
         tmp = kaiming_normal_(arr, self.a, self.mode, self.nonlinearity)
-        init._assignment(arr, tmp)
+        assignment(arr, tmp)
 
 def default_recurisive_init(custom_cell):
     """weight init for conv2d and dense"""
-    for name, cell in custom_cell.cells_and_names():
+    for _, cell in custom_cell.cells_and_names():
         if isinstance(cell, nn.Conv2d):
-            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), 
-                    cell.weight.default_input.shape(), 
-                    cell.weight.default_input.dtype())
+            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)),
+                                                         cell.weight.default_input.shape(),
+                                                         cell.weight.default_input.dtype())
             if cell.bias is not None:
                 fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
                 bound = 1 / math.sqrt(fan_in)
-                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, 
-                    cell.bias.default_input.shape()), 
-                    cell.bias.default_input.dtype())
+                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound,
+                                                                   cell.bias.default_input.shape()),
+                                                 cell.bias.default_input.dtype())
         elif isinstance(cell, nn.Dense):
-            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), 
-                    cell.weight.default_input.shape(), 
-                    cell.weight.default_input.dtype())
+            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)),
+                                                         cell.weight.default_input.shape(),
+                                                         cell.weight.default_input.dtype())
             if cell.bias is not None:
                 fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
                 bound = 1 / math.sqrt(fan_in)
-                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, 
-                    cell.bias.default_input.shape()), 
-                    cell.bias.default_input.dtype())
+                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound,
+                                                                   cell.bias.default_input.shape()),
+                                                 cell.bias.default_input.dtype())
         elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)):
             pass
diff --git a/mindspore/model_zoo/resnet.py b/mindspore/model_zoo/resnet.py
index d67f26814c..3055026718 100755
--- a/mindspore/model_zoo/resnet.py
+++ b/mindspore/model_zoo/resnet.py
@@ -279,4 +279,4 @@ def resnet101(class_num=1001):
                   [64, 256, 512, 1024],
                   [256, 512, 1024, 2048],
                   [1, 2, 2, 2],
-                  class_num)
\ No newline at end of file
+                  class_num)