From e4517964cb33b148135e405071e316f146d6752e Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Wed, 29 Apr 2020 20:33:00 +0800
Subject: [PATCH 1/2] support relative and full paths

---
 .../run_distribute_train.sh                   | 26 ++++++++++++++-----
 .../run_standalone_train.sh                   | 16 +++++++++---
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/example/resnet101_imagenet2012/run_distribute_train.sh b/example/resnet101_imagenet2012/run_distribute_train.sh
index 5165f58cab..ecdcd66859 100755
--- a/example/resnet101_imagenet2012/run_distribute_train.sh
+++ b/example/resnet101_imagenet2012/run_distribute_train.sh
@@ -20,23 +20,35 @@ then
 exit 1
 fi
 
-if [ ! -f $1 ]
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+PATH1=$(get_real_path $1)
+PATH2=$(get_real_path $2)
+echo $PATH1
+echo $PATH2
+
+if [ ! -f $PATH1 ]
 then 
-    echo "error: DMINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
+    echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file"
 exit 1
 fi 
 
-if [ ! -d $2 ]
+if [ ! -d $PATH2 ]
 then 
-    echo "error: DATASET_PATH=$2 is not a directory"
+    echo "error: DATASET_PATH=$PATH2 is not a directory"
 exit 1
 fi 
 
 ulimit -u unlimited
 export DEVICE_NUM=8
 export RANK_SIZE=8
-export MINDSPORE_HCCL_CONFIG_PATH=$1
-export RANK_TABLE_FILE=$1
+export MINDSPORE_HCCL_CONFIG_PATH=$PATH1
+export RANK_TABLE_FILE=$PATH1
 
 for((i=0; i<${DEVICE_NUM}; i++))
 do
@@ -49,6 +61,6 @@ do
     cd ./train_parallel$i || exit
     echo "start training for rank $RANK_ID, device $DEVICE_ID"
     env > env.log
-    python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$2 &> log &
+    python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log &
     cd ..
 done
diff --git a/example/resnet101_imagenet2012/run_standalone_train.sh b/example/resnet101_imagenet2012/run_standalone_train.sh
index 9ba5742515..dde018b8eb 100755
--- a/example/resnet101_imagenet2012/run_standalone_train.sh
+++ b/example/resnet101_imagenet2012/run_standalone_train.sh
@@ -20,9 +20,19 @@ then
 exit 1
 fi
 
-if [ ! -d $1 ]
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+PATH1=$(get_real_path $1)
+echo $PATH1
+
+if [ ! -d $PATH1 ]
 then 
-    echo "error: DATASET_PATH=$1 is not a directory"
+    echo "error: DATASET_PATH=$PATH1 is not a directory"
 exit 1
 fi 
 
@@ -42,5 +52,5 @@ cp *.sh ./train
 cd ./train || exit
 echo "start training for device $DEVICE_ID"
 env > env.log
-python train.py --do_train=True --dataset_path=$1 &> log &
+python train.py --do_train=True --dataset_path=$PATH1 &> log &
 cd ..

From 73bd2e9afb7918f346375be3e923565ed3a2f7bf Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Wed, 29 Apr 2020 22:06:57 +0800
Subject: [PATCH 2/2] modify weight init

---
 example/resnet101_imagenet2012/train.py    |   8 +-
 example/resnet101_imagenet2012/var_init.py | 192 ---------------------
 2 files changed, 5 insertions(+), 195 deletions(-)
 delete mode 100755 example/resnet101_imagenet2012/var_init.py

diff --git a/example/resnet101_imagenet2012/train.py b/example/resnet101_imagenet2012/train.py
index ca74262890..3d0a23f93a 100755
--- a/example/resnet101_imagenet2012/train.py
+++ b/example/resnet101_imagenet2012/train.py
@@ -34,7 +34,6 @@ from mindspore.communication.management import init
 import mindspore.nn as nn
 import mindspore.common.initializer as weight_init
 from crossentropy import CrossEntropy
-from var_init import default_recurisive_init, KaimingNormal
 
 random.seed(1)
 np.random.seed(1)
@@ -68,8 +67,11 @@ if __name__ == '__main__':
     default_recurisive_init(net)
     for _, cell in net.cells_and_names():
         if isinstance(cell, nn.Conv2d):
-            cell.weight.default_input = weight_init.initializer(KaimingNormal(a=math.sqrt(5),
-                                                                              mode='fan_out', nonlinearity='relu'),
+            cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(),
+                                                                cell.weight.default_input.shape(),
+                                                                cell.weight.default_input.dtype())
+        if isinstance(cell, nn.Dense):
+            cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(),
                                                                 cell.weight.default_input.shape(),
                                                                 cell.weight.default_input.dtype())
     if not config.label_smooth:
diff --git a/example/resnet101_imagenet2012/var_init.py b/example/resnet101_imagenet2012/var_init.py
deleted file mode 100755
index 34d8664a49..0000000000
--- a/example/resnet101_imagenet2012/var_init.py
+++ /dev/null
@@ -1,192 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""weight initial"""
-import math
-import numpy as np
-from mindspore.common import initializer as init
-import mindspore.nn as nn
-from mindspore import Tensor
-
-def calculate_gain(nonlinearity, param=None):
-    r"""Return the recommended gain value for the given nonlinearity function.
-    The values are as follows:
-    ================= ====================================================
-    nonlinearity      gain
-    ================= ====================================================
-    Linear / Identity :math:`1`
-    Conv{1,2,3}D      :math:`1`
-    Sigmoid           :math:`1`
-    Tanh              :math:`\frac{5}{3}`
-    ReLU              :math:`\sqrt{2}`
-    Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
-    ================= ====================================================
-    Args:
-        nonlinearity: the non-linear function (`nn.functional` name)
-        param: optional parameter for the non-linear function
-    """
-    linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
-    gain = 0
-    if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
-        gain = 1
-    elif nonlinearity == 'tanh':
-        gain = 5.0 / 3
-    elif nonlinearity == 'relu':
-        gain = math.sqrt(2.0)
-    elif nonlinearity == 'leaky_relu':
-        if param is None:
-            negative_slope = 0.01
-        elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float):
-            # True/False are instances of int, hence check above
-            negative_slope = param
-        else:
-            raise ValueError("negative_slope {} not a valid number".format(param))
-        gain = math.sqrt(2.0 / (1 + negative_slope ** 2))
-    else:
-        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
-    return gain
-
-def _calculate_correct_fan(array, mode):
-    mode = mode.lower()
-    valid_modes = ['fan_in', 'fan_out']
-    if mode not in valid_modes:
-        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
-    fan_in, fan_out = _calculate_fan_in_and_fan_out(array)
-    return fan_in if mode == 'fan_in' else fan_out
-
-def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
-    r"""Fills the input `Tensor` with values according to the method
-    described in `Delving deep into rectifiers: Surpassing human-level
-    performance on ImageNet classification` - He, K. et al. (2015), using a
-    uniform distribution. The resulting tensor will have values sampled from
-    :math:`\mathcal{U}(-\text{bound}, \text{bound})` where
-    .. math::
-        \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}
-    Also known as He initialization.
-
-    Args:
-        array: an n-dimensional `tensor`
-        a: the negative slope of the rectifier used after this layer (only
-        used with ``'leaky_relu'``)
-        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
-            preserves the magnitude of the variance of the weights in the
-            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
-            backwards pass.
-        nonlinearity: the non-linear function (`nn.functional` name),
-            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
-    """
-    fan = _calculate_correct_fan(array, mode)
-    gain = calculate_gain(nonlinearity, a)
-    std = gain / math.sqrt(fan)
-    bound = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
-    return np.random.uniform(-bound, bound, array.shape)
-
-def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
-    r"""Fills the input `Tensor` with values according to the method
-    described in `Delving deep into rectifiers: Surpassing human-level
-    performance on ImageNet classification` - He, K. et al. (2015), using a
-    normal distribution. The resulting tensor will have values sampled from
-    :math:`\mathcal{N}(0, \text{std}^2)` where
-    .. math::
-        \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}
-    Also known as He initialization.
-
-    Args:
-        array: an n-dimensional `tensor`
-        a: the negative slope of the rectifier used after this layer (only
-        used with ``'leaky_relu'``)
-        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
-            preserves the magnitude of the variance of the weights in the
-            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
-            backwards pass.
-        nonlinearity: the non-linear function (`nn.functional` name),
-            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
-    """
-    fan = _calculate_correct_fan(array, mode)
-    gain = calculate_gain(nonlinearity, a)
-    std = gain / math.sqrt(fan)
-    return np.random.normal(0, std, array.shape)
-
-def _calculate_fan_in_and_fan_out(array):
-    """calculate the fan_in and fan_out for input array"""
-    dimensions = len(array.shape)
-    if dimensions < 2:
-        raise ValueError("Fan in and fan out can not be computed for array with fewer than 2 dimensions")
-    num_input_fmaps = array.shape[1]
-    num_output_fmaps = array.shape[0]
-    receptive_field_size = 1
-    if dimensions > 2:
-        receptive_field_size = array[0][0].size
-    fan_in = num_input_fmaps * receptive_field_size
-    fan_out = num_output_fmaps * receptive_field_size
-    return fan_in, fan_out
-
-def assignment(arr, num):
-    """Assign the value of num to arr"""
-    if arr.shape == ():
-        arr = arr.reshape((1))
-        arr[:] = num
-        arr = arr.reshape(())
-    else:
-        if isinstance(num, np.ndarray):
-            arr[:] = num[:]
-        else:
-            arr[:] = num
-    return arr
-
-class KaimingUniform(init.Initializer):
-    def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
-        super(KaimingUniform, self).__init__()
-        self.a = a
-        self.mode = mode
-        self.nonlinearity = nonlinearity
-    def _initialize(self, arr):
-        tmp = kaiming_uniform_(arr, self.a, self.mode, self.nonlinearity)
-        assignment(arr, tmp)
-
-class KaimingNormal(init.Initializer):
-    def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
-        super(KaimingNormal, self).__init__()
-        self.a = a
-        self.mode = mode
-        self.nonlinearity = nonlinearity
-    def _initialize(self, arr):
-        tmp = kaiming_normal_(arr, self.a, self.mode, self.nonlinearity)
-        assignment(arr, tmp)
-
-def default_recurisive_init(custom_cell):
-    """weight init for conv2d and dense"""
-    for _, cell in custom_cell.cells_and_names():
-        if isinstance(cell, nn.Conv2d):
-            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)),
-                                                         cell.weight.default_input.shape(),
-                                                         cell.weight.default_input.dtype())
-            if cell.bias is not None:
-                fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
-                bound = 1 / math.sqrt(fan_in)
-                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound,
-                                                                   cell.bias.default_input.shape()),
-                                                 cell.bias.default_input.dtype())
-        elif isinstance(cell, nn.Dense):
-            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)),
-                                                         cell.weight.default_input.shape(),
-                                                         cell.weight.default_input.dtype())
-            if cell.bias is not None:
-                fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
-                bound = 1 / math.sqrt(fan_in)
-                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound,
-                                                                   cell.bias.default_input.shape()),
-                                                 cell.bias.default_input.dtype())
-        elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)):
-            pass