From e4517964cb33b148135e405071e316f146d6752e Mon Sep 17 00:00:00 2001 From: meixiaowei Date: Wed, 29 Apr 2020 20:33:00 +0800 Subject: [PATCH 1/2] support relative and full paths --- .../run_distribute_train.sh | 26 ++++++++++++++----- .../run_standalone_train.sh | 16 +++++++++--- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/example/resnet101_imagenet2012/run_distribute_train.sh b/example/resnet101_imagenet2012/run_distribute_train.sh index 5165f58cab..ecdcd66859 100755 --- a/example/resnet101_imagenet2012/run_distribute_train.sh +++ b/example/resnet101_imagenet2012/run_distribute_train.sh @@ -20,23 +20,35 @@ then exit 1 fi -if [ ! -f $1 ] +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) +echo $PATH1 +echo $PATH2 + +if [ ! -f $PATH1 ] then - echo "error: DMINDSPORE_HCCL_CONFIG_PATH=$1 is not a file" + echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file" exit 1 fi -if [ ! -d $2 ] +if [ ! -d $PATH2 ] then - echo "error: DATASET_PATH=$2 is not a directory" + echo "error: DATASET_PATH=$PATH2 is not a directory" exit 1 fi ulimit -u unlimited export DEVICE_NUM=8 export RANK_SIZE=8 -export MINDSPORE_HCCL_CONFIG_PATH=$1 -export RANK_TABLE_FILE=$1 +export MINDSPORE_HCCL_CONFIG_PATH=$PATH1 +export RANK_TABLE_FILE=$PATH1 for((i=0; i<${DEVICE_NUM}; i++)) do @@ -49,6 +61,6 @@ do cd ./train_parallel$i || exit echo "start training for rank $RANK_ID, device $DEVICE_ID" env > env.log - python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$2 &> log & + python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log & cd .. done diff --git a/example/resnet101_imagenet2012/run_standalone_train.sh b/example/resnet101_imagenet2012/run_standalone_train.sh index 9ba5742515..dde018b8eb 100755 --- a/example/resnet101_imagenet2012/run_standalone_train.sh +++ b/example/resnet101_imagenet2012/run_standalone_train.sh @@ -20,9 +20,19 @@ then exit 1 fi -if [ ! -d $1 ] +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +PATH1=$(get_real_path $1) +echo $PATH1 + +if [ ! -d $PATH1 ] then - echo "error: DATASET_PATH=$1 is not a directory" + echo "error: DATASET_PATH=$PATH1 is not a directory" exit 1 fi @@ -42,5 +52,5 @@ cp *.sh ./train cd ./train || exit echo "start training for device $DEVICE_ID" env > env.log -python train.py --do_train=True --dataset_path=$1 &> log & +python train.py --do_train=True --dataset_path=$PATH1 &> log & cd .. From 73bd2e9afb7918f346375be3e923565ed3a2f7bf Mon Sep 17 00:00:00 2001 From: meixiaowei Date: Wed, 29 Apr 2020 22:06:57 +0800 Subject: [PATCH 2/2] modify weight init --- example/resnet101_imagenet2012/train.py | 8 +- example/resnet101_imagenet2012/var_init.py | 192 --------------------- 2 files changed, 5 insertions(+), 195 deletions(-) delete mode 100755 example/resnet101_imagenet2012/var_init.py diff --git a/example/resnet101_imagenet2012/train.py b/example/resnet101_imagenet2012/train.py index ca74262890..3d0a23f93a 100755 --- a/example/resnet101_imagenet2012/train.py +++ b/example/resnet101_imagenet2012/train.py @@ -34,7 +34,6 @@ from mindspore.communication.management import init import mindspore.nn as nn import mindspore.common.initializer as weight_init from crossentropy import CrossEntropy -from var_init import default_recurisive_init, KaimingNormal random.seed(1) np.random.seed(1) @@ -68,8 +67,11 @@ if __name__ == '__main__': default_recurisive_init(net) for _, cell in net.cells_and_names(): if isinstance(cell, nn.Conv2d): - cell.weight.default_input = weight_init.initializer(KaimingNormal(a=math.sqrt(5), - mode='fan_out', nonlinearity='relu'), + cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(), + cell.weight.default_input.shape(), + cell.weight.default_input.dtype()) + if isinstance(cell, nn.Dense): + cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(), cell.weight.default_input.shape(), cell.weight.default_input.dtype()) if not config.label_smooth: diff --git a/example/resnet101_imagenet2012/var_init.py b/example/resnet101_imagenet2012/var_init.py deleted file mode 100755 index 34d8664a49..0000000000 --- a/example/resnet101_imagenet2012/var_init.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""weight initial""" -import math -import numpy as np -from mindspore.common import initializer as init -import mindspore.nn as nn -from mindspore import Tensor - -def calculate_gain(nonlinearity, param=None): - r"""Return the recommended gain value for the given nonlinearity function. - The values are as follows: - ================= ==================================================== - nonlinearity gain - ================= ==================================================== - Linear / Identity :math:`1` - Conv{1,2,3}D :math:`1` - Sigmoid :math:`1` - Tanh :math:`\frac{5}{3}` - ReLU :math:`\sqrt{2}` - Leaky Relu :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}` - ================= ==================================================== - Args: - nonlinearity: the non-linear function (`nn.functional` name) - param: optional parameter for the non-linear function - """ - linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d'] - gain = 0 - if nonlinearity in linear_fns or nonlinearity == 'sigmoid': - gain = 1 - elif nonlinearity == 'tanh': - gain = 5.0 / 3 - elif nonlinearity == 'relu': - gain = math.sqrt(2.0) - elif nonlinearity == 'leaky_relu': - if param is None: - negative_slope = 0.01 - elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float): - # True/False are instances of int, hence check above - negative_slope = param - else: - raise ValueError("negative_slope {} not a valid number".format(param)) - gain = math.sqrt(2.0 / (1 + negative_slope ** 2)) - else: - raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) - return gain - -def _calculate_correct_fan(array, mode): - mode = mode.lower() - valid_modes = ['fan_in', 'fan_out'] - if mode not in valid_modes: - raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes)) - fan_in, fan_out = _calculate_fan_in_and_fan_out(array) - return fan_in if mode == 'fan_in' else fan_out - -def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'): - r"""Fills the input `Tensor` with values according to the method - described in `Delving deep into rectifiers: Surpassing human-level - performance on ImageNet classification` - He, K. et al. (2015), using a - uniform distribution. The resulting tensor will have values sampled from - :math:`\mathcal{U}(-\text{bound}, \text{bound})` where - .. math:: - \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}} - Also known as He initialization. - - Args: - array: an n-dimensional `tensor` - a: the negative slope of the rectifier used after this layer (only - used with ``'leaky_relu'``) - mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'`` - preserves the magnitude of the variance of the weights in the - forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the - backwards pass. - nonlinearity: the non-linear function (`nn.functional` name), - recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default). - """ - fan = _calculate_correct_fan(array, mode) - gain = calculate_gain(nonlinearity, a) - std = gain / math.sqrt(fan) - bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation - return np.random.uniform(-bound, bound, array.shape) - -def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'): - r"""Fills the input `Tensor` with values according to the method - described in `Delving deep into rectifiers: Surpassing human-level - performance on ImageNet classification` - He, K. et al. (2015), using a - normal distribution. The resulting tensor will have values sampled from - :math:`\mathcal{N}(0, \text{std}^2)` where - .. math:: - \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}} - Also known as He initialization. - - Args: - array: an n-dimensional `tensor` - a: the negative slope of the rectifier used after this layer (only - used with ``'leaky_relu'``) - mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'`` - preserves the magnitude of the variance of the weights in the - forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the - backwards pass. - nonlinearity: the non-linear function (`nn.functional` name), - recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default). - """ - fan = _calculate_correct_fan(array, mode) - gain = calculate_gain(nonlinearity, a) - std = gain / math.sqrt(fan) - return np.random.normal(0, std, array.shape) - -def _calculate_fan_in_and_fan_out(array): - """calculate the fan_in and fan_out for input array""" - dimensions = len(array.shape) - if dimensions < 2: - raise ValueError("Fan in and fan out can not be computed for array with fewer than 2 dimensions") - num_input_fmaps = array.shape[1] - num_output_fmaps = array.shape[0] - receptive_field_size = 1 - if dimensions > 2: - receptive_field_size = array[0][0].size - fan_in = num_input_fmaps * receptive_field_size - fan_out = num_output_fmaps * receptive_field_size - return fan_in, fan_out - -def assignment(arr, num): - """Assign the value of num to arr""" - if arr.shape == (): - arr = arr.reshape((1)) - arr[:] = num - arr = arr.reshape(()) - else: - if isinstance(num, np.ndarray): - arr[:] = num[:] - else: - arr[:] = num - return arr - -class KaimingUniform(init.Initializer): - def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'): - super(KaimingUniform, self).__init__() - self.a = a - self.mode = mode - self.nonlinearity = nonlinearity - def _initialize(self, arr): - tmp = kaiming_uniform_(arr, self.a, self.mode, self.nonlinearity) - assignment(arr, tmp) - -class KaimingNormal(init.Initializer): - def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'): - super(KaimingNormal, self).__init__() - self.a = a - self.mode = mode - self.nonlinearity = nonlinearity - def _initialize(self, arr): - tmp = kaiming_normal_(arr, self.a, self.mode, self.nonlinearity) - assignment(arr, tmp) - -def default_recurisive_init(custom_cell): - """weight init for conv2d and dense""" - for _, cell in custom_cell.cells_and_names(): - if isinstance(cell, nn.Conv2d): - cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), - cell.weight.default_input.shape(), - cell.weight.default_input.dtype()) - if cell.bias is not None: - fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy()) - bound = 1 / math.sqrt(fan_in) - cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, - cell.bias.default_input.shape()), - cell.bias.default_input.dtype()) - elif isinstance(cell, nn.Dense): - cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), - cell.weight.default_input.shape(), - cell.weight.default_input.dtype()) - if cell.bias is not None: - fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy()) - bound = 1 / math.sqrt(fan_in) - cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, - cell.bias.default_input.shape()), - cell.bias.default_input.dtype()) - elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)): - pass