!15365 All the descriptions of batch normal under the mindspore folder have been uniformly updated to "Batch Normalization"

From: @dinglinhe123 Reviewed-by: @liangchenghui,@wuxuejian Signed-off-by: @liangchenghui
4 years ago · eeb32f410e
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/batch_norm_gard_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/batch_norm_gard_cpu_kernel.cc
@@ -51,12 +51,12 @@ void BatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
  auto prop_kind = dnnl::prop_kind::forward_training;
  auto normalization_flags = dnnl::normalization_flags::use_scale_shift;

  // fused batch normalization forward description
  // fused Batch Normalization forward description
  dnnl::batch_normalization_forward::desc desc =
    dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags);
  auto forward_prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());

  // fused batch normalization backward description
  // fused Batch Normalization backward description
  dnnl::batch_normalization_backward::desc backward_desc =
    dnnl::batch_normalization_backward::desc(dnnl::prop_kind::backward, x_desc, x_desc, epsilon, normalization_flags);
  auto backward_prim_desc = dnnl::batch_normalization_backward::primitive_desc(
--- a/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc
+++ b/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc
@@ -33,7 +33,7 @@ enum OpMergeMode {
  OP_MERGE_IGNORE = 1,               // indicate an input op merged into other op in compute node list
  OP_MERGE_CONV = 2,                 // indicate `MindSpore Conv + BiasAdd` --> `ONNX Conv`
  OP_MERGE_GEMM = 3,                 // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm`
  OP_MERGE_BATCH_NORM = 4,           // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX BatchNormalization`
  OP_MERGE_BATCH_NORM = 4,           // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX Batch Normalization`
  OP_MERGE_MAXPOOL_WITH_ARGMAX = 5,  // indicate `MindSpore MaxPoolWithArgmax(x)[0]` --> `ONNX MaxPool`
 };

--- a/mindspore/compression/quant/qat.py
+++ b/mindspore/compression/quant/qat.py
@@ -339,7 +339,7 @@ class QuantizationAwareTraining(Quantizer):
                                                         quant_config=self.quant_config,
                                                         quant_dtype=self.weight_dtype,
                                                         fake=True)
                # change original network BatchNormal OP parameters to quant network
                # change original network Batch Normalization OP parameters to quant network
                conv_inner.gamma = subcell.batchnorm.gamma
                conv_inner.beta = subcell.batchnorm.beta
                conv_inner.moving_mean = subcell.batchnorm.moving_mean
@@ -363,7 +363,7 @@ class QuantizationAwareTraining(Quantizer):
                                                            bias_init=conv_inner.bias_init,
                                                            quant_config=self.quant_config,
                                                            quant_dtype=self.weight_dtype)
                # change original network BatchNormal OP parameters to quant network
                # change original network Batch Normalization OP parameters to quant network
                conv_inner.batchnorm.gamma = subcell.batchnorm.gamma
                conv_inner.batchnorm.beta = subcell.batchnorm.beta
                conv_inner.batchnorm.moving_mean = subcell.batchnorm.moving_mean
--- a/mindspore/nn/acc/less_batch_normalization.py
+++ b/mindspore/nn/acc/less_batch_normalization.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """less batch normalization"""
 """less Batch Normalization"""
 import numpy as np
 from mindspore import nn
 from mindspore.ops import operations as P
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -233,7 +233,7 @@ def _shape_check_bn(in_shape, in_dims):

@constexpr
 def _shape_infer(x_shape, num_feature):
    """global batch normalization shape and axes infer"""
    """global Batch Normalization shape and axes infer"""
    if len(x_shape) == 4:
        axes = (0, 2, 3)
        re_shape = (1, num_feature, 1, 1)
@@ -245,7 +245,7 @@ def _shape_infer(x_shape, num_feature):

 class BatchNorm1d(_BatchNorm):
    r"""
    Batch normalization layer over a 2D input.
    Batch Normalization layer over a 2D input.

    Batch Normalization is widely used in convolutional networks. This layer
    applies Batch Normalization over a 2D input (a mini-batch of 1D inputs) to
@@ -334,7 +334,7 @@ class BatchNorm1d(_BatchNorm):

 class BatchNorm2d(_BatchNorm):
    r"""
    Batch normalization layer over a 4D input.
    Batch Normalization layer over a 4D input.

    Batch Normalization is widely used in convolutional networks. This layer
    applies Batch Normalization over a 4D input (a mini-batch of 2D inputs with
@@ -441,7 +441,7 @@ def _check_3d_shape(input_shape):

 class BatchNorm3d(Cell):
    r"""
    Batch normalization layer over a 5D input.
    Batch Normalization layer over a 5D input.

    Batch Normalization is widely used in convolutional networks. This layer
    applies Batch Normalization over a 5D input (a mini-batch of 3D inputs with
@@ -540,7 +540,7 @@ class GlobalBatchNorm(_BatchNorm):
    r"""
    Global normalization layer over a N-dimension input.

    Global Normalization is cross device synchronized batch normalization. The implementation of Batch Normalization
    Global Normalization is cross device synchronized Batch Normalization. The implementation of Batch Normalization
    only normalizes the data within each device. Global normalization will normalize the input within the group.
    It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by
    Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
@@ -653,10 +653,10 @@ class GlobalBatchNorm(_BatchNorm):

 class SyncBatchNorm(_BatchNorm):
    r"""
    Sync Batch normalization layer over a N-dimension input.
    Sync Batch Normalization layer over a N-dimension input.

    Sync Batch Normalization is cross device synchronized batch normalization. The implementation of Batch
    Normalization only normalizes the data within each device. Sync Batch normalization will normalize the input
    Sync Batch Normalization is cross device synchronized Batch Normalization. The implementation of Batch
    Normalization only normalizes the data within each device. Sync Batch Normalization will normalize the input
    within the group. It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by
    Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
    feature using a mini-batch of data and the learned parameters which can be described in the following formula.
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -47,7 +47,7 @@ __all__ = [

 class BatchNormFoldCell(Cell):
    """
    Batch normalization folded.
    Batch Normalization folded.

    Args:
        momentum (float): Momentum value must be [0, 1]. Default: 0.9.
@@ -402,7 +402,8 @@ quant_config_default = QuantConfig(weight=FakeQuantWithMinMaxObserver, activatio

 class Conv2dBnFoldQuantOneConv(Cell):
    r"""
    2D convolution which use the convolution layer statistics once to calculate BatchNormal operation folded construct.
    2D convolution which use the convolution layer statistics once to calculate Batch Normalization
    operation folded construct.

    This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
    please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
@@ -414,8 +415,8 @@ class Conv2dBnFoldQuantOneConv(Cell):
        stride (int): Specifies stride for all spatial dimensions with the same value.
        pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
        padding (int): Implicit paddings on both sides of the input. Default: 0.
        eps (float): Parameters for BatchNormal. Default: 1e-5.
        momentum (float): Parameters for BatchNormal op. Default: 0.997.
        eps (float): Parameters for Batch Normalization. Default: 1e-5.
        momentum (float): Parameters for Batch Normalization op. Default: 0.997.
        dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
            divisible by the number of groups. Default: 1.
@@ -600,7 +601,7 @@ class Conv2dBnFoldQuantOneConv(Cell):

 class Conv2dBnFoldQuant(Cell):
    r"""
    2D convolution with BatchNormal operation folded construct.
    2D convolution with Batch Normalization operation folded construct.

    This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
    please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
@@ -612,8 +613,8 @@ class Conv2dBnFoldQuant(Cell):
        stride (int): Specifies stride for all spatial dimensions with the same value.
        pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
        padding (int): Implicit paddings on both sides of the input. Default: 0.
        eps (float): Parameters for BatchNormal. Default: 1e-5.
        momentum (float): Parameters for BatchNormal op. Default: 0.997.
        eps (float): Parameters for Batch Normalization. Default: 1e-5.
        momentum (float): Parameters for Batch Normalization op. Default: 0.997.
        dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
            divisible by the number of groups. Default: 1.
@@ -635,7 +636,8 @@ class Conv2dBnFoldQuant(Cell):
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
        freeze_bn (int): The quantization freeze BatchNormal op is according to the global step. Default: 100000.
        freeze_bn (int): The quantization freeze Batch Normalization op is according to the global step.
            Default: 100000.

    Inputs:
        - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
@@ -811,8 +813,8 @@ class Conv2dBnWithoutFoldQuant(Cell):
        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
            divisible by the number of groups. Default: 1.
        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
        eps (float): Parameters for BatchNormal. Default: 1e-5.
        momentum (float): Parameters for BatchNormal op. Default: 0.997.
        eps (float): Parameters for Batch Normalization. Default: 1e-5.
        momentum (float): Parameters for Batch Normalization op. Default: 0.997.
        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
            Default: 'normal'.
        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
--- a/mindspore/nn/layer/timedistributed.py
+++ b/mindspore/nn/layer/timedistributed.py
@@ -65,7 +65,7 @@ class TimeDistributed(Cell):
    There are two cases in the implementation.
    When reshape_with_axis provided, the reshape method will be chosen, which is more efficient;
    otherwise, the method of dividing the inputs along time axis will be used, which is more general.
    For example, reshape_with_axis could not be provided when deal with batch normal.
    For example, reshape_with_axis could not be provided when deal with Batch Normalization.

    Args:
        layer(Union[Cell, Primitive]): The Cell or Primitive which will be wrapped.
--- a/mindspore/ops/operations/_inner_ops.py
+++ b/mindspore/ops/operations/_inner_ops.py
@@ -739,7 +739,7 @@ class SyncBatchNorm(PrimitiveWithInfer):
    r"""
    Sync Batch Normalization for input data and updated parameters.

    Sync Batch Normalization is cross device synchronized batch normalization. Batch Normalization is
    Sync Batch Normalization is cross device synchronized Batch Normalization. Batch Normalization is
    widely used in convolutional neural networks. This operation applies Batch Normalization over input
    to avoid internal covariate shift as described in the paper `Batch Normalization: Accelerating
    Deep Network Training by Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_.
--- a/mindspore/ops/operations/_quant_ops.py
+++ b/mindspore/ops/operations/_quant_ops.py
@@ -411,8 +411,8 @@ class FakeQuantPerLayer(PrimitiveWithInfer):
        ema (bool): Uses EMA algorithm update value min and max. Default: False.
        ema_decay (int) : EMA algorithm decay parameter. Default: 0.999.
        quant_delay (int): Quantilization delay parameter. Before delay step in training time not update
            simulate quantization aware funcion. After delay step in training time begin simulate the aware
            quantize funcion. Default: 0.
            simulate quantization aware function. After delay step in training time begin simulate the aware
            quantize function. Default: 0.
        symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
        training (bool): Training the network or not. Default: True.
@@ -687,7 +687,7 @@ class FakeQuantPerChannelGrad(PrimitiveWithInfer):

 class BatchNormFold(PrimitiveWithInfer):
    """
    Batch normalization folded.
    Batch Normalization folded.

    Args:
        momentum (float): Momentum value must be [0, 1]. Default: 0.9.
--- a/model_zoo/official/cv/vgg16/src/vgg.py
+++ b/model_zoo/official/cv/vgg16/src/vgg.py
@@ -129,7 +129,7 @@ cfg = {

 def vgg16(num_classes=1000, args=None, phase="train", **kwargs):
    """
    Get Vgg16 neural network with batch normalization.
    Get Vgg16 neural network with Batch Normalization.

    Args:
        num_classes (int): Class numbers. Default: 1000.
@@ -137,7 +137,7 @@ def vgg16(num_classes=1000, args=None, phase="train", **kwargs):
        phase(str): train or test mode.

    Returns:
        Cell, cell instance of Vgg16 neural network with batch normalization.
        Cell, cell instance of Vgg16 neural network with Batch Normalization.

    Examples:
        >>> vgg16(num_classes=1000, args=args, **kwargs)
--- a/tests/vm_impl/vm_me.py
+++ b/tests/vm_impl/vm_me.py
@@ -57,7 +57,7 @@ def avg_pool_grad(dout, origin_shape, pool_h, pool_w, stride):
        stride (int): The stride of the sliding window.

    Returns:
        numpy.ndarray, grad of avgerage pooling.
        numpy.ndarray, grad of average pooling.
    """
    # pylint: disable=unused-argument
    _, _, height, width = dout.shape
@@ -70,7 +70,7 @@ def avg_pool_grad(dout, origin_shape, pool_h, pool_w, stride):

 def _batch_norm(x, scale, shift, running_mean=None, running_var=None,
                eps=1e-05, momentum=0.1, is_training=True):
    """Batch normalization over an array."""
    """Batch Normalization over an array."""
    _, c_h_w = x.shape
    # Handle running_mean and running_var are not None
    # if running_mean is None:
@@ -106,7 +106,7 @@ def _batch_norm(x, scale, shift, running_mean=None, running_var=None,

 def batch_norm(x, scale=1, shift=0, mean=None, variance=None,
               eps=1e-05, momentum=0.1, is_training=True):
    """Batch normalization over an array."""
    """Batch Normalization over an array."""
    input_shape = x.shape
    if x.ndim != 2:
        batch_num = x.shape[0]
@@ -120,7 +120,7 @@ def batch_norm(x, scale=1, shift=0, mean=None, variance=None,

 def _batch_norm_grad(dout, x, scale, save_mean, save_inv_variance, \
                     eps=1e-05, momentum=0.1, is_training=True):
    """Batch normalization over an array."""
    """Batch Normalization over an array."""
    if x.ndim != 2:
        batch_num = x.shape[0]
        x = x.reshape(batch_num, -1)
@@ -141,7 +141,7 @@ def _batch_norm_grad(dout, x, scale, save_mean, save_inv_variance, \


 def batch_norm_grad(dy, x, scale, save_mean, save_inv_variance):
    """Batch normalization over an array."""
    """Batch Normalization over an array."""
    if dy.ndim != 2:
        batch_size = dy.shape[0]
        dy = dy.reshape(batch_size, -1)
@@ -275,7 +275,7 @@ def conv2d(x, weight, bias=None, stride=1, pad=0,
    col = im2col(x, filter_h, filter_w, stride, pad, dilation)
    col_w = np.reshape(weight, (filter_num, -1)).T
    out = np.dot(col, col_w)
    out = out.reshape(batch_num, out_h, out_w, -1).transpose(0, 3, 1, 2)
    out = out.reshape((batch_num, out_h, out_w, -1)).transpose(0, 3, 1, 2)
    if bias is not None:
        out += bias
    return out
@@ -287,7 +287,7 @@ def conv2d_backprop_filter(dout, x, w_size, stride=1, pad=0):
    dout = dout.transpose(0, 2, 3, 1).reshape(-1, filter_num)
    col = im2col(x, filter_height, filter_width, stride, pad)
    dw = np.dot(col.T, dout)
    dw = dw.transpose(1, 0).reshape(filter_num, channel, filter_height, filter_width)
    dw = dw.transpose(1, 0).reshape((filter_num, channel, filter_height, filter_width))
    return dw