From: @dinglinhe123 Reviewed-by: @liangchenghui,@wuxuejian Signed-off-by: @liangchenghuipull/15365/MERGE
| @@ -51,12 +51,12 @@ void BatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| auto prop_kind = dnnl::prop_kind::forward_training; | |||
| auto normalization_flags = dnnl::normalization_flags::use_scale_shift; | |||
| // fused batch normalization forward description | |||
| // fused Batch Normalization forward description | |||
| dnnl::batch_normalization_forward::desc desc = | |||
| dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags); | |||
| auto forward_prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||
| // fused batch normalization backward description | |||
| // fused Batch Normalization backward description | |||
| dnnl::batch_normalization_backward::desc backward_desc = | |||
| dnnl::batch_normalization_backward::desc(dnnl::prop_kind::backward, x_desc, x_desc, epsilon, normalization_flags); | |||
| auto backward_prim_desc = dnnl::batch_normalization_backward::primitive_desc( | |||
| @@ -33,7 +33,7 @@ enum OpMergeMode { | |||
| OP_MERGE_IGNORE = 1, // indicate an input op merged into other op in compute node list | |||
| OP_MERGE_CONV = 2, // indicate `MindSpore Conv + BiasAdd` --> `ONNX Conv` | |||
| OP_MERGE_GEMM = 3, // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm` | |||
| OP_MERGE_BATCH_NORM = 4, // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX BatchNormalization` | |||
| OP_MERGE_BATCH_NORM = 4, // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX Batch Normalization` | |||
| OP_MERGE_MAXPOOL_WITH_ARGMAX = 5, // indicate `MindSpore MaxPoolWithArgmax(x)[0]` --> `ONNX MaxPool` | |||
| }; | |||
| @@ -339,7 +339,7 @@ class QuantizationAwareTraining(Quantizer): | |||
| quant_config=self.quant_config, | |||
| quant_dtype=self.weight_dtype, | |||
| fake=True) | |||
| # change original network BatchNormal OP parameters to quant network | |||
| # change original network Batch Normalization OP parameters to quant network | |||
| conv_inner.gamma = subcell.batchnorm.gamma | |||
| conv_inner.beta = subcell.batchnorm.beta | |||
| conv_inner.moving_mean = subcell.batchnorm.moving_mean | |||
| @@ -363,7 +363,7 @@ class QuantizationAwareTraining(Quantizer): | |||
| bias_init=conv_inner.bias_init, | |||
| quant_config=self.quant_config, | |||
| quant_dtype=self.weight_dtype) | |||
| # change original network BatchNormal OP parameters to quant network | |||
| # change original network Batch Normalization OP parameters to quant network | |||
| conv_inner.batchnorm.gamma = subcell.batchnorm.gamma | |||
| conv_inner.batchnorm.beta = subcell.batchnorm.beta | |||
| conv_inner.batchnorm.moving_mean = subcell.batchnorm.moving_mean | |||
| @@ -12,7 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """less batch normalization""" | |||
| """less Batch Normalization""" | |||
| import numpy as np | |||
| from mindspore import nn | |||
| from mindspore.ops import operations as P | |||
| @@ -233,7 +233,7 @@ def _shape_check_bn(in_shape, in_dims): | |||
| @constexpr | |||
| def _shape_infer(x_shape, num_feature): | |||
| """global batch normalization shape and axes infer""" | |||
| """global Batch Normalization shape and axes infer""" | |||
| if len(x_shape) == 4: | |||
| axes = (0, 2, 3) | |||
| re_shape = (1, num_feature, 1, 1) | |||
| @@ -245,7 +245,7 @@ def _shape_infer(x_shape, num_feature): | |||
| class BatchNorm1d(_BatchNorm): | |||
| r""" | |||
| Batch normalization layer over a 2D input. | |||
| Batch Normalization layer over a 2D input. | |||
| Batch Normalization is widely used in convolutional networks. This layer | |||
| applies Batch Normalization over a 2D input (a mini-batch of 1D inputs) to | |||
| @@ -334,7 +334,7 @@ class BatchNorm1d(_BatchNorm): | |||
| class BatchNorm2d(_BatchNorm): | |||
| r""" | |||
| Batch normalization layer over a 4D input. | |||
| Batch Normalization layer over a 4D input. | |||
| Batch Normalization is widely used in convolutional networks. This layer | |||
| applies Batch Normalization over a 4D input (a mini-batch of 2D inputs with | |||
| @@ -441,7 +441,7 @@ def _check_3d_shape(input_shape): | |||
| class BatchNorm3d(Cell): | |||
| r""" | |||
| Batch normalization layer over a 5D input. | |||
| Batch Normalization layer over a 5D input. | |||
| Batch Normalization is widely used in convolutional networks. This layer | |||
| applies Batch Normalization over a 5D input (a mini-batch of 3D inputs with | |||
| @@ -540,7 +540,7 @@ class GlobalBatchNorm(_BatchNorm): | |||
| r""" | |||
| Global normalization layer over a N-dimension input. | |||
| Global Normalization is cross device synchronized batch normalization. The implementation of Batch Normalization | |||
| Global Normalization is cross device synchronized Batch Normalization. The implementation of Batch Normalization | |||
| only normalizes the data within each device. Global normalization will normalize the input within the group. | |||
| It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by | |||
| Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the | |||
| @@ -653,10 +653,10 @@ class GlobalBatchNorm(_BatchNorm): | |||
| class SyncBatchNorm(_BatchNorm): | |||
| r""" | |||
| Sync Batch normalization layer over a N-dimension input. | |||
| Sync Batch Normalization layer over a N-dimension input. | |||
| Sync Batch Normalization is cross device synchronized batch normalization. The implementation of Batch | |||
| Normalization only normalizes the data within each device. Sync Batch normalization will normalize the input | |||
| Sync Batch Normalization is cross device synchronized Batch Normalization. The implementation of Batch | |||
| Normalization only normalizes the data within each device. Sync Batch Normalization will normalize the input | |||
| within the group. It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by | |||
| Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the | |||
| feature using a mini-batch of data and the learned parameters which can be described in the following formula. | |||
| @@ -47,7 +47,7 @@ __all__ = [ | |||
| class BatchNormFoldCell(Cell): | |||
| """ | |||
| Batch normalization folded. | |||
| Batch Normalization folded. | |||
| Args: | |||
| momentum (float): Momentum value must be [0, 1]. Default: 0.9. | |||
| @@ -402,7 +402,8 @@ quant_config_default = QuantConfig(weight=FakeQuantWithMinMaxObserver, activatio | |||
| class Conv2dBnFoldQuantOneConv(Cell): | |||
| r""" | |||
| 2D convolution which use the convolution layer statistics once to calculate BatchNormal operation folded construct. | |||
| 2D convolution which use the convolution layer statistics once to calculate Batch Normalization | |||
| operation folded construct. | |||
| This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, | |||
| please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. | |||
| @@ -414,8 +415,8 @@ class Conv2dBnFoldQuantOneConv(Cell): | |||
| stride (int): Specifies stride for all spatial dimensions with the same value. | |||
| pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". | |||
| padding (int): Implicit paddings on both sides of the input. Default: 0. | |||
| eps (float): Parameters for BatchNormal. Default: 1e-5. | |||
| momentum (float): Parameters for BatchNormal op. Default: 0.997. | |||
| eps (float): Parameters for Batch Normalization. Default: 1e-5. | |||
| momentum (float): Parameters for Batch Normalization op. Default: 0.997. | |||
| dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1. | |||
| group (int): Splits filter into groups, `in_ channels` and `out_channels` must be | |||
| divisible by the number of groups. Default: 1. | |||
| @@ -600,7 +601,7 @@ class Conv2dBnFoldQuantOneConv(Cell): | |||
| class Conv2dBnFoldQuant(Cell): | |||
| r""" | |||
| 2D convolution with BatchNormal operation folded construct. | |||
| 2D convolution with Batch Normalization operation folded construct. | |||
| This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, | |||
| please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. | |||
| @@ -612,8 +613,8 @@ class Conv2dBnFoldQuant(Cell): | |||
| stride (int): Specifies stride for all spatial dimensions with the same value. | |||
| pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". | |||
| padding (int): Implicit paddings on both sides of the input. Default: 0. | |||
| eps (float): Parameters for BatchNormal. Default: 1e-5. | |||
| momentum (float): Parameters for BatchNormal op. Default: 0.997. | |||
| eps (float): Parameters for Batch Normalization. Default: 1e-5. | |||
| momentum (float): Parameters for Batch Normalization op. Default: 0.997. | |||
| dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1. | |||
| group (int): Splits filter into groups, `in_ channels` and `out_channels` must be | |||
| divisible by the number of groups. Default: 1. | |||
| @@ -635,7 +636,8 @@ class Conv2dBnFoldQuant(Cell): | |||
| generated by compression.quant.create_quant_config method. | |||
| Default: both set to default FakeQuantWithMinMaxObserver. | |||
| quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. | |||
| freeze_bn (int): The quantization freeze BatchNormal op is according to the global step. Default: 100000. | |||
| freeze_bn (int): The quantization freeze Batch Normalization op is according to the global step. | |||
| Default: 100000. | |||
| Inputs: | |||
| - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. | |||
| @@ -811,8 +813,8 @@ class Conv2dBnWithoutFoldQuant(Cell): | |||
| group (int): Splits filter into groups, `in_ channels` and `out_channels` must be | |||
| divisible by the number of groups. Default: 1. | |||
| has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. | |||
| eps (float): Parameters for BatchNormal. Default: 1e-5. | |||
| momentum (float): Parameters for BatchNormal op. Default: 0.997. | |||
| eps (float): Parameters for Batch Normalization. Default: 1e-5. | |||
| momentum (float): Parameters for Batch Normalization op. Default: 0.997. | |||
| weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. | |||
| Default: 'normal'. | |||
| bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'. | |||
| @@ -65,7 +65,7 @@ class TimeDistributed(Cell): | |||
| There are two cases in the implementation. | |||
| When reshape_with_axis provided, the reshape method will be chosen, which is more efficient; | |||
| otherwise, the method of dividing the inputs along time axis will be used, which is more general. | |||
| For example, reshape_with_axis could not be provided when deal with batch normal. | |||
| For example, reshape_with_axis could not be provided when deal with Batch Normalization. | |||
| Args: | |||
| layer(Union[Cell, Primitive]): The Cell or Primitive which will be wrapped. | |||
| @@ -739,7 +739,7 @@ class SyncBatchNorm(PrimitiveWithInfer): | |||
| r""" | |||
| Sync Batch Normalization for input data and updated parameters. | |||
| Sync Batch Normalization is cross device synchronized batch normalization. Batch Normalization is | |||
| Sync Batch Normalization is cross device synchronized Batch Normalization. Batch Normalization is | |||
| widely used in convolutional neural networks. This operation applies Batch Normalization over input | |||
| to avoid internal covariate shift as described in the paper `Batch Normalization: Accelerating | |||
| Deep Network Training by Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. | |||
| @@ -411,8 +411,8 @@ class FakeQuantPerLayer(PrimitiveWithInfer): | |||
| ema (bool): Uses EMA algorithm update value min and max. Default: False. | |||
| ema_decay (int) : EMA algorithm decay parameter. Default: 0.999. | |||
| quant_delay (int): Quantilization delay parameter. Before delay step in training time not update | |||
| simulate quantization aware funcion. After delay step in training time begin simulate the aware | |||
| quantize funcion. Default: 0. | |||
| simulate quantization aware function. After delay step in training time begin simulate the aware | |||
| quantize function. Default: 0. | |||
| symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False. | |||
| narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False. | |||
| training (bool): Training the network or not. Default: True. | |||
| @@ -687,7 +687,7 @@ class FakeQuantPerChannelGrad(PrimitiveWithInfer): | |||
| class BatchNormFold(PrimitiveWithInfer): | |||
| """ | |||
| Batch normalization folded. | |||
| Batch Normalization folded. | |||
| Args: | |||
| momentum (float): Momentum value must be [0, 1]. Default: 0.9. | |||
| @@ -129,7 +129,7 @@ cfg = { | |||
| def vgg16(num_classes=1000, args=None, phase="train", **kwargs): | |||
| """ | |||
| Get Vgg16 neural network with batch normalization. | |||
| Get Vgg16 neural network with Batch Normalization. | |||
| Args: | |||
| num_classes (int): Class numbers. Default: 1000. | |||
| @@ -137,7 +137,7 @@ def vgg16(num_classes=1000, args=None, phase="train", **kwargs): | |||
| phase(str): train or test mode. | |||
| Returns: | |||
| Cell, cell instance of Vgg16 neural network with batch normalization. | |||
| Cell, cell instance of Vgg16 neural network with Batch Normalization. | |||
| Examples: | |||
| >>> vgg16(num_classes=1000, args=args, **kwargs) | |||
| @@ -57,7 +57,7 @@ def avg_pool_grad(dout, origin_shape, pool_h, pool_w, stride): | |||
| stride (int): The stride of the sliding window. | |||
| Returns: | |||
| numpy.ndarray, grad of avgerage pooling. | |||
| numpy.ndarray, grad of average pooling. | |||
| """ | |||
| # pylint: disable=unused-argument | |||
| _, _, height, width = dout.shape | |||
| @@ -70,7 +70,7 @@ def avg_pool_grad(dout, origin_shape, pool_h, pool_w, stride): | |||
| def _batch_norm(x, scale, shift, running_mean=None, running_var=None, | |||
| eps=1e-05, momentum=0.1, is_training=True): | |||
| """Batch normalization over an array.""" | |||
| """Batch Normalization over an array.""" | |||
| _, c_h_w = x.shape | |||
| # Handle running_mean and running_var are not None | |||
| # if running_mean is None: | |||
| @@ -106,7 +106,7 @@ def _batch_norm(x, scale, shift, running_mean=None, running_var=None, | |||
| def batch_norm(x, scale=1, shift=0, mean=None, variance=None, | |||
| eps=1e-05, momentum=0.1, is_training=True): | |||
| """Batch normalization over an array.""" | |||
| """Batch Normalization over an array.""" | |||
| input_shape = x.shape | |||
| if x.ndim != 2: | |||
| batch_num = x.shape[0] | |||
| @@ -120,7 +120,7 @@ def batch_norm(x, scale=1, shift=0, mean=None, variance=None, | |||
| def _batch_norm_grad(dout, x, scale, save_mean, save_inv_variance, \ | |||
| eps=1e-05, momentum=0.1, is_training=True): | |||
| """Batch normalization over an array.""" | |||
| """Batch Normalization over an array.""" | |||
| if x.ndim != 2: | |||
| batch_num = x.shape[0] | |||
| x = x.reshape(batch_num, -1) | |||
| @@ -141,7 +141,7 @@ def _batch_norm_grad(dout, x, scale, save_mean, save_inv_variance, \ | |||
| def batch_norm_grad(dy, x, scale, save_mean, save_inv_variance): | |||
| """Batch normalization over an array.""" | |||
| """Batch Normalization over an array.""" | |||
| if dy.ndim != 2: | |||
| batch_size = dy.shape[0] | |||
| dy = dy.reshape(batch_size, -1) | |||
| @@ -275,7 +275,7 @@ def conv2d(x, weight, bias=None, stride=1, pad=0, | |||
| col = im2col(x, filter_h, filter_w, stride, pad, dilation) | |||
| col_w = np.reshape(weight, (filter_num, -1)).T | |||
| out = np.dot(col, col_w) | |||
| out = out.reshape(batch_num, out_h, out_w, -1).transpose(0, 3, 1, 2) | |||
| out = out.reshape((batch_num, out_h, out_w, -1)).transpose(0, 3, 1, 2) | |||
| if bias is not None: | |||
| out += bias | |||
| return out | |||
| @@ -287,7 +287,7 @@ def conv2d_backprop_filter(dout, x, w_size, stride=1, pad=0): | |||
| dout = dout.transpose(0, 2, 3, 1).reshape(-1, filter_num) | |||
| col = im2col(x, filter_height, filter_width, stride, pad) | |||
| dw = np.dot(col.T, dout) | |||
| dw = dw.transpose(1, 0).reshape(filter_num, channel, filter_height, filter_width) | |||
| dw = dw.transpose(1, 0).reshape((filter_num, channel, filter_height, filter_width)) | |||
| return dw | |||