From: @dinglinhe123 Reviewed-by: @liangchenghui,@wuxuejian Signed-off-by: @liangchenghuipull/15365/MERGE
| @@ -51,12 +51,12 @@ void BatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| auto prop_kind = dnnl::prop_kind::forward_training; | auto prop_kind = dnnl::prop_kind::forward_training; | ||||
| auto normalization_flags = dnnl::normalization_flags::use_scale_shift; | auto normalization_flags = dnnl::normalization_flags::use_scale_shift; | ||||
| // fused batch normalization forward description | |||||
| // fused Batch Normalization forward description | |||||
| dnnl::batch_normalization_forward::desc desc = | dnnl::batch_normalization_forward::desc desc = | ||||
| dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags); | dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags); | ||||
| auto forward_prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | auto forward_prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | ||||
| // fused batch normalization backward description | |||||
| // fused Batch Normalization backward description | |||||
| dnnl::batch_normalization_backward::desc backward_desc = | dnnl::batch_normalization_backward::desc backward_desc = | ||||
| dnnl::batch_normalization_backward::desc(dnnl::prop_kind::backward, x_desc, x_desc, epsilon, normalization_flags); | dnnl::batch_normalization_backward::desc(dnnl::prop_kind::backward, x_desc, x_desc, epsilon, normalization_flags); | ||||
| auto backward_prim_desc = dnnl::batch_normalization_backward::primitive_desc( | auto backward_prim_desc = dnnl::batch_normalization_backward::primitive_desc( | ||||
| @@ -33,7 +33,7 @@ enum OpMergeMode { | |||||
| OP_MERGE_IGNORE = 1, // indicate an input op merged into other op in compute node list | OP_MERGE_IGNORE = 1, // indicate an input op merged into other op in compute node list | ||||
| OP_MERGE_CONV = 2, // indicate `MindSpore Conv + BiasAdd` --> `ONNX Conv` | OP_MERGE_CONV = 2, // indicate `MindSpore Conv + BiasAdd` --> `ONNX Conv` | ||||
| OP_MERGE_GEMM = 3, // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm` | OP_MERGE_GEMM = 3, // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm` | ||||
| OP_MERGE_BATCH_NORM = 4, // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX BatchNormalization` | |||||
| OP_MERGE_BATCH_NORM = 4, // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX Batch Normalization` | |||||
| OP_MERGE_MAXPOOL_WITH_ARGMAX = 5, // indicate `MindSpore MaxPoolWithArgmax(x)[0]` --> `ONNX MaxPool` | OP_MERGE_MAXPOOL_WITH_ARGMAX = 5, // indicate `MindSpore MaxPoolWithArgmax(x)[0]` --> `ONNX MaxPool` | ||||
| }; | }; | ||||
| @@ -339,7 +339,7 @@ class QuantizationAwareTraining(Quantizer): | |||||
| quant_config=self.quant_config, | quant_config=self.quant_config, | ||||
| quant_dtype=self.weight_dtype, | quant_dtype=self.weight_dtype, | ||||
| fake=True) | fake=True) | ||||
| # change original network BatchNormal OP parameters to quant network | |||||
| # change original network Batch Normalization OP parameters to quant network | |||||
| conv_inner.gamma = subcell.batchnorm.gamma | conv_inner.gamma = subcell.batchnorm.gamma | ||||
| conv_inner.beta = subcell.batchnorm.beta | conv_inner.beta = subcell.batchnorm.beta | ||||
| conv_inner.moving_mean = subcell.batchnorm.moving_mean | conv_inner.moving_mean = subcell.batchnorm.moving_mean | ||||
| @@ -363,7 +363,7 @@ class QuantizationAwareTraining(Quantizer): | |||||
| bias_init=conv_inner.bias_init, | bias_init=conv_inner.bias_init, | ||||
| quant_config=self.quant_config, | quant_config=self.quant_config, | ||||
| quant_dtype=self.weight_dtype) | quant_dtype=self.weight_dtype) | ||||
| # change original network BatchNormal OP parameters to quant network | |||||
| # change original network Batch Normalization OP parameters to quant network | |||||
| conv_inner.batchnorm.gamma = subcell.batchnorm.gamma | conv_inner.batchnorm.gamma = subcell.batchnorm.gamma | ||||
| conv_inner.batchnorm.beta = subcell.batchnorm.beta | conv_inner.batchnorm.beta = subcell.batchnorm.beta | ||||
| conv_inner.batchnorm.moving_mean = subcell.batchnorm.moving_mean | conv_inner.batchnorm.moving_mean = subcell.batchnorm.moving_mean | ||||
| @@ -12,7 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """less batch normalization""" | |||||
| """less Batch Normalization""" | |||||
| import numpy as np | import numpy as np | ||||
| from mindspore import nn | from mindspore import nn | ||||
| from mindspore.ops import operations as P | from mindspore.ops import operations as P | ||||
| @@ -233,7 +233,7 @@ def _shape_check_bn(in_shape, in_dims): | |||||
| @constexpr | @constexpr | ||||
| def _shape_infer(x_shape, num_feature): | def _shape_infer(x_shape, num_feature): | ||||
| """global batch normalization shape and axes infer""" | |||||
| """global Batch Normalization shape and axes infer""" | |||||
| if len(x_shape) == 4: | if len(x_shape) == 4: | ||||
| axes = (0, 2, 3) | axes = (0, 2, 3) | ||||
| re_shape = (1, num_feature, 1, 1) | re_shape = (1, num_feature, 1, 1) | ||||
| @@ -245,7 +245,7 @@ def _shape_infer(x_shape, num_feature): | |||||
| class BatchNorm1d(_BatchNorm): | class BatchNorm1d(_BatchNorm): | ||||
| r""" | r""" | ||||
| Batch normalization layer over a 2D input. | |||||
| Batch Normalization layer over a 2D input. | |||||
| Batch Normalization is widely used in convolutional networks. This layer | Batch Normalization is widely used in convolutional networks. This layer | ||||
| applies Batch Normalization over a 2D input (a mini-batch of 1D inputs) to | applies Batch Normalization over a 2D input (a mini-batch of 1D inputs) to | ||||
| @@ -334,7 +334,7 @@ class BatchNorm1d(_BatchNorm): | |||||
| class BatchNorm2d(_BatchNorm): | class BatchNorm2d(_BatchNorm): | ||||
| r""" | r""" | ||||
| Batch normalization layer over a 4D input. | |||||
| Batch Normalization layer over a 4D input. | |||||
| Batch Normalization is widely used in convolutional networks. This layer | Batch Normalization is widely used in convolutional networks. This layer | ||||
| applies Batch Normalization over a 4D input (a mini-batch of 2D inputs with | applies Batch Normalization over a 4D input (a mini-batch of 2D inputs with | ||||
| @@ -441,7 +441,7 @@ def _check_3d_shape(input_shape): | |||||
| class BatchNorm3d(Cell): | class BatchNorm3d(Cell): | ||||
| r""" | r""" | ||||
| Batch normalization layer over a 5D input. | |||||
| Batch Normalization layer over a 5D input. | |||||
| Batch Normalization is widely used in convolutional networks. This layer | Batch Normalization is widely used in convolutional networks. This layer | ||||
| applies Batch Normalization over a 5D input (a mini-batch of 3D inputs with | applies Batch Normalization over a 5D input (a mini-batch of 3D inputs with | ||||
| @@ -540,7 +540,7 @@ class GlobalBatchNorm(_BatchNorm): | |||||
| r""" | r""" | ||||
| Global normalization layer over a N-dimension input. | Global normalization layer over a N-dimension input. | ||||
| Global Normalization is cross device synchronized batch normalization. The implementation of Batch Normalization | |||||
| Global Normalization is cross device synchronized Batch Normalization. The implementation of Batch Normalization | |||||
| only normalizes the data within each device. Global normalization will normalize the input within the group. | only normalizes the data within each device. Global normalization will normalize the input within the group. | ||||
| It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by | It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by | ||||
| Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the | Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the | ||||
| @@ -653,10 +653,10 @@ class GlobalBatchNorm(_BatchNorm): | |||||
| class SyncBatchNorm(_BatchNorm): | class SyncBatchNorm(_BatchNorm): | ||||
| r""" | r""" | ||||
| Sync Batch normalization layer over a N-dimension input. | |||||
| Sync Batch Normalization layer over a N-dimension input. | |||||
| Sync Batch Normalization is cross device synchronized batch normalization. The implementation of Batch | |||||
| Normalization only normalizes the data within each device. Sync Batch normalization will normalize the input | |||||
| Sync Batch Normalization is cross device synchronized Batch Normalization. The implementation of Batch | |||||
| Normalization only normalizes the data within each device. Sync Batch Normalization will normalize the input | |||||
| within the group. It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by | within the group. It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by | ||||
| Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the | Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the | ||||
| feature using a mini-batch of data and the learned parameters which can be described in the following formula. | feature using a mini-batch of data and the learned parameters which can be described in the following formula. | ||||
| @@ -47,7 +47,7 @@ __all__ = [ | |||||
| class BatchNormFoldCell(Cell): | class BatchNormFoldCell(Cell): | ||||
| """ | """ | ||||
| Batch normalization folded. | |||||
| Batch Normalization folded. | |||||
| Args: | Args: | ||||
| momentum (float): Momentum value must be [0, 1]. Default: 0.9. | momentum (float): Momentum value must be [0, 1]. Default: 0.9. | ||||
| @@ -402,7 +402,8 @@ quant_config_default = QuantConfig(weight=FakeQuantWithMinMaxObserver, activatio | |||||
| class Conv2dBnFoldQuantOneConv(Cell): | class Conv2dBnFoldQuantOneConv(Cell): | ||||
| r""" | r""" | ||||
| 2D convolution which use the convolution layer statistics once to calculate BatchNormal operation folded construct. | |||||
| 2D convolution which use the convolution layer statistics once to calculate Batch Normalization | |||||
| operation folded construct. | |||||
| This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, | This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, | ||||
| please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. | please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. | ||||
| @@ -414,8 +415,8 @@ class Conv2dBnFoldQuantOneConv(Cell): | |||||
| stride (int): Specifies stride for all spatial dimensions with the same value. | stride (int): Specifies stride for all spatial dimensions with the same value. | ||||
| pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". | pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". | ||||
| padding (int): Implicit paddings on both sides of the input. Default: 0. | padding (int): Implicit paddings on both sides of the input. Default: 0. | ||||
| eps (float): Parameters for BatchNormal. Default: 1e-5. | |||||
| momentum (float): Parameters for BatchNormal op. Default: 0.997. | |||||
| eps (float): Parameters for Batch Normalization. Default: 1e-5. | |||||
| momentum (float): Parameters for Batch Normalization op. Default: 0.997. | |||||
| dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1. | dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1. | ||||
| group (int): Splits filter into groups, `in_ channels` and `out_channels` must be | group (int): Splits filter into groups, `in_ channels` and `out_channels` must be | ||||
| divisible by the number of groups. Default: 1. | divisible by the number of groups. Default: 1. | ||||
| @@ -600,7 +601,7 @@ class Conv2dBnFoldQuantOneConv(Cell): | |||||
| class Conv2dBnFoldQuant(Cell): | class Conv2dBnFoldQuant(Cell): | ||||
| r""" | r""" | ||||
| 2D convolution with BatchNormal operation folded construct. | |||||
| 2D convolution with Batch Normalization operation folded construct. | |||||
| This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, | This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, | ||||
| please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. | please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. | ||||
| @@ -612,8 +613,8 @@ class Conv2dBnFoldQuant(Cell): | |||||
| stride (int): Specifies stride for all spatial dimensions with the same value. | stride (int): Specifies stride for all spatial dimensions with the same value. | ||||
| pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". | pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". | ||||
| padding (int): Implicit paddings on both sides of the input. Default: 0. | padding (int): Implicit paddings on both sides of the input. Default: 0. | ||||
| eps (float): Parameters for BatchNormal. Default: 1e-5. | |||||
| momentum (float): Parameters for BatchNormal op. Default: 0.997. | |||||
| eps (float): Parameters for Batch Normalization. Default: 1e-5. | |||||
| momentum (float): Parameters for Batch Normalization op. Default: 0.997. | |||||
| dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1. | dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1. | ||||
| group (int): Splits filter into groups, `in_ channels` and `out_channels` must be | group (int): Splits filter into groups, `in_ channels` and `out_channels` must be | ||||
| divisible by the number of groups. Default: 1. | divisible by the number of groups. Default: 1. | ||||
| @@ -635,7 +636,8 @@ class Conv2dBnFoldQuant(Cell): | |||||
| generated by compression.quant.create_quant_config method. | generated by compression.quant.create_quant_config method. | ||||
| Default: both set to default FakeQuantWithMinMaxObserver. | Default: both set to default FakeQuantWithMinMaxObserver. | ||||
| quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. | quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. | ||||
| freeze_bn (int): The quantization freeze BatchNormal op is according to the global step. Default: 100000. | |||||
| freeze_bn (int): The quantization freeze Batch Normalization op is according to the global step. | |||||
| Default: 100000. | |||||
| Inputs: | Inputs: | ||||
| - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. | - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. | ||||
| @@ -811,8 +813,8 @@ class Conv2dBnWithoutFoldQuant(Cell): | |||||
| group (int): Splits filter into groups, `in_ channels` and `out_channels` must be | group (int): Splits filter into groups, `in_ channels` and `out_channels` must be | ||||
| divisible by the number of groups. Default: 1. | divisible by the number of groups. Default: 1. | ||||
| has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. | has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. | ||||
| eps (float): Parameters for BatchNormal. Default: 1e-5. | |||||
| momentum (float): Parameters for BatchNormal op. Default: 0.997. | |||||
| eps (float): Parameters for Batch Normalization. Default: 1e-5. | |||||
| momentum (float): Parameters for Batch Normalization op. Default: 0.997. | |||||
| weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. | weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. | ||||
| Default: 'normal'. | Default: 'normal'. | ||||
| bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'. | bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'. | ||||
| @@ -65,7 +65,7 @@ class TimeDistributed(Cell): | |||||
| There are two cases in the implementation. | There are two cases in the implementation. | ||||
| When reshape_with_axis provided, the reshape method will be chosen, which is more efficient; | When reshape_with_axis provided, the reshape method will be chosen, which is more efficient; | ||||
| otherwise, the method of dividing the inputs along time axis will be used, which is more general. | otherwise, the method of dividing the inputs along time axis will be used, which is more general. | ||||
| For example, reshape_with_axis could not be provided when deal with batch normal. | |||||
| For example, reshape_with_axis could not be provided when deal with Batch Normalization. | |||||
| Args: | Args: | ||||
| layer(Union[Cell, Primitive]): The Cell or Primitive which will be wrapped. | layer(Union[Cell, Primitive]): The Cell or Primitive which will be wrapped. | ||||
| @@ -739,7 +739,7 @@ class SyncBatchNorm(PrimitiveWithInfer): | |||||
| r""" | r""" | ||||
| Sync Batch Normalization for input data and updated parameters. | Sync Batch Normalization for input data and updated parameters. | ||||
| Sync Batch Normalization is cross device synchronized batch normalization. Batch Normalization is | |||||
| Sync Batch Normalization is cross device synchronized Batch Normalization. Batch Normalization is | |||||
| widely used in convolutional neural networks. This operation applies Batch Normalization over input | widely used in convolutional neural networks. This operation applies Batch Normalization over input | ||||
| to avoid internal covariate shift as described in the paper `Batch Normalization: Accelerating | to avoid internal covariate shift as described in the paper `Batch Normalization: Accelerating | ||||
| Deep Network Training by Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. | Deep Network Training by Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. | ||||
| @@ -411,8 +411,8 @@ class FakeQuantPerLayer(PrimitiveWithInfer): | |||||
| ema (bool): Uses EMA algorithm update value min and max. Default: False. | ema (bool): Uses EMA algorithm update value min and max. Default: False. | ||||
| ema_decay (int) : EMA algorithm decay parameter. Default: 0.999. | ema_decay (int) : EMA algorithm decay parameter. Default: 0.999. | ||||
| quant_delay (int): Quantilization delay parameter. Before delay step in training time not update | quant_delay (int): Quantilization delay parameter. Before delay step in training time not update | ||||
| simulate quantization aware funcion. After delay step in training time begin simulate the aware | |||||
| quantize funcion. Default: 0. | |||||
| simulate quantization aware function. After delay step in training time begin simulate the aware | |||||
| quantize function. Default: 0. | |||||
| symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False. | symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False. | ||||
| narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False. | narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False. | ||||
| training (bool): Training the network or not. Default: True. | training (bool): Training the network or not. Default: True. | ||||
| @@ -687,7 +687,7 @@ class FakeQuantPerChannelGrad(PrimitiveWithInfer): | |||||
| class BatchNormFold(PrimitiveWithInfer): | class BatchNormFold(PrimitiveWithInfer): | ||||
| """ | """ | ||||
| Batch normalization folded. | |||||
| Batch Normalization folded. | |||||
| Args: | Args: | ||||
| momentum (float): Momentum value must be [0, 1]. Default: 0.9. | momentum (float): Momentum value must be [0, 1]. Default: 0.9. | ||||
| @@ -129,7 +129,7 @@ cfg = { | |||||
| def vgg16(num_classes=1000, args=None, phase="train", **kwargs): | def vgg16(num_classes=1000, args=None, phase="train", **kwargs): | ||||
| """ | """ | ||||
| Get Vgg16 neural network with batch normalization. | |||||
| Get Vgg16 neural network with Batch Normalization. | |||||
| Args: | Args: | ||||
| num_classes (int): Class numbers. Default: 1000. | num_classes (int): Class numbers. Default: 1000. | ||||
| @@ -137,7 +137,7 @@ def vgg16(num_classes=1000, args=None, phase="train", **kwargs): | |||||
| phase(str): train or test mode. | phase(str): train or test mode. | ||||
| Returns: | Returns: | ||||
| Cell, cell instance of Vgg16 neural network with batch normalization. | |||||
| Cell, cell instance of Vgg16 neural network with Batch Normalization. | |||||
| Examples: | Examples: | ||||
| >>> vgg16(num_classes=1000, args=args, **kwargs) | >>> vgg16(num_classes=1000, args=args, **kwargs) | ||||
| @@ -57,7 +57,7 @@ def avg_pool_grad(dout, origin_shape, pool_h, pool_w, stride): | |||||
| stride (int): The stride of the sliding window. | stride (int): The stride of the sliding window. | ||||
| Returns: | Returns: | ||||
| numpy.ndarray, grad of avgerage pooling. | |||||
| numpy.ndarray, grad of average pooling. | |||||
| """ | """ | ||||
| # pylint: disable=unused-argument | # pylint: disable=unused-argument | ||||
| _, _, height, width = dout.shape | _, _, height, width = dout.shape | ||||
| @@ -70,7 +70,7 @@ def avg_pool_grad(dout, origin_shape, pool_h, pool_w, stride): | |||||
| def _batch_norm(x, scale, shift, running_mean=None, running_var=None, | def _batch_norm(x, scale, shift, running_mean=None, running_var=None, | ||||
| eps=1e-05, momentum=0.1, is_training=True): | eps=1e-05, momentum=0.1, is_training=True): | ||||
| """Batch normalization over an array.""" | |||||
| """Batch Normalization over an array.""" | |||||
| _, c_h_w = x.shape | _, c_h_w = x.shape | ||||
| # Handle running_mean and running_var are not None | # Handle running_mean and running_var are not None | ||||
| # if running_mean is None: | # if running_mean is None: | ||||
| @@ -106,7 +106,7 @@ def _batch_norm(x, scale, shift, running_mean=None, running_var=None, | |||||
| def batch_norm(x, scale=1, shift=0, mean=None, variance=None, | def batch_norm(x, scale=1, shift=0, mean=None, variance=None, | ||||
| eps=1e-05, momentum=0.1, is_training=True): | eps=1e-05, momentum=0.1, is_training=True): | ||||
| """Batch normalization over an array.""" | |||||
| """Batch Normalization over an array.""" | |||||
| input_shape = x.shape | input_shape = x.shape | ||||
| if x.ndim != 2: | if x.ndim != 2: | ||||
| batch_num = x.shape[0] | batch_num = x.shape[0] | ||||
| @@ -120,7 +120,7 @@ def batch_norm(x, scale=1, shift=0, mean=None, variance=None, | |||||
| def _batch_norm_grad(dout, x, scale, save_mean, save_inv_variance, \ | def _batch_norm_grad(dout, x, scale, save_mean, save_inv_variance, \ | ||||
| eps=1e-05, momentum=0.1, is_training=True): | eps=1e-05, momentum=0.1, is_training=True): | ||||
| """Batch normalization over an array.""" | |||||
| """Batch Normalization over an array.""" | |||||
| if x.ndim != 2: | if x.ndim != 2: | ||||
| batch_num = x.shape[0] | batch_num = x.shape[0] | ||||
| x = x.reshape(batch_num, -1) | x = x.reshape(batch_num, -1) | ||||
| @@ -141,7 +141,7 @@ def _batch_norm_grad(dout, x, scale, save_mean, save_inv_variance, \ | |||||
| def batch_norm_grad(dy, x, scale, save_mean, save_inv_variance): | def batch_norm_grad(dy, x, scale, save_mean, save_inv_variance): | ||||
| """Batch normalization over an array.""" | |||||
| """Batch Normalization over an array.""" | |||||
| if dy.ndim != 2: | if dy.ndim != 2: | ||||
| batch_size = dy.shape[0] | batch_size = dy.shape[0] | ||||
| dy = dy.reshape(batch_size, -1) | dy = dy.reshape(batch_size, -1) | ||||
| @@ -275,7 +275,7 @@ def conv2d(x, weight, bias=None, stride=1, pad=0, | |||||
| col = im2col(x, filter_h, filter_w, stride, pad, dilation) | col = im2col(x, filter_h, filter_w, stride, pad, dilation) | ||||
| col_w = np.reshape(weight, (filter_num, -1)).T | col_w = np.reshape(weight, (filter_num, -1)).T | ||||
| out = np.dot(col, col_w) | out = np.dot(col, col_w) | ||||
| out = out.reshape(batch_num, out_h, out_w, -1).transpose(0, 3, 1, 2) | |||||
| out = out.reshape((batch_num, out_h, out_w, -1)).transpose(0, 3, 1, 2) | |||||
| if bias is not None: | if bias is not None: | ||||
| out += bias | out += bias | ||||
| return out | return out | ||||
| @@ -287,7 +287,7 @@ def conv2d_backprop_filter(dout, x, w_size, stride=1, pad=0): | |||||
| dout = dout.transpose(0, 2, 3, 1).reshape(-1, filter_num) | dout = dout.transpose(0, 2, 3, 1).reshape(-1, filter_num) | ||||
| col = im2col(x, filter_height, filter_width, stride, pad) | col = im2col(x, filter_height, filter_width, stride, pad) | ||||
| dw = np.dot(col.T, dout) | dw = np.dot(col.T, dout) | ||||
| dw = dw.transpose(1, 0).reshape(filter_num, channel, filter_height, filter_width) | |||||
| dw = dw.transpose(1, 0).reshape((filter_num, channel, filter_height, filter_width)) | |||||
| return dw | return dw | ||||