# Copyright 2020 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """thor_ops""" import math from ..primitive import prim_attr_register, PrimitiveWithInfer from ...common import dtype as mstype from ..._checkparam import Validator as validator from ..._checkparam import Rel __all__ = ["CusBatchMatMul", "CusCholeskyTrsm", "CusFusedAbsMax1", "CusImg2Col", "CusMatMulCubeDenseLeft", "CusMatMulCubeFraczRightMul", "CusMatMulCube", "CusMatrixCombine", "CusTranspose02314", "CusMatMulCubeDenseRight", "CusMatMulCubeFraczLeftCast", ] def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=False, ret_four=False): """ Checks whether an argument is a positive int or tuple with 2 or 4(when allow_four is True) positive int elements. """ def _raise_message(): raise ValueError(f"For '{prim_name}' attr '{arg_name}' should be an positive int number or a tuple of two " f"{'or four ' if allow_four else ''}positive int numbers, but got {arg_value}") def _get_return_value(): if isinstance(arg_value, int): ret = (1, 1, arg_value, arg_value) if ret_four else (arg_value, arg_value) elif len(arg_value) == 2: ret = (1, 1, arg_value[0], arg_value[1]) if ret_four else arg_value elif len(arg_value) == 4: if not allow_four: _raise_message() ret = arg_value if ret_four else (arg_value[2], arg_value[3]) else: _raise_message() return ret validator.check_value_type(arg_name, arg_value, (int, tuple), prim_name) ret_value = _get_return_value() for item in ret_value: if isinstance(item, int) and item > 0: continue _raise_message() return ret_value class CusBatchMatMul(PrimitiveWithInfer): """ Multiplies matrix `a` by matrix `b` in batch. The rank of input tensors must be `3`. Inputs: - **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N, D, D)`. - **input_y** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(N, D, D)`. If `transpose_b` is True. Outputs: Tensor, the shape of the output tensor is :math:`(N, D, D)`. Examples: >>> input_x = Tensor(np.ones(shape=[2, 128, 128]), mindspore.float32) >>> input_y = Tensor(np.ones(shape=[2, 128, 128]), mindspore.float32) >>> cus_batch_matmul = P.CusBatchMatMul() >>> output = cus_batch_matmul(input_x, input_y) """ @prim_attr_register def __init__(self): """Initialize CusBatchMatMul""" self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y']) from mindspore.ops._op_impl._custom_op.batch_matmul_impl import CusBatchMatMul def infer_shape(self, data1_shape, data2_shape): return data1_shape def infer_dtype(self, data1_dtype, data2_dtype): return data1_dtype class CusCholeskyTrsm(PrimitiveWithInfer): """ L * LT = A. LT * (LT)^-1 = I. return (LT)^-1. Only compute the res of the diag part of input matrix with dim 128. The rank of input tensors must be `2`. Inputs: - **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N, N)`. Outputs: Tensor, the shape of the output tensor is :math:`(N // Split_dim, Split_dim, Split_dim)`. Examples: >>> input_x = Tensor(np.ones(shape=[256, 256]), mindspore.float32) >>> cus_choleskytrsm = P.CusCholeskyTrsm() >>> output = matmul(input_x) """ @prim_attr_register def __init__(self): """Initialize CusCholeskyTrsm""" self.init_prim_io_names(inputs=['x1'], outputs=['y']) from mindspore.ops._op_impl._custom_op.cholesky_trsm_impl import CusCholeskyTrsm def infer_shape(self, data1_shape): ll = [] m, _ = data1_shape if m >= 128: ll = [m // 128, 128, 128] else: ll = [1, 64, 64] return ll def infer_dtype(self, data1_dtype): return data1_dtype class CusFusedAbsMax1(PrimitiveWithInfer): """ Computes the abs max of Tensor input. The rank of input tensors must be `4` or `2`. Inputs: - **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N0, M0, N1, M1)` or math:`(32, 64)`. Outputs: Tensor, the shape of the output tensor is :math:`(32, 64)` or math:`(1, )`. Examples: >>> input_x = Tensor(np.ones(shape=[1, 3]), mindspore.float32) >>> cus_fused_abs_max1 = P.CusFusedAbsMax1() >>> output = cus_fused_abs_max1(input_x) """ @prim_attr_register def __init__(self, origin_shape=[-1, -1]): """Initialize CusFusedAbsMax1""" self.init_prim_io_names(inputs=['x1'], outputs=['y']) self.origin_shape = origin_shape from mindspore.ops._op_impl._custom_op.fused_abs_max1_impl import CusFusedAbsMax1 def infer_shape(self, data1_shape): ll = [] if len(data1_shape) == 2: ll = [1,] else: ll = [32, 64] return ll def infer_dtype(self, data1_dtype): return data1_dtype class CusImg2Col(PrimitiveWithInfer): """ Img2cols the feature map and the result in reorganized in NC1HWC0. Args: - **strides** (listInt) - the stride of the ops. - **ksizes** (listInt) - the kernel size of the ops. Inputs: - **input_x** (Tensor) - The shape of the tensor is :math:`(N, C, H, W)`. Outputs: Tensor, the shape of the output tensor is :math:`(N * H_O * W_O, C1 * K_W * K_H * C0)`. Examples: >>> input_x = Tensor(np.ones(shape=[32, 3, 224, 224]), mindspore.float16) >>> cusimg2col = P.CusImg2Col() >>> output = cusimg2col(input_x) """ @prim_attr_register def __init__(self, ksizes, strides, dilates=(1, 1, 1, 1), mode="NC1HWC0"): """Initialize CusImg2Col""" self.init_prim_io_names(inputs=['x1'], outputs=['y']) self.ksizes = ksizes self.strides = strides self.dilates = dilates self.mode = mode from mindspore.ops._op_impl._custom_op.img2col_impl import CusImg2Col def infer_shape(self, data1_shape): bs, c, h, w = data1_shape _, stride_h, stride_w, _ = self.strides _, k_w, k_h, _ = self.ksizes # assert m == n c0 = 16 c1 = c // 16 if c1 == 0: c1 = 1 shape = [bs * int(h // stride_h) * int(w // stride_w), k_w * k_h * c1 * c0] return shape def infer_dtype(self, data1_dtype): return data1_dtype class CusMatMulCubeDenseLeft(PrimitiveWithInfer): """ Multiplies matrix `a` by matrix `b`. The rank of input_x1 must be `4`, the fractal format of the normal matrix. The rank of input_x2 must be `2`. Inputs: - **input_x1** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N0, M0, N1, M1)`. - **input_x2** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(M, C)`. Outputs: Tensor, the shape of the output tensor is :math:`(N, C)`. Examples: >>> input_x = Tensor(np.ones(shape=[16, 16, 16, 16]), mindspore.float16) >>> input_y = Tensor(np.ones(shape=[256, 256]), mindspore.float16) >>> matmulcubedenseleft = P.CusMatMulCubeDenseLeft() >>> output = matmulcubedenseleft(input_x, input_y) """ @prim_attr_register def __init__(self): """Initialize CusMatMulCubeDenseLeft""" self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y']) from mindspore.ops._op_impl._custom_op.matmul_cube_dense_left_impl import CusMatMulCubeDenseLeft def infer_shape(self, data1_shape, data2_shape): return data2_shape def infer_dtype(self, data1_dtype, data2_dtype): return mstype.float16 class CusMatMulCubeFraczRightMul(PrimitiveWithInfer): """ Multiplies matrix `a` by matrix `b` and muls the result by scalar `c`. The rank of input_x1 tensors must be `2`. The rank of input_x2 tensors must be `4`. Inputs: - **input_x1** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N, C)`. - **input_x2** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(C1, M1, C0, M0)`. - **input_x3** (Tensor) - The third tensor to be multiplied. The shape of the tensor if :math`(1, )`. Outputs: Tensor, the shape of the output tensor is :math:`(N, M)`. Examples: >>> input_x1 = Tensor(np.ones(shape=[256, 256]), mindspore.float16) >>> input_x2 = Tensor(np.ones(shape=[16, 16, 16, 16]), mindspore.float16) >>> input_x3 = Tensor(np.ones(shape=[1, ]), mindspore.float16) >>> cusmatmulfraczrightmul = P.CusMatMulCubeFraczRightMul() >>> output = cusmatmulfraczrightmul(input_x1, input_x2, input_x3) """ @prim_attr_register def __init__(self): """Initialize CusMatMulCubeFraczRightMul""" self.init_prim_io_names(inputs=['x1', 'x2', 'x3'], outputs=['y']) from mindspore.ops._op_impl._custom_op.matmul_cube_fracz_right_mul_impl import CusMatMulCubeFraczRightMul def infer_shape(self, data1_shape, data2_shape, data3_shape): return data1_shape def infer_dtype(self, data1_dtype, data2_dtype, data3_dtype): return mstype.float32 class CusMatMulCube(PrimitiveWithInfer): """ Multiplies matrix `a` by matrix `b`. The rank of input tensors must be `2`. Args: transpose_a (bool): If true, `a` is transposed before multiplication. Default: False. transpose_b (bool): If true, `b` is transposed before multiplication. Default: False. Inputs: - **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N, C)`. If `transpose_a` is True, its shape must be :math:`(N, C)` after transposing. - **input_y** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(C, M)`. If `transpose_b` is True, its shape must be :math:`(C, M)` after transpose. Outputs: Tensor, the shape of the output tensor is :math:`(N, M)`. Examples: >>> input_x = Tensor(np.ones(shape=[256, 256]), mindspore.float16) >>> input_y = Tensor(np.ones(shape=[256, 256]), mindspore.float16) >>> cusmatmulcube = P.CusMatMulCube() >>> output = matmul(input_x, input_y) """ @prim_attr_register def __init__(self, transpose_a=False, transpose_b=False): """Initialize CusMatMulCube""" self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y']) self.transpose_a = transpose_a self.transpose_b = transpose_b from mindspore.ops._op_impl._custom_op.matmul_cube_impl import CusMatMulCube def infer_shape(self, data1_shape, data2_shape): if self.transpose_a: k1, m = data1_shape else: m, k1 = data1_shape if self.transpose_b: n, k2 = data2_shape else: k2, n = data2_shape assert k1 == k2 shape = [m, n] return shape def infer_dtype(self, data1_dtype, data2_dtype): return mstype.float32 class CusMatrixCombine(PrimitiveWithInfer): """ move the batch matrix to result matrix diag part. The rank of input tensors must be `3`. Inputs: - **input_x** (Tensor) - The shape of the tensor is :math:`(N, D, D)`. Outputs: Tensor, the shape of the output tensor is :math:`(N * D, N * D)`. Examples: >>> input_x = Tensor(np.ones(shape=[2, 128, 128]), mindspore.float32) >>> cusmatrixcombine = P.CusMatrixCombine() >>> output = cusmatrixcombine(input_x) """ @prim_attr_register def __init__(self): """Initialize CusMatrixCombine""" self.init_prim_io_names(inputs=['x'], outputs=['y']) from mindspore.ops._op_impl._custom_op.matrix_combine_impl import CusMatrixCombine def infer_shape(self, data_shape): a, b, c = data_shape shape = [a * b, a * c] return shape def infer_dtype(self, data_dtype): return data_dtype class CusTranspose02314(PrimitiveWithInfer): """ Permute input tensor with perm (0, 2, 3, 1, 4) The rank of input tensors must be `5` with format NC1HWC0. Inputs: - **input_x** (Tensor) - The shape of the tensor is :math:`(N, C1, H, W, C0)`. Outputs: Tensor, the shape of the output tensor is :math:`(N, H, W, C1, C0)`. Examples: >>> input_x = Tensor(np.ones(shape=[32, 1, 224, 224, 16]), mindspore.float16) >>> custranspose02314 = P.CusTranspose02314() >>> output = custranspose02314(input_x) """ @prim_attr_register def __init__(self): """Initialize CusTranspose02314""" self.init_prim_io_names(inputs=['x1'], outputs=['y']) from mindspore.ops._op_impl._custom_op.transpose02314_impl import CusTranspose02314 def get_bprop(self): def bprop(x, out, dout): return (C.zeros_like(x),) return bprop def infer_shape(self, data1_shape): assert len(data1_shape) == 4 n, c, h, w = data1_shape c0 = 16 c1 = c // 16 shape = (n * h * w, c1 * c0) return shape def infer_dtype(self, data1_dtype): return data1_dtype class CusMatMulCubeDenseRight(PrimitiveWithInfer): """ Multiplies matrix `a` by matrix `b`. The rank of input_x1 tensor must be `2`. The rank of input_x2 tensor must be `4`. Inputs: - **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N, C)`. - **input_y** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(C1, M1, M0, C0)`. Outputs: Tensor, the shape of the output tensor is :math:`(N, M)`. Examples: >>> input_x = Tensor(np.ones(shape=[256, 256]), mindspore.float16) >>> input_y = Tensor(np.ones(shape=[16, 16, 16, 16]), mindspore.float16) >>> cusmatmulcubedenseright = P.CusMatMulCubeDenseRight() >>> output = cusmatmulcubedenseright(input_x, input_y) """ @prim_attr_register def __init__(self): """Initialize CusMatMulCubeDenseRight""" self.init_prim_io_names(inputs=['x1', 'x2', 'x3'], outputs=['y']) from mindspore.ops._op_impl._custom_op.matmul_cube_dense_right_impl import CusMatMulCubeDenseRight def infer_shape(self, data1_shape, data2_shape, data3_shape): return data1_shape def infer_dtype(self, data1_dtype, data2_dtype, data3_dtype): return mstype.float32 class CusMatMulCubeFraczLeftCast(PrimitiveWithInfer): """ Multiplies matrix `a` by matrix `b`. The rank of input_x1 tensor must be `4`. The rank of input_x2 tensors must be `2`. Inputs: - **input_x1** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(C1, N1, N0, C0)`. - **input_x2** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(C, M)`. Outputs: Tensor, the shape of the output tensor is :math:`(N, M)`. Examples: >>> input_x = Tensor(np.ones(shape=[16, 16, 16, 16]), mindspore.float16) >>> input_y = Tensor(np.ones(shape=[256, 256]), mindspore.float16) >>> cusmatmulcubefraczleftcast = P.CusMatMulCubeFraczLeftCast() >>> output = cusmatmulcubefraczleftcast(input_x, input_y) """ @prim_attr_register def __init__(self): """Initialize CusMatMulCubeFraczLeftCast""" self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y']) from mindspore.ops._op_impl._custom_op.matmul_cube_fracz_left_cast_impl import CusMatMulCubeFraczLeftCast def infer_shape(self, data1_shape, data2_shape): return data2_shape def infer_dtype(self, data1_dtype, data2_dtype): return mstype.float16 class Im2Col(PrimitiveWithInfer): """ extracts image pathes from image. The rank of input_x1 must be `4`, data_format is "NCHW". Inputs: - **input_x1** (Tensor) - The feature map. The shape of the tensor is :math:`(N, C, H, W)`. Outputs: Tensor. Examples: >>> input_x = Tensor(np.random.rand(32, 3, 224, 224).astype(np.float16)) >>> img2col = P.CusMatMulCubeDenseLeft(kernel_size=7, pad=3, stride=2) >>> output = img2col(input_x) """ @prim_attr_register def __init__(self, kernel_size, pad_mode="valid", pad=0, stride=1, dilation=1): """Initialize Im2Col""" self.init_prim_io_names(inputs=['x'], outputs=['output']) self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name) self.add_prim_attr('kernel_size', self.kernel_size) self.stride = _check_positive_int_or_tuple('stride', stride, self.name, allow_four=True, ret_four=True) self.add_prim_attr('stride', self.stride) self.dilation = _check_positive_int_or_tuple('dilation', dilation, self.name, allow_four=True, ret_four=True) self.add_prim_attr('dilation', self.dilation) validator.check_value_type('pad', pad, (int,), self.name) self.pad_mode = validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.name) self.pad = validator.check_pad_value_by_mode(pad_mode, pad, self.name) if self.pad_mode == 'pad': validator.check_non_negative_int(self.pad, 'pad', self.name) self.add_prim_attr('data_format', "NCHW") def infer_shape(self, x_shape): validator.check_equal_int(len(x_shape), 4, "x rank", self.name) kernel_size_h = self.kernel_size[0] kernel_size_w = self.kernel_size[1] stride_h = self.stride[2] stride_w = self.stride[3] dilation_h = self.dilation[2] dilation_w = self.dilation[3] if self.pad_mode == "valid": h_out = math.ceil((x_shape[2] - dilation_h * (kernel_size_h - 1)) / stride_h) w_out = math.ceil((x_shape[3] - dilation_w * (kernel_size_w - 1)) / stride_w) pad_top, pad_bottom, pad_left, pad_right = 0, 0, 0, 0 elif self.pad_mode == "same": h_out = math.ceil(x_shape[2] / stride_h) w_out = math.ceil(x_shape[3] / stride_w) pad_needed_h = max(0, (h_out - 1) * stride_h + dilation_h * (kernel_size_h - 1) + 1 - x_shape[2]) pad_top = math.floor(pad_needed_h / 2) pad_bottom = pad_needed_h - pad_top pad_needed_w = max(0, (w_out - 1) * stride_w + dilation_w * (kernel_size_w - 1) + 1 - x_shape[3]) pad_left = math.floor(pad_needed_w / 2) pad_right = pad_needed_w - pad_left elif self.pad_mode == 'pad': pad_top, pad_bottom, pad_left, pad_right = self.pad, self.pad, self.pad, self.pad h_out = 1 + (x_shape[2] + 2 * self.pad - kernel_size_h - (kernel_size_h - 1) * (dilation_h - 1)) / stride_h w_out = 1 + (x_shape[3] + 2 * self.pad - kernel_size_w - (kernel_size_w - 1) * (dilation_w - 1)) / stride_w h_out = math.floor(h_out) w_out = math.floor(w_out) self.pad_list = [pad_top, pad_bottom, pad_left, pad_right] self.add_prim_attr('pad_list', (pad_top, pad_bottom, pad_left, pad_right)) batch_size = x_shape[0] channel = x_shape[1] k_h = kernel_size_h k_w = kernel_size_w out_shape = [channel, k_h, k_w, batch_size, h_out, w_out] return out_shape def infer_dtype(self, x_dtype): args = {'x': x_dtype} valid_types = [mstype.float16, mstype.float32] validator.check_tensor_type_same(args, valid_types, self.name) return x_dtype class UpdateThorGradient(PrimitiveWithInfer): """ Updates Thor Gradient with Approximate Fisher info matrix(for GPU backend). The rank of input_x1 must be `3`, which indicates the A matrix. The rank of input_x2 must be `2`, which indicates the 1st-order gradient. The rank of input_x3 must be `4`, which indicates the G matrix. Inputs: - **input_x1** (Tensor) - The first input is the diag part of the cov matrix of feature map. Supported dtype [float32]. - **input_x2** (Tensor) - The second input is the corresponding 1st-order grad. Supported dtype [float32]. - **input_x3** (Tensor) - The third input is the diag part of the cov matrix of dout. Supported dtype [float32]. Outputs: Tensor, the shape is the same as the shape of input_x2, it will be used to update the weights. Examples: >>> input_x1 = Tensor(np.random.rand(16, 128, 128).astype(np.float32)) >>> input_x2 = Tensor(np.random.rand(2048, 1024).astype(np.float32)) >>> temp_x3 = np.random.rand(8, 128, 128).astype(np.float32) >>> input_x3 = np.zeros(16,8,128,128).astype(np.float32) >>> for i in range(16): >>> input_x3[i,:,:,:] = temp_x3 >>> input_x3 = Tensor(input_x3) >>> update_thor_gradient = P.UpdateThorGradient(split_dim=128) >>> output = update_thor_gradient(input_x1, input_x2, input_x3) """ @prim_attr_register def __init__(self, split_dim=0): """Initialize UpdateThorGradient""" self.init_prim_io_names(inputs=['x1', 'x2', 'x3'], outputs=['y']) self.split_dim = split_dim self.add_prim_attr('split_dim', self.split_dim) def infer_shape(self, x1_shape, x2_shape, x3_shape): return x2_shape def infer_dtype(self, x1_dtype, x2_dtype, x3_dtype): validator.check_tensor_type_same({'x1_dtype': x1_dtype, 'x2_dtype': x2_dtype, 'x3_dtype': x3_dtype}, [mstype.float32], self.name) return x2_dtype class Cholesky(PrimitiveWithInfer): """ Inner API for resnet50 THOR GPU backend """ @prim_attr_register def __init__(self, split_dim=0): self.init_prim_io_names(inputs=['x1'], outputs=['y']) self.split_dim = split_dim self.add_prim_attr('split_dim', self.split_dim) def infer_shape(self, x1_shape): if self.split_dim != 0: assert len(x1_shape) == 2 height = x1_shape[0] width = x1_shape[1] assert height == width if height <= self.split_dim: out_shape = [1, height, width] else: batch = height // self.split_dim if height != batch * self.split_dim: batch += 1 out_shape = [batch, self.split_dim, self.split_dim] else: out_shape = x1_shape return out_shape def infer_dtype(self, x1_dtype): validator.check_tensor_type_same({'x1_dtype': x1_dtype}, [mstype.float32], self.name) return x1_dtype