From: @anrui-wang Reviewed-by: Signed-off-by:tags/v1.1.0
| @@ -17,25 +17,26 @@ | |||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <map> | |||||
| #include "backend/kernel_compiler/cpu/reduce_cpu_kernel.h" | #include "backend/kernel_compiler/cpu/reduce_cpu_kernel.h" | ||||
| #include "runtime/device/cpu/cpu_device_address.h" | #include "runtime/device/cpu/cpu_device_address.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| const size_t kReduceTypeMax = 0; | |||||
| const size_t kReduceTypeMean = 1; | |||||
| const size_t kReduceTypeSum = 2; | |||||
| const size_t kReduceTypeMax = 1; | |||||
| const size_t kReduceTypeMean = 2; | |||||
| const size_t kReduceTypeSum = 3; | |||||
| const size_t kReduceTypeMin = 4; | |||||
| const size_t kMaxDim = 100; | const size_t kMaxDim = 100; | ||||
| static std::map<std::string, int> reduce_types_map_ = { | |||||
| {"ReduceMax", 1}, {"ReduceMean", 2}, {"ReduceSum", 3}, {"ReduceMin", 4}}; | |||||
| void ReduceCPUKernel::InitKernel(const CNodePtr &kernel_node) { | void ReduceCPUKernel::InitKernel(const CNodePtr &kernel_node) { | ||||
| MS_EXCEPTION_IF_NULL(kernel_node); | MS_EXCEPTION_IF_NULL(kernel_node); | ||||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | ||||
| if (kernel_name == "ReduceMax") { | |||||
| reduce_type_ = kReduceTypeMax; | |||||
| } else if (kernel_name == "ReduceMean") { | |||||
| reduce_type_ = kReduceTypeMean; | |||||
| } else if (kernel_name == "ReduceSum") { | |||||
| reduce_type_ = kReduceTypeSum; | |||||
| } else { | |||||
| reduce_type_ = reduce_types_map_[kernel_name]; | |||||
| if (reduce_type_ == 0) { | |||||
| MS_LOG(EXCEPTION) << "Array reduce kernel type " << kernel_name << " is not supported."; | MS_LOG(EXCEPTION) << "Array reduce kernel type " << kernel_name << " is not supported."; | ||||
| } | } | ||||
| shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | ||||
| @@ -57,6 +58,7 @@ void ReduceCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| } | } | ||||
| left_dims_ = left_dims_ / stride_; | left_dims_ = left_dims_ / stride_; | ||||
| } | } | ||||
| bool ReduceCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | bool ReduceCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | ||||
| const std::vector<kernel::AddressPtr> & /*workspaces*/, | const std::vector<kernel::AddressPtr> & /*workspaces*/, | ||||
| const std::vector<kernel::AddressPtr> &outputs) { | const std::vector<kernel::AddressPtr> &outputs) { | ||||
| @@ -127,17 +129,23 @@ void ReduceCPUKernel::CheckAxis(const CNodePtr &kernel_node) { | |||||
| } | } | ||||
| void ReduceCPUKernel::ConvertDataToOutput(const float *new_input, float *output) { | void ReduceCPUKernel::ConvertDataToOutput(const float *new_input, float *output) { | ||||
| if (reduce_type_ == kReduceTypeMax) { | |||||
| if (reduce_type_ == kReduceTypeMax || reduce_type_ == kReduceTypeMin) { | |||||
| for (size_t i = 0; i < left_dims_; ++i) { | for (size_t i = 0; i < left_dims_; ++i) { | ||||
| float value = new_input[i * stride_]; | float value = new_input[i * stride_]; | ||||
| for (size_t k = 0; k < stride_; ++k) { | for (size_t k = 0; k < stride_; ++k) { | ||||
| if (value < new_input[i * stride_ + k]) { | |||||
| value = new_input[i * stride_ + k]; | |||||
| if (reduce_type_ == kReduceTypeMax) { | |||||
| if (value < new_input[i * stride_ + k]) { | |||||
| value = new_input[i * stride_ + k]; | |||||
| } | |||||
| } else { | |||||
| if (value > new_input[i * stride_ + k]) { | |||||
| value = new_input[i * stride_ + k]; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| output[i] = value; | output[i] = value; | ||||
| } | } | ||||
| } else { | |||||
| } else if (reduce_type_ == kReduceTypeMean || reduce_type_ == kReduceTypeSum) { | |||||
| for (size_t i = 0; i < left_dims_; ++i) { | for (size_t i = 0; i < left_dims_; ++i) { | ||||
| float value = 0.0; | float value = 0.0; | ||||
| for (size_t k = 0; k < stride_; ++k) { | for (size_t k = 0; k < stride_; ++k) { | ||||
| @@ -149,20 +157,23 @@ void ReduceCPUKernel::ConvertDataToOutput(const float *new_input, float *output) | |||||
| output[i] = value; | output[i] = value; | ||||
| } | } | ||||
| } | } | ||||
| } else { | |||||
| MS_LOG(EXCEPTION) << "Array reduce kernel type " << reduce_type_ << " is not supported."; | |||||
| } | } | ||||
| } | } | ||||
| void ReduceCPUKernel::Transpose(const int size, const float *input, const std::vector<size_t> &input_shape, | void ReduceCPUKernel::Transpose(const int size, const float *input, const std::vector<size_t> &input_shape, | ||||
| const std::vector<size_t> &input_axis, const int shape_size, float *output) { | const std::vector<size_t> &input_axis, const int shape_size, float *output) { | ||||
| int pos_array[kMaxDim]; | int pos_array[kMaxDim]; | ||||
| int size_offset[kMaxDim]; | int size_offset[kMaxDim]; | ||||
| size_offset[0] = size / SizeToInt(input_shape[0]); | size_offset[0] = size / SizeToInt(input_shape[0]); | ||||
| for (int i = 1; i < shape_size; i++) { | |||||
| for (int i = 1; i < shape_size; ++i) { | |||||
| size_offset[i] = size_offset[i - 1] / SizeToInt(input_shape[i]); | size_offset[i] = size_offset[i - 1] / SizeToInt(input_shape[i]); | ||||
| } | } | ||||
| for (int position = 0; position < size; position += 1) { | for (int position = 0; position < size; position += 1) { | ||||
| int temp_position = position; | int temp_position = position; | ||||
| pos_array[0] = temp_position / size_offset[0]; | pos_array[0] = temp_position / size_offset[0]; | ||||
| for (int i = 1; i < shape_size; i++) { | |||||
| for (int i = 1; i < shape_size; ++i) { | |||||
| temp_position -= pos_array[i - 1] * size_offset[i - 1]; | temp_position -= pos_array[i - 1] * size_offset[i - 1]; | ||||
| pos_array[i] = temp_position / size_offset[i]; | pos_array[i] = temp_position / size_offset[i]; | ||||
| } | } | ||||
| @@ -42,12 +42,15 @@ class ReduceCPUKernel : public CPUKernel { | |||||
| size_t left_dims_ = 1; | size_t left_dims_ = 1; | ||||
| size_t stride_ = 1; | size_t stride_ = 1; | ||||
| }; | }; | ||||
| MS_REG_CPU_KERNEL(ReduceMean, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | MS_REG_CPU_KERNEL(ReduceMean, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | ||||
| ReduceCPUKernel); | ReduceCPUKernel); | ||||
| MS_REG_CPU_KERNEL(ReduceMax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | MS_REG_CPU_KERNEL(ReduceMax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | ||||
| ReduceCPUKernel); | ReduceCPUKernel); | ||||
| MS_REG_CPU_KERNEL(ReduceSum, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | MS_REG_CPU_KERNEL(ReduceSum, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | ||||
| ReduceCPUKernel); | ReduceCPUKernel); | ||||
| MS_REG_CPU_KERNEL(ReduceMin, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| ReduceCPUKernel); | |||||
| } // namespace kernel | } // namespace kernel | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_CPU_KERNEL_H_ | #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_CPU_KERNEL_H_ | ||||
| @@ -568,7 +568,7 @@ class ReduceMin(_Reduce): | |||||
| the shape of output is :math:`(x_1, x_4, ..., x_R)`. | the shape of output is :math:`(x_1, x_4, ..., x_R)`. | ||||
| Supported Platforms: | Supported Platforms: | ||||
| ``Ascend`` ``GPU`` | |||||
| ``Ascend`` ``GPU`` ``CPU`` | |||||
| Examples: | Examples: | ||||
| >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) | >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) | ||||
| @@ -37,6 +37,7 @@ class NetReduce(nn.Cell): | |||||
| self.reduce_mean = P.ReduceMean(False) | self.reduce_mean = P.ReduceMean(False) | ||||
| self.reduce_sum = P.ReduceSum(False) | self.reduce_sum = P.ReduceSum(False) | ||||
| self.reduce_max = P.ReduceMax(False) | self.reduce_max = P.ReduceMax(False) | ||||
| self.reduce_min = P.ReduceMin(False) | |||||
| @ms_function | @ms_function | ||||
| def construct(self, indice): | def construct(self, indice): | ||||
| @@ -50,7 +51,14 @@ class NetReduce(nn.Cell): | |||||
| self.reduce_max(indice, self.axis0), | self.reduce_max(indice, self.axis0), | ||||
| self.reduce_max(indice, self.axis2), | self.reduce_max(indice, self.axis2), | ||||
| self.reduce_max(indice, self.axis5), | self.reduce_max(indice, self.axis5), | ||||
| self.reduce_max(indice, self.axis6)) | |||||
| self.reduce_max(indice, self.axis6), | |||||
| self.reduce_min(indice, self.axis0), | |||||
| self.reduce_min(indice, self.axis1), | |||||
| self.reduce_min(indice, self.axis2), | |||||
| self.reduce_min(indice, self.axis3), | |||||
| self.reduce_min(indice, self.axis4), | |||||
| self.reduce_min(indice, self.axis5), | |||||
| self.reduce_min(indice, self.axis6)) | |||||
| @@ -76,6 +84,13 @@ def test_reduce(): | |||||
| print(output[8]) | print(output[8]) | ||||
| print(output[9]) | print(output[9]) | ||||
| print(output[10]) | print(output[10]) | ||||
| print(output[11]) | |||||
| print(output[12]) | |||||
| print(output[13]) | |||||
| print(output[14]) | |||||
| print(output[15]) | |||||
| print(output[16]) | |||||
| print(output[17]) | |||||
| expect_0 = np.array([[2., 1., 2., 3., 0., 1], [2., 2., 1., 2., 3., 2.]]).astype(np.float32) | expect_0 = np.array([[2., 1., 2., 3., 0., 1], [2., 2., 1., 2., 3., 2.]]).astype(np.float32) | ||||
| expect_1 = np.array([[1.5, 1.5, 1.5, 3., 2., 1.], [1.5, 0., 0.5, 4.5, 2., 2.], [3., 3., 2.5, 0., 0.5, 1.5]]).astype( | expect_1 = np.array([[1.5, 1.5, 1.5, 3., 2., 1.], [1.5, 0., 0.5, 4.5, 2., 2.], [3., 3., 2.5, 0., 0.5, 1.5]]).astype( | ||||
| np.float32) | np.float32) | ||||
| @@ -86,6 +101,11 @@ def test_reduce(): | |||||
| expect_6 = np.array([[9., 12.], [9., 12.], [9., 12.]]).astype(np.float32) | expect_6 = np.array([[9., 12.], [9., 12.], [9., 12.]]).astype(np.float32) | ||||
| expect_7 = np.array([[4., 2., 4., 5., 0., 2.], [3., 5., 2., 4., 4., 3.]]).astype(np.float32) | expect_7 = np.array([[4., 2., 4., 5., 0., 2.], [3., 5., 2., 4., 4., 3.]]).astype(np.float32) | ||||
| expect_8 = np.array([[4., 4.], [5., 4.], [4., 5.]]).astype(np.float32) | expect_8 = np.array([[4., 4.], [5., 4.], [4., 5.]]).astype(np.float32) | ||||
| expect_9 = np.array([[0., 0., 1., 0., 0., 0.], [1., 0., 0., 0., 1., 0.]]).astype(np.float32) | |||||
| expect_10 = np.array([[0., 1., 1., 2., 0., 0.], [1., 0., 0., 4., 0., 1.], [2., 1., 1., 0., 0., 0.]]).astype( | |||||
| np.float32) | |||||
| expect_11 = np.array([[0., 0.], [0., 0.], [0., 0.]]).astype(np.float32) | |||||
| expect_12 = np.array([0., 0., 0., 0., 0., 0.]).astype(np.float32) | |||||
| assert (output[0].asnumpy() == expect_0).all() | assert (output[0].asnumpy() == expect_0).all() | ||||
| assert (output[1].asnumpy() == expect_1).all() | assert (output[1].asnumpy() == expect_1).all() | ||||
| assert (output[2].asnumpy() == expect_2).all() | assert (output[2].asnumpy() == expect_2).all() | ||||
| @@ -97,5 +117,12 @@ def test_reduce(): | |||||
| assert (output[8].asnumpy() == expect_8).all() | assert (output[8].asnumpy() == expect_8).all() | ||||
| assert (output[9].asnumpy() == expect_8).all() | assert (output[9].asnumpy() == expect_8).all() | ||||
| assert (output[10].asnumpy() == 5.0).all() | assert (output[10].asnumpy() == 5.0).all() | ||||
| assert (output[11].asnumpy() == expect_9).all() | |||||
| assert (output[12].asnumpy() == expect_10).all() | |||||
| assert (output[13].asnumpy() == expect_11).all() | |||||
| assert (output[14].asnumpy() == expect_12).all() | |||||
| assert (output[15].asnumpy() == 0.0).all() | |||||
| assert (output[16].asnumpy() == expect_11).all() | |||||
| assert (output[17].asnumpy() == 0.0).all() | |||||
| test_reduce() | test_reduce() | ||||