| @@ -110,7 +110,13 @@ void NoBroadcastGrad(const int &nums, enum BroadcastGradOpType op, const T *x1, | |||||
| template void NoBroadcastGrad(const int &nums, enum BroadcastGradOpType op, const float *x1, const float *x2, | template void NoBroadcastGrad(const int &nums, enum BroadcastGradOpType op, const float *x1, const float *x2, | ||||
| const float *dy, float *dx1, float *dx2, cudaStream_t stream); | const float *dy, float *dx1, float *dx2, cudaStream_t stream); | ||||
| template void NoBroadcastGrad(const int &nums, enum BroadcastGradOpType op, const int *x1, const int *x2, | |||||
| const int *dy, int *dx1, int *dx2, cudaStream_t stream); | |||||
| template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1, | template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1, | ||||
| const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, | const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, | ||||
| enum BroadcastGradOpType op, const float *x1, const float *x2, const float *dy, float *dx1, | enum BroadcastGradOpType op, const float *x1, const float *x2, const float *dy, float *dx1, | ||||
| float *dx2, cudaStream_t stream); | float *dx2, cudaStream_t stream); | ||||
| template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1, | |||||
| const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, | |||||
| enum BroadcastGradOpType op, const int *x1, const int *x2, const int *dy, int *dx1, | |||||
| int *dx2, cudaStream_t stream); | |||||
| @@ -90,5 +90,11 @@ MS_REG_GPU_KERNEL_TWO( | |||||
| MS_REG_GPU_KERNEL_TWO( | MS_REG_GPU_KERNEL_TWO( | ||||
| TensorAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | TensorAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | ||||
| BroadcastOpGpuKernel, int, int) | BroadcastOpGpuKernel, int, int) | ||||
| MS_REG_GPU_KERNEL_TWO( | |||||
| Minimum, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||||
| BroadcastOpGpuKernel, int, int) | |||||
| MS_REG_GPU_KERNEL_TWO( | |||||
| Maximum, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||||
| BroadcastOpGpuKernel, int, int) | |||||
| } // namespace kernel | } // namespace kernel | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -34,5 +34,21 @@ MS_REG_GPU_KERNEL_ONE(MaximumGrad, | |||||
| .AddOutputAttr(kNumberTypeFloat32) | .AddOutputAttr(kNumberTypeFloat32) | ||||
| .AddOutputAttr(kNumberTypeFloat32), | .AddOutputAttr(kNumberTypeFloat32), | ||||
| BroadcastOpGradGpuKernel, float) | BroadcastOpGradGpuKernel, float) | ||||
| MS_REG_GPU_KERNEL_ONE(MinimumGrad, | |||||
| KernelAttr() | |||||
| .AddInputAttr(kNumberTypeInt32) | |||||
| .AddInputAttr(kNumberTypeInt32) | |||||
| .AddInputAttr(kNumberTypeInt32) | |||||
| .AddOutputAttr(kNumberTypeInt32) | |||||
| .AddOutputAttr(kNumberTypeInt32), | |||||
| BroadcastOpGradGpuKernel, int) | |||||
| MS_REG_GPU_KERNEL_ONE(MaximumGrad, | |||||
| KernelAttr() | |||||
| .AddInputAttr(kNumberTypeInt32) | |||||
| .AddInputAttr(kNumberTypeInt32) | |||||
| .AddInputAttr(kNumberTypeInt32) | |||||
| .AddOutputAttr(kNumberTypeInt32) | |||||
| .AddOutputAttr(kNumberTypeInt32), | |||||
| BroadcastOpGradGpuKernel, int) | |||||
| } // namespace kernel | } // namespace kernel | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -222,3 +222,27 @@ def test_broadcast_diff_dims(): | |||||
| output_ms = net(Tensor(x1_np), Tensor(x2_np), Tensor(dy_np)) | output_ms = net(Tensor(x1_np), Tensor(x2_np), Tensor(dy_np)) | ||||
| assert np.allclose(output_ms[0].asnumpy(), expect_dx1) | assert np.allclose(output_ms[0].asnumpy(), expect_dx1) | ||||
| assert np.allclose(output_ms[1].asnumpy(), expect_dx2) | assert np.allclose(output_ms[1].asnumpy(), expect_dx2) | ||||
| @pytest.mark.level0 | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.env_onecard | |||||
| def test_maximum_int(): | |||||
| x = Tensor(np.array([[1, 2, 3]]).astype(np.int32)) | |||||
| y = Tensor(np.array([[2]]).astype(np.int32)) | |||||
| expect = [[2, 2, 3]] | |||||
| error = np.ones(shape=[1, 3]) * 1.0e-5 | |||||
| context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") | |||||
| max_op = Net() | |||||
| output = max_op(x, y) | |||||
| diff = output.asnumpy() - expect | |||||
| assert np.all(diff < error) | |||||
| assert np.all(-diff < error) | |||||
| context.set_context(mode=context.GRAPH_MODE, device_target="GPU") | |||||
| max_op_2 = Net() | |||||
| output = max_op_2(x, y) | |||||
| diff = output.asnumpy() - expect | |||||
| assert np.all(diff < error) | |||||
| assert np.all(-diff < error) | |||||
| @@ -218,3 +218,21 @@ def test_broadcast_diff_dims(): | |||||
| output_ms = net(Tensor(x1_np), Tensor(x2_np), Tensor(dy_np)) | output_ms = net(Tensor(x1_np), Tensor(x2_np), Tensor(dy_np)) | ||||
| assert np.allclose(output_ms[0].asnumpy(), expect_dx1) | assert np.allclose(output_ms[0].asnumpy(), expect_dx1) | ||||
| assert np.allclose(output_ms[1].asnumpy(), expect_dx2) | assert np.allclose(output_ms[1].asnumpy(), expect_dx2) | ||||
| @pytest.mark.level0 | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.env_onecard | |||||
| def test_broadcast_int32(): | |||||
| context.set_context(mode=context.GRAPH_MODE, save_graphs=True, device_target='GPU') | |||||
| x1_np = np.random.rand(3, 4).astype(np.int32) | |||||
| x2_np = np.random.rand(3, 4).astype(np.int32) | |||||
| dy_np = np.random.rand(3, 4).astype(np.int32) | |||||
| net = Grad(MinimumNet()) | |||||
| output_ms = net(Tensor(x1_np), Tensor(x2_np), Tensor(dy_np)) | |||||
| output0_np = np.where(x1_np < x2_np, dy_np, 0) | |||||
| output1_np = np.where(x1_np < x2_np, 0, dy_np) | |||||
| assert np.allclose(output_ms[0].asnumpy(), output0_np) | |||||
| assert np.allclose(output_ms[1].asnumpy(), output1_np) | |||||