Merge pull request !1175 from chenweifeng/layernormtags/v0.3.0-alpha
| @@ -37,6 +37,5 @@ MS_REG_GPU_KERNEL_TWO( | |||||
| UnsortedSegmentSum, | UnsortedSegmentSum, | ||||
| KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32), | KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32), | ||||
| UnsortedSegmentSumGpuKernel, int, int64_t) | UnsortedSegmentSumGpuKernel, int, int64_t) | ||||
| } // namespace kernel | } // namespace kernel | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -18,7 +18,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| size_t UnitSizeInBytes(const mindspore::TypeId &t) { | size_t UnitSizeInBytes(const mindspore::TypeId &t) { | ||||
| size_t bytes = 0; | size_t bytes = 0; | ||||
| switch (t) { | switch (t) { | ||||
| @@ -65,6 +64,5 @@ int ElementNums(const std::vector<int> &shape) { | |||||
| return nums; | return nums; | ||||
| } | } | ||||
| } // namespace kernel | } // namespace kernel | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -21,7 +21,6 @@ | |||||
| #include "ir/dtype/type.h" | #include "ir/dtype/type.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| size_t UnitSizeInBytes(const mindspore::TypeId &t); | size_t UnitSizeInBytes(const mindspore::TypeId &t); | ||||
| int ElementNums(const std::vector<int> &shape); | int ElementNums(const std::vector<int> &shape); | ||||
| } // namespace kernel | } // namespace kernel | ||||
| @@ -27,7 +27,6 @@ | |||||
| #include "kernel/gpu/kernel_constants.h" | #include "kernel/gpu/kernel_constants.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| template <typename T, typename S> | template <typename T, typename S> | ||||
| class BroadcastOpGpuKernel : public GpuKernel { | class BroadcastOpGpuKernel : public GpuKernel { | ||||
| public: | public: | ||||
| @@ -70,14 +69,14 @@ class BroadcastOpGpuKernel : public GpuKernel { | |||||
| output_num_ *= shape3[i]; | output_num_ *= shape3[i]; | ||||
| } | } | ||||
| int offset = shape3.size() - shape1.size(); | int offset = shape3.size() - shape1.size(); | ||||
| for (size_t i = 0; i < shape1.size(); i++) { | |||||
| lhs_shape_[i + offset] = shape1[i]; | |||||
| input1_num_ *= shape1[i]; | |||||
| for (size_t j = 0; j < shape1.size(); j++) { | |||||
| lhs_shape_[j + offset] = shape1[j]; | |||||
| input1_num_ *= shape1[j]; | |||||
| } | } | ||||
| offset = shape3.size() - shape2.size(); | offset = shape3.size() - shape2.size(); | ||||
| for (size_t i = 0; i < shape2.size(); i++) { | |||||
| rhs_shape_[i + offset] = shape2[i]; | |||||
| input2_num_ *= shape2[i]; | |||||
| for (size_t k = 0; k < shape2.size(); k++) { | |||||
| rhs_shape_[k + offset] = shape2[k]; | |||||
| input2_num_ *= shape2[k]; | |||||
| } | } | ||||
| InitSizeLists(); | InitSizeLists(); | ||||
| @@ -1,4 +1,3 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | * Copyright 2020 Huawei Technologies Co., Ltd | ||||
| * | * | ||||
| @@ -28,7 +27,6 @@ | |||||
| #include "kernel/gpu/kernel_constants.h" | #include "kernel/gpu/kernel_constants.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| template <typename T> | template <typename T> | ||||
| class BroadcastOpGradGpuKernel : public GpuKernel { | class BroadcastOpGradGpuKernel : public GpuKernel { | ||||
| public: | public: | ||||
| @@ -36,8 +36,8 @@ class LayerNormGradGpuKernel : public GpuKernel { | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | ||||
| const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override { | const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override { | ||||
| auto dy = GetDeviceAddress<T>(inputs, 0); | |||||
| auto x = GetDeviceAddress<T>(inputs, 1); | |||||
| auto x = GetDeviceAddress<T>(inputs, 0); | |||||
| auto dy = GetDeviceAddress<T>(inputs, 1); | |||||
| auto var = GetDeviceAddress<T>(inputs, 2); | auto var = GetDeviceAddress<T>(inputs, 2); | ||||
| auto mean = GetDeviceAddress<T>(inputs, 3); | auto mean = GetDeviceAddress<T>(inputs, 3); | ||||
| auto gamma = GetDeviceAddress<T>(inputs, 4); | auto gamma = GetDeviceAddress<T>(inputs, 4); | ||||
| @@ -44,6 +44,5 @@ MS_REG_GPU_KERNEL_ONE(ApplyCenteredRMSProp, | |||||
| .AddInputAttr(kNumberTypeFloat32) | .AddInputAttr(kNumberTypeFloat32) | ||||
| .AddOutputAttr(kNumberTypeFloat32), | .AddOutputAttr(kNumberTypeFloat32), | ||||
| RMSPropGpuKernel, float) | RMSPropGpuKernel, float) | ||||
| } // namespace kernel | } // namespace kernel | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -80,7 +80,7 @@ def test_layernormgrad0(): | |||||
| gamma_ms = Tensor(gamma_np) | gamma_ms = Tensor(gamma_np) | ||||
| net = LayerNormGradNet(begin_norm_axis, begin_params_axis) | net = LayerNormGradNet(begin_norm_axis, begin_params_axis) | ||||
| dx_ms, dg_ms, db_ms = net(dy_ms, x_ms, var_ms, mean_ms, gamma_ms) | |||||
| dx_ms, dg_ms, db_ms = net(x_ms, dy_ms, var_ms, mean_ms, gamma_ms) | |||||
| assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-6, atol=1e-6) | assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-6, atol=1e-6) | ||||
| assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-6, atol=1e-3) | assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-6, atol=1e-3) | ||||
| @@ -107,7 +107,7 @@ def test_layernormgrad1(): | |||||
| gamma_ms = Tensor(gamma_np) | gamma_ms = Tensor(gamma_np) | ||||
| net = LayerNormGradNet(begin_norm_axis, begin_params_axis) | net = LayerNormGradNet(begin_norm_axis, begin_params_axis) | ||||
| dx_ms, dg_ms, db_ms = net(dy_ms, x_ms, var_ms, mean_ms, gamma_ms) | |||||
| dx_ms, dg_ms, db_ms = net(x_ms, dy_ms, var_ms, mean_ms, gamma_ms) | |||||
| assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-6, atol=1e-6) | assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-6, atol=1e-6) | ||||
| assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-6, atol=1e-3) | assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-6, atol=1e-3) | ||||
| @@ -133,8 +133,8 @@ def test_layernormgrad2(): | |||||
| gamma_ms = Tensor(gamma_np) | gamma_ms = Tensor(gamma_np) | ||||
| net = LayerNormGradNet(begin_norm_axis, begin_params_axis) | net = LayerNormGradNet(begin_norm_axis, begin_params_axis) | ||||
| dx_ms, dg_ms, db_ms = net(dy_ms, x_ms, var_ms, mean_ms, gamma_ms) | |||||
| dx_ms, dg_ms, db_ms = net(x_ms, dy_ms, var_ms, mean_ms, gamma_ms) | |||||
| assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-6, atol=1e-6) | assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-6, atol=1e-6) | ||||
| assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-6, atol=1e-3) | assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-6, atol=1e-3) | ||||
| assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-6, atol=1e-3) | |||||
| assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-6, atol=1e-3) | |||||