From: @wuxuejian Reviewed-by: @kisnwang,@liangchenghui Signed-off-by: @liangchenghuitags/v1.2.0-rc1
| @@ -20,54 +20,38 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void HSigmoidCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void HSigmoidCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| dtype_ = AnfAlgo ::GetPrevNodeOutputDeviceDataType(kernel_node, 0); | |||
| if (dtype_ == kTypeUnknown) { | |||
| dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); | |||
| } | |||
| for (const uint64_t &d : x_shape_) { | |||
| tensor_size_ *= d; | |||
| } | |||
| launch_map_[kNumberTypeInt8] = &HSigmoidCPUKernel::LaunchKernel<int8_t>; | |||
| launch_map_[kNumberTypeInt16] = &HSigmoidCPUKernel::LaunchKernel<int16_t>; | |||
| launch_map_[kNumberTypeInt32] = &HSigmoidCPUKernel::LaunchKernel<int>; | |||
| launch_map_[kNumberTypeInt64] = &HSigmoidCPUKernel::LaunchKernel<int64_t>; | |||
| launch_map_[kNumberTypeFloat32] = &HSigmoidCPUKernel::LaunchKernel<float>; | |||
| auto iter = launch_map_.find(dtype_); | |||
| if (iter != launch_map_.end()) { | |||
| launch_func_ = iter->second; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Input data type: " << dtype_ << "is not supported for HSigmoid kernel on CPU."; | |||
| } | |||
| } | |||
| bool HSigmoidCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| launch_func_(this, inputs, outputs); | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void HSigmoidCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| bool HSigmoidCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto x = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto y = reinterpret_cast<T *>(outputs[0]->addr); | |||
| for (uint64_t i = 0; i < tensor_size_; ++i) { | |||
| if (x[i] <= -3) { | |||
| y[i] = 0; | |||
| } else if (x[i] >= 3) { | |||
| y[i] = 1; | |||
| } else { | |||
| y[i] = (x[i] + 3) / 6; | |||
| auto task = [&](size_t start, size_t end) { | |||
| for (uint64_t i = start; i < end; ++i) { | |||
| if (x[i] <= -3) { | |||
| y[i] = 0; | |||
| } else if (x[i] >= 3) { | |||
| y[i] = 1; | |||
| } else { | |||
| y[i] = (x[i] + 3) / 6; | |||
| } | |||
| } | |||
| } | |||
| }; | |||
| CPUKernelUtils::ParallelFor(task, tensor_size_); | |||
| return true; | |||
| } | |||
| void HSigmoidCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void HSigmoidCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoidCPUKernel needs 1 input."; | |||
| @@ -24,6 +24,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T> | |||
| class HSigmoidCPUKernel : public CPUKernel { | |||
| public: | |||
| HSigmoidCPUKernel() = default; | |||
| @@ -34,34 +35,26 @@ class HSigmoidCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> x_shape_; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| using TypeKernel = std::function<void(HSigmoidCPUKernel *, const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs)>; | |||
| std::unordered_map<TypeId, TypeKernel> launch_map_; | |||
| TypeKernel launch_func_; | |||
| uint64_t tensor_size_ = 1; | |||
| }; | |||
| MS_REG_CPU_KERNEL(HSigmoid, KernelAttr().AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8), | |||
| HSigmoidCPUKernel); | |||
| MS_REG_CPU_KERNEL_T(HSigmoid, KernelAttr().AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8), | |||
| HSigmoidCPUKernel, int8_t); | |||
| MS_REG_CPU_KERNEL(HSigmoid, KernelAttr().AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), | |||
| HSigmoidCPUKernel); | |||
| MS_REG_CPU_KERNEL_T(HSigmoid, KernelAttr().AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), | |||
| HSigmoidCPUKernel, int16_t); | |||
| MS_REG_CPU_KERNEL(HSigmoid, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| HSigmoidCPUKernel); | |||
| MS_REG_CPU_KERNEL_T(HSigmoid, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| HSigmoidCPUKernel, int); | |||
| MS_REG_CPU_KERNEL(HSigmoid, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| HSigmoidCPUKernel); | |||
| MS_REG_CPU_KERNEL_T(HSigmoid, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| HSigmoidCPUKernel, int64_t); | |||
| MS_REG_CPU_KERNEL(HSigmoid, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| HSigmoidCPUKernel); | |||
| MS_REG_CPU_KERNEL_T(HSigmoid, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| HSigmoidCPUKernel, float); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| @@ -20,54 +20,37 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void HSigmoidGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void HSigmoidGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| dtype_ = AnfAlgo ::GetPrevNodeOutputDeviceDataType(kernel_node, 0); | |||
| if (dtype_ == kTypeUnknown) { | |||
| dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); | |||
| } | |||
| for (const uint64_t &d : x_shape_) { | |||
| tensor_size_ *= d; | |||
| } | |||
| launch_map_[kNumberTypeInt8] = &HSigmoidGradCPUKernel::LaunchKernel<int8_t>; | |||
| launch_map_[kNumberTypeInt16] = &HSigmoidGradCPUKernel::LaunchKernel<int16_t>; | |||
| launch_map_[kNumberTypeInt32] = &HSigmoidGradCPUKernel::LaunchKernel<int>; | |||
| launch_map_[kNumberTypeInt64] = &HSigmoidGradCPUKernel::LaunchKernel<int64_t>; | |||
| launch_map_[kNumberTypeFloat32] = &HSigmoidGradCPUKernel::LaunchKernel<float>; | |||
| auto iter = launch_map_.find(dtype_); | |||
| if (iter != launch_map_.end()) { | |||
| launch_func_ = iter->second; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Input data type: " << dtype_ << "is not supported for HSigmoidGrad kernel on CPU."; | |||
| } | |||
| } | |||
| bool HSigmoidGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| launch_func_(this, inputs, outputs); | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void HSigmoidGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| bool HSigmoidGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto dy = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto x = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto out = reinterpret_cast<T *>(outputs[0]->addr); | |||
| for (uint64_t i = 0; i < tensor_size_; ++i) { | |||
| if (x[i] <= -3 || x[i] >= 3) { | |||
| out[i] = 0; | |||
| } else { | |||
| out[i] = dy[i] / 6; | |||
| auto task = [&](size_t start, size_t end) { | |||
| for (uint64_t i = start; i < end; ++i) { | |||
| if (x[i] <= -3 || x[i] >= 3) { | |||
| out[i] = 0; | |||
| } else { | |||
| out[i] = dy[i] / 6; | |||
| } | |||
| } | |||
| } | |||
| }; | |||
| CPUKernelUtils::ParallelFor(task, tensor_size_); | |||
| return true; | |||
| } | |||
| void HSigmoidGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void HSigmoidGradCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoidGradCPUKernel needs 2 input."; | |||
| @@ -24,6 +24,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T> | |||
| class HSigmoidGradCPUKernel : public CPUKernel { | |||
| public: | |||
| HSigmoidGradCPUKernel() = default; | |||
| @@ -34,43 +35,35 @@ class HSigmoidGradCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> x_shape_; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| using TypeKernel = std::function<void(HSigmoidGradCPUKernel *, const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs)>; | |||
| std::unordered_map<TypeId, TypeKernel> launch_map_; | |||
| TypeKernel launch_func_; | |||
| uint64_t tensor_size_ = 1; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| MS_REG_CPU_KERNEL_T( | |||
| HSigmoidGrad, KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8), | |||
| HSigmoidGradCPUKernel); | |||
| HSigmoidGradCPUKernel, int8_t); | |||
| MS_REG_CPU_KERNEL( | |||
| MS_REG_CPU_KERNEL_T( | |||
| HSigmoidGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), | |||
| HSigmoidGradCPUKernel); | |||
| HSigmoidGradCPUKernel, int16_t); | |||
| MS_REG_CPU_KERNEL( | |||
| MS_REG_CPU_KERNEL_T( | |||
| HSigmoidGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| HSigmoidGradCPUKernel); | |||
| HSigmoidGradCPUKernel, int); | |||
| MS_REG_CPU_KERNEL( | |||
| MS_REG_CPU_KERNEL_T( | |||
| HSigmoidGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| HSigmoidGradCPUKernel); | |||
| HSigmoidGradCPUKernel, int64_t); | |||
| MS_REG_CPU_KERNEL( | |||
| MS_REG_CPU_KERNEL_T( | |||
| HSigmoidGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| HSigmoidGradCPUKernel); | |||
| HSigmoidGradCPUKernel, float); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| @@ -20,54 +20,38 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void HSwishCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void HSwishCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| dtype_ = AnfAlgo ::GetPrevNodeOutputDeviceDataType(kernel_node, 0); | |||
| if (dtype_ == kTypeUnknown) { | |||
| dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); | |||
| } | |||
| for (const uint64_t &d : x_shape_) { | |||
| tensor_size_ *= d; | |||
| } | |||
| launch_map_[kNumberTypeInt8] = &HSwishCPUKernel::LaunchKernel<int8_t>; | |||
| launch_map_[kNumberTypeInt16] = &HSwishCPUKernel::LaunchKernel<int16_t>; | |||
| launch_map_[kNumberTypeInt32] = &HSwishCPUKernel::LaunchKernel<int>; | |||
| launch_map_[kNumberTypeInt64] = &HSwishCPUKernel::LaunchKernel<int64_t>; | |||
| launch_map_[kNumberTypeFloat32] = &HSwishCPUKernel::LaunchKernel<float>; | |||
| auto iter = launch_map_.find(dtype_); | |||
| if (iter != launch_map_.end()) { | |||
| launch_func_ = iter->second; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Input data type: " << dtype_ << "is not supported for HSwish kernel on CPU."; | |||
| } | |||
| } | |||
| bool HSwishCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| launch_func_(this, inputs, outputs); | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void HSwishCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| bool HSwishCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto x = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto y = reinterpret_cast<T *>(outputs[0]->addr); | |||
| for (uint64_t i = 0; i < tensor_size_; ++i) { | |||
| if (x[i] <= -3) { | |||
| y[i] = 0; | |||
| } else if (x[i] >= 3) { | |||
| y[i] = x[i]; | |||
| } else { | |||
| y[i] = x[i] * (x[i] + 3) / 6; | |||
| auto task = [&](size_t start, size_t end) { | |||
| for (uint64_t i = start; i < end; ++i) { | |||
| if (x[i] <= -3) { | |||
| y[i] = 0; | |||
| } else if (x[i] >= 3) { | |||
| y[i] = x[i]; | |||
| } else { | |||
| y[i] = x[i] * (x[i] + 3) / 6; | |||
| } | |||
| } | |||
| } | |||
| }; | |||
| CPUKernelUtils::ParallelFor(task, tensor_size_); | |||
| return true; | |||
| } | |||
| void HSwishCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void HSwishCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSwishCPUKernel needs 1 input."; | |||
| @@ -24,6 +24,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T> | |||
| class HSwishCPUKernel : public CPUKernel { | |||
| public: | |||
| HSwishCPUKernel() = default; | |||
| @@ -34,30 +35,26 @@ class HSwishCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> x_shape_; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| using TypeKernel = std::function<void(HSwishCPUKernel *, const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs)>; | |||
| std::unordered_map<TypeId, TypeKernel> launch_map_; | |||
| TypeKernel launch_func_; | |||
| uint64_t tensor_size_ = 1; | |||
| }; | |||
| MS_REG_CPU_KERNEL(HSwish, KernelAttr().AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8), HSwishCPUKernel); | |||
| MS_REG_CPU_KERNEL_T(HSwish, KernelAttr().AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8), HSwishCPUKernel, | |||
| int8_t); | |||
| MS_REG_CPU_KERNEL(HSwish, KernelAttr().AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), HSwishCPUKernel); | |||
| MS_REG_CPU_KERNEL_T(HSwish, KernelAttr().AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), | |||
| HSwishCPUKernel, int16_t); | |||
| MS_REG_CPU_KERNEL(HSwish, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), HSwishCPUKernel); | |||
| MS_REG_CPU_KERNEL_T(HSwish, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| HSwishCPUKernel, int); | |||
| MS_REG_CPU_KERNEL(HSwish, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), HSwishCPUKernel); | |||
| MS_REG_CPU_KERNEL_T(HSwish, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| HSwishCPUKernel, int64_t); | |||
| MS_REG_CPU_KERNEL(HSwish, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| HSwishCPUKernel); | |||
| MS_REG_CPU_KERNEL_T(HSwish, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| HSwishCPUKernel, float); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| @@ -20,55 +20,39 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void HSwishGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void HSwishGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| dtype_ = AnfAlgo ::GetPrevNodeOutputDeviceDataType(kernel_node, 0); | |||
| if (dtype_ == kTypeUnknown) { | |||
| dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); | |||
| } | |||
| for (const uint64_t &d : x_shape_) { | |||
| tensor_size_ *= d; | |||
| } | |||
| launch_map_[kNumberTypeInt8] = &HSwishGradCPUKernel::LaunchKernel<int8_t>; | |||
| launch_map_[kNumberTypeInt16] = &HSwishGradCPUKernel::LaunchKernel<int16_t>; | |||
| launch_map_[kNumberTypeInt32] = &HSwishGradCPUKernel::LaunchKernel<int>; | |||
| launch_map_[kNumberTypeInt64] = &HSwishGradCPUKernel::LaunchKernel<int64_t>; | |||
| launch_map_[kNumberTypeFloat32] = &HSwishGradCPUKernel::LaunchKernel<float>; | |||
| auto iter = launch_map_.find(dtype_); | |||
| if (iter != launch_map_.end()) { | |||
| launch_func_ = iter->second; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Input data type: " << dtype_ << "is not supported for HSwishGrad kernel on CPU."; | |||
| } | |||
| } | |||
| bool HSwishGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| launch_func_(this, inputs, outputs); | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void HSwishGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| bool HSwishGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto dy = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto x = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto out = reinterpret_cast<T *>(outputs[0]->addr); | |||
| for (uint64_t i = 0; i < tensor_size_; ++i) { | |||
| if (x[i] <= -3) { | |||
| out[i] = 0; | |||
| } else if (x[i] >= 3) { | |||
| out[i] = dy[i]; | |||
| } else { | |||
| out[i] = dy[i] * (2 * x[i] + 3) / 6; | |||
| auto task = [&](size_t start, size_t end) { | |||
| for (uint64_t i = start; i < end; ++i) { | |||
| if (x[i] <= -3) { | |||
| out[i] = 0; | |||
| } else if (x[i] >= 3) { | |||
| out[i] = dy[i]; | |||
| } else { | |||
| out[i] = dy[i] * (2 * x[i] + 3) / 6; | |||
| } | |||
| } | |||
| } | |||
| }; | |||
| CPUKernelUtils::ParallelFor(task, tensor_size_); | |||
| return true; | |||
| } | |||
| void HSwishGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void HSwishGradCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSwishGradCPUKernel needs 2 input."; | |||
| @@ -24,6 +24,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T> | |||
| class HSwishGradCPUKernel : public CPUKernel { | |||
| public: | |||
| HSwishGradCPUKernel() = default; | |||
| @@ -34,43 +35,35 @@ class HSwishGradCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> x_shape_; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| using TypeKernel = std::function<void(HSwishGradCPUKernel *, const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs)>; | |||
| std::unordered_map<TypeId, TypeKernel> launch_map_; | |||
| TypeKernel launch_func_; | |||
| uint64_t tensor_size_ = 1; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| MS_REG_CPU_KERNEL_T( | |||
| HSwishGrad, KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8), | |||
| HSwishGradCPUKernel); | |||
| HSwishGradCPUKernel, int8_t); | |||
| MS_REG_CPU_KERNEL( | |||
| MS_REG_CPU_KERNEL_T( | |||
| HSwishGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), | |||
| HSwishGradCPUKernel); | |||
| HSwishGradCPUKernel, int16_t); | |||
| MS_REG_CPU_KERNEL( | |||
| MS_REG_CPU_KERNEL_T( | |||
| HSwishGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| HSwishGradCPUKernel); | |||
| HSwishGradCPUKernel, int); | |||
| MS_REG_CPU_KERNEL( | |||
| MS_REG_CPU_KERNEL_T( | |||
| HSwishGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| HSwishGradCPUKernel); | |||
| HSwishGradCPUKernel, int64_t); | |||
| MS_REG_CPU_KERNEL( | |||
| MS_REG_CPU_KERNEL_T( | |||
| HSwishGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| HSwishGradCPUKernel); | |||
| HSwishGradCPUKernel, float); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| @@ -19,50 +19,45 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void SmoothL1LossCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void SmoothL1LossCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| beta_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "beta"); | |||
| CheckParam(kernel_node); | |||
| dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); | |||
| std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| for (const uint64_t &d : x_shape) { | |||
| tensor_size_ *= d; | |||
| } | |||
| } | |||
| bool SmoothL1LossCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| LaunchKernel<float>(inputs, outputs); | |||
| } | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void SmoothL1LossCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| bool SmoothL1LossCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto predict_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto target_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto result_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| T zero = (T)0.0; | |||
| T half = (T)0.5; | |||
| T beta = (T)beta_; | |||
| for (uint64_t i = 0; i < tensor_size_; ++i) { | |||
| T diff = predict_addr[i] - target_addr[i]; | |||
| if (diff < zero) { | |||
| diff = -diff; | |||
| auto task = [&](size_t start, size_t end) { | |||
| for (uint64_t i = start; i < end; ++i) { | |||
| T diff = predict_addr[i] - target_addr[i]; | |||
| if (diff < zero) { | |||
| diff = -diff; | |||
| } | |||
| if (diff < beta) { | |||
| result_addr[i] = half * diff * diff / beta; | |||
| } else { | |||
| result_addr[i] = diff - (half * beta); | |||
| } | |||
| } | |||
| if (diff < beta) { | |||
| result_addr[i] = half * diff * diff / beta; | |||
| } else { | |||
| result_addr[i] = diff - (half * beta); | |||
| } | |||
| } | |||
| }; | |||
| CPUKernelUtils::ParallelFor(task, tensor_size_); | |||
| return true; | |||
| } | |||
| void SmoothL1LossCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void SmoothL1LossCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but SmoothL1LossCPUKernel needs 2 input."; | |||
| @@ -24,6 +24,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T> | |||
| class SmoothL1LossCPUKernel : public CPUKernel { | |||
| public: | |||
| SmoothL1LossCPUKernel() = default; | |||
| @@ -34,9 +35,6 @@ class SmoothL1LossCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| float beta_ = 1.0; | |||
| @@ -44,15 +42,15 @@ class SmoothL1LossCPUKernel : public CPUKernel { | |||
| uint64_t tensor_size_ = 1; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| MS_REG_CPU_KERNEL_T( | |||
| SmoothL1Loss, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | |||
| SmoothL1LossCPUKernel); | |||
| SmoothL1LossCPUKernel, float16); | |||
| MS_REG_CPU_KERNEL( | |||
| MS_REG_CPU_KERNEL_T( | |||
| SmoothL1Loss, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| SmoothL1LossCPUKernel); | |||
| SmoothL1LossCPUKernel, float); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SMOOTH_L1_LOSS_CPU_KERNEL_H_ | |||
| @@ -19,30 +19,20 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void SmoothL1LossGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void SmoothL1LossGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| beta_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "beta"); | |||
| CheckParam(kernel_node); | |||
| dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); | |||
| std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| for (const uint64_t &d : x_shape) { | |||
| tensor_size_ *= d; | |||
| } | |||
| } | |||
| bool SmoothL1LossGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| LaunchKernel<float>(inputs, outputs); | |||
| } | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void SmoothL1LossGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| bool SmoothL1LossGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto predict_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto target_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto dloss_addr = reinterpret_cast<T *>(inputs[2]->addr); | |||
| @@ -58,9 +48,11 @@ void SmoothL1LossGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inpu | |||
| result_addr[i] = (diff / beta) * dloss_addr[i]; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| void SmoothL1LossGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void SmoothL1LossGradCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but SmoothL1LossGradCPUKernel needs 3 input."; | |||
| @@ -24,6 +24,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T> | |||
| class SmoothL1LossGradCPUKernel : public CPUKernel { | |||
| public: | |||
| SmoothL1LossGradCPUKernel() = default; | |||
| @@ -34,31 +35,27 @@ class SmoothL1LossGradCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| float beta_ = 1.0; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| uint64_t tensor_size_ = 1; | |||
| }; | |||
| MS_REG_CPU_KERNEL(SmoothL1LossGrad, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddOutputAttr(kNumberTypeFloat16), | |||
| SmoothL1LossGradCPUKernel); | |||
| MS_REG_CPU_KERNEL(SmoothL1LossGrad, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| SmoothL1LossGradCPUKernel); | |||
| MS_REG_CPU_KERNEL_T(SmoothL1LossGrad, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddOutputAttr(kNumberTypeFloat16), | |||
| SmoothL1LossGradCPUKernel, float16); | |||
| MS_REG_CPU_KERNEL_T(SmoothL1LossGrad, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| SmoothL1LossGradCPUKernel, float); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SMOOTH_L1_LOSS_GRAD_CPU_KERNEL_H_ | |||
| @@ -1,61 +0,0 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import GradOperation | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| class Net(nn.Cell): | |||
| def __init__(self, sigma=1.0): | |||
| super(Net, self).__init__() | |||
| self.SmoothL1Loss = P.SmoothL1Loss(sigma) | |||
| def construct(self, pred, gt): | |||
| return self.SmoothL1Loss(pred, gt) | |||
| class Grad(nn.Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.grad = GradOperation(get_all=True, sens_param=True) | |||
| self.network = network | |||
| def construct(self, pred, gt, dout): | |||
| return self.grad(self.network)(pred, gt, dout) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_net(): | |||
| pred = np.random.randn(2, 4).astype(np.float32) | |||
| gt = np.random.randn(2, 4).astype(np.float32) | |||
| dout = np.random.randn(2, 4).astype(np.float32) | |||
| smooth_l1_loss_grad = Grad(Net()) | |||
| output = smooth_l1_loss_grad(Tensor(pred), Tensor(gt), Tensor(dout)) | |||
| print("------------- input ---------------") | |||
| print("predict:\n", pred) | |||
| print("grount truth:\n", gt) | |||
| print("dout:\n", dout) | |||
| print("------------- output ---------------") | |||
| print("predict grad:\n", output[0].asnumpy()) | |||
| @@ -1,48 +0,0 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.ops import operations as P | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| class Net(nn.Cell): | |||
| def __init__(self, sigma=1.0): | |||
| super(Net, self).__init__() | |||
| self.SmoothL1Loss = P.SmoothL1Loss(sigma) | |||
| def construct(self, pred, gt): | |||
| return self.SmoothL1Loss(pred, gt) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_net(): | |||
| pred = np.random.randn(2, 4).astype(np.float32) | |||
| gt = np.random.randn(2, 4).astype(np.float32) | |||
| smooth_l1_loss = Net() | |||
| loss = smooth_l1_loss(Tensor(pred), Tensor(gt)) | |||
| print("------------- input ---------------") | |||
| print("predict:\n", pred) | |||
| print("grount truth:\n", gt) | |||
| print("------------- output ---------------") | |||
| print("loss:\n", loss.asnumpy()) | |||
| @@ -0,0 +1,119 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.ops import composite as C | |||
| def smoothl1loss(beta): | |||
| np.random.seed(42) | |||
| prediction = np.random.randn(20).astype(np.float32) | |||
| target = np.random.randn(20).astype(np.float32) | |||
| net = nn.SmoothL1Loss(beta) | |||
| return net(Tensor(prediction), Tensor(target)) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_smoothl1loss(): | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU", save_graphs=True) | |||
| epsilon = 1e-6 | |||
| beta = 1.0 | |||
| loss = smoothl1loss(beta) | |||
| expect = [0.46941718, 0.00382918, 0.16829303, 2.447778, 0.04812113, 0.05953304, | |||
| 2.2302065, 0.07672881, 0.00860204, 0.34798968, 0.00956192, 1.818008, | |||
| 0.03262977, 0.36599946, 2.047463, 0.2168481, 0.7216947, 1.7739174, | |||
| 0.08826803, 1.109165] | |||
| diff = np.absolute(loss.asnumpy() - np.array(expect)) | |||
| assert(diff < epsilon).all() | |||
| beta = 1 / 9 | |||
| loss = smoothl1loss(beta) | |||
| expect = [0.9133791, 0.03446258, 0.5246048, 2.8922224, 0.2546738, 0.289504, | |||
| 2.674651, 0.33618113, 0.07560876, 0.7786982, 0.08273339, 2.2624524, | |||
| 0.19990394, 0.8000138, 2.4919074, 0.6030006, 1.1661391, 2.2183619, | |||
| 0.3646064, 1.5536094] | |||
| diff = np.absolute(loss.asnumpy() - np.array(expect)) | |||
| assert(diff < epsilon).all() | |||
| class Grad(nn.Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.grad = C.GradOperation(get_all=True, sens_param=True) | |||
| self.network = network | |||
| def construct(self, x1, x2, sens): | |||
| gout = self.grad(self.network)(x1, x2, sens) | |||
| return gout | |||
| def smoothl1loss_grad(beta): | |||
| np.random.seed(42) | |||
| prediction = np.random.randn(20).astype(np.float32) | |||
| target = np.random.randn(20).astype(np.float32) | |||
| sens = np.random.randn(20).astype(np.float32) | |||
| net = nn.SmoothL1Loss(beta) | |||
| grad = Grad(net) | |||
| return grad(Tensor(prediction), Tensor(target), Tensor(sens)) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_smoothl1loss_grad(): | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU", save_graphs=True) | |||
| epsilon = 1e-6 | |||
| beta = 1.0 | |||
| dx = smoothl1loss_grad(beta) | |||
| dx1_expect = [-0.71552587, 0.01499678, -0.06709455, -0.30110368, -0.45868093, | |||
| 0.24838912, -0.46063876, 0.41411355, 0.04507046, -1.4708229, | |||
| 0.04481723, 0.38508227, -0.17292616, -0.52333146, -1.0309995, | |||
| 0.61330026, 0.83921754, -0.3092124, 0.1391843, -0.9755451] | |||
| dx2_expect = [0.71552587, -0.01499678, 0.06709455, 0.30110368, 0.45868093, | |||
| -0.24838912, 0.46063876, -0.41411355, -0.04507046, 1.4708229, | |||
| -0.04481723, -0.38508227, 0.17292616, 0.52333146, 1.0309995, | |||
| -0.61330026, -0.83921754, 0.3092124, -0.1391843, 0.9755451] | |||
| diff1 = np.absolute(dx[0].asnumpy() - np.array(dx1_expect)) | |||
| diff2 = np.absolute(dx[1].asnumpy() - np.array(dx2_expect)) | |||
| assert(diff1 < epsilon).all() | |||
| assert(diff2 < epsilon).all() | |||
| beta = 1 / 9 | |||
| dx = smoothl1loss_grad(beta) | |||
| dx1_expect = [-0.73846656, 0.13497104, -0.11564828, -0.30110368, -1.478522, | |||
| 0.7198442, -0.46063876, 1.0571222, 0.3436183, -1.7630402, | |||
| 0.32408398, 0.38508227, -0.676922, -0.6116763, -1.0309995, | |||
| 0.93128014, 0.83921754, -0.3092124, 0.33126342, -0.9755451] | |||
| dx2_expect = [0.73846656, -0.13497104, 0.11564828, 0.30110368, 1.478522, | |||
| -0.7198442, 0.46063876, -1.0571222, -0.3436183, 1.7630402, | |||
| -0.32408398, -0.38508227, 0.676922, 0.6116763, 1.0309995, | |||
| -0.93128014, -0.83921754, 0.3092124, -0.33126342, 0.9755451] | |||
| diff1 = np.absolute(dx[0].asnumpy() - np.array(dx1_expect)) | |||
| diff2 = np.absolute(dx[1].asnumpy() - np.array(dx2_expect)) | |||
| assert(diff1 < epsilon).all() | |||
| assert(diff2 < epsilon).all() | |||