| @@ -13,9 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h" | |||
| #include <thread> | |||
| #include <cmath> | |||
| #include <string> | |||
| #include <thread> | |||
| #include "backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| @@ -52,13 +53,35 @@ void ArithmeticCPUKernel::Mul(const T *input1, const T *input2, T *out, size_t s | |||
| } | |||
| template <typename T> | |||
| void ArithmeticCPUKernel::Div(const T *input1, const T *input2, T *out, size_t start, size_t end) { | |||
| void ArithmeticCPUKernel::RealDiv(const T *input1, const T *input2, T *out, size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| auto div_number = input2[i]; | |||
| std::vector<size_t> idx; | |||
| GenIndex(i, &idx); | |||
| auto div_number = input2[idx[1]]; | |||
| if (div_number == 0) { | |||
| MS_LOG(EXCEPTION) << "Cannot divided by 0!"; | |||
| } | |||
| out[i] = input1[i] / div_number; | |||
| out[i] = input1[idx[0]] / div_number; | |||
| } | |||
| } | |||
| template <typename T> | |||
| void ArithmeticCPUKernel::Pow(const T *input1, const T *input2, T *out, size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| std::vector<size_t> idx; | |||
| GenIndex(i, &idx); | |||
| auto x = static_cast<double>(input1[idx[0]]); | |||
| auto y = static_cast<double>(input2[idx[1]]); | |||
| out[i] = static_cast<T>(std::pow(x, y)); | |||
| } | |||
| } | |||
| template <typename T> | |||
| void ArithmeticCPUKernel::Less(const T *input1, const T *input2, bool *out, size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| std::vector<size_t> idx; | |||
| GenIndex(i, &idx); | |||
| out[i] = input1[idx[0]] < input2[idx[1]]; | |||
| } | |||
| } | |||
| @@ -71,10 +94,16 @@ void ArithmeticCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| operate_type_ = SUB; | |||
| } else if (kernel_name == prim::kPrimMul->name()) { | |||
| operate_type_ = MUL; | |||
| } else if (kernel_name == "Div") { | |||
| operate_type_ = DIV; | |||
| } else if (kernel_name == prim::kPrimRealDiv->name()) { | |||
| operate_type_ = REALDIV; | |||
| } else if (kernel_name == prim::kPrimPow->name()) { | |||
| operate_type_ = POW; | |||
| } else if (kernel_name == prim::kPrimLess->name()) { | |||
| operate_type_ = LESS; | |||
| } else if (kernel_name == prim::kPrimAssignAdd->name()) { | |||
| operate_type_ = ASSIGNADD; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Not support " << kernel_name; | |||
| } | |||
| input_shape0_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| @@ -145,14 +174,45 @@ void ArithmeticCPUKernel::GenIndex(size_t num, std::vector<size_t> *idx) { | |||
| idx->push_back(idx0); | |||
| idx->push_back(idx1); | |||
| } | |||
| template <typename T> | |||
| void ArithmeticCPUKernel::LaunchLess(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| T *input1 = reinterpret_cast<T *>(inputs[0]->addr); | |||
| T *input2 = reinterpret_cast<T *>(inputs[1]->addr); | |||
| bool *output = reinterpret_cast<bool *>(outputs[0]->addr); | |||
| size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1; | |||
| auto max_thread_num = std::thread::hardware_concurrency(); | |||
| size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | |||
| MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | |||
| std::vector<std::thread> threads; | |||
| threads.reserve(thread_num); | |||
| size_t start = 0; | |||
| size_t once_compute_size = (lens + thread_num - 1) / thread_num; | |||
| while (start < lens) { | |||
| size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); | |||
| threads.emplace_back(std::thread(&ArithmeticCPUKernel::Less<T>, this, input1, input2, output, start, end)); | |||
| start += once_compute_size; | |||
| } | |||
| for (size_t i = 0; i < threads.size(); ++i) { | |||
| threads[i].join(); | |||
| } | |||
| } | |||
| template <typename T> | |||
| void ArithmeticCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| if (operate_type_ == LESS) { | |||
| LaunchLess<T>(inputs, outputs); | |||
| return; | |||
| } | |||
| T *input1 = reinterpret_cast<T *>(inputs[0]->addr); | |||
| T *input2 = reinterpret_cast<T *>(inputs[1]->addr); | |||
| T *output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto lens = outputs[0]->size / sizeof(T); | |||
| size_t thread_num = lens < 128 * 24 ? std::ceil(lens / 128.0) : 24; | |||
| MS_LOG(INFO) << "lens=" << lens << "; use thread_num=" << thread_num; | |||
| size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1; | |||
| auto max_thread_num = std::thread::hardware_concurrency(); | |||
| size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | |||
| MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | |||
| std::vector<std::thread> threads; | |||
| threads.reserve(thread_num); | |||
| size_t start = 0; | |||
| @@ -165,10 +225,14 @@ void ArithmeticCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, co | |||
| threads.emplace_back(std::thread(&ArithmeticCPUKernel::Sub<T>, this, input1, input2, output, start, end)); | |||
| } else if (operate_type_ == MUL) { | |||
| threads.emplace_back(std::thread(&ArithmeticCPUKernel::Mul<T>, this, input1, input2, output, start, end)); | |||
| } else if (operate_type_ == DIV) { | |||
| threads.emplace_back(std::thread(&ArithmeticCPUKernel::Div<T>, this, input1, input2, output, start, end)); | |||
| } else if (operate_type_ == REALDIV) { | |||
| threads.emplace_back(std::thread(&ArithmeticCPUKernel::RealDiv<T>, this, input1, input2, output, start, end)); | |||
| } else if (operate_type_ == POW) { | |||
| threads.emplace_back(std::thread(&ArithmeticCPUKernel::Pow<T>, this, input1, input2, output, start, end)); | |||
| } else if (operate_type_ == ASSIGNADD) { | |||
| threads.emplace_back(std::thread(&ArithmeticCPUKernel::AssignAdd<T>, this, input1, input2, output, start, end)); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Not support " << operate_type_; | |||
| } | |||
| start += once_compute_size; | |||
| } | |||
| @@ -15,8 +15,8 @@ | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| @@ -31,7 +31,8 @@ class ArithmeticCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| template <typename T> | |||
| void LaunchLess(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| @@ -44,9 +45,13 @@ class ArithmeticCPUKernel : public CPUKernel { | |||
| template <typename T> | |||
| void Mul(const T *input1, const T *input2, T *out, size_t start, size_t end); | |||
| template <typename T> | |||
| void Div(const T *input1, const T *input2, T *out, size_t start, size_t end); | |||
| void RealDiv(const T *input1, const T *input2, T *out, size_t start, size_t end); | |||
| template <typename T> | |||
| void Pow(const T *input1, const T *input2, T *out, size_t start, size_t end); | |||
| template <typename T> | |||
| void AssignAdd(T *input1, const T *input2, T *out, size_t start, size_t end); | |||
| template <typename T> | |||
| void Less(const T *input1, const T *input2, bool *out, size_t start, size_t end); | |||
| std::vector<size_t> input_shape0_; | |||
| std::vector<size_t> input_shape1_; | |||
| std::vector<size_t> input_element_num0_; | |||
| @@ -66,6 +71,34 @@ MS_REG_CPU_KERNEL( | |||
| MS_REG_CPU_KERNEL( | |||
| Sub, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| ArithmeticCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| Pow, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| ArithmeticCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| Pow, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| ArithmeticCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| Pow, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| ArithmeticCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| RealDiv, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| ArithmeticCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| RealDiv, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| ArithmeticCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| RealDiv, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| ArithmeticCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| Less, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool), | |||
| ArithmeticCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| Less, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool), | |||
| ArithmeticCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| Less, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeBool), | |||
| ArithmeticCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| AssignAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| ArithmeticCPUKernel); | |||
| @@ -13,10 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h" | |||
| #include <cmath> | |||
| #include <thread> | |||
| #include <string> | |||
| #include <thread> | |||
| #include "backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| @@ -30,9 +30,9 @@ void Square(const T *in, T *out, size_t start, size_t end) { | |||
| } | |||
| template <typename T> | |||
| void Sqrt(const T *in, T *out, size_t start, size_t end) { | |||
| void Neg(const T *in, T *out, size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| out[i] = sqrtf(in[i]); | |||
| out[i] = -in[i]; | |||
| } | |||
| } | |||
| } // namespace | |||
| @@ -42,8 +42,8 @@ void ArithmeticSelfCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| if (kernel_name == prim::kPrimSquare->name()) { | |||
| operate_type_ = SQUARE; | |||
| } else if (kernel_name == prim::kPrimSqrt->name()) { | |||
| operate_type_ = SQRT; | |||
| } else if (kernel_name == prim::kPrimNeg->name()) { | |||
| operate_type_ = NEG; | |||
| } | |||
| dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); | |||
| } | |||
| @@ -66,10 +66,11 @@ void ArithmeticSelfCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs | |||
| const std::vector<AddressPtr> &outputs) { | |||
| T *input = reinterpret_cast<T *>(inputs[0]->addr); | |||
| T *output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto lens = inputs[0]->size / sizeof(T); | |||
| MS_LOG(INFO) << "lens=" << lens; | |||
| size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1; | |||
| const size_t thread_num = 24; | |||
| auto max_thread_num = std::thread::hardware_concurrency(); | |||
| size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | |||
| MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | |||
| std::vector<std::thread> threads; | |||
| threads.reserve(thread_num); | |||
| size_t start = 0; | |||
| @@ -78,8 +79,8 @@ void ArithmeticSelfCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs | |||
| size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); | |||
| if (operate_type_ == SQUARE) { | |||
| threads.emplace_back(std::thread(Square<T>, input, output, start, end)); | |||
| } else if (operate_type_ == SQRT) { | |||
| threads.emplace_back(std::thread(Sqrt<T>, input, output, start, end)); | |||
| } else if (operate_type_ == NEG) { | |||
| threads.emplace_back(std::thread(Neg<T>, input, output, start, end)); | |||
| } | |||
| start += once_compute_size; | |||
| } | |||
| @@ -15,8 +15,8 @@ | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_SELF_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_SELF_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| @@ -40,10 +40,12 @@ class ArithmeticSelfCPUKernel : public CPUKernel { | |||
| TypeId dtype_{kTypeUnknown}; | |||
| }; | |||
| MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| ArithmeticSelfCPUKernel); | |||
| MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| ArithmeticSelfCPUKernel); | |||
| MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| ArithmeticSelfCPUKernel); | |||
| MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| ArithmeticSelfCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,82 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <cmath> | |||
| #include <map> | |||
| #include <string> | |||
| #include <thread> | |||
| #include "backend/kernel_compiler/cpu/cast_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename S, typename T> | |||
| void Cast(const S *in, T *out, size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| out[i] = static_cast<T>(in[i]); | |||
| } | |||
| } | |||
| template <typename S, typename T> | |||
| void LaunchCast(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs) { | |||
| S *input = reinterpret_cast<S *>(inputs[0]->addr); | |||
| T *output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| MS_LOG(DEBUG) << "Type source: " << typeid(S).name() << "; target: " << typeid(T).name(); | |||
| size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1; | |||
| auto max_thread_num = std::thread::hardware_concurrency(); | |||
| size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | |||
| MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | |||
| std::vector<std::thread> threads; | |||
| threads.reserve(thread_num); | |||
| size_t start = 0; | |||
| size_t once_compute_size = (lens + thread_num - 1) / thread_num; | |||
| while (start < lens) { | |||
| size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); | |||
| threads.emplace_back(std::thread(Cast<S, T>, input, output, start, end)); | |||
| start += once_compute_size; | |||
| } | |||
| for (size_t i = 0; i < threads.size(); ++i) { | |||
| threads[i].join(); | |||
| } | |||
| } | |||
| void CastCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| source_dtype = AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, 0); | |||
| target_dtype = AnfAlgo::GetOutputInferDataType(kernel_node, 0); | |||
| } | |||
| bool CastCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| using TypePair = | |||
| std::function<void(const std::vector<kernel::AddressPtr> &, const std::vector<kernel::AddressPtr> &)>; | |||
| std::map<TypeId, std::map<TypeId, TypePair>> mode_map; | |||
| mode_map[kNumberTypeFloat32][kNumberTypeFloat32] = LaunchCast<float, float>; | |||
| mode_map[kNumberTypeFloat32][kNumberTypeInt32] = LaunchCast<float, int>; | |||
| mode_map[kNumberTypeFloat32][kNumberTypeBool] = LaunchCast<float, bool>; | |||
| mode_map[kNumberTypeInt32][kNumberTypeFloat32] = LaunchCast<int, float>; | |||
| mode_map[kNumberTypeInt32][kNumberTypeInt32] = LaunchCast<int, int>; | |||
| mode_map[kNumberTypeInt32][kNumberTypeBool] = LaunchCast<int, bool>; | |||
| mode_map[kNumberTypeBool][kNumberTypeFloat32] = LaunchCast<bool, float>; | |||
| mode_map[kNumberTypeBool][kNumberTypeBool] = LaunchCast<bool, bool>; | |||
| mode_map[kNumberTypeBool][kNumberTypeInt32] = LaunchCast<bool, int>; | |||
| mode_map[source_dtype][target_dtype](inputs, outputs); | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,54 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CAST_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CAST_CPU_KERNEL_H_ | |||
| #include <functional> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class CastCPUKernel : public CPUKernel { | |||
| public: | |||
| CastCPUKernel() = default; | |||
| ~CastCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| TypeId source_dtype{kTypeUnknown}; | |||
| TypeId target_dtype{kTypeUnknown}; | |||
| }; | |||
| MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), CastCPUKernel); | |||
| MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32), CastCPUKernel); | |||
| MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool), CastCPUKernel); | |||
| MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), CastCPUKernel); | |||
| MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), CastCPUKernel); | |||
| MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool), CastCPUKernel); | |||
| MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), CastCPUKernel); | |||
| MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeInt32), CastCPUKernel); | |||
| MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeFloat32), CastCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CAST_CPU_KERNEL_H_ | |||
| @@ -15,15 +15,14 @@ | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CPU_KERNEL_H_ | |||
| #include <string> | |||
| #include <vector> | |||
| #include <functional> | |||
| #include <memory> | |||
| #include <numeric> | |||
| #include <functional> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| #include "ir/anf.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "ir/anf.h" | |||
| using mindspore::kernel::Address; | |||
| using mindspore::kernel::AddressPtr; | |||
| @@ -52,7 +51,26 @@ const char END[] = "end"; | |||
| const char SIZE[] = "size"; | |||
| const char USE_NESTEROV[] = "use_nesterov"; | |||
| const char GROUP[] = "group"; | |||
| enum OperateType { ADD = 0, SUB, MUL, DIV, SQUARE, SQRT, ASSIGNADD }; | |||
| enum OperateType { | |||
| ADD = 0, | |||
| SUB, | |||
| MUL, | |||
| DIV, | |||
| SQUARE, | |||
| SQRT, | |||
| POW, | |||
| REALDIV, | |||
| NEG, | |||
| LESS, | |||
| ASSIGNADD, | |||
| RELUGRAD, | |||
| RELU6GRAD, | |||
| ABSGRAD, | |||
| TANHGRAD, | |||
| SQRTGRAD, | |||
| SIGMOIDGRAD | |||
| }; | |||
| class CPUKernel : public kernel::KernelMod { | |||
| public: | |||
| @@ -0,0 +1,177 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <cmath> | |||
| #include <string> | |||
| #include <thread> | |||
| #include "backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T> | |||
| void EltWiseGradCPUKernel::ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| if (input2[i] > 0) { | |||
| out[i] = input1[i]; | |||
| } else { | |||
| out[i] = 0; | |||
| } | |||
| } | |||
| } | |||
| template <typename T> | |||
| void EltWiseGradCPUKernel::ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| if (input2[i] > 0 && input2[i] <= 6) { | |||
| out[i] = input1[i]; | |||
| } else { | |||
| out[i] = 0; | |||
| } | |||
| } | |||
| } | |||
| template <typename T> | |||
| void EltWiseGradCPUKernel::AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| if (input1[i] > 0) { | |||
| out[i] = input2[i]; | |||
| } else if (input1[i] < 0) { | |||
| out[i] = -input2[i]; | |||
| } else { | |||
| out[i] = 0; | |||
| } | |||
| } | |||
| } | |||
| template <typename T> | |||
| void EltWiseGradCPUKernel::SigmoidGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| out[i] = input2[i] * input1[i] * (1 - input1[i]); | |||
| } | |||
| } | |||
| template <typename T> | |||
| void EltWiseGradCPUKernel::SqrtGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| out[i] = input2[i] / (input1[i] * 2); | |||
| } | |||
| } | |||
| template <typename T> | |||
| void EltWiseGradCPUKernel::TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| T tmp = (1 - input1[i]); | |||
| out[i] = input2[i] * tmp * tmp; | |||
| } | |||
| } | |||
| void EltWiseGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| if (kernel_name == "ReluGrad") { | |||
| operate_type_ = RELUGRAD; | |||
| } else if (kernel_name == "ReLU6Grad") { | |||
| operate_type_ = RELU6GRAD; | |||
| } else if (kernel_name == "SigmoidGrad") { | |||
| operate_type_ = SIGMOIDGRAD; | |||
| } else if (kernel_name == "AbsGrad") { | |||
| operate_type_ = ABSGRAD; | |||
| } else if (kernel_name == "TanhGrad") { | |||
| operate_type_ = TANHGRAD; | |||
| } else if (kernel_name == "SqrtGrad") { | |||
| operate_type_ = SQRTGRAD; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Not support " << kernel_name; | |||
| } | |||
| input_shape0_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| input_shape1_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| if (output_shape_.size() == 0) { | |||
| output_shape_.insert(output_shape_.begin(), 1); | |||
| } | |||
| size_t l = input_shape0_.size(); | |||
| for (size_t i = 0; i < output_shape_.size() - l; ++i) { | |||
| input_shape0_.insert(input_shape0_.begin(), 1); | |||
| } | |||
| l = input_shape1_.size(); | |||
| for (size_t i = 0; i < output_shape_.size() - l; ++i) { | |||
| input_shape1_.insert(input_shape1_.begin(), 1); | |||
| } | |||
| CPUKernelUtils::GetElementNumEveryDim(input_shape0_, &input_element_num0_); | |||
| CPUKernelUtils::GetElementNumEveryDim(input_shape1_, &input_element_num1_); | |||
| CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_); | |||
| dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); | |||
| if (dtype_ != AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 1)) { | |||
| MS_LOG(EXCEPTION) << "Input0 and input1 must has the same data type"; | |||
| } | |||
| } | |||
| bool EltWiseGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (dtype_ == kNumberTypeInt32) { | |||
| LaunchKernel<int>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| LaunchKernel<float>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeInt64) { | |||
| LaunchKernel<int64_t>(inputs, outputs); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Only support int32, float32, but actual data type is " << TypeIdLabel(dtype_); | |||
| } | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void EltWiseGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| T *input1 = reinterpret_cast<T *>(inputs[0]->addr); | |||
| T *input2 = reinterpret_cast<T *>(inputs[1]->addr); | |||
| T *output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1; | |||
| auto max_thread_num = std::thread::hardware_concurrency(); | |||
| size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; | |||
| MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; | |||
| std::vector<std::thread> threads; | |||
| threads.reserve(thread_num); | |||
| size_t start = 0; | |||
| size_t once_compute_size = (lens + thread_num - 1) / thread_num; | |||
| while (start < lens) { | |||
| size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); | |||
| if (operate_type_ == RELUGRAD) { | |||
| threads.emplace_back(std::thread(&EltWiseGradCPUKernel::ReluGrad<T>, this, input1, input2, output, start, end)); | |||
| } else if (operate_type_ == RELU6GRAD) { | |||
| threads.emplace_back(std::thread(&EltWiseGradCPUKernel::ReLU6Grad<T>, this, input1, input2, output, start, end)); | |||
| } else if (operate_type_ == ABSGRAD) { | |||
| threads.emplace_back(std::thread(&EltWiseGradCPUKernel::AbsGrad<T>, this, input1, input2, output, start, end)); | |||
| } else if (operate_type_ == SIGMOIDGRAD) { | |||
| threads.emplace_back( | |||
| std::thread(&EltWiseGradCPUKernel::SigmoidGrad<T>, this, input1, input2, output, start, end)); | |||
| } else if (operate_type_ == TANHGRAD) { | |||
| threads.emplace_back(std::thread(&EltWiseGradCPUKernel::TanhGrad<T>, this, input1, input2, output, start, end)); | |||
| } else if (operate_type_ == SQRTGRAD) { | |||
| threads.emplace_back(std::thread(&EltWiseGradCPUKernel::SqrtGrad<T>, this, input1, input2, output, start, end)); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Not support " << operate_type_; | |||
| } | |||
| start += once_compute_size; | |||
| } | |||
| for (size_t i = 0; i < threads.size(); ++i) { | |||
| threads[i].join(); | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,87 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_GRAD_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class EltWiseGradCPUKernel : public CPUKernel { | |||
| public: | |||
| EltWiseGradCPUKernel() = default; | |||
| ~EltWiseGradCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| template <typename T> | |||
| void ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end); | |||
| template <typename T> | |||
| void ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end); | |||
| template <typename T> | |||
| void AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end); | |||
| template <typename T> | |||
| void SigmoidGrad(const T *input1, const T *input2, T *out, size_t start, size_t end); | |||
| template <typename T> | |||
| void SqrtGrad(const T *input1, const T *input2, T *out, size_t start, size_t end); | |||
| template <typename T> | |||
| void TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end); | |||
| std::vector<size_t> input_shape0_; | |||
| std::vector<size_t> input_shape1_; | |||
| std::vector<size_t> input_element_num0_; | |||
| std::vector<size_t> input_element_num1_; | |||
| std::vector<size_t> output_shape_; | |||
| std::vector<size_t> output_element_num_; | |||
| OperateType operate_type_{RELUGRAD}; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| ReluGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseGradCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| ReLU6Grad, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseGradCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| AbsGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseGradCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| SigmoidGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseGradCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| SqrtGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseGradCPUKernel); | |||
| MS_REG_CPU_KERNEL( | |||
| TanhGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseGradCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_GRAD_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,76 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h" | |||
| #include <string> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "utils/ms_utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| dnnl::eltwise_forward::desc EltWiseCPUKernel::GetForwardEltwiseDesc(const CNodePtr &kernel_node, | |||
| dnnl::memory::desc src_desc) { | |||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| if (kernel_name == "ReLU") { | |||
| return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_relu, src_desc, 0.0); | |||
| } else if (kernel_name == "ReLU6") { | |||
| return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_clip, src_desc, 0.0, 6.0); | |||
| } else if (kernel_name == "Abs") { | |||
| return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_abs, src_desc); | |||
| } else if (kernel_name == "Exp") { | |||
| return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_exp, src_desc); | |||
| } else if (kernel_name == "Log") { | |||
| return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_log, src_desc); | |||
| } else if (kernel_name == "Sigmoid") { | |||
| return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_logistic, src_desc); | |||
| } else if (kernel_name == "Sqrt") { | |||
| return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_sqrt, src_desc); | |||
| } else if (kernel_name == "Square") { | |||
| return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_square, src_desc); | |||
| } else if (kernel_name == "Tanh") { | |||
| return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_tanh, src_desc); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Eltwise operators don't support " << kernel_name; | |||
| } | |||
| } | |||
| void EltWiseCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| auto desc = GetForwardEltwiseDesc(kernel_node, src_desc); | |||
| auto prim_desc = dnnl::eltwise_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||
| primitive_ = std::make_shared<dnnl::eltwise_forward>(prim_desc); | |||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||
| AddArgument(DNNL_ARG_DST, src_desc); | |||
| } | |||
| bool EltWiseCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.empty() || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,60 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class EltWiseCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| EltWiseCPUKernel() = default; | |||
| ~EltWiseCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| dnnl::eltwise_forward::desc GetForwardEltwiseDesc(const CNodePtr &kernel_node, dnnl::memory::desc src_desc); | |||
| dnnl::prop_kind DnnlForward = dnnl::prop_kind::forward_training; | |||
| }; | |||
| MS_REG_CPU_KERNEL(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseCPUKernel); | |||
| MS_REG_CPU_KERNEL(ReLU6, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseCPUKernel); | |||
| MS_REG_CPU_KERNEL(Abs, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseCPUKernel); | |||
| MS_REG_CPU_KERNEL(Exp, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseCPUKernel); | |||
| MS_REG_CPU_KERNEL(Log, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseCPUKernel); | |||
| MS_REG_CPU_KERNEL(Sigmoid, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseCPUKernel); | |||
| MS_REG_CPU_KERNEL(Sqrt, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseCPUKernel); | |||
| MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseCPUKernel); | |||
| MS_REG_CPU_KERNEL(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EltWiseCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_CPU_KERNEL_H_ | |||
| @@ -13,12 +13,11 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <string> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_cpu_kernel.h" | |||
| #include "utils/ms_utils.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "utils/ms_utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| @@ -15,9 +15,8 @@ | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| @@ -74,4 +73,4 @@ MS_REG_CPU_KERNEL(BatchNorm, | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV2D_CPU_KERNEL_H_ | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,110 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_gard_cpu_kernel.h" | |||
| #include <string> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "utils/ms_utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void FusedBatchNormGradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { | |||
| CPUKernel::InitInputOutputSize(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t type_size = sizeof(float); | |||
| std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| size_t tensor_size = shape[1] * 2 * type_size; | |||
| // [2, c] to store scale and bias | |||
| workspace_size_list_.emplace_back(tensor_size); | |||
| // [2, c] to store diff_scale and diff_bias | |||
| workspace_size_list_.emplace_back(tensor_size); | |||
| } | |||
| void FusedBatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (x_shape.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "Fused batchnorm only support nchw input!"; | |||
| } | |||
| batch_size = x_shape[0]; | |||
| channel = x_shape[1]; | |||
| hw_size = x_shape[2] * x_shape[3]; | |||
| nhw_size = x_shape[0] * hw_size; | |||
| dnnl::memory::desc x_desc = GetDefaultMemDesc(x_shape); | |||
| dnnl::memory::desc scale_bias_desc = GetDefaultMemDesc({2, channel}); | |||
| auto epsilon = AnfAlgo::GetNodeAttr<float>(kernel_node, "epsilon"); | |||
| auto prop_kind = dnnl::prop_kind::forward_training; | |||
| auto normalization_flags = dnnl::normalization_flags::use_scale_shift; | |||
| // fused batch normalization forward description | |||
| dnnl::batch_normalization_forward::desc desc = | |||
| dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags); | |||
| auto forward_prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||
| // fused batch normalization backward description | |||
| dnnl::batch_normalization_backward::desc backward_desc = | |||
| dnnl::batch_normalization_backward::desc(dnnl::prop_kind::backward, x_desc, x_desc, epsilon, normalization_flags); | |||
| auto backward_prim_desc = dnnl::batch_normalization_backward::primitive_desc( | |||
| backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc); | |||
| primitive_ = std::make_shared<dnnl::batch_normalization_backward>(backward_prim_desc); | |||
| AddArgument(DNNL_ARG_SRC, x_desc); | |||
| AddArgument(DNNL_ARG_MEAN, forward_prim_desc.mean_desc()); | |||
| AddArgument(DNNL_ARG_VARIANCE, forward_prim_desc.variance_desc()); | |||
| AddArgument(DNNL_ARG_SCALE_SHIFT, scale_bias_desc); | |||
| AddArgument(DNNL_ARG_WORKSPACE, forward_prim_desc.workspace_desc()); | |||
| AddArgument(DNNL_ARG_DST, x_desc); | |||
| AddArgument(DNNL_ARG_DIFF_DST, x_desc); | |||
| AddArgument(DNNL_ARG_DIFF_SRC, x_desc); | |||
| AddArgument(DNNL_ARG_DIFF_SCALE_SHIFT, scale_bias_desc); | |||
| } | |||
| bool FusedBatchNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 5 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Error input output size!"; | |||
| } | |||
| auto wksp_in = reinterpret_cast<float *>(workspace[0]->addr); | |||
| auto scale_ret = memcpy_s(wksp_in, workspace[0]->size, inputs[2]->addr, inputs[2]->size); | |||
| auto max_size = workspace[0]->size - inputs[2]->size; | |||
| auto bias_ret = memcpy_s(wksp_in + (inputs[2]->size / sizeof(float)), max_size, inputs[3]->addr, inputs[3]->size); | |||
| if (scale_ret != 0 || bias_ret != 0) { | |||
| MS_LOG(EXCEPTION) << "Memcpy_s error."; | |||
| return false; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_MEAN, inputs[4]->addr); | |||
| SetArgumentHandle(DNNL_ARG_VARIANCE, inputs[5]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SCALE_SHIFT, workspace[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_SRC, outputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_SCALE_SHIFT, workspace[1]->addr); | |||
| ExecutePrimitive(); | |||
| auto wksp_out = reinterpret_cast<float *>(workspace[1]->addr); | |||
| auto diff_scale_ret = memcpy_s(outputs[1]->addr, outputs[1]->size, wksp_out, inputs[2]->size); | |||
| auto diff_bias_ret = | |||
| memcpy_s(outputs[2]->addr, outputs[2]->size, wksp_out + (outputs[1]->size / sizeof(float)), inputs[3]->size); | |||
| if (diff_scale_ret != 0 || diff_bias_ret != 0) { | |||
| MS_LOG(EXCEPTION) << "Memcpy_s error."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,61 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_GRAD_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class FusedBatchNormGradCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| FusedBatchNormGradCPUKernel() = default; | |||
| ~FusedBatchNormGradCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| protected: | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| private: | |||
| float momentum{0.9}; | |||
| size_t batch_size{0}; | |||
| size_t channel{0}; | |||
| size_t hw_size{0}; | |||
| size_t nhw_size{0}; | |||
| }; | |||
| MS_REG_CPU_KERNEL(FusedBatchNormGradCPU, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| FusedBatchNormGradCPUKernel) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_GRAD_CPU_KERNEL_H_ | |||
| @@ -25,24 +25,53 @@ void MulCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) { | |||
| MS_LOG(EXCEPTION) << "mul only support same dim input or tensor * scalar " << src0_shape.size() << " vs " | |||
| << src1_shape.size(); | |||
| } | |||
| if (src1_shape.size() < src0_shape.size()) { | |||
| for (size_t i = src1_shape.size(); i < src0_shape.size(); ++i) { | |||
| src1_shape.emplace_back(1); | |||
| if (src1_shape.size() != src0_shape.size()) { | |||
| if (src0_shape.size() == 0) { | |||
| need_swap_ = true; | |||
| for (size_t i = 0; i < src1_shape.size(); ++i) { | |||
| src0_shape.emplace_back(1); | |||
| } | |||
| } else if (src1_shape.size() == 0) { | |||
| for (size_t i = 0; i < src0_shape.size(); ++i) { | |||
| src1_shape.emplace_back(1); | |||
| } | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape; | |||
| } | |||
| } else { | |||
| bool visit_src0 = false; | |||
| bool visit_src1 = false; | |||
| for (size_t i = 0; i < src0_shape.size(); ++i) { | |||
| if (src0_shape[i] != src1_shape[i]) { | |||
| if (src0_shape[i] == 1 && !visit_src1) { | |||
| need_swap_ = true; | |||
| visit_src0 = true; | |||
| } else if (src1_shape[i] == 1 && !visit_src0) { | |||
| need_swap_ = false; | |||
| visit_src1 = true; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| dnnl::memory::desc src0_mem_desc = GetDefaultMemDesc(src0_shape); | |||
| dnnl::memory::desc src1_mem_desc = GetDefaultMemDesc(src1_shape); | |||
| dnnl::memory::desc dst_mem_desc = GetDefaultMemDesc(dst_shape); | |||
| dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_mul, src0_mem_desc, src1_mem_desc, dst_mem_desc); | |||
| dnnl::memory::desc src0_desc; | |||
| dnnl::memory::desc src1_desc; | |||
| if (need_swap_) { | |||
| src0_desc = GetDefaultMemDesc(src1_shape); | |||
| src1_desc = GetDefaultMemDesc(src0_shape); | |||
| } else { | |||
| src0_desc = GetDefaultMemDesc(src0_shape); | |||
| src1_desc = GetDefaultMemDesc(src1_shape); | |||
| } | |||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||
| dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_mul, src0_desc, src1_desc, dst_desc); | |||
| auto prim_desc = dnnl::binary::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||
| primitive_ = std::make_shared<dnnl::binary>(prim_desc); | |||
| AddArgument(DNNL_ARG_SRC_0, src0_mem_desc); | |||
| AddArgument(DNNL_ARG_SRC_1, src1_mem_desc); | |||
| AddArgument(DNNL_ARG_DST, dst_mem_desc); | |||
| AddArgument(DNNL_ARG_SRC_0, src0_desc); | |||
| AddArgument(DNNL_ARG_SRC_1, src1_desc); | |||
| AddArgument(DNNL_ARG_DST, dst_desc); | |||
| } | |||
| bool MulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| @@ -51,8 +80,13 @@ bool MulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| if (inputs.size() < 2 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "mul error input output size!"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); | |||
| if (need_swap_) { | |||
| SetArgumentHandle(DNNL_ARG_SRC_0, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC_1, inputs[0]->addr); | |||
| } else { | |||
| SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| return true; | |||
| @@ -31,6 +31,9 @@ class MulCPUKernel : public MKLCPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| bool need_swap_{false}; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| @@ -1,59 +0,0 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <string> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "utils/ms_utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void ReluCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (src_shape.size() != 4 && src_shape.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "relu kernel dims invalid " << src_shape.size(); | |||
| } | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| dnnl::eltwise_forward::desc desc = | |||
| dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0); | |||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| if (kernel_name == "ReLU6") { | |||
| desc = | |||
| dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_clip, src_desc, 0.0, 6.0); | |||
| } | |||
| auto prim_desc = dnnl::eltwise_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||
| primitive_ = std::make_shared<dnnl::eltwise_forward>(prim_desc); | |||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||
| AddArgument(DNNL_ARG_DST, src_desc); | |||
| } | |||
| bool ReluCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.empty() || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -1,42 +0,0 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class ReluCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| ReluCPUKernel() = default; | |||
| ~ReluCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| }; | |||
| MS_REG_CPU_KERNEL(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), ReluCPUKernel); | |||
| MS_REG_CPU_KERNEL(ReLU6, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| ReluCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_CPU_KERNEL_H_ | |||
| @@ -1,69 +0,0 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "utils/ms_utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void ReluGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (src_shape.size() != 4 && src_shape.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "relu grad kernel dims invalid " << src_shape.size(); | |||
| } | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| dnnl::eltwise_forward::desc forward_desc = | |||
| dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0); | |||
| auto forward_prim_desc = dnnl::eltwise_forward::primitive_desc(forward_desc, MKLKernelEngine::Get().engine()); | |||
| dnnl::eltwise_backward::desc backward_desc = | |||
| dnnl::eltwise_backward::desc(dnnl::algorithm::eltwise_relu, src_desc, src_desc, 0.0, 0.0); | |||
| auto backward_prim_desc = | |||
| dnnl::eltwise_backward::primitive_desc(backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc); | |||
| primitive_ = std::make_shared<dnnl::eltwise_backward>(backward_prim_desc); | |||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||
| AddArgument(DNNL_ARG_DIFF_SRC, src_desc); | |||
| AddArgument(DNNL_ARG_DIFF_DST, src_desc); | |||
| } | |||
| bool ReluGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 2 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "relu grad error input output size!"; | |||
| } | |||
| if (inputs[0]->size != outputs[0]->size) { | |||
| MS_LOG(EXCEPTION) << "relu grad error input output data size!"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| size_t mem_bits = outputs[0]->size; | |||
| auto ret = memcpy_s(outputs[0]->addr, mem_bits, inputs[0]->addr, mem_bits); | |||
| if (ret != 0) { | |||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno " << ret; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -1,43 +0,0 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_GRAD_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class ReluGradCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| ReluGradCPUKernel() = default; | |||
| ~ReluGradCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| ReluGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| ReluGradCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_GRAD_CPU_KERNEL_H_ | |||
| @@ -25,17 +25,45 @@ void TensorAddCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) { | |||
| MS_LOG(EXCEPTION) << "TensorAdd only support same dim input or tensor * scalar " << src0_shape.size() << " vs " | |||
| << src1_shape.size(); | |||
| } | |||
| if (src1_shape.size() < src0_shape.size()) { | |||
| for (size_t i = src1_shape.size(); i < src0_shape.size(); ++i) { | |||
| src1_shape.emplace_back(1); | |||
| if (src1_shape.size() != src0_shape.size()) { | |||
| if (src0_shape.size() == 0) { | |||
| need_swap_ = true; | |||
| for (size_t i = 0; i < src1_shape.size(); ++i) { | |||
| src0_shape.emplace_back(1); | |||
| } | |||
| } else if (src1_shape.size() == 0) { | |||
| for (size_t i = 0; i < src0_shape.size(); ++i) { | |||
| src1_shape.emplace_back(1); | |||
| } | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape; | |||
| } | |||
| } else { | |||
| bool visit_src0 = false; | |||
| bool visit_src1 = false; | |||
| for (size_t i = 0; i < src0_shape.size(); ++i) { | |||
| if (src0_shape[i] != src1_shape[i]) { | |||
| if (src0_shape[i] == 1 && !visit_src1) { | |||
| need_swap_ = true; | |||
| visit_src0 = true; | |||
| } else if (src1_shape[i] == 1 && !visit_src0) { | |||
| need_swap_ = false; | |||
| visit_src1 = true; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| dnnl::memory::desc src0_desc = GetDefaultMemDesc(src0_shape); | |||
| dnnl::memory::desc src1_desc = GetDefaultMemDesc(src1_shape); | |||
| dnnl::memory::desc src0_desc; | |||
| dnnl::memory::desc src1_desc; | |||
| if (need_swap_) { | |||
| src0_desc = GetDefaultMemDesc(src1_shape); | |||
| src1_desc = GetDefaultMemDesc(src0_shape); | |||
| } else { | |||
| src0_desc = GetDefaultMemDesc(src0_shape); | |||
| src1_desc = GetDefaultMemDesc(src1_shape); | |||
| } | |||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||
| dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_add, src0_desc, src1_desc, dst_desc); | |||
| auto prim_desc = dnnl::binary::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||
| @@ -51,8 +79,13 @@ bool TensorAddCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| if (inputs.size() < 2 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "TensorAdd error input output size!"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); | |||
| if (need_swap_) { | |||
| SetArgumentHandle(DNNL_ARG_SRC_0, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC_1, inputs[0]->addr); | |||
| } else { | |||
| SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| return true; | |||
| @@ -31,6 +31,9 @@ class TensorAddCPUKernel : public MKLCPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| bool need_swap_{false}; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| @@ -39,6 +39,7 @@ MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutput | |||
| ReshapeCPUKernel); | |||
| MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| ReshapeCPUKernel); | |||
| MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), ReshapeCPUKernel); | |||
| MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| ReshapeCPUKernel); | |||
| @@ -46,6 +47,7 @@ MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutput | |||
| ReshapeCPUKernel); | |||
| MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| ReshapeCPUKernel); | |||
| MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), ReshapeCPUKernel); | |||
| MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| ReshapeCPUKernel); | |||
| @@ -53,6 +55,8 @@ MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOut | |||
| ReshapeCPUKernel); | |||
| MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| ReshapeCPUKernel); | |||
| MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), | |||
| ReshapeCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -560,11 +560,17 @@ def get_bprop_gelu(self): | |||
| def get_bprop_fused_batch_norm(self): | |||
| """Grad definition for `FusedBatchNorm` operation.""" | |||
| input_grad = G.FusedBatchNormGrad(self.epsilon, self.momentum) | |||
| target_cpu = False | |||
| if self.target == "CPU": | |||
| input_grad = G.FusedBatchNormGradCPU(self.epsilon, self.momentum) | |||
| target_cpu = True | |||
| def bprop(x, scale, b, mean, variance, out, dout): | |||
| saved_mean = out[3] | |||
| saved_variance = out[4] | |||
| out = input_grad(dout[0], x, scale, saved_mean, saved_variance) | |||
| if target_cpu: | |||
| out = input_grad(dout[0], x, scale, b, saved_mean, saved_variance) | |||
| else: | |||
| out = input_grad(dout[0], x, scale, saved_mean, saved_variance) | |||
| dx = out[0] | |||
| dscale = out[1] | |||
| dbias = out[2] | |||
| @@ -540,6 +540,22 @@ class FusedBatchNormGrad(Primitive): | |||
| raise NotImplementedError | |||
| class FusedBatchNormGradCPU(PrimitiveWithInfer): | |||
| """Gradients of FusedBatchNorm operation for CPU.""" | |||
| @prim_attr_register | |||
| def __init__(self, epsilon=0.0, momentum=0.1): | |||
| self.init_prim_io_names(inputs=['dy', 'x', 'scale', 'bias', 'save_mean', 'save_inv_variance'], | |||
| outputs=['dx', 'bn_scale', 'bn_bias']) | |||
| self.add_prim_attr('data_format', "NCHW") | |||
| def infer_shape(self, dy_shape, x_shape, scale_shape, bias_shape, save_mean_shape, save_inv_variance_shape): | |||
| return (x_shape, scale_shape, bias_shape) | |||
| def infer_dtype(self, dy_type, x_type, scale_type, bias_type, save_mean_type, save_inv_variance_type): | |||
| return (x_type, scale_type, bias_type) | |||
| class FusedBatchNormGradEx(PrimitiveWithInfer): | |||
| """Gradients of FusedBatchNormEx operation.""" | |||
| @@ -640,6 +640,7 @@ class FusedBatchNorm(Primitive): | |||
| self.epsilon = validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name) | |||
| self.momentum = validator.check_float_range(momentum, 0, 1, Rel.INC_BOTH, 'momentum', self.name) | |||
| self._update_parameter = True | |||
| self.target = context.get_context("device_target") | |||
| class FusedBatchNormEx(PrimitiveWithInfer): | |||
| @@ -0,0 +1,60 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.common.api import ms_function | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import GradOperation | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| class Grad(nn.Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.grad = GradOperation(get_all=True, sens_param=True) | |||
| self.network = network | |||
| @ms_function | |||
| def construct(self, input_, output_grad): | |||
| return self.grad(self.network)(input_, output_grad) | |||
| class Net(nn.Cell): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.ops = P.Abs() | |||
| def construct(self, x): | |||
| return self.ops(x) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_net(): | |||
| x = np.random.randn(2, 3, 3, 4).astype(np.float32) | |||
| y_expect = np.abs(x) | |||
| net = Net() | |||
| out = net(Tensor(x)) | |||
| assert (out.asnumpy() == y_expect).all() | |||
| sens = np.random.randn(2, 3, 3, 4).astype(np.float32) | |||
| backword_net = Grad(Net()) | |||
| output = backword_net(Tensor(x), Tensor(sens)) | |||
| print(len(output)) | |||
| print(output[0].asnumpy()) | |||
| @@ -80,3 +80,39 @@ def test_train_forward(): | |||
| bn_net = Batchnorm_Net(2, Tensor(weight), Tensor(bias), Tensor(moving_mean), Tensor(moving_var_init)) | |||
| bn_net.set_train(False) | |||
| output = bn_net(Tensor(x)) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_train_backward(): | |||
| x = np.array([[ | |||
| [[1, 3, 3, 5], [2, 4, 6, 8], [3, 6, 7, 7], [4, 3, 8, 2]], | |||
| [[5, 7, 6, 3], [3, 5, 6, 7], [9, 4, 2, 5], [7, 5, 8, 1]]]]).astype(np.float32) | |||
| grad = np.array([[ | |||
| [[1, 2, 7, 1], [4, 2, 1, 3], [1, 6, 5, 2], [2, 4, 3, 2]], | |||
| [[9, 4, 3, 5], [1, 3, 7, 6], [5, 7, 9, 9], [1, 4, 6, 8]]]]).astype(np.float32) | |||
| expect_output = np.array([[[[-0.69126546, -0.32903028, 1.9651246, -0.88445705], | |||
| [0.6369296, -0.37732816, -0.93275493, -0.11168876], | |||
| [-0.7878612, 1.3614, 0.8542711, -0.52222186], | |||
| [-0.37732816, 0.5886317, -0.11168876, -0.28073236]], | |||
| [[1.6447213, -0.38968924, -1.0174079, -0.55067265], | |||
| [-2.4305856, -1.1751484, 0.86250514, 0.5502673], | |||
| [0.39576983, 0.5470243, 1.1715001, 1.6447213], | |||
| [-1.7996241, -0.7051701, 0.7080077, 0.5437813]]]]).astype(np.float32) | |||
| weight = Tensor(np.ones(2).astype(np.float32)) | |||
| bias = Tensor(np.ones(2).astype(np.float32)) | |||
| moving_mean = Tensor(np.ones(2).astype(np.float32)) | |||
| moving_var_init = Tensor(np.ones(2).astype(np.float32)) | |||
| error = np.ones(shape=[1, 2, 4, 4]) * 1.0e-6 | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| bn_net = Batchnorm_Net(2, weight, bias, moving_mean, moving_var_init) | |||
| bn_net.set_train() | |||
| bn_grad = Grad(bn_net) | |||
| output = bn_grad(Tensor(x), Tensor(grad)) | |||
| diff = output[0].asnumpy() - expect_output | |||
| assert np.all(diff < error) | |||
| assert np.all(-diff < error) | |||
| @@ -0,0 +1,76 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.common.dtype as mstype | |||
| import mindspore.context as context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| class Net(Cell): | |||
| def __init__(self, dtype): | |||
| super(Net, self).__init__() | |||
| self.Cast = P.Cast() | |||
| self.dtype = dtype | |||
| def construct(self, x): | |||
| return self.Cast(x, self.dtype) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_cast_int32(): | |||
| x0 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.float32)) | |||
| x1 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.int32)) | |||
| x2 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.bool)) | |||
| t = mstype.int32 | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| net = Net(t) | |||
| output = net(x0) | |||
| type0 = output.asnumpy().dtype | |||
| assert type0 == 'int32' | |||
| output = net(x1) | |||
| type1 = output.asnumpy().dtype | |||
| assert type1 == 'int32' | |||
| output = net(x2) | |||
| type2 = output.asnumpy().dtype | |||
| assert type2 == 'int32' | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_cast_float32(): | |||
| x0 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.float32)) | |||
| x1 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.int32)) | |||
| x2 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.bool)) | |||
| t = mstype.float32 | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| net = Net(t) | |||
| output = net(x0) | |||
| type0 = output.asnumpy().dtype | |||
| assert type0 == 'float32' | |||
| output = net(x1) | |||
| type1 = output.asnumpy().dtype | |||
| assert type1 == 'float32' | |||
| output = net(x2) | |||
| type2 = output.asnumpy().dtype | |||
| assert type2 == 'float32' | |||
| @@ -0,0 +1,56 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.ops import operations as P | |||
| class NetExp(nn.Cell): | |||
| def __init__(self): | |||
| super(NetExp, self).__init__() | |||
| self.exp = P.Exp() | |||
| def construct(self, x): | |||
| return self.exp(x) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_exp(): | |||
| x0_np = np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32) | |||
| x1_np = np.random.uniform(-2, 2, 1).astype(np.float32) | |||
| x0 = Tensor(x0_np) | |||
| x1 = Tensor(x1_np) | |||
| expect0 = np.exp(x0_np) | |||
| expect1 = np.exp(x1_np) | |||
| error0 = np.ones(shape=expect0.shape) * 1.0e-5 | |||
| error1 = np.ones(shape=expect1.shape) * 1.0e-5 | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| exp = NetExp() | |||
| output0 = exp(x0) | |||
| diff0 = output0.asnumpy() - expect0 | |||
| assert np.all(diff0 < error0) | |||
| assert output0.shape == expect0.shape | |||
| output1 = exp(x1) | |||
| diff1 = output1.asnumpy() - expect1 | |||
| assert np.all(diff1 < error1) | |||
| assert output1.shape == expect1.shape | |||
| @@ -0,0 +1,83 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.ops import operations as P | |||
| class Net(nn.Cell): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.ops = P.Less() | |||
| def construct(self, x, y): | |||
| return self.ops(x, y) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu_training | |||
| @pytest.mark.env_onecard | |||
| def test_net(): | |||
| x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) | |||
| y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) | |||
| x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) | |||
| y1_np = np.random.randint(1, 5, (2, 1, 4, 4)).astype(np.float32) | |||
| x2_np = np.random.randint(1, 5, (2, 1, 1, 4)).astype(np.float32) | |||
| y2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) | |||
| x3_np = np.random.randint(1, 5, 1).astype(np.float32) | |||
| y3_np = np.random.randint(1, 5, 1).astype(np.float32) | |||
| x4_np = np.array(768).astype(np.float32) | |||
| y4_np = np.array(3072.5).astype(np.float32) | |||
| x0 = Tensor(x0_np) | |||
| y0 = Tensor(y0_np) | |||
| x1 = Tensor(x1_np) | |||
| y1 = Tensor(y1_np) | |||
| x2 = Tensor(x2_np) | |||
| y2 = Tensor(y2_np) | |||
| x3 = Tensor(x3_np) | |||
| y3 = Tensor(y3_np) | |||
| x4 = Tensor(x4_np) | |||
| y4 = Tensor(y4_np) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| net = Net() | |||
| out = net(x0, y0).asnumpy() | |||
| expect = x0_np < y0_np | |||
| assert np.all(out == expect) | |||
| assert out.shape == expect.shape | |||
| out = net(x1, y1).asnumpy() | |||
| expect = x1_np < y1_np | |||
| assert np.all(out == expect) | |||
| assert out.shape == expect.shape | |||
| out = net(x2, y2).asnumpy() | |||
| expect = x2_np < y2_np | |||
| assert np.all(out == expect) | |||
| assert out.shape == expect.shape | |||
| out = net(x3, y3).asnumpy() | |||
| expect = x3_np < y3_np | |||
| assert np.all(out == expect) | |||
| assert out.shape == expect.shape | |||
| out = net(x4, y4).asnumpy() | |||
| expect = x4_np < y4_np | |||
| assert np.all(out == expect) | |||
| assert out.shape == expect.shape | |||
| @@ -0,0 +1,56 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.ops import operations as P | |||
| class NetLog(nn.Cell): | |||
| def __init__(self): | |||
| super(NetLog, self).__init__() | |||
| self.log = P.Log() | |||
| def construct(self, x): | |||
| return self.log(x) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_log(): | |||
| x0_np = np.random.uniform(1, 2, (2, 3, 4, 4)).astype(np.float32) | |||
| x1_np = np.random.uniform(1, 2, 1).astype(np.float32) | |||
| x0 = Tensor(x0_np) | |||
| x1 = Tensor(x1_np) | |||
| expect0 = np.log(x0_np) | |||
| expect1 = np.log(x1_np) | |||
| error0 = np.ones(shape=expect0.shape) * 1.0e-5 | |||
| error1 = np.ones(shape=expect1.shape) * 1.0e-5 | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| log = NetLog() | |||
| output0 = log(x0) | |||
| output1 = log(x1) | |||
| diff0 = output0.asnumpy() - expect0 | |||
| assert np.all(diff0 < error0) | |||
| assert output0.shape == expect0.shape | |||
| diff1 = output1.asnumpy() - expect1 | |||
| assert np.all(diff1 < error1) | |||
| assert output1.shape == expect1.shape | |||
| @@ -16,38 +16,53 @@ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.common.dtype as mstype | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore import Tensor, context | |||
| from mindspore.common.api import ms_function | |||
| from mindspore.common.initializer import initializer | |||
| from mindspore.common.parameter import Parameter | |||
| from mindspore.ops import operations as P | |||
| x = np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32) | |||
| y = np.random.uniform(-2, 2, (1, 1, 1, 1)).astype(np.float32) | |||
| context.set_context(device_target='CPU') | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| class Net(nn.Cell): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.mul = P.Mul() | |||
| self.x = Parameter(initializer(Tensor(x), x.shape), name='x3') | |||
| self.y = Parameter(initializer(Tensor(y), y.shape), name='y3') | |||
| @ms_function | |||
| def construct(self): | |||
| return self.mul(self.x, self.y) | |||
| def construct(self, x, y): | |||
| return self.mul(x, y) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_Mul(): | |||
| def test_mul(): | |||
| x0 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)) | |||
| y0 = Tensor(np.random.uniform(-2, 2, (1, 1, 1, 1)).astype(np.float32)) | |||
| x1 = Tensor(np.random.uniform(-2, 2, (1, 3, 1, 4)).astype(np.float32)) | |||
| y1 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)) | |||
| x2 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)) | |||
| y2 = Tensor(2, mstype.float32) | |||
| mul = Net() | |||
| output = mul() | |||
| print(x) | |||
| print(y) | |||
| print(output) | |||
| out = mul(x0, y0).asnumpy() | |||
| exp = x0.asnumpy() * y0.asnumpy() | |||
| diff = np.abs(out - exp) | |||
| err = np.ones(shape=exp.shape) * 1.0e-5 | |||
| assert np.all(diff < err) | |||
| assert out.shape == exp.shape | |||
| out = mul(x1, y1).asnumpy() | |||
| exp = x1.asnumpy() * y1.asnumpy() | |||
| diff = np.abs(out - exp) | |||
| err = np.ones(shape=exp.shape) * 1.0e-5 | |||
| assert np.all(diff < err) | |||
| assert out.shape == exp.shape | |||
| out = mul(x2, y2).asnumpy() | |||
| exp = x2.asnumpy() * y2.asnumpy() | |||
| diff = np.abs(out - exp) | |||
| err = np.ones(shape=exp.shape) * 1.0e-5 | |||
| assert np.all(diff < err) | |||
| assert out.shape == exp.shape | |||
| @@ -0,0 +1,60 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.common.api import ms_function | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import GradOperation | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| class Grad(nn.Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.grad = GradOperation(get_all=True, sens_param=True) | |||
| self.network = network | |||
| @ms_function | |||
| def construct(self, input_, output_grad): | |||
| return self.grad(self.network)(input_, output_grad) | |||
| class Net(nn.Cell): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.ops = P.Neg() | |||
| def construct(self, x): | |||
| return self.ops(x) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_net(): | |||
| x = np.random.randn(2, 3, 3, 4).astype(np.float32) | |||
| y_expect = -x | |||
| net = Net() | |||
| out = net(Tensor(x)) | |||
| assert (out.asnumpy() == y_expect).all() | |||
| sens = np.random.randn(2, 3, 3, 4).astype(np.float32) | |||
| backword_net = Grad(Net()) | |||
| output = backword_net(Tensor(x), Tensor(sens)) | |||
| print(len(output)) | |||
| print(output[0].asnumpy()) | |||
| @@ -0,0 +1,58 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.ops import operations as P | |||
| class Net(nn.Cell): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.ops = P.Pow() | |||
| def construct(self, x, y): | |||
| return self.ops(x, y) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu_training | |||
| @pytest.mark.env_onecard | |||
| def test_net(): | |||
| x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) | |||
| y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) | |||
| x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) | |||
| y1_np = np.array(3).astype(np.float32) | |||
| x0 = Tensor(x0_np) | |||
| y0 = Tensor(y0_np) | |||
| x1 = Tensor(x1_np) | |||
| y1 = Tensor(y1_np) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| net = Net() | |||
| out = net(x0, y0).asnumpy() | |||
| expect = np.power(x0_np, y0_np) | |||
| assert np.all(out == expect) | |||
| assert out.shape == expect.shape | |||
| out = net(x1, y1).asnumpy() | |||
| expect = np.power(x1_np, y1_np) | |||
| assert np.all(out == expect) | |||
| assert out.shape == expect.shape | |||
| @@ -0,0 +1,95 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.ops import operations as P | |||
| class NetRealDiv(nn.Cell): | |||
| def __init__(self): | |||
| super(NetRealDiv, self).__init__() | |||
| self.divide = P.RealDiv() | |||
| def construct(self, x, y): | |||
| return self.divide(x, y) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu_training | |||
| @pytest.mark.env_onecard | |||
| def test_real_div(): | |||
| x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) | |||
| y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) | |||
| x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) | |||
| y1_np = np.random.randint(1, 5, (2, 1, 4, 4)).astype(np.float32) | |||
| x2_np = np.random.randint(1, 5, (2, 1, 1, 4)).astype(np.float32) | |||
| y2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) | |||
| x3_np = np.random.randint(1, 5, 1).astype(np.float32) | |||
| y3_np = np.random.randint(1, 5, 1).astype(np.float32) | |||
| x4_np = np.array(768).astype(np.float32) | |||
| y4_np = np.array(3072.5).astype(np.float32) | |||
| x0 = Tensor(x0_np) | |||
| y0 = Tensor(y0_np) | |||
| x1 = Tensor(x1_np) | |||
| y1 = Tensor(y1_np) | |||
| x2 = Tensor(x2_np) | |||
| y2 = Tensor(y2_np) | |||
| x3 = Tensor(x3_np) | |||
| y3 = Tensor(y3_np) | |||
| x4 = Tensor(x4_np) | |||
| y4 = Tensor(y4_np) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| real_div = NetRealDiv() | |||
| output0 = real_div(x0, y0) | |||
| expect0 = np.divide(x0_np, y0_np) | |||
| diff0 = output0.asnumpy() - expect0 | |||
| error0 = np.ones(shape=expect0.shape) * 1.0e-5 | |||
| assert np.all(diff0 < error0) | |||
| assert output0.shape == expect0.shape | |||
| output1 = real_div(x1, y1) | |||
| expect1 = np.divide(x1_np, y1_np) | |||
| diff1 = output1.asnumpy() - expect1 | |||
| error1 = np.ones(shape=expect1.shape) * 1.0e-5 | |||
| assert np.all(diff1 < error1) | |||
| assert output1.shape == expect1.shape | |||
| output2 = real_div(x2, y2) | |||
| expect2 = np.divide(x2_np, y2_np) | |||
| diff2 = output2.asnumpy() - expect2 | |||
| error2 = np.ones(shape=expect2.shape) * 1.0e-5 | |||
| assert np.all(diff2 < error2) | |||
| assert output2.shape == expect2.shape | |||
| output3 = real_div(x3, y3) | |||
| expect3 = np.divide(x3_np, y3_np) | |||
| diff3 = output3.asnumpy() - expect3 | |||
| error3 = np.ones(shape=expect3.shape) * 1.0e-5 | |||
| assert np.all(diff3 < error3) | |||
| assert output3.shape == expect3.shape | |||
| output4 = real_div(x4, y4) | |||
| expect4 = np.divide(x4_np, y4_np) | |||
| diff4 = output4.asnumpy() - expect4 | |||
| error4 = np.ones(shape=expect4.shape) * 1.0e-5 | |||
| assert np.all(diff4 < error4) | |||
| assert output4.shape == expect4.shape | |||
| @@ -20,7 +20,9 @@ import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.operations import _grad_ops as G | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| class NetReLU6(nn.Cell): | |||
| def __init__(self): | |||
| @@ -30,6 +32,13 @@ class NetReLU6(nn.Cell): | |||
| def construct(self, x): | |||
| return self.relu6(x) | |||
| class NetReLU6Grad(nn.Cell): | |||
| def __init__(self): | |||
| super(NetReLU6Grad, self).__init__() | |||
| self.relu6_grad = G.ReLU6Grad() | |||
| def construct(self, x, dy): | |||
| return self.relu6_grad(dy, x) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @@ -42,7 +51,26 @@ def test_relu6(): | |||
| [5.9, 6, 6,], | |||
| [6, 1, 0.]]]]).astype(np.float32) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| relu6 = NetReLU6() | |||
| output = relu6(x) | |||
| assert (output.asnumpy() == expect).all() | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_relu6_grad(): | |||
| x = Tensor(np.array([[[[-1, 1, 10], | |||
| [5.9, 6.1, 6], | |||
| [10, 1, -1]]]]).astype(np.float32)) | |||
| dy = Tensor(np.array([[[[1, 1, 1], | |||
| [1, 1, 1], | |||
| [1, 1, 1]]]]).astype(np.float32)) | |||
| expect = np.array([[[[0, 1, 0,], | |||
| [1, 0, 1,], | |||
| [0, 1, 0,]]]]).astype(np.float32) | |||
| error = np.ones(shape=[3, 3]) * 1.0e-6 | |||
| relu6_grad = NetReLU6Grad() | |||
| output = relu6_grad(x, dy) | |||
| diff = np.abs(output.asnumpy() - expect) | |||
| assert np.all(np.abs(diff) < error) | |||
| @@ -49,5 +49,5 @@ def test_relu_grad(): | |||
| output = relu_grad() | |||
| expect = np.array([[[[0, 0, 1,], [0, 0, 0,], [1, 1, 0.]]]]).astype(np.float32) | |||
| error = np.ones(shape=[3, 3]) * 1.0e-6 | |||
| diff = output.asnumpy() - expect | |||
| diff = np.abs(output.asnumpy() - expect) | |||
| assert np.all(diff < error) | |||
| @@ -0,0 +1,78 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.operations import _grad_ops as G | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| class NetSigmoidGrad(nn.Cell): | |||
| def __init__(self): | |||
| super(NetSigmoidGrad, self).__init__() | |||
| self.sigmoid_grad = G.SigmoidGrad() | |||
| def construct(self, y, dy): | |||
| return self.sigmoid_grad(y, dy) | |||
| class Net(nn.Cell): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.ops = P.Sigmoid() | |||
| def construct(self, x): | |||
| return self.ops(x) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_net(): | |||
| x = np.random.randn(2, 3, 3, 4).astype(np.float32) | |||
| y_expect = 1 / (1 + np.exp(-x)) | |||
| net = Net() | |||
| out = net(Tensor(x)) | |||
| diff = out.asnumpy() - y_expect | |||
| err = np.ones(shape=y_expect.shape) * 1.0e-5 | |||
| assert np.all(diff < err) | |||
| assert out.shape == y_expect.shape | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_sigmoid_grad(): | |||
| y = Tensor(np.array([[[[-1, 1, 2], | |||
| [1, -1, 1], | |||
| [2, 1, -1]]]]).astype(np.float32)) | |||
| dy = Tensor(np.array([[[[-11, 2, 4], | |||
| [-1, 1, -1], | |||
| [-4, 4, -4]]]]).astype(np.float32)) | |||
| expect = np.array([[[[22, 0, -8], | |||
| [0, -2, 0], | |||
| [8, 0, 8]]]]).astype(np.float32) | |||
| error = np.ones(shape=[1, 1, 3, 3]) * 1.0e-6 | |||
| sigmoid_grad = NetSigmoidGrad() | |||
| output = sigmoid_grad(y, dy) | |||
| diff = np.abs(output.asnumpy() - expect) | |||
| assert np.all(abs(diff) < error) | |||
| @@ -0,0 +1,75 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.operations import _grad_ops as G | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| class NetSqrtGrad(nn.Cell): | |||
| def __init__(self): | |||
| super(NetSqrtGrad, self).__init__() | |||
| self.sqrt_grad = G.SqrtGrad() | |||
| def construct(self, x, dx): | |||
| return self.sqrt_grad(x, dx) | |||
| class Net(nn.Cell): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.ops = P.Sqrt() | |||
| def construct(self, x): | |||
| return self.ops(x) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_net(): | |||
| x = np.abs(np.random.randn(2, 3, 3, 4)).astype(np.float32) | |||
| y_expect = np.sqrt(x) | |||
| net = Net() | |||
| out = net(Tensor(x)) | |||
| diff = out.asnumpy() - y_expect | |||
| err = np.ones(shape=y_expect.shape) * 1.0e-5 | |||
| assert np.all(diff < err) | |||
| assert out.shape == y_expect.shape | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_sqrt_grad(): | |||
| x = Tensor(np.array([[[[-1, 1, 10], | |||
| [5.9, 6.1, 6], | |||
| [10, 1, -1]]]]).astype(np.float32)) | |||
| dx = Tensor(np.array([[[[1, 1, 1], | |||
| [2, 2, 2], | |||
| [3, 3, 3]]]]).astype(np.float32)) | |||
| expect = np.array([[[[-0.5, 0.5, 0.05,], | |||
| [0.16949153, 0.16393442, 0.16666667,], | |||
| [0.15, 1.5, -1.5,]]]]).astype(np.float32) | |||
| error = np.ones(shape=[3, 3]) * 1.0e-6 | |||
| sqrt_grad = NetSqrtGrad() | |||
| output = sqrt_grad(x, dx) | |||
| diff = np.abs(output.asnumpy() - expect) | |||
| assert np.all(np.abs(diff) < error) | |||
| @@ -0,0 +1,63 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.common.api import ms_function | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import GradOperation | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| class Grad(nn.Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.grad = GradOperation(get_all=True, sens_param=True) | |||
| self.network = network | |||
| @ms_function | |||
| def construct(self, input_, output_grad): | |||
| return self.grad(self.network)(input_, output_grad) | |||
| class Net(nn.Cell): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.ops = P.Square() | |||
| def construct(self, x): | |||
| return self.ops(x) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_net(): | |||
| x = np.random.randn(2, 3, 3, 4).astype(np.float32) | |||
| y_expect = x * x | |||
| net = Net() | |||
| out = net(Tensor(x)) | |||
| diff = out.asnumpy() - y_expect | |||
| err = np.ones(shape=y_expect.shape) * 1.0e-5 | |||
| assert np.all(diff < err) | |||
| assert out.shape == y_expect.shape | |||
| sens = np.random.randn(2, 3, 3, 4).astype(np.float32) | |||
| backword_net = Grad(Net()) | |||
| output = backword_net(Tensor(x), Tensor(sens)) | |||
| print(len(output)) | |||
| print(output[0].asnumpy()) | |||
| @@ -0,0 +1,63 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.common.api import ms_function | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import GradOperation | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| class Grad(nn.Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.grad = GradOperation(get_all=True, sens_param=True) | |||
| self.network = network | |||
| @ms_function | |||
| def construct(self, input_, output_grad): | |||
| return self.grad(self.network)(input_, output_grad) | |||
| class Net(nn.Cell): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.ops = P.Tanh() | |||
| def construct(self, x): | |||
| return self.ops(x) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_net(): | |||
| x = np.random.randn(2, 3, 3, 4).astype(np.float32) | |||
| y_expect = np.tanh(x) | |||
| net = Net() | |||
| out = net(Tensor(x)) | |||
| diff = out.asnumpy() - y_expect | |||
| err = np.ones(shape=y_expect.shape) * 1.0e-5 | |||
| assert np.all(diff < err) | |||
| assert out.shape == y_expect.shape | |||
| sens = np.random.randn(2, 3, 3, 4).astype(np.float32) | |||
| backword_net = Grad(Net()) | |||
| output = backword_net(Tensor(x), Tensor(sens)) | |||
| print(len(output)) | |||
| print(output[0].asnumpy()) | |||
| @@ -13,12 +13,15 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import pytest | |||
| import numpy as np | |||
| from mindspore import Tensor | |||
| from mindspore.ops import operations as P | |||
| import pytest | |||
| import mindspore.common.dtype as mstype | |||
| import mindspore.nn as nn | |||
| import mindspore.context as context | |||
| from mindspore import Tensor, context | |||
| from mindspore.ops import operations as P | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| class TensorAdd(nn.Cell): | |||
| def __init__(self): | |||
| @@ -34,10 +37,30 @@ class TensorAdd(nn.Cell): | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_tensor_add(): | |||
| x = np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32) | |||
| y = np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| x0 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)) | |||
| y0 = Tensor(np.random.uniform(-2, 2, (1, 1, 1, 1)).astype(np.float32)) | |||
| x1 = Tensor(np.random.uniform(-2, 2, (1, 3, 1, 4)).astype(np.float32)) | |||
| y1 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)) | |||
| x2 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)) | |||
| y2 = Tensor(2, mstype.float32) | |||
| add = TensorAdd() | |||
| output = add(Tensor(x), Tensor(y)) | |||
| assert (output.asnumpy() == x + y).all() | |||
| out = add(x0, y0).asnumpy() | |||
| exp = x0.asnumpy() + y0.asnumpy() | |||
| diff = np.abs(out - exp) | |||
| err = np.ones(shape=exp.shape) * 1.0e-5 | |||
| assert np.all(diff < err) | |||
| assert out.shape == exp.shape | |||
| out = add(x1, y1).asnumpy() | |||
| exp = x1.asnumpy() + y1.asnumpy() | |||
| diff = np.abs(out - exp) | |||
| err = np.ones(shape=exp.shape) * 1.0e-5 | |||
| assert np.all(diff < err) | |||
| assert out.shape == exp.shape | |||
| out = add(x2, y2).asnumpy() | |||
| exp = x2.asnumpy() + y2.asnumpy() | |||
| diff = np.abs(out - exp) | |||
| err = np.ones(shape=exp.shape) * 1.0e-5 | |||
| assert np.all(diff < err) | |||
| assert out.shape == exp.shape | |||