| @@ -0,0 +1,111 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.h" | |||
| #include <thread> | |||
| #include <vector> | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSizeFloat16 = 2; | |||
| constexpr size_t kSizeFloat32 = 4; | |||
| constexpr size_t kInputSize = 4; | |||
| constexpr size_t kOutputSize = 2; | |||
| } // namespace | |||
| void ApplyAdagradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| update_slots_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "update_slots"); | |||
| dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); | |||
| } | |||
| bool ApplyAdagradCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| CheckParam(inputs, outputs); | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| LaunchKernel<float>(inputs); | |||
| } | |||
| return true; | |||
| } | |||
| void ApplyAdagradCPUKernel::CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| // inputs: var, accum, lr, gradient | |||
| if (inputs.size() != kInputSize) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but ApplyAdagrad needs 4 inputs."; | |||
| } | |||
| // outputs: var, accum | |||
| if (outputs.size() != kOutputSize) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but ApplyAdagrad needs 2 outputs."; | |||
| } | |||
| if (inputs[0]->size != inputs[1]->size || inputs[0]->size != inputs[3]->size) { | |||
| MS_LOG(EXCEPTION) << "Error input data size!"; | |||
| } | |||
| if (inputs[2]->size != kSizeFloat16 && inputs[2]->size != kSizeFloat32) { | |||
| MS_LOG(EXCEPTION) << "The attribute lr and grad must be float16 or float32!"; | |||
| } | |||
| } | |||
| template <typename T> | |||
| void ApplyAdagradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs) { | |||
| auto var = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto accum = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto lr = reinterpret_cast<T *>(inputs[2]->addr); | |||
| auto gradient = reinterpret_cast<T *>(inputs[3]->addr); | |||
| // multithreading | |||
| size_t length = inputs[0]->size / sizeof(T); | |||
| size_t max_thread_num = std::thread::hardware_concurrency(); | |||
| size_t use_thread_num = length < 128 * max_thread_num ? std::ceil(length / 128.0) : max_thread_num; | |||
| std::vector<std::thread> threads; | |||
| threads.reserve(use_thread_num); | |||
| size_t start = 0; | |||
| size_t batch_size = (length + use_thread_num - 1) / use_thread_num; | |||
| while (start < length) { | |||
| size_t end = (start + batch_size) > length ? length : (start + batch_size); | |||
| threads.emplace_back( | |||
| std::thread(&ApplyAdagradCPUKernel::LaunchApplyAdagrad<T>, this, var, accum, *lr, gradient, start, end)); | |||
| start += batch_size; | |||
| } | |||
| for (auto &it : threads) { | |||
| it.join(); | |||
| } | |||
| } | |||
| template <typename T> | |||
| void ApplyAdagradCPUKernel::LaunchApplyAdagrad(T *var, T *accum, T lr, T *gradient, size_t start, size_t end) { | |||
| const T one = T(1); | |||
| const T eps = T(1e-6); | |||
| for (size_t i = start; i < end; ++i) { | |||
| // update accum: accum += grad * grad | |||
| if (update_slots_) { | |||
| accum[i] += gradient[i] * gradient[i]; | |||
| } | |||
| // update var: var -= lr * grad * \frac{1}{\sqrt{accum}} | |||
| var[i] -= lr * gradient[i] * (one / sqrt(accum[i] + eps)); | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,67 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_APPLY_ADAGRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_APPLY_ADAGRAD_CPU_KERNEL_H_ | |||
| #include <thread> | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class ApplyAdagradCPUKernel : public CPUKernel { | |||
| public: | |||
| ApplyAdagradCPUKernel() = default; | |||
| ~ApplyAdagradCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| static void CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs); | |||
| template <typename T> | |||
| void LaunchApplyAdagrad(T *var, T *accum, T lr, T *gradient, size_t start, size_t end); | |||
| bool update_slots_{true}; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| }; | |||
| MS_REG_CPU_KERNEL(ApplyAdagrad, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| ApplyAdagradCPUKernel); | |||
| MS_REG_CPU_KERNEL(ApplyAdagrad, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddOutputAttr(kNumberTypeFloat16) | |||
| .AddOutputAttr(kNumberTypeFloat16), | |||
| ApplyAdagradCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -0,0 +1,61 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor, Parameter | |||
| from mindspore.ops import operations as P | |||
| import mindspore.common.dtype as mstype | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| var_np = np.random.rand(3, 3).astype(np.float32) | |||
| accum_np = np.random.rand(3, 3).astype(np.float32) | |||
| class Net(nn.Cell): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.apply_adagrad = P.ApplyAdagrad() | |||
| self.var = Parameter(Tensor(var_np), name="var") | |||
| self.accum = Parameter(Tensor(accum_np), name="accum") | |||
| def construct(self, lr, grad): | |||
| self.apply_adagrad(self.var, self.accum, lr, grad) | |||
| return self.var, self.accum | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_apply_adagrad(): | |||
| # numpy op | |||
| grident_np = np.random.rand(3, 3).astype(np.float32) | |||
| expect_accum_np = accum_np + grident_np * grident_np | |||
| expect_var_np = var_np - (0.001 * grident_np * (1 / np.sqrt(expect_accum_np + 1e-6))) | |||
| net = Net() | |||
| lr = Tensor(0.001, mstype.float32) | |||
| grad = Tensor(grident_np) | |||
| out = net(lr, grad) | |||
| res_var_mindspore = out[0].asnumpy() | |||
| res_accum_mindspore = out[1].asnumpy() | |||
| eps = np.array([1e-6 for i in range(9)]).reshape(3, 3) | |||
| assert np.all(expect_var_np - res_var_mindspore < eps) | |||
| assert np.all(expect_accum_np - res_accum_mindspore < eps) | |||