added SGD operation in CPU

5 years ago · cb54c6efae
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc
@@ -0,0 +1,88 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "backend/kernel_compiler/cpu/sgd_cpu_kernel.h"

 #include <thread>
 #include <vector>

 namespace mindspore {
 namespace kernel {
 namespace {
 constexpr size_t kInputSize = 6;
 constexpr size_t kOutputSize = 1;
 }  // namespace
 template <typename T>
 void SGDCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
  MS_EXCEPTION_IF_NULL(kernel_node);
  dampening_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "dampening");
  weight_decay_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "weight_decay");
  nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "nesterov");
 }

 template <typename T>
 void SGDCPUKernel<T>::CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
  // inputs: params, grad, lr, accum, momentum, stat
  if (inputs.size() != kInputSize) {
    MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but SGD needs 6 inputs.";
  }

  // output: param
  if (outputs.size() != kOutputSize) {
    MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but SGD needs 1 outputs.";
  }
 }

 template <typename T>
 bool SGDCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
                             const std::vector<AddressPtr> &outputs) {
  CheckParam(inputs, outputs);

  auto param = reinterpret_cast<T *>(inputs[0]->addr);
  auto grad = reinterpret_cast<T *>(inputs[1]->addr);
  auto lr = reinterpret_cast<T *>(inputs[2]->addr);
  auto accum = reinterpret_cast<T *>(inputs[3]->addr);
  auto momentum = reinterpret_cast<T *>(inputs[4]->addr);
  auto stat = reinterpret_cast<T *>(inputs[5]->addr);
  size_t elem_num = inputs[0]->size / sizeof(float);

  auto task = [&](size_t start, size_t end) {
    for (size_t i = start; i < end; i++) {
      T grad_new = grad[i];
      if (weight_decay_ > 0) {
        grad_new += param[i] * static_cast<T>(weight_decay_);
      }
      if (momentum[0] > static_cast<T>(0)) {
        if (stat[i] > static_cast<T>(0)) {
          accum[i] = grad_new;
          stat[i] = static_cast<T>(0);
        } else {
          accum[i] = accum[i] * momentum[0] + static_cast<T>(1.0 - dampening_) * grad_new;
        }
        if (nesterov_) {
          grad_new += accum[i] * momentum[0];
        } else {
          grad_new = accum[i];
        }
      }
      param[i] -= lr[0] * grad_new;
    }
  };
  CPUKernelUtils::ParallelFor(task, elem_num);
  return true;
 }
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h
@@ -0,0 +1,67 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SGD_CPU_KERNEL_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SGD_CPU_KERNEL_H_

 #include <thread>
 #include <vector>
 #include "backend/kernel_compiler/cpu/cpu_kernel.h"
 #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

 namespace mindspore {
 namespace kernel {
 template <typename T>
 class SGDCPUKernel : public CPUKernel {
 public:
  SGDCPUKernel() = default;
  ~SGDCPUKernel() override = default;

  void InitKernel(const CNodePtr &kernel_node) override;

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
              const std::vector<AddressPtr> &outputs) override;

 private:
  static void CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
  float dampening_;
  float weight_decay_;
  bool nesterov_{true};
 };

 MS_REG_CPU_KERNEL_T(SGD,
                    KernelAttr()
                      .AddInputAttr(kNumberTypeFloat32)
                      .AddInputAttr(kNumberTypeFloat32)
                      .AddInputAttr(kNumberTypeFloat32)
                      .AddInputAttr(kNumberTypeFloat32)
                      .AddInputAttr(kNumberTypeFloat32)
                      .AddInputAttr(kNumberTypeFloat32)
                      .AddOutputAttr(kNumberTypeFloat32),
                    SGDCPUKernel, float);

 MS_REG_CPU_KERNEL_T(SGD,
                    KernelAttr()
                      .AddInputAttr(kNumberTypeFloat16)
                      .AddInputAttr(kNumberTypeFloat16)
                      .AddInputAttr(kNumberTypeFloat16)
                      .AddInputAttr(kNumberTypeFloat16)
                      .AddInputAttr(kNumberTypeFloat16)
                      .AddInputAttr(kNumberTypeFloat16)
                      .AddOutputAttr(kNumberTypeFloat16),
                    SGDCPUKernel, float16);
 }  // namespace kernel
 }  // namespace mindspore
 #endif
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -2704,7 +2704,7 @@ class SGD(PrimitiveWithCheck):
                   float16 nor float32.

    Supported Platforms:
        ``Ascend`` ``GPU``
        ``Ascend`` ``GPU`` ``CPU``

    Examples:
        >>> sgd = ops.SGD()
--- a/tests/st/ops/cpu/test_sgd_op.py
+++ b/tests/st/ops/cpu/test_sgd_op.py
@@ -0,0 +1,72 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 import numpy as np
 import pytest

 import mindspore.context as context
 import mindspore.nn as nn
 from mindspore import Tensor
 from mindspore.nn import Dense
 from mindspore.nn import TrainOneStepCell, WithLossCell
 from mindspore.nn.optim import SGD
 from mindspore.ops import operations as P

 context.set_context(mode=context.GRAPH_MODE, device_target="CPU")

 class NetSGD(nn.Cell):
    def __init__(self):
        super(NetSGD, self).__init__()
        self.batch_size = 1
        self.reshape = P.Reshape()
        weight = Tensor(np.ones([10, 16]).astype(np.float32) * 0.01)
        self.fc1 = Dense(16, 10, weight_init=weight)

    def construct(self, input_x):
        output = self.reshape(input_x, (self.batch_size, -1))
        output = self.fc1(output)
        return output


@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
 def test_SGD():
    epoch = 3
    net = NetSGD()
    learning_rate = 0.1
    momentum = 0.9
    dampening = 0.0
    weight_decay = 0.0
    nesterov = True
    loss_scale = 1.0

    optimizer = SGD(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum, dampening,
                    weight_decay, nesterov, loss_scale)
    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    net_with_criterion = WithLossCell(net, criterion)
    train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
    train_network.set_train()
    losses = []
    for _ in range(epoch):
        data = Tensor(np.arange(0, 16).reshape(1, 1, 4, 4).astype(np.float32) * 0.01)
        label = Tensor(np.array([0]).astype(np.int32))
        loss = train_network(data, label)
        losses.append(loss.asnumpy())

    last_loss = 100.0
    for loss in losses:
        assert last_loss > loss
        last_loss = loss