add floordiv kernel to cpu

4 years ago · 4a0fe13430
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc
@@ -102,6 +102,29 @@ void ArithmeticCPUKernel::Div(const T *input1, const T *input2, T *out, size_t s
  }
 }

 template <typename T>
 void ArithmeticCPUKernel::FloorDiv(const T *input1, const T *input2, T *out, size_t start, size_t end) {
  for (size_t i = start; i < end; i++) {
    std::vector<size_t> idx;
    GenIndex(i, &idx);
    auto dividend = input1[idx[0]];
    auto divisor = input2[idx[1]];
    if (divisor == 0) {
      if (dividend == 0) {
        out[i] = std::numeric_limits<T>::quiet_NaN();
        continue;
      }
      if (std::numeric_limits<T>::has_infinity) {
        out[i] = dividend > 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity();
      } else {
        out[i] = dividend > 0 ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
      }
      continue;
    }
    out[i] = floor(dividend / divisor);
  }
 }

 template <typename T>
 void ArithmeticCPUKernel::Mod(const T *input1, const T *input2, T *out, size_t start, size_t end) {
  for (size_t i = start; i < end; i++) {
@@ -207,6 +230,8 @@ void ArithmeticCPUKernel::InitKernel(const CNodePtr &kernel_node) {
    operate_type_ = REALDIV;
  } else if (kernel_name == prim::kPrimDiv->name()) {
    operate_type_ = DIV;
  } else if (kernel_name == prim::kPrimFloorDiv->name()) {
    operate_type_ = FLOORDIV;
  } else if (kernel_name == prim::kPrimMod->name()) {
    operate_type_ = MOD;
  } else if (kernel_name == prim::kPrimPow->name()) {
@@ -389,6 +414,8 @@ void ArithmeticCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, co
      threads.emplace_back(std::thread(&ArithmeticCPUKernel::RealDiv<T>, this, input1, input2, output, start, end));
    } else if (operate_type_ == DIV) {
      threads.emplace_back(std::thread(&ArithmeticCPUKernel::Div<T>, this, input1, input2, output, start, end));
    } else if (operate_type_ == FLOORDIV) {
      threads.emplace_back(std::thread(&ArithmeticCPUKernel::FloorDiv<T>, this, input1, input2, output, start, end));
    } else if (operate_type_ == MOD) {
      threads.emplace_back(std::thread(&ArithmeticCPUKernel::Mod<T>, this, input1, input2, output, start, end));
    } else if (operate_type_ == POW) {
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h
@@ -50,6 +50,8 @@ class ArithmeticCPUKernel : public CPUKernel {
  template <typename T>
  void Div(const T *input1, const T *input2, T *out, size_t start, size_t end);
  template <typename T>
  void FloorDiv(const T *input1, const T *input2, T *out, size_t start, size_t end);
  template <typename T>
  void Mod(const T *input1, const T *input2, T *out, size_t start, size_t end);
  template <typename T>
  void Pow(const T *input1, const T *input2, T *out, size_t start, size_t end);
@@ -117,6 +119,16 @@ MS_REG_CPU_KERNEL(
 MS_REG_CPU_KERNEL(
  Div, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  ArithmeticCPUKernel);
 MS_REG_CPU_KERNEL(
  FloorDiv, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
  ArithmeticCPUKernel);
 MS_REG_CPU_KERNEL(
  FloorDiv, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
  ArithmeticCPUKernel);
 MS_REG_CPU_KERNEL(
  FloorDiv,
  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  ArithmeticCPUKernel);
 MS_REG_CPU_KERNEL(
  Mod, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
  ArithmeticCPUKernel);
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
@@ -67,6 +67,7 @@ enum OperateType {
  SQRT,
  POW,
  REALDIV,
  FLOORDIV,
  MOD,
  NEG,
  LESS,
--- a/mindspore/core/base/core_ops.h
+++ b/mindspore/core/base/core_ops.h
@@ -261,6 +261,7 @@ inline const PrimitivePtr kPrimInplaceAdd = std::make_shared<Primitive>("Inplace
 inline const PrimitivePtr kPrimInplaceSub = std::make_shared<Primitive>("InplaceSub");
 inline const PrimitivePtr kPrimPow = std::make_shared<Primitive>("Pow");
 inline const PrimitivePtr kPrimRealDiv = std::make_shared<Primitive>("RealDiv");
 inline const PrimitivePtr kPrimFloorDiv = std::make_shared<Primitive>("FloorDiv");
 inline const PrimitivePtr kPrimSqrt = std::make_shared<Primitive>("Sqrt");
 inline const PrimitivePtr kPrimSqrtGrad = std::make_shared<Primitive>("SqrtGrad");
 inline const PrimitivePtr kPrimReciprocal = std::make_shared<Primitive>("Reciprocal");
--- a/tests/st/ops/cpu/test_arithmetic_op.py
+++ b/tests/st/ops/cpu/test_arithmetic_op.py
@@ -42,6 +42,15 @@ class DivNet(nn.Cell):
        return self.div(x, y)


 class FloorDivNet(nn.Cell):
    def __init__(self):
        super(FloorDivNet, self).__init__()
        self.floor_div = P.FloorDiv()

    def construct(self, x, y):
        return self.floor_div(x, y)


 class ModNet(nn.Cell):
    def __init__(self):
        super(ModNet, self).__init__()
@@ -156,6 +165,71 @@ def test_div():
    assert output7.shape == expect7.shape


@pytest.mark.level0
@pytest.mark.platform_x86_cpu_training
@pytest.mark.env_onecard
 def test_floor_div():
    prop = 1 if np.random.random() < 0.5 else -1
    x0_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.float32) * prop
    y0_np = np.random.randint(1, 100, (2, 1, 4, 4)).astype(np.float32) * prop
    x1_np = np.random.randint(1, 100, (2, 1, 1, 4)).astype(np.float16) * prop
    y1_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.float16) * prop
    x2_np = np.random.randint(1, 100, (2, 1, 1, 4)).astype(np.int32) * prop
    y2_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.int32) * prop
    x3_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.int32) * prop
    y3_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.float32) * prop
    x4_np = np.random.randint(1, 100, (2, 1, 1, 4)).astype(np.int64) * prop
    y4_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.int64) * prop

    x0 = Tensor(x0_np)
    y0 = Tensor(y0_np)
    x1 = Tensor(x1_np)
    y1 = Tensor(y1_np)
    x2 = Tensor(x2_np)
    y2 = Tensor(y2_np)
    x3 = Tensor(x3_np)
    y3 = Tensor(y3_np)
    x4 = Tensor(x4_np)
    y4 = Tensor(y4_np)

    context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
    floor_div = FloorDivNet()
    output0 = floor_div(x0, y0)
    expect0 = np.floor_divide(x0_np, y0_np)
    diff0 = output0.asnumpy() - expect0
    error0 = np.ones(shape=expect0.shape) * 1.0e-5
    assert np.all(diff0 < error0)
    assert output0.shape == expect0.shape

    output1 = floor_div(x1, y1)
    expect1 = np.floor_divide(x1_np, y1_np)
    diff1 = output1.asnumpy() - expect1
    error1 = np.ones(shape=expect1.shape) * 1.0e-5
    assert np.all(diff1 < error1)
    assert output1.shape == expect1.shape

    output2 = floor_div(x2, y2)
    expect2 = np.floor_divide(x2_np, y2_np).astype(np.float16)
    diff2 = output2.asnumpy() - expect2
    error2 = np.ones(shape=expect2.shape) * 1.0e-5
    assert np.all(diff2 < error2)
    assert output2.shape == expect2.shape

    output3 = floor_div(x3, y3)
    expect3 = np.floor_divide(x3_np, y3_np)
    diff3 = output3.asnumpy() - expect3
    error3 = np.ones(shape=expect3.shape) * 1.0e-5
    assert np.all(diff3 < error3)
    assert output3.shape == expect3.shape

    output4 = floor_div(x4, y4)
    expect4 = np.floor_divide(x4_np, y4_np)
    diff4 = output4.asnumpy() - expect4
    error4 = np.ones(shape=expect4.shape) * 1.0e-5
    assert np.all(diff4 < error4)
    assert output4.shape == expect4.shape


@pytest.mark.level0
@pytest.mark.platform_x86_cpu_training
@pytest.mark.env_onecard
@@ -249,6 +323,8 @@ def test_mod():
    assert np.all(output7.asnumpy() == expect7)
    assert output6.shape == expect6.shape


 test_sub()
 test_div()
 test_floor_div()
 test_mod()