Browse Source

!14997 add scatter operations: ScatterAdd ScatterSub ScatterMul ScatterDiv ScatterMax ScatterMin ScatterUpdate

From: @dinglongwei
Reviewed-by: @wuxuejian,@liangchenghui
Signed-off-by: @wuxuejian
pull/14997/MERGE
mindspore-ci-bot Gitee 5 years ago
parent
commit
29ab37d925
4 changed files with 1060 additions and 8 deletions
  1. +200
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.cc
  2. +210
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.h
  3. +7
    -8
      mindspore/ops/operations/array_ops.py
  4. +643
    -0
      tests/st/ops/cpu/test_scatter_arithmetic_op.py

+ 200
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.cc View File

@@ -0,0 +1,200 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.h"
#include <map>
#include <limits>
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {

template <typename T>
void ScatterArithmeticCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
input_size_ = 1;
inner_size_ = 1;
for (size_t i = 1; i < input_shape.size(); i++) {
inner_size_ *= input_shape[i];
}
input_size_ = input_shape[0] * inner_size_;
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
indices_size_ = 1;
for (size_t i = 0; i < indices_shape.size(); i++) {
indices_size_ *= indices_shape[i];
}
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) const {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ScatterAdd needs 3 inputs.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ScatterAdd has 1 output.";
}
}

template <typename T>
bool ScatterArithmeticCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
static const std::map<std::string, std::function<void(ScatterArithmeticCPUKernel *, T *, const int *, const T *)>>
kScatterArithmeticBinOpFuncMap{{"ScatterAdd", &ScatterArithmeticCPUKernel<T>::ScatterAdd},
{"ScatterSub", &ScatterArithmeticCPUKernel<T>::ScatterSub},
{"ScatterMul", &ScatterArithmeticCPUKernel<T>::ScatterMul},
{"ScatterDiv", &ScatterArithmeticCPUKernel<T>::ScatterDiv},
{"ScatterMax", &ScatterArithmeticCPUKernel<T>::ScatterMax},
{"ScatterMin", &ScatterArithmeticCPUKernel<T>::ScatterMin},
{"ScatterUpdate", &ScatterArithmeticCPUKernel<T>::ScatterUpdate}};
if (kScatterArithmeticBinOpFuncMap.find(kernel_name_) != kScatterArithmeticBinOpFuncMap.end()) {
T *input = reinterpret_cast<T *>(inputs[0]->addr);
int *indices = reinterpret_cast<int *>(inputs[1]->addr);
T *updates = reinterpret_cast<T *>(inputs[2]->addr);
T *output = reinterpret_cast<T *>(outputs[0]->addr);
kScatterArithmeticBinOpFuncMap.at(kernel_name_)(this, input, indices, updates);
auto bufferSize = outputs[0]->size;
auto ret = memcpy_s(output, bufferSize, input, input_size_ * sizeof(T));
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Memory copy failed!";
}
} else {
MS_LOG(EXCEPTION) << "Not support operator:" << kernel_name_;
}
return true;
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterAdd(T *input, const int *indices, const T *updates) {
auto task = [this, input, indices, updates](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto base_index_updates = i * inner_size_;
auto base_index_input = indices[i] * inner_size_;
for (size_t j = 0; j < inner_size_; j++) {
input[base_index_input + j] += updates[base_index_updates + j];
}
}
};
CPUKernelUtils::ParallelFor(task, indices_size_);
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterSub(T *input, const int *indices, const T *updates) {
auto task = [this, input, indices, updates](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto base_index_updates = i * inner_size_;
auto base_index_input = indices[i] * inner_size_;
for (size_t j = 0; j < inner_size_; j++) {
input[base_index_input + j] -= updates[base_index_updates + j];
}
}
};
CPUKernelUtils::ParallelFor(task, indices_size_);
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterMul(T *input, const int *indices, const T *updates) {
auto task = [this, input, indices, updates](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto base_index_updates = i * inner_size_;
auto base_index_input = indices[i] * inner_size_;
for (size_t j = 0; j < inner_size_; j++) {
input[base_index_input + j] *= updates[base_index_updates + j];
}
}
};
CPUKernelUtils::ParallelFor(task, indices_size_);
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterDiv(T *input, const int *indices, const T *updates) {
auto task = [this, input, indices, updates](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
for (size_t j = 0; j < inner_size_; j++) {
auto dividend = input[indices[i] * inner_size_ + j];
auto divisor = updates[i * inner_size_ + j];
if (divisor == 0) {
if (dividend == 0) {
input[indices[i] * inner_size_ + j] = std::numeric_limits<T>::quiet_NaN();
continue;
}
if (std::numeric_limits<T>::has_infinity) {
input[indices[i] * inner_size_ + j] =
dividend > 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity();
} else {
input[indices[i] * inner_size_ + j] =
dividend > 0 ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
}
continue;
}
input[indices[i] * inner_size_ + j] = dividend / divisor;
}
}
};
CPUKernelUtils::ParallelFor(task, indices_size_);
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterMax(T *input, const int *indices, const T *updates) {
auto task = [this, input, indices, updates](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto base_index_updates = i * inner_size_;
auto base_index_input = indices[i] * inner_size_;
for (size_t j = 0; j < inner_size_; j++) {
input[base_index_input + j] = input[base_index_input + j] > updates[base_index_updates + j]
? input[base_index_input + j]
: updates[base_index_updates + j];
}
}
};
CPUKernelUtils::ParallelFor(task, indices_size_);
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterMin(T *input, const int *indices, const T *updates) {
auto task = [this, input, indices, updates](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto base_index_updates = i * inner_size_;
auto base_index_input = indices[i] * inner_size_;
for (size_t j = 0; j < inner_size_; j++) {
input[base_index_input + j] = input[base_index_input + j] < updates[base_index_updates + j]
? input[base_index_input + j]
: updates[base_index_updates + j];
}
}
};
CPUKernelUtils::ParallelFor(task, indices_size_);
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterUpdate(T *input, const int *indices, const T *updates) {
auto task = [this, input, indices, updates](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto base_index_updates = i * inner_size_;
auto base_index_input = indices[i] * inner_size_;
for (size_t j = 0; j < inner_size_; j++) {
input[base_index_input + j] = updates[base_index_updates + j];
}
}
};
CPUKernelUtils::ParallelFor(task, indices_size_);
}
} // namespace kernel
} // namespace mindspore

+ 210
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.h View File

@@ -0,0 +1,210 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SCATTER_ARITHMETIC_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SCATTER_ARITHMETIC_CPU_KERNEL_H_
#include <vector>
#include <string>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
template <typename T>
class ScatterArithmeticCPUKernel : public CPUKernel {
public:
ScatterArithmeticCPUKernel() = default;

~ScatterArithmeticCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node) const;

void ScatterAdd(T *input, const int *indices, const T *updates);

void ScatterSub(T *input, const int *indices, const T *updates);

void ScatterMul(T *input, const int *indices, const T *updates);

void ScatterDiv(T *input, const int *indices, const T *updates);

void ScatterMax(T *input, const int *indices, const T *updates);

void ScatterMin(T *input, const int *indices, const T *updates);

void ScatterUpdate(T *input, const int *indices, const T *updates);

size_t input_size_{0};
size_t inner_size_{0};
size_t indices_size_{0};
std::string kernel_name_;
};

MS_REG_CPU_KERNEL_T(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterArithmeticCPUKernel, int32_t);
MS_REG_CPU_KERNEL_T(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterArithmeticCPUKernel, float);
MS_REG_CPU_KERNEL_T(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64),
ScatterArithmeticCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(ScatterSub,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterArithmeticCPUKernel, int32_t);
MS_REG_CPU_KERNEL_T(ScatterSub,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterArithmeticCPUKernel, float);
MS_REG_CPU_KERNEL_T(ScatterSub,
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64),
ScatterArithmeticCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(ScatterMul,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterArithmeticCPUKernel, int32_t);
MS_REG_CPU_KERNEL_T(ScatterMul,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterArithmeticCPUKernel, float);
MS_REG_CPU_KERNEL_T(ScatterMul,
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64),
ScatterArithmeticCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(ScatterDiv,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterArithmeticCPUKernel, int32_t);
MS_REG_CPU_KERNEL_T(ScatterDiv,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterArithmeticCPUKernel, float);
MS_REG_CPU_KERNEL_T(ScatterDiv,
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64),
ScatterArithmeticCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(ScatterMax,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterArithmeticCPUKernel, int32_t);
MS_REG_CPU_KERNEL_T(ScatterMax,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterArithmeticCPUKernel, float);
MS_REG_CPU_KERNEL_T(ScatterMax,
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64),
ScatterArithmeticCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(ScatterMin,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterArithmeticCPUKernel, int32_t);
MS_REG_CPU_KERNEL_T(ScatterMin,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterArithmeticCPUKernel, float);
MS_REG_CPU_KERNEL_T(ScatterMin,
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64),
ScatterArithmeticCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterArithmeticCPUKernel, int32_t);
MS_REG_CPU_KERNEL_T(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterArithmeticCPUKernel, float);
MS_REG_CPU_KERNEL_T(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64),
ScatterArithmeticCPUKernel, int64_t);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SCATTER_ARITHMETIC_CPU_KERNEL_H_

+ 7
- 8
mindspore/ops/operations/array_ops.py View File

@@ -39,7 +39,6 @@ from ...common.parameter import Parameter
from ...common.tensor import Tensor



class _ScatterOp(PrimitiveWithInfer):
"""
Defines Scatter operators
@@ -3508,7 +3507,7 @@ class ScatterUpdate(_ScatterOp_Dynamic):
TypeError: If `use_locking` is not a bool.

Supported Platforms:
``Ascend`` ``GPU``
``Ascend`` ``GPU`` ``CPU``

Examples:
>>> np_x = np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]])
@@ -3631,7 +3630,7 @@ class ScatterMax(_ScatterOp):
TypeError: If `use_locking` is not a bool.

Supported Platforms:
``Ascend``
``Ascend`` ``CPU``

Examples:
>>> input_x = Parameter(Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), mindspore.float32), name="input_x")
@@ -3680,7 +3679,7 @@ class ScatterMin(_ScatterOp):
TypeError: If `use_locking` is not a bool.

Supported Platforms:
``Ascend``
``Ascend`` ``CPU``

Examples:
>>> input_x = Parameter(Tensor(np.array([[0.0, 1.0, 2.0], [0.0, 0.0, 0.0]]), mindspore.float32), name="input_x")
@@ -3728,7 +3727,7 @@ class ScatterAdd(_ScatterOp_Dynamic):
TypeError: If `use_locking` is not a bool.

Supported Platforms:
``Ascend`` ``GPU``
``Ascend`` ``GPU`` ``CPU``

Examples:
>>> input_x = Parameter(Tensor(np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]), mindspore.float32), name="x")
@@ -3784,7 +3783,7 @@ class ScatterSub(_ScatterOp):
TypeError: If `use_locking` is not a bool.

Supported Platforms:
``Ascend``
``Ascend`` ``CPU``

Examples:
>>> input_x = Parameter(Tensor(np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]), mindspore.float32), name="x")
@@ -3832,7 +3831,7 @@ class ScatterMul(_ScatterOp):
TypeError: If `use_locking` is not a bool.

Supported Platforms:
``Ascend``
``Ascend`` ``CPU``

Examples:
>>> input_x = Parameter(Tensor(np.array([[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]), mindspore.float32), name="x")
@@ -3880,7 +3879,7 @@ class ScatterDiv(_ScatterOp):
TypeError: If `use_locking` is not a bool.

Supported Platforms:
``Ascend``
``Ascend`` ``CPU``

Examples:
>>> input_x = Parameter(Tensor(np.array([[6.0, 6.0, 6.0], [2.0, 2.0, 2.0]]), mindspore.float32), name="x")


+ 643
- 0
tests/st/ops/cpu/test_scatter_arithmetic_op.py View File

@@ -0,0 +1,643 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor, Parameter
from mindspore.ops import operations as P
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
class TestScatterAddNet(nn.Cell):
def __init__(self, lock, inputx, indices, updates):
super(TestScatterAddNet, self).__init__()
self.scatter_add = P.ScatterAdd(use_locking=lock)
self.inputx = Parameter(inputx, name="inputx")
self.indices = Parameter(indices, name="indices")
self.updates = Parameter(updates, name="updates")
def construct(self):
out = self.scatter_add(self.inputx, self.indices, self.updates)
return out
def scatter_add_net(inputx, indices, updates):
lock = True
net = TestScatterAddNet(lock, inputx, indices, updates)
return net()
def scatter_add_use_locking_false_net(inputx, indices, updates):
lock = False
net = TestScatterAddNet(lock, inputx, indices, updates)
return net()
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_add_small_float32():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[6., 8., 10.],
[12., 14., 16.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_add_input_updated():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
lock = True
net = TestScatterAddNet(lock, inputx, indices, updates)
net()
expected = np.array([[6., 8., 10.],
[12., 14., 16.]])
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_add_large_shape_float32():
inputx = Tensor(np.ones((4, 2, 3, 4)).astype(np.float32))
indices = Tensor(np.array([[0, 2], [3, 1]]).astype(np.int32))
updates = Tensor(np.arange(96).reshape((2, 2, 2, 3, 4)).astype(np.float32))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[[[1., 2., 3., 4.],
[5., 6., 7., 8.],
[9., 10., 11., 12.]],
[[13., 14., 15., 16.],
[17., 18., 19., 20.],
[21., 22., 23., 24.]]],
[[[73., 74., 75., 76.],
[77., 78., 79., 80.],
[81., 82., 83., 84.]],
[[85., 86., 87., 88.],
[89., 90., 91., 92.],
[93., 94., 95., 96.]]],
[[[25., 26., 27., 28.],
[29., 30., 31., 32.],
[33., 34., 35., 36.]],
[[37., 38., 39., 40.],
[41., 42., 43., 44.],
[45., 46., 47., 48.]]],
[[[49., 50., 51., 52.],
[53., 54., 55., 56.],
[57., 58., 59., 60.]],
[[61., 62., 63., 64.],
[65., 66., 67., 68.],
[69., 70., 71., 72.]]]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_add_small_float32_use_locking_false():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([1, 0]).astype(np.int32))
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
output = scatter_add_use_locking_false_net(inputx, indices, updates)
expected = np.array([[3., 4., 5.],
[0., 1., 2.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_add_input_less_than_1_float32():
inputx = Tensor(np.array([[0.214141, 0.415151, 0.51516],
[0.876542, 0.451611, 0.55112],
[0.111244, 0.633333, 0.34444]]).astype(np.float32))
indices = Tensor(np.array([[[1, 0, 2],
[2, 2, 0]],
[[1, 0, 1],
[2, 1, 2]]]).astype(np.int32))
updates = Tensor(np.arange(34, 70).reshape((2, 2, 3, 3)).astype(np.float32))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[141.21414, 144.41515, 147.51517],
[208.87654, 212.45161, 216.55112],
[257.11124, 262.63333, 267.34442]], dtype=np.float32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_add_float16():
inputx = Tensor(np.zeros((2, 3)).astype(np.float16))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float16))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[6., 8., 10.],
[12., 14., 16.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_add_large_float16():
inputx = Tensor(np.zeros((2, 3, 4)).astype(np.float16))
indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[[138., 140., 142., 144.],
[146., 148., 150., 152.],
[154., 156., 158., 160.]],
[[186., 188., 190., 192.],
[194., 196., 198., 200.],
[202., 204., 206., 208.]]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_add_disordered_float16():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.float16)))
indices = Tensor(np.array([[[0, 1, 2],
[2, 1, 0]],
[[0, 0, 0],
[2, 2, 2]]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[464., 468., 472., 476.],
[187., 188., 189., 190.],
[492., 496., 500., 504.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_add_large_int32():
inputx = Tensor(np.zeros((2, 3, 4)).astype(np.int32))
indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[[138., 140., 142., 144.],
[146., 148., 150., 152.],
[154., 156., 158., 160.]],
[[186., 188., 190., 192.],
[194., 196., 198., 200.],
[202., 204., 206., 208.]]]).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_add_disordered_int32():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32)))
indices = Tensor(np.array([[[0, 1, 2],
[2, 1, 0]],
[[0, 0, 0],
[2, 2, 2]]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[464., 468., 472., 476.],
[187., 188., 189., 190.],
[492., 496., 500., 504.]]).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
class TestScatterSubNet(nn.Cell):
def __init__(self, lock, inputx, indices, updates):
super(TestScatterSubNet, self).__init__()
self.scatter_sub = P.ScatterSub(use_locking=lock)
self.inputx = Parameter(inputx, name="inputx")
self.indices = Parameter(indices, name="indices")
self.updates = Parameter(updates, name="updates")
def construct(self):
out = self.scatter_sub(self.inputx, self.indices, self.updates)
return out
def scatter_sub_net(inputx, indices, updates):
lock = True
net = TestScatterSubNet(lock, inputx, indices, updates)
return net()
def scatter_sub_use_locking_false_net(inputx, indices, updates):
lock = False
net = TestScatterSubNet(lock, inputx, indices, updates)
return net()
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_sub_input_updated():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
lock = True
net = TestScatterSubNet(lock, inputx, indices, updates)
net()
expected = np.array([[-6., -8., -10.],
[-12., -14., -16.]])
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_sub_large_shape_float32():
inputx = Tensor(np.ones((4, 2, 3, 4)).astype(np.float32))
indices = Tensor(np.array([[0, 2], [3, 1]]).astype(np.int32))
updates = Tensor(np.arange(96).reshape((2, 2, 2, 3, 4)).astype(np.float32))
output = scatter_sub_net(inputx, indices, updates)
expected = np.array(
[[[[1.0, 0.0, -1.0, -2.0],
[-3.0, -4.0, -5.0, -6.0],
[-7.0, -8.0, -9.0, -10.0]],
[[-11.0, -12.0, -13.0, -14.0],
[-15.0, -16.0, -17.0, -18.0],
[-19.0, -20.0, -21.0, -22.0]]],
[[[-71.0, -72.0, -73.0, -74.0],
[-75.0, -76.0, -77.0, -78.0],
[-79.0, -80.0, -81.0, -82.0]],
[[-83.0, -84.0, -85.0, -86.0],
[-87.0, -88.0, -89.0, -90.0],
[-91.0, -92.0, -93.0, -94.0]]],
[[[-23.0, -24.0, -25.0, -26.0],
[-27.0, -28.0, -29.0, -30.0],
[-31.0, -32.0, -33.0, -34.0]],
[[-35.0, -36.0, -37.0, -38.0],
[-39.0, -40.0, -41.0, -42.0],
[-43.0, -44.0, -45.0, -46.0]]],
[[[-47.0, -48.0, -49.0, -50.0],
[-51.0, -52.0, -53.0, -54.0],
[-55.0, -56.0, -57.0, -58.0]],
[[-59.0, -60.0, -61.0, -62.0],
[-63.0, -64.0, -65.0, -66.0],
[-67.0, -68.0, -69.0, -70.0]]]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_sub_small_float32_use_locking_false():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([1, 0]).astype(np.int32))
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
output = scatter_sub_use_locking_false_net(inputx, indices, updates)
expected = np.array([[-3., -4., -5.],
[-0., -1., -2.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
class TestScatterMulNet(nn.Cell):
def __init__(self, lock, inputx, indices, updates):
super(TestScatterMulNet, self).__init__()
self.scatter_mul = P.ScatterMul(use_locking=lock)
self.inputx = Parameter(inputx, name="inputx")
self.indices = Parameter(indices, name="indices")
self.updates = Parameter(updates, name="updates")
def construct(self):
out = self.scatter_mul(self.inputx, self.indices, self.updates)
return out
def scatter_mul_net(inputx, indices, updates):
lock = True
net = TestScatterMulNet(lock, inputx, indices, updates)
return net()
def scatter_mul_use_locking_false_net(inputx, indices, updates):
lock = False
net = TestScatterMulNet(lock, inputx, indices, updates)
return net()
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_mul_input_updated():
inputx = Tensor(np.ones((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
lock = True
net = TestScatterMulNet(lock, inputx, indices, updates)
net()
expected = np.array([[0., 7., 16.],
[27., 40., 55.]])
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_mul_output_updated_float32():
inputx = Tensor(np.ones((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
output = scatter_mul_net(inputx, indices, updates)
expected = np.array([[0., 7., 16.],
[27., 40., 55.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_mul_small_float32_use_locking_false():
inputx = Tensor(np.ones((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
output = scatter_mul_use_locking_false_net(inputx, indices, updates)
expected = np.array([[0., 7., 16.],
[27., 40., 55.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
class TestScatterDivNet(nn.Cell):
def __init__(self, lock, inputx, indices, updates):
super(TestScatterDivNet, self).__init__()
self.scatter_div = P.ScatterDiv(use_locking=lock)
self.inputx = Parameter(inputx, name="inputx")
self.indices = Parameter(indices, name="indices")
self.updates = Parameter(updates, name="updates")
def construct(self):
out = self.scatter_div(self.inputx, self.indices, self.updates)
return out
def scatter_div_net(inputx, indices, updates):
lock = True
net = TestScatterDivNet(lock, inputx, indices, updates)
return net()
def scatter_div_use_locking_false_net(inputx, indices, updates):
lock = False
net = TestScatterDivNet(lock, inputx, indices, updates)
return net()
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_div_input_updated():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(1, 13).reshape((2, 2, 3)).astype(np.float32))
lock = True
net = TestScatterDivNet(lock, inputx, indices, updates)
net()
expected = np.array([[0., 0., 0.],
[0., 0., 0.]])
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_div_output_updated_float32():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(1, 13).reshape((2, 2, 3)).astype(np.float32))
output = scatter_div_net(inputx, indices, updates)
expected = np.array([[0., 0., 0.],
[0., 0., 0.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_div_small_float32_use_locking_false():
inputx = Tensor(np.ones((2, 3)).astype(np.float32) * 10)
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.ones(12).reshape((2, 2, 3)).astype(np.float32))
output = scatter_div_use_locking_false_net(inputx, indices, updates)
expected = np.array([[10., 10., 10.],
[10., 10., 10.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
class TestScatterMaxNet(nn.Cell):
def __init__(self, lock, inputx, indices, updates):
super(TestScatterMaxNet, self).__init__()
self.scatter_max = P.ScatterMax(use_locking=lock)
self.inputx = Parameter(inputx, name="inputx")
self.indices = Parameter(indices, name="indices")
self.updates = Parameter(updates, name="updates")
def construct(self):
out = self.scatter_max(self.inputx, self.indices, self.updates)
return out
def scatter_max_net(inputx, indices, updates):
lock = True
net = TestScatterMaxNet(lock, inputx, indices, updates)
return net()
def scatter_max_use_locking_false_net(inputx, indices, updates):
lock = False
net = TestScatterMaxNet(lock, inputx, indices, updates)
return net()
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_max_input_updated():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
lock = True
net = TestScatterMaxNet(lock, inputx, indices, updates)
net()
expected = np.array([[6., 7., 8.],
[9., 10., 11.]])
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_max_output_updated_float32():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
output = scatter_max_net(inputx, indices, updates)
expected = np.array([[6., 7., 8.],
[9., 10., 11.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_max_small_float32_use_locking_false():
inputx = Tensor(np.ones((2, 3)).astype(np.float32) * 10)
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
output = scatter_max_use_locking_false_net(inputx, indices, updates)
expected = np.array([[10., 10., 10.],
[10., 10., 11.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
class TestScatterMinNet(nn.Cell):
def __init__(self, lock, inputx, indices, updates):
super(TestScatterMinNet, self).__init__()
self.scatter_min = P.ScatterMin(use_locking=lock)
self.inputx = Parameter(inputx, name="inputx")
self.indices = Parameter(indices, name="indices")
self.updates = Parameter(updates, name="updates")
def construct(self):
out = self.scatter_min(self.inputx, self.indices, self.updates)
return out
def scatter_min_net(inputx, indices, updates):
lock = True
net = TestScatterMinNet(lock, inputx, indices, updates)
return net()
def scatter_min_use_locking_false_net(inputx, indices, updates):
lock = False
net = TestScatterMinNet(lock, inputx, indices, updates)
return net()
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_min_input_updated():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
lock = True
net = TestScatterMinNet(lock, inputx, indices, updates)
net()
expected = np.array([[0., 0., 0.],
[0., 0., 0.]])
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_min_output_updated_float32():
inputx = Tensor(np.ones((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
output = scatter_min_net(inputx, indices, updates)
expected = np.array([[0., 1., 1.],
[1., 1., 1.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_min_small_float32_use_locking_false():
inputx = Tensor(np.ones((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
output = scatter_min_use_locking_false_net(inputx, indices, updates)
expected = np.array([[0., 1., 1.],
[1., 1., 1.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
class TestScatterUpdateNet(nn.Cell):
def __init__(self, lock, inputx, indices, updates):
super(TestScatterUpdateNet, self).__init__()
self.scatter_update = P.ScatterUpdate(use_locking=lock)
self.inputx = Parameter(inputx, name="inputx")
self.indices = Parameter(indices, name="indices")
self.updates = Parameter(updates, name="updates")
def construct(self):
out = self.scatter_update(self.inputx, self.indices, self.updates)
return out
def scatter_update_net(inputx, indices, updates):
lock = True
net = TestScatterUpdateNet(lock, inputx, indices, updates)
return net()
def scatter_update_use_locking_false_net(inputx, indices, updates):
lock = False
net = TestScatterUpdateNet(lock, inputx, indices, updates)
return net()
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_update_input_updated():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
lock = True
net = TestScatterUpdateNet(lock, inputx, indices, updates)
net()
expected = np.array([[6., 7., 8.],
[9., 10., 11.]])
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_update_output_updated_float32():
inputx = Tensor(np.ones((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
output = scatter_update_net(inputx, indices, updates)
expected = np.array([[6., 7., 8.],
[9., 10., 11.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatter_update_small_float32_use_locking_false():
inputx = Tensor(np.ones((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
output = scatter_update_use_locking_false_net(inputx, indices, updates)
expected = np.array([[6., 7., 8.],
[9., 10., 11.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)

Loading…
Cancel
Save