diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h index f39546dffc..3af5002415 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h @@ -82,6 +82,7 @@ const std::map DictOpType{ {"Abs", OperatorType::kRecElmWiseOp}, {"Acosh", OperatorType::kRecElmWiseOp}, {"AddN", OperatorType::kRecElmWiseOp}, + {"AccumulateNV2", OperatorType::kRecElmWiseOp}, {"Atan2", OperatorType::kRecElmWiseOp}, {"Erf", OperatorType::kRecElmWiseOp}, {"Floor", OperatorType::kRecElmWiseOp}, diff --git a/mindspore/ops/_grad/grad_math_ops.py b/mindspore/ops/_grad/grad_math_ops.py index acc4bc0672..1e4f932442 100755 --- a/mindspore/ops/_grad/grad_math_ops.py +++ b/mindspore/ops/_grad/grad_math_ops.py @@ -932,6 +932,18 @@ def get_bprop_scalar_cast(self): return bprop +@bprop_getters.register(P.AccumulateNV2) +def get_bprop_scalar_accumulatenv2(self): + """Generate bprop for AccumulateNV2""" + + def bprop(x, out, dout): + dx = () + for _ in range(len(x)): + dx = dx + (dout,) + return dx + return bprop + + @bprop_getters.register(P.AddN) def get_bprop_scalar_addn(self): """Generate bprop for AddN""" diff --git a/mindspore/ops/_op_impl/tbe/__init__.py b/mindspore/ops/_op_impl/tbe/__init__.py index 0537d4b4f2..8351761935 100644 --- a/mindspore/ops/_op_impl/tbe/__init__.py +++ b/mindspore/ops/_op_impl/tbe/__init__.py @@ -26,6 +26,7 @@ from .adam_apply_one_with_decay import _adam_apply_one_with_decay_tbe from .add import _add_tbe from .apply_centered_rms_prop import _apply_centered_rms_prop_tbe from .add_n import _add_n_tbe +from .accumulate_n_v2 import _accumulate_n_v2_tbe from .apply_ftrl import _apply_ftrl_tbe from .apply_momentum import _apply_momentum_tbe from .apply_adam import _apply_adam_tbe diff --git a/mindspore/ops/_op_impl/tbe/accumulate_n_v2.py b/mindspore/ops/_op_impl/tbe/accumulate_n_v2.py new file mode 100644 index 0000000000..fdd72a9494 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/accumulate_n_v2.py @@ -0,0 +1,41 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""AccumulateNV2 op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +accumulate_n_v2_op_info = TBERegOp("AccumulateNV2") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("accumulate_n_v2.so") \ + .compute_cost(10) \ + .kernel_name("accumulate_n_v2") \ + .partial_flag(True) \ + .attr("n", "required", "int", "all") \ + .input(0, "x", False, "dynamic", "all") \ + .output(0, "y", False, "required", "all") \ + .op_pattern("broadcast") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default) \ + .get_op_info() + + +@op_info_register(accumulate_n_v2_op_info) +def _accumulate_n_v2_tbe(): + """AccumulateNV2 TBE register""" + return diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index e84cf44945..792381a15f 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -41,7 +41,7 @@ from .debug_ops import (ImageSummary, InsertGradientOf, HookBackward, ScalarSumm from .control_ops import ControlDepend, GeSwitch, Merge from .inner_ops import ScalarCast -from .math_ops import (Abs, ACos, Asin, Asinh, AddN, AssignAdd, AssignSub, Atan2, BatchMatMul, BitwiseAnd, BitwiseOr, +from .math_ops import (Abs, ACos, Asin, Asinh, AddN, AccumulateNV2, AssignAdd, AssignSub, Atan2, BatchMatMul, BitwiseAnd, BitwiseOr, BitwiseXor, Inv, Invert, ApproximateEqual, InplaceAdd, InplaceSub, ReduceMax, ReduceMin, ReduceMean, ReduceSum, ReduceAll, ReduceProd, CumProd, Cos, Div, DivNoNan, Equal, EqualCount, Exp, Expm1, Erf, Erfc, Floor, FloorDiv, FloorMod, Ceil, @@ -88,6 +88,7 @@ __all__ = [ 'ArgMaxWithValue', 'ArgMinWithValue', 'AddN', + 'AccumulateNV2', 'Sub', 'CumSum', 'MatMul', diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index d992d078ac..08cd481582 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -798,6 +798,64 @@ class AddN(PrimitiveWithInfer): return Tensor(out) +class AccumulateNV2(PrimitiveWithInfer): + """ + Computes accumulation of all input tensors element-wise. + + AccumulateNV2 is like AddN with a significant difference: AccumulateNV2 won't + wait for all of its inputs to be ready before beginning to sum. That is to say, + AccumulateNV2 will be able to save memory when inputs are ready at different + times since minimum temporary storage is proportional to the output size rather + than the inputs size. + + Inputs: + - **input_x** (Union(tuple[Tensor], list[Tensor])) - The input tuple or list + is made up of multiple tensors whose dtype is number to be added together. + + Outputs: + Tensor, has the same shape and dtype as each entry of the `input_x`. + + Examples: + >>> class NetAccumulateNV2(nn.Cell): + >>> def __init__(self): + >>> super(NetAccumulateNV2, self).__init__() + >>> self.accumulateNV2 = P.AccumulateNV2() + >>> + >>> def construct(self, *z): + >>> return self.accumulateNV2(z) + >>> + >>> net = NetAccumulateNV2() + >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.float32) + >>> input_y = Tensor(np.array([4, 5, 6]), mindspore.float32) + >>> net(input_x, input_y, input_x, input_y) + Tensor([10., 14., 18.], shape=(3,), dtype=mindspore.float32) + """ + + @prim_attr_register + def __init__(self): + self.__setattr_flag__ = True + self.init_prim_io_names(inputs=["inputs"], outputs=["sum"]) + + def infer_shape(self, inputs): + cls_name = self.name + validator.check_integer("inputs", len(inputs), 1, Rel.GE, cls_name) + self.add_prim_attr('n', len(inputs)) + shp0 = inputs[0] + for i, shp in enumerate(inputs): + validator.check(f"shape of inputs[{i}]", shp, 'shape of inputs[0]', shp0, Rel.EQ, cls_name) + return shp0 + + def infer_dtype(self, inputs): + cls_name = self.name + validator.check_value_type("inputs", inputs, [tuple, list], cls_name) + validator.check_integer("inputs", len(inputs), 1, Rel.GE, cls_name) + args = {} + for i, dtype in enumerate(inputs): + args[f"inputs[{i}]"] = dtype + validator.check_tensor_type_same(args, mstype.number_type + (mstype.bool_,), cls_name) + return inputs[0] + + class Neg(PrimitiveWithInfer): """ Returns a tensor with negative values of the input tensor element-wise. diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py index 5486a4319c..cdbb818454 100755 --- a/tests/ut/python/ops/test_ops.py +++ b/tests/ut/python/ops/test_ops.py @@ -1415,6 +1415,11 @@ test_case_array_ops = [ 'desc_inputs': [[2, 3, 3, 5], [2, 3, 3, 5]], 'desc_bprop': [[2, 3, 3, 5]], 'skip': ['backward']}), + ('AccumulateNV2', { + 'block': NetForTupleInput(P.AccumulateNV2()), + 'desc_inputs': [[2, 3, 3, 5], [2, 3, 3, 5]], + 'desc_bprop': [[2, 3, 3, 5]], + 'skip': ['backward']}), ('Shape', { 'block': P.Shape(), 'desc_inputs': [[3, 3, 2, 2]],