add dynamic broadcastgradientargs

4 years ago · f1b59bc454
--- a/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_broadcast_gradient_args_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_broadcast_gradient_args_kernel.cc
@@ -0,0 +1,206 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "backend/kernel_compiler/host/dynamic_broadcast_gradient_args_kernel.h"
 #include "backend/session/anf_runtime_algorithm.h"

 namespace mindspore {
 namespace kernel {
 namespace {
 const int kInputNum = 2;

 std::vector<std::vector<int64_t>> GetGradientIndices(const std::vector<std::vector<int64_t>> &reverse_shape,
                                                     const size_t largest_rank) {
  std::vector<std::vector<int64_t>> grad_reduce_idx(kInputNum);
  // indices of j-th component of each input.
  bool prev_is_one[kInputNum];
  bool current_is_one[kInputNum];
  for (int i = 0; i < kInputNum; ++i) {
    prev_is_one[i] = false;
    current_is_one[i] = false;
  }

  bool set_one = false;
  for (size_t j = 0; j < largest_rank; ++j) {
    int output_dim = -1;
    bool output_dim_set = false;
    bool none_is_one = true;
    // Find which indices are 1.
    for (int i = 0; i < kInputNum; ++i) {
      if (reverse_shape[i][j] == 1) {
        current_is_one[i] = true;
        none_is_one = false;
      } else {
        current_is_one[i] = false;
        if (!output_dim_set || reverse_shape[i][j] == static_cast<int64_t>(output_dim)) {
          output_dim = reverse_shape[i][j];
          output_dim_set = true;
        } else {
          MS_LOG(EXCEPTION) << "Input[0] and input[1] Cannot broadcast!";
        }
      }
    }

    // All dimensions are 1.
    if (!output_dim_set) {
      for (int i = 0; i < kInputNum; ++i) {
        grad_reduce_idx[i].push_back(largest_rank - 1 - j);
      }
      continue;
    } else if (std::equal(current_is_one, current_is_one + kInputNum, prev_is_one) && set_one) {
      for (int i = 0; i < kInputNum; ++i) {
        if (current_is_one[i] && !none_is_one) {
          grad_reduce_idx[i].push_back(largest_rank - 1 - j);
        }
      }
    } else {
      for (int i = 0; i < kInputNum; ++i) {
        if (current_is_one[i] && !none_is_one) {
          grad_reduce_idx[i].push_back(largest_rank - 1 - j);
        }
      }
    }
    set_one = true;
    for (int i = 0; i < kInputNum; ++i) {
      prev_is_one[i] = current_is_one[i];
    }
  }
  return grad_reduce_idx;
 }

 std::vector<std::vector<int64_t>> CalculateOutput(const std::vector<std::vector<int64_t>> &x) {
  std::vector<std::vector<int64_t>> grad_reduce_idx(kInputNum);
  bool all_equal = true;
  size_t largest_rank = 0;
  for (int i = 0; i < kInputNum; ++i) {
    if (x[i] != x[0]) {
      all_equal = false;
    }
    if (x[i].size() > largest_rank) {
      largest_rank = x[i].size();
    }
  }
  if (all_equal) {
    return grad_reduce_idx;
  }

  // Reverse input the shapes
  std::vector<std::vector<int64_t>> reverse_shape(kInputNum);
  for (int i = 0; i < kInputNum; ++i) {
    reverse_shape[i] = x[i];
    std::reverse(reverse_shape[i].begin(), reverse_shape[i].end());
  }

  // 1-extend and align all vectors.
  for (int i = 0; i < kInputNum; ++i) {
    if (reverse_shape[i].size() < largest_rank) {
      reverse_shape[i].resize(largest_rank, 1);
    }
  }
  grad_reduce_idx = GetGradientIndices(reverse_shape, largest_rank);
  return grad_reduce_idx;
 }

 std::vector<int64_t> GetInputShape(const CNodePtr &cnode, size_t index) {
  auto address_x = AnfAlgo::GetPrevNodeMutableOutputAddr(cnode, index);
  auto shape_x = AnfAlgo::GetPrevNodeOutputInferShape(cnode, index);
  auto type_x = AnfAlgo::GetOutputInferDataType(cnode, index);
  if (type_x != TypeId::kNumberTypeInt64) {
    MS_LOG(EXCEPTION) << "Input x type must be int64, but :" << type_x;
  }
  if (shape_x.size() != 1) {
    MS_LOG(EXCEPTION) << "Input" << index << " must be [1-D], but " << shape_x.size() << "-D.";
  }

  size_t x_num = shape_x[0];
  std::vector<int64_t> x{SizeToLong(x_num)};

  auto x_shape_value = std::make_shared<tensor::Tensor>(type_x, x);
  x_shape_value->set_device_address(address_x);
  x_shape_value->data_sync();

  auto x_value = reinterpret_cast<int64_t *>(x_shape_value->data_c());
  MS_EXCEPTION_IF_NULL(x_value);
  std::vector<int64_t> input_shape = {x_value, x_value + x_num};
  return input_shape;
 }

 size_t SetOutputValue(const CNodePtr &cnode, const std::vector<std::vector<int64_t>> &grad_reduce_idx, size_t index,
                      size_t input_num) {
  std::vector<int64_t> output;
  size_t idx_num = grad_reduce_idx[index].size();

  for (size_t k = 0; k < idx_num; ++k) {
    output.push_back(grad_reduce_idx[index][idx_num - 1 - k]);
  }

  auto out_addr = AnfAlgo::GetOutputAddr(cnode, index);
  MS_EXCEPTION_IF_NULL(out_addr);

  size_t out_size = idx_num;
  if (idx_num == 0) {
    out_size = input_num;
    for (size_t k = 0; k < input_num; ++k) {
      output.push_back(k);
    }
  }

  std::vector<int64_t> out_shape{SizeToLong(out_size)};
  auto output_type = TypeId::kNumberTypeInt64;
  auto tensor_for_sync = std::make_shared<tensor::Tensor>(output_type, out_shape);

  auto data_ptr = static_cast<int64_t *>(tensor_for_sync->data_c());
  for (size_t i = 0; i < out_size; ++i) {
    MS_LOG(DEBUG) << "DEBUG r" << index << "_output_shape[" << i << "]:" << output[i];
    *(data_ptr + i) = output[i];
  }

  out_addr->SyncHostToDevice(out_shape, LongToSize(tensor_for_sync->data().nbytes()), tensor_for_sync->data_type(),
                             tensor_for_sync->data_c(), tensor_for_sync->device_info().host_format_);
  return out_size;
 }
 }  // namespace

 void DynamicBroadcastGradientArgsKernel::Execute() {
  MS_LOG(INFO) << "Execute DynamicBroadcastGradientArgsKernel Start";
  auto cnode = cnode_ptr_.lock();
  MS_EXCEPTION_IF_NULL(cnode);
  auto input_num = AnfAlgo::GetInputTensorNum(cnode);
  if (input_num != 2) {
    MS_LOG(EXCEPTION) << "Invalid Input Num:" << input_num;
  }

  std::vector<std::vector<int64_t>> input_shapes(kInputNum);
  input_shapes[0] = GetInputShape(cnode, 0);
  input_shapes[1] = GetInputShape(cnode, 1);
  auto grad_reduce_idx = CalculateOutput(input_shapes);

  auto r0_size = SetOutputValue(cnode, grad_reduce_idx, 0, input_shapes[0].size());
  auto r1_size = SetOutputValue(cnode, grad_reduce_idx, 1, input_shapes[1].size());

  std::vector<size_t> r0_shp{r0_size};
  std::vector<size_t> r1_shp{r1_size};
  auto output_type = TypeId::kNumberTypeInt64;
  AnfAlgo::SetOutputInferTypeAndShape({output_type, output_type}, {r0_shp, r1_shp}, cnode.get());
  MS_LOG(INFO) << "Execute DynamicBroadcastGradientArgsKernel End";
 }

 device::DynamicKernelPtr DynamicBroadcastGradientArgsKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr,
                                                                                 void *stream_ptr) {
  return std::make_shared<DynamicBroadcastGradientArgsKernel>(stream_ptr, cnode_ptr);
 }
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_broadcast_gradient_args_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_broadcast_gradient_args_kernel.h
@@ -0,0 +1,44 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_BROADCAST_GRADIENT_ARGS_KERNEL_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_BROADCAST_GRADIENT_ARGS_KERNEL_H_
 #include <vector>
 #include <memory>
 #include <string>
 #include "runtime/device/ascend/executor/host_dynamic_kernel.h"
 #include "backend/kernel_compiler/host/host_kernel_mod.h"

 using HostDynamicKernel = mindspore::device::ascend::HostDynamicKernel;
 namespace mindspore {
 namespace kernel {
 class DynamicBroadcastGradientArgsKernel : public HostDynamicKernel {
 public:
  DynamicBroadcastGradientArgsKernel(void *stream, const CNodePtr &cnode_ptr) : HostDynamicKernel(stream, cnode_ptr) {}
  ~DynamicBroadcastGradientArgsKernel() override = default;
  void Execute() override;
 };

 class DynamicBroadcastGradientArgsKernelMod : public HostKernelMod {
 public:
  DynamicBroadcastGradientArgsKernelMod() = default;
  ~DynamicBroadcastGradientArgsKernelMod() override = default;
  device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
 };
 MS_HOST_REG_KERNEL(DynamicBroadcastGradientArgs, DynamicBroadcastGradientArgsKernelMod);
 }  // namespace kernel
 }  // namespace mindspore

 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_BROADCAST_GRADIENT_ARGS_KERNEL_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_metadata.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_metadata.cc
@@ -29,11 +29,6 @@ void HostMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<K
  MS_LOG(INFO) << "HostMetadataInfo.";
  MS_EXCEPTION_IF_NULL(kernel_node);
  MS_EXCEPTION_IF_NULL(kernel_info_list);
  std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
  if (op_name != kDynamicShape) {
    MS_LOG(DEBUG) << "Host does not have op [" << op_name << "]";
    return;
  }

  std::vector<std::string> inputs_format{};
  std::vector<TypeId> inputs_type{};
--- a/mindspore/core/base/core_ops.h
+++ b/mindspore/core/base/core_ops.h
@@ -74,6 +74,7 @@ constexpr auto kFastGeLU = "FastGeLU";
 constexpr auto kFastGeLUGrad = "FastGeLUGrad";
 constexpr auto kZerosLike = "ZerosLike";
 constexpr auto kOnesLike = "OnesLike";
 constexpr auto kDynamicBroadcastGradientArgs = "DynamicBroadcastGradientArgs";

 // NN
 constexpr auto kCTCLoss = "CTCLoss";
@@ -632,6 +633,8 @@ inline const PrimitivePtr kPrimStringConcat = std::make_shared<Primitive>("strin
 inline const PrimitivePtr kPrimDictLen = std::make_shared<Primitive>("dict_len");
 inline const PrimitivePtr kPrimFakeBprop = std::make_shared<Primitive>("fake_bprop");
 inline const PrimitivePtr kPrimBroadcastGradientArgs = std::make_shared<Primitive>("BroadcastGradientArgs");
 inline const PrimitivePtr kPrimDynamicBroadcastGradientArgs =
  std::make_shared<Primitive>(kDynamicBroadcastGradientArgs);

 class DoSignaturePrimitive : public Primitive {
 public:
--- a/mindspore/core/ops/dynamic_broadcast_gradient_args.cc
+++ b/mindspore/core/ops/dynamic_broadcast_gradient_args.cc
@@ -0,0 +1,80 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "ops/dynamic_broadcast_gradient_args.h"

 #include <set>
 #include <string>
 #include <vector>
 #include <map>
 #include <memory>

 #include "ops/op_utils.h"

 namespace mindspore {
 namespace ops {
 namespace {
 size_t CheckInputsAndGetShape(const AbstractBasePtr &input_arg, const string &prim_name) {
  MS_EXCEPTION_IF_NULL(input_arg);
  if (input_arg->isa<abstract::AbstractTensor>()) {
    auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_arg->BuildShape())[kShape];
    auto input_size = input_shape.size();
    if (input_size != 1) {
      MS_EXCEPTION(TypeError) << prim_name << " input must be 1-D, but dims is " << input_size;
    }
    if (input_shape[0] == abstract::Shape::SHP_ANY) {
      auto max_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_arg->BuildShape())[kMaxShape];
      if (max_shape.empty()) {
        MS_LOG(EXCEPTION) << prim_name << " input shape is dynamic, but max shape is empty.";
      }
      return max_shape[0];
    }
    return input_shape[0];
  } else if (input_arg->isa<abstract::AbstractTuple>()) {
    auto x_shape = dyn_cast<abstract::AbstractTuple>(input_arg);
    auto x_shape_data = x_shape->elements();
    return x_shape_data.size();
  } else {
    MS_EXCEPTION(TypeError) << prim_name << " input must be a tuple or Tensor.";
  }
 }

 abstract::TupleShapePtr Infer(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
  MS_EXCEPTION_IF_NULL(primitive);
  auto prim_name = primitive->name();
  (void)CheckAndConvertUtils::CheckInteger("input number", SizeToLong(input_args.size()), kEqual, 2, prim_name);
  auto x_shape = CheckInputsAndGetShape(input_args[0], prim_name);
  auto y_shape = CheckInputsAndGetShape(input_args[1], prim_name);

  ShapeVector shape{abstract::Shape::SHP_ANY};
  ShapeVector min_shape{1L};
  size_t max_size = x_shape > y_shape ? x_shape : y_shape;
  ShapeVector max_shape{SizeToLong(max_size)};
  auto out_shape = std::make_shared<abstract::Shape>(shape, min_shape, max_shape);
  return std::make_shared<abstract::TupleShape>(std::vector<abstract::BaseShapePtr>{out_shape, out_shape});
 }
 }  // namespace

 AbstractBasePtr DynamicBroadcastGradientArgsInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                                  const std::vector<AbstractBasePtr> &input_args) {
  auto types = std::vector<TypePtr>{kInt64, kInt64};
  auto output_type = std::make_shared<Tuple>(types);
  return abstract::MakeAbstract(Infer(primitive, input_args), output_type);
 }
 REGISTER_PRIMITIVE_EVAL_IMPL(DynamicBroadcastGradientArgs, prim::kPrimDynamicBroadcastGradientArgs,
                             DynamicBroadcastGradientArgsInfer, nullptr, true);
 }  // namespace ops
 }  // namespace mindspore
--- a/mindspore/core/ops/dynamic_broadcast_gradient_args.h
+++ b/mindspore/core/ops/dynamic_broadcast_gradient_args.h
@@ -0,0 +1,39 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_CORE_OPS_DYNAMIC_BROADCAST_GRADIENT_ARGS_H_
 #define MINDSPORE_CORE_OPS_DYNAMIC_BROADCAST_GRADIENT_ARGS_H_
 #include <vector>
 #include <memory>
 #include "ops/primitive_c.h"
 #include "utils/check_convert_utils.h"

 namespace mindspore {
 namespace ops {
 class DynamicBroadcastGradientArgs : public PrimitiveC {
 public:
  DynamicBroadcastGradientArgs() : PrimitiveC(prim::kPrimDynamicBroadcastGradientArgs->name()) {}
  ~DynamicBroadcastGradientArgs() = default;
  MS_DECLARE_PARENT(DynamicBroadcastGradientArgs, PrimitiveC);
  void Init() {}
 };
 AbstractBasePtr DynamicBroadcastGradientArgsInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                                  const std::vector<AbstractBasePtr> &input_args);
 using PrimDynamicBroadcastGradientArgsPtr = std::shared_ptr<DynamicBroadcastGradientArgs>;
 }  // namespace ops
 }  // namespace mindspore

 #endif  // MINDSPORE_CORE_OPS_DYNAMIC_BROADCAST_GRADIENT_ARGS_H_
--- a/mindspore/ops/_op_impl/aicpu/dynamic_shape.py
+++ b/mindspore/ops/_op_impl/aicpu/dynamic_shape.py
@@ -1,40 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 """DynamicShape op"""
 from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType

 dynamic_shape_op_info = AiCPURegOp("DynamicShape") \
    .fusion_type("OPAQUE") \
    .input(0, "x", "required") \
    .output(0, "y", "required") \
    .dtype_format(DataType.I8_Default, DataType.I32_Default) \
    .dtype_format(DataType.I16_Default, DataType.I32_Default) \
    .dtype_format(DataType.I32_Default, DataType.I32_Default) \
    .dtype_format(DataType.I64_Default, DataType.I32_Default) \
    .dtype_format(DataType.U8_Default, DataType.I32_Default) \
    .dtype_format(DataType.U16_Default, DataType.I32_Default) \
    .dtype_format(DataType.U32_Default, DataType.I32_Default) \
    .dtype_format(DataType.U64_Default, DataType.I32_Default) \
    .dtype_format(DataType.F16_Default, DataType.I32_Default) \
    .dtype_format(DataType.F32_Default, DataType.I32_Default) \
    .dtype_format(DataType.F64_Default, DataType.I32_Default) \
    .dtype_format(DataType.BOOL_Default, DataType.I32_Default) \
    .get_op_info()

@op_info_register(dynamic_shape_op_info)
 def _dynamic_shape_aicpu():
    """Unique AiCPU register"""
    return
--- a/mindspore/ops/operations/_inner_ops.py
+++ b/mindspore/ops/operations/_inner_ops.py
@@ -21,7 +21,7 @@ from ..._checkparam import Rel
 from ..._checkparam import Validator as validator
 from ... import context
 from ...common import dtype as mstype
 from ..primitive import PrimitiveWithCheck, PrimitiveWithInfer, prim_attr_register
 from ..primitive import PrimitiveWithCheck, PrimitiveWithInfer, prim_attr_register, Primitive
 from ..operations.math_ops import _infer_shape_reduce
 from ...communication.management import GlobalComm
 from .. import signature as sig
@@ -1111,3 +1111,45 @@ class DynamicStitch(PrimitiveWithCheck):
                                               mstype.number_type + (mstype.bool_,), self.name)
            validator.check(f"type of data[{i}]", data_type[i], f"type of data[0]", data_type[0], Rel.EQ, self.name)
        return data_type[0]


 class DynamicBroadcastGradientArgs(Primitive):
    """
    Broadcast the two input shapes, return the dimensions that each need to be broadcast.

    Input shape `s0` and shape `s1` can be broadcast to a common shape if for each dimension pair they are either equal
    or input is one or the target dimension is -1. In case of -1 in target shape, it will be replaced by the input
    shape's value in that dimension.

    Inputs:
        - **s0** (Tensor) - A `1-D` tensor. The data type should be one of the following types: int32, int64,
          uint32, uint64.
        - **s1** (Tensor) - A `1-D` tensor with the same type as `s0`.

    Outputs:
        Tuple(Tensor), tuple of 2 tensors, r0 and r1. The first one is the index tensor and the other one is the mask
        tensor.

        - **r0** (Tensor) - The output shape is 1-D with the same type as s0.
        - **r1** (Tensor) - The output shape is 1-D with the same type as s0.

    Raises:
        ValueError: if the `s0` and `s1` are incompatible, or if a - 1 in the target shape is in an invalid
                    location.

    Supported Platforms:
        ``Ascend``

    Examples:
        >>> shape0 = (4, 2, 1)
        >>> shape1 = (2, 7)
        >>> from mindspore.ops.operations import _inner_ops
        >>> args = _inner_ops.DynamicBroadcastGradientArgs()
        >>> r0, r1 = args(Tensor(shape0), Tensor(shape1))
        >>> print(r0, r1)
        [2], [0]
    """

    @prim_attr_register
    def __init__(self):
        """Init BroadcastGradientArgs"""
--- a/tests/st/ops/ascend/test_dynamic_broadcast_gradient_args.py
+++ b/tests/st/ops/ascend/test_dynamic_broadcast_gradient_args.py
@@ -0,0 +1,44 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 import numpy as np

 import mindspore.nn as nn
 import mindspore.context as context

 from mindspore.ops.operations import _inner_ops

 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")


 class Net(nn.Cell):
    def __init__(self):
        super(Net, self).__init__()
        self.args = _inner_ops.BroadcastGradientArgs()

    def construct(self, s0, s1):
        return self.args(s0, s1)


 def test_net():
    shape0 = (4, 2, 1)
    shape1 = (2, 7)
    net = Net()
    r0, r1 = net(shape0, shape1)
    print(r0, r1)
    r0_expected = [2]
    r1_expected = [0]

    assert np.array_equal(r0_expected, r0.asnumpy())
    assert np.array_equal(r1_expected, r1.asnumpy())