return default shape when max_shape is empty

4 years ago · e9679ca0bd
--- a/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.cc
@@ -870,12 +870,31 @@ bool AnfRuntimeAlgorithm::IsIndependentNode(const CNodePtr &node) {
  return true;
 }

 std::vector<size_t> AnfRuntimeAlgorithm::GetInputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index) {
 static inline void GetMaxOrDefaultShape(const std::vector<int64_t> &max_shape, std::vector<size_t> *device_shape) {
  if (!max_shape.empty()) {
    std::transform(max_shape.begin(), max_shape.end(), device_shape->begin(), IntToSize);
  } else {
    const size_t kDefaultValueForDynamicDim = 16;
    auto tmp_shape = *device_shape;
    auto ConvertNegOneToDefalut = [](size_t size) {
      return static_cast<int64_t>(size) < 0 ? kDefaultValueForDynamicDim : size;
    };
    std::transform(tmp_shape.begin(), tmp_shape.end(), device_shape->begin(), ConvertNegOneToDefalut);
  }
 }

 // This function get input device shape adaptively in case of dynamic shape and static shape.
 // when shape is dynamic, it firstly get shape value from max_shape. If max_shape is empty, it
 // just return default shape value to avoid calculating error in init of kernels.
 // why do we do this? Because in dynamic shape case, the input shape is unknown when the `init`
 // function executes at the very first time, but we still need to  some helpful shape to make
 // sure the `init` executes correctly.
 std::vector<size_t> AnfRuntimeAlgorithm::GetInputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index) {
  auto device_shape = GetInputDeviceShape(anf_node, index);
  // Initialize GPUKernel with max shape to fit 'InitDynamicOutputKernelRef()' for memory reuse.
  if (AnfUtils::IsShapeDynamic(device_shape)) {
    auto max_shape = common::AnfAlgo::GetInputMaxShape(anf_node, index);
    std::transform(max_shape.begin(), max_shape.end(), device_shape.begin(), IntToSize);
    GetMaxOrDefaultShape(max_shape, &device_shape);
    auto format = GetInputFormat(anf_node, index);
    auto dtype = GetInputDeviceDataType(anf_node, index);
    (void)trans::TransShapeToDevice(device_shape, format, anf_node, index, dtype, false);
@@ -883,12 +902,13 @@ std::vector<size_t> AnfRuntimeAlgorithm::GetInputRealDeviceShapeIfExist(const An
  return device_shape;
 }

 std::vector<size_t> AnfRuntimeAlgorithm::GetOutputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index) {
 // The same to GetInputDeviceShapeAdaptively
 std::vector<size_t> AnfRuntimeAlgorithm::GetOutputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index) {
  auto device_shape = GetOutputDeviceShape(anf_node, index);
  // Initialize GPUKernel with max shape to fit 'InitDynamicOutputKernelRef()' for memory reuse.
  if (AnfUtils::IsShapeDynamic(device_shape)) {
    auto max_shape = common::AnfAlgo::GetOutputMaxShape(anf_node, index);
    std::transform(max_shape.begin(), max_shape.end(), device_shape.begin(), IntToSize);
    GetMaxOrDefaultShape(max_shape, &device_shape);
    auto format = GetOutputFormat(anf_node, index);
    auto dtype = GetOutputDeviceDataType(anf_node, index);
    (void)trans::TransShapeToDevice(device_shape, format, anf_node, index, dtype);
--- a/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.h
+++ b/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.h
@@ -178,8 +178,8 @@ class AnfRuntimeAlgorithm {
  static std::vector<KernelGraphPtr> GetCallSwitchKernelGraph(const CNodePtr &cnode);
  static bool IsIndependentNode(const CNodePtr &node);
  static void InferShape(const CNodePtr &node, std::map<uint32_t, tensor::TensorPtr> *depend_tensors = nullptr);
  static std::vector<size_t> GetInputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index);
  static std::vector<size_t> GetOutputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index);
  static std::vector<size_t> GetInputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index);
  static std::vector<size_t> GetOutputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index);
  static AnfNodePtr FetchFrontNodeByBackendNode(const AnfNodePtr &backend_node, const KernelGraph &graph);
  static void InsertMakeTupleForOutput(const NotNull<KernelGraphPtr> &root_graph);
  // Save inputs/outputs/workspace address in kernel_mod.
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/array_reduce_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/array_reduce_gpu_kernel.h
@@ -94,7 +94,7 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod {
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
    int input_dim_length = SizeToInt(AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0).size());
    int input_dim_length = SizeToInt(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0).size());

    auto prim = common::AnfAlgo::GetCNodePrimitive(kernel_node);
    MS_EXCEPTION_IF_NULL(prim);
@@ -121,8 +121,8 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod {
    }
    keep_dims_ = GetAttr<bool>(kernel_node, "keep_dims");

    auto inputA_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto outputC_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
    auto inputA_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    auto outputC_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ =
      CHECK_SHAPE_NULL(inputA_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(outputC_shape, kernel_name_, "output");
    if (is_null_input_) {
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/batchtospace_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/batchtospace_gpu_kernel.h
@@ -105,7 +105,7 @@ class BatchToSpaceGpuKernelMod : public NativeGpuKernelMod {
    }

    // check input_shape
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    if (input_shape.size() != SHAPE_SIZE) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input should be 4, but got "
                        << input_shape.size();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.h
@@ -75,7 +75,7 @@ class ConcatV2FwdGpuKernelMod : public NativeGpuKernelMod {
    if (!CheckParam(kernel_node)) {
      return false;
    }
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    int dims = SizeToInt(input_shape.size());
    axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
    if (axis_ < -dims || axis_ >= dims) {
@@ -95,7 +95,7 @@ class ConcatV2FwdGpuKernelMod : public NativeGpuKernelMod {
    int current_dim = 0;
    for (int i = 0; i < input_num_; i++) {
      size_t input_size = 1;
      auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i);
      auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, i);
      for (size_t j = 0; j < input_shape.size(); j++) {
        input_size *= input_shape[j];
      }
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/depthtospace_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/depthtospace_gpu_kernel.h
@@ -67,7 +67,7 @@ class DepthToSpaceFwdKernelMod : public NativeGpuKernelMod {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }
    // check input_shape
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/embedding_lookup_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/embedding_lookup_gpu_kernel.h
@@ -66,9 +66,9 @@ class EmbeddingLookupKernelMod : public NativeGpuKernelMod {
    } else {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2 or 3, but got " << input_num;
    }
    input_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    indices_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
    output_shapes_ = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
    input_shapes_ = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    indices_shapes_ = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
    output_shapes_ = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shapes_, kernel_name, "input") ||
                     CHECK_SHAPE_NULL(indices_shapes_, kernel_name, "input_indices") ||
                     CHECK_SHAPE_NULL(output_shapes_, kernel_name, "output");
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gatherv2_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gatherv2_gpu_kernel.h
@@ -73,9 +73,9 @@ class GatherV2FwdGpuKernelMod : public NativeGpuKernelMod {
    } else {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2 or 3, but got " << input_num;
    }
    input_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    indices_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
    output_shapes_ = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
    input_shapes_ = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    indices_shapes_ = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
    output_shapes_ = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shapes_, kernel_name, "input") ||
                     CHECK_SHAPE_NULL(indices_shapes_, kernel_name, "indices") ||
                     CHECK_SHAPE_NULL(output_shapes_, kernel_name, "output");
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/matrix_diag_part_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/matrix_diag_part_gpu_kernel.h
@@ -70,7 +70,7 @@ class MatrixDiagPartGpuKernelMod : public NativeGpuKernelMod {
  }

  void PostExecute() override {
    auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node_.lock(), 0);
    auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node_.lock(), 0);
    output_shape[shapes_.size() - kDim1] = max_diag_len_;
    // If the out shape m' * n', the m' dimension is 1, then remove this dimension
    output_shape[shapes_.size() - kDim2] = num_diags_;
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/slice_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/slice_gpu_kernel.h
@@ -106,8 +106,8 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod {
    kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
    (void)CheckParam(kernel_node);

    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto out_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    auto out_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ =
      CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(out_shape, kernel_name_, "output");
    if (is_null_input_) {
@@ -126,7 +126,7 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod {
      std::vector<size_t> dynamic_attr_indexs = {kBeginIndex_, kSizeIndex_};
      for (size_t index : dynamic_attr_indexs) {
        input_size = sizeof(T);
        for (size_t x : AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, index)) {
        for (size_t x : AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, index)) {
          input_size *= x;
        }
        input_size_list_.push_back(input_size);
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetobatch_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetobatch_gpu_kernel.h
@@ -107,7 +107,7 @@ class SpaceToBatchGpuKernelMod : public NativeGpuKernelMod {
    }

    // check input_shape
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    if (input_shape.size() != SHAPE_SIZE) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be equal to " << SHAPE_SIZE
                        << ", but got " << input_shape.size();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetodepth_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetodepth_gpu_kernel.h
@@ -68,7 +68,7 @@ class SpaceToDepthFwdKernelMod : public NativeGpuKernelMod {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 2, but got " << output_num;
    }
    // check input_shape
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/split_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/split_gpu_kernel.h
@@ -54,7 +54,7 @@ class SplitFwdGpuKernelMod : public NativeGpuKernelMod {
  bool Init(const CNodePtr &kernel_node) override {
    kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
    if (is_null_input_) {
      InitSizeLists();
@@ -96,7 +96,7 @@ class SplitFwdGpuKernelMod : public NativeGpuKernelMod {

    for (int i = 0; i < output_num_; i++) {
      size_t output_size = 1;
      auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, i);
      auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, i);
      is_null_input_ = CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
      if (is_null_input_) {
        InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/squeeze_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/squeeze_gpu_kernel.h
@@ -52,7 +52,7 @@ class SqueezeGpuKernelMod : public NativeGpuKernelMod {
    auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    auto axis = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/transpose_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/transpose_gpu_kernel.h
@@ -92,7 +92,7 @@ class TransposeFwdGpuKernelMod : public NativeGpuKernelMod {
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unique_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unique_gpu_kernel.h
@@ -48,7 +48,7 @@ class UniqueGpuKernelMod : public NativeGpuKernelMod {
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    std::vector<size_t> shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    std::vector<size_t> shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_max_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_max_gpu_kernel.h
@@ -52,9 +52,9 @@ class UnsortedSegmentMaxGpuKernelMod : public NativeGpuKernelMod {
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto segment_ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
    auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    auto segment_ids_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
    auto output_shapes = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") ||
                     CHECK_SHAPE_NULL(segment_ids_shapes, kernel_name, "segment_ids") ||
                     CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_min_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_min_gpu_kernel.h
@@ -46,9 +46,9 @@ class UnsortedSegmentMinGpuKernelMod : public NativeGpuKernelMod {

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
    auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto segment_ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
    auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    auto segment_ids_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
    auto output_shapes = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") ||
                     CHECK_SHAPE_NULL(segment_ids_shapes, kernel_name, "segment_ids") ||
                     CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_sum_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_sum_gpu_kernel.h
@@ -50,9 +50,9 @@ class UnsortedSegmentSumGpuKernelMod : public NativeGpuKernelMod {
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
    auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    auto ids_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
    auto output_shapes = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") ||
                     CHECK_SHAPE_NULL(ids_shapes, kernel_name, "segment_ids") ||
                     CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/zeroslike_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/zeroslike_gpu_kernel.h
@@ -51,7 +51,7 @@ class ZerosLikeGpuKernelMod : public NativeGpuKernelMod {
    auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;

    std::vector<size_t> input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    std::vector<size_t> input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_complex_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_complex_gpu_kernel.h
@@ -56,9 +56,10 @@ class BroadcastComplexOpGpuKernelMod : public NativeGpuKernelMod {
  bool Init(const CNodePtr &kernel_node) override {
    kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
    GetOpType(kernel_node);
    auto shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
    auto shape3 = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);

    auto shape1 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    auto shape2 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
    auto shape3 = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
    need_broadcast_ = common::AnfAlgo::IsTensorBroadcast(shape1, shape2);
    if (need_broadcast_ && shape1.size() > MAX_DIMS) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_gpu_kernel.h
@@ -69,9 +69,9 @@ class BroadcastOpGpuKernelMod : public NativeGpuKernelMod {
  bool Init(const CNodePtr &kernel_node) override {
    kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
    GetOpType(kernel_node);
    auto shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
    auto shape3 = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
    auto shape1 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    auto shape2 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
    auto shape3 = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(shape1, kernel_name_, "input") ||
                     CHECK_SHAPE_NULL(shape2, kernel_name_, "input") ||
                     CHECK_SHAPE_NULL(shape3, kernel_name_, "output");
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/identity_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/identity_gpu_kernel.h
@@ -57,7 +57,7 @@ class IdentityGpuKernelMod : public NativeGpuKernelMod {
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/linspace.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/linspace.h
@@ -56,9 +56,9 @@ class LinSpaceGpuKernelMod : public NativeGpuKernelMod {
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }
    auto input_1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
    auto value_count = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_1 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    auto input_2 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
    auto value_count = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_1, kernel_name, "start") ||
                     CHECK_SHAPE_NULL(input_2, kernel_name, "stop") ||
                     CHECK_SHAPE_NULL(value_count, kernel_name, "output");
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/logical_not_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/logical_not_gpu_kernel.h
@@ -48,7 +48,7 @@ class LogicalNotGpuKernelMod : public NativeGpuKernelMod {
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/squared_difference_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/squared_difference_kernel.h
@@ -53,9 +53,10 @@ class SquaredDifferenceOpGpuKernelMod : public NativeGpuKernelMod {

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
    auto input_shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
    auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape1 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    auto input_shape2 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
    auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);

    is_null_input_ = CHECK_SHAPE_NULL(input_shape1, kernel_name, "input") ||
                     CHECK_SHAPE_NULL(input_shape2, kernel_name, "input") ||
                     CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_complex_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_complex_gpu_kernel.h
@@ -79,7 +79,7 @@ class UnaryOpComplexGpuKernelMod : public NativeGpuKernelMod {
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 3, but got " << output_num;
    }
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_gpu_kernel.h
@@ -130,7 +130,7 @@ class UnaryOpGpuKernelMod : public NativeGpuKernelMod {
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_collective_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_collective_gpu_kernel.h
@@ -84,7 +84,7 @@ class NcclCollectiveGpuKernel : public NcclGpuKernelMod {
    size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
    size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node);
    for (size_t i = 0; i < input_num; ++i) {
      auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i);
      auto shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, i);
      is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
      if (is_null_input_) {
        InitSizeLists();
@@ -99,7 +99,7 @@ class NcclCollectiveGpuKernel : public NcclGpuKernelMod {
      input_size_ += aligned_size;
    }
    for (size_t i = 0; i < output_num; ++i) {
      auto shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, i);
      auto shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, i);
      is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "output");
      if (is_null_input_) {
        InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_p2p_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_p2p_gpu_kernel.h
@@ -70,7 +70,7 @@ class NcclP2PGpuKernel : public NcclGpuKernelMod {
      output_nccl_data_type_ = nccl_dtype(AnfAlgo::GetOutputDeviceDataType(kernel_node, 0));
    }
    for (size_t i = 0; i < input_num; ++i) {
      auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i);
      auto shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, i);
      is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
      if (is_null_input_) {
        InitSizeLists();
@@ -84,7 +84,7 @@ class NcclP2PGpuKernel : public NcclGpuKernelMod {
      input_size_ += size;
    }
    for (size_t i = 0; i < output_num; ++i) {
      auto shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, i);
      auto shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, i);
      is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "output");
      if (is_null_input_) {
        InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_gpu_kernel.h
@@ -65,7 +65,7 @@ class ActivationFwdGpuKernelMod : public NativeGpuKernelMod {
    if (input_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << node_name << "', the number of inputs should be 1, but got " << input_num;
    }
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, node_name, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_gpu_kernel.h
@@ -48,7 +48,8 @@ class FlattenFwdGpuKernelMod : public NativeGpuKernelMod {
  }
  bool Init(const CNodePtr &kernel_node) override {
    kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
    auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);

    is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_grad_gpu_kernel.h
@@ -54,7 +54,7 @@ class FlattenGardBkwGpuKernelMod : public NativeGpuKernelMod {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
    }

    auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_gpu_kernel.h
@@ -50,7 +50,7 @@ class ReLUFwdGpuKernelMod : public NativeGpuKernelMod {
    if (input_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
    }
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_grad_gpu_kernel.h
@@ -54,7 +54,7 @@ class ReluGradFwdGpuKernelMod : public NativeGpuKernelMod {
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
    }
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      InitSizeLists();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/concat_offset_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/concat_offset_gpu_kernel.h
@@ -64,7 +64,7 @@ class ConcatOffsetGpuKernelMod : public NativeGpuKernelMod {
    }
    for (size_t i = 0; i < input_num; i++) {
      size_t input_size = 1;
      auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i);
      auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, i);
      for (size_t j = 0; j < input_shape.size(); j++) {
        input_size *= input_shape[j];
      }
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_broadcast_grad_args_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_broadcast_grad_args_gpu_kernel.h
@@ -61,10 +61,10 @@ class DynamicBroadcastGradientArgsGpuKernelMod : public NativeGpuKernelMod {
    if (input_num != kInputNum) {
      MS_LOG(EXCEPTION) << "DynamicBroadcastGradiendArgs needs " << kInputNum << " inputs, but get " << input_num;
    }
    auto s0_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto s1_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
    auto r0_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
    auto r1_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 1);
    auto s0_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    auto s1_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
    auto r0_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
    auto r1_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 1);
    if (s0_shape.size() != 1 || s1_shape.size() != 1) {
      MS_LOG(EXCEPTION) << "Inputs must be [1-D], but get " << s0_shape.size() << "-D and " << s1_shape.size() << "-D.";
    }
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_broadcastto_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_broadcastto_gpu_kernel.h
@@ -58,9 +58,9 @@ class DynamicBroadcastToGpuKernelMod : public NativeGpuKernelMod {
  }
  bool Init(const CNodePtr &kernel_node) override {
    kernel_node_ = kernel_node;
    auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto shape_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
    auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    auto shape_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
    auto output_shapes = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(output_shapes) || CHECK_NULL_INPUT(shape_shape);
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'BroadcastToGpuKernelMod', input or output is null";
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_reshape_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_reshape_gpu_kernel.h
@@ -53,9 +53,9 @@ class DynamicReshapeKernelMod : public NativeGpuKernelMod {
  }
  bool Init(const CNodePtr &kernel_node) override {
    kernel_node_ = kernel_node;
    auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_x_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    auto input_shape_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
    auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
    auto input_x_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
    auto input_shape_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
    auto data_type = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
    data_type_size_ = mindspore::kernel::GetDtypeNbyte(TypeIdToString(data_type, true));
    shape_size_ = input_shape_shape.size();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_stitch_gpu_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_stitch_gpu_kernel.cc
@@ -34,15 +34,15 @@ bool DynamicStitchKernelMod::Init(const CNodePtr &kernel_node) {
  size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
  n_ = input_num / kDivNum2;

  auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
  auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
  auto data_type = AnfAlgo::GetInputDeviceDataType(kernel_node, n_);
  // Index type is restricted to int32 by kernel prim.
  size_t index_type_size = sizeof(int);
  data_type_size_ = GetDtypeNbyte(TypeIdToString(data_type, true));
  auto first_data_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, n_);
  auto first_data_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, n_);
  one_data_ele_num_ = first_data_shape[first_data_shape.size() - 1];
  for (size_t i = 0; i < n_; i++) {
    auto data_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, n_ + i);
    auto data_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, n_ + i);
    size_t data_size = std::accumulate(data_shape.begin(), data_shape.end(), 1, std::multiplies<size_t>());
    //  Data size
    input_size_list_.push_back(data_size * data_type_size_);
@@ -60,7 +60,7 @@ bool DynamicStitchKernelMod::Init(const CNodePtr &kernel_node) {
 }

 void DynamicStitchKernelMod::PostExecute() {
  auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node_.lock(), 0);
  auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node_.lock(), 0);
  output_shape[0] = max_index_ + 1;
  auto data_type = AnfAlgo::GetInputDeviceDataType(kernel_node_.lock(), n_);
  common::AnfAlgo::SetOutputInferTypeAndShape({data_type}, {output_shape}, kernel_node_.lock().get());