Browse Source

return default shape when max_shape is empty

feature/build-system-rewrite
ckey_Dou 4 years ago
parent
commit
e9679ca0bd
40 changed files with 103 additions and 80 deletions
  1. +24
    -4
      mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.cc
  2. +2
    -2
      mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.h
  3. +3
    -3
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/array_reduce_gpu_kernel.h
  4. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/batchtospace_gpu_kernel.h
  5. +2
    -2
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.h
  6. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/depthtospace_gpu_kernel.h
  7. +3
    -3
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/embedding_lookup_gpu_kernel.h
  8. +3
    -3
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gatherv2_gpu_kernel.h
  9. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/matrix_diag_part_gpu_kernel.h
  10. +3
    -3
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/slice_gpu_kernel.h
  11. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetobatch_gpu_kernel.h
  12. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetodepth_gpu_kernel.h
  13. +2
    -2
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/split_gpu_kernel.h
  14. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/squeeze_gpu_kernel.h
  15. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/transpose_gpu_kernel.h
  16. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unique_gpu_kernel.h
  17. +3
    -3
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_max_gpu_kernel.h
  18. +3
    -3
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_min_gpu_kernel.h
  19. +3
    -3
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_sum_gpu_kernel.h
  20. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/arrays/zeroslike_gpu_kernel.h
  21. +4
    -3
      mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_complex_gpu_kernel.h
  22. +3
    -3
      mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_gpu_kernel.h
  23. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/math/identity_gpu_kernel.h
  24. +3
    -3
      mindspore/ccsrc/plugin/device/gpu/kernel/math/linspace.h
  25. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/math/logical_not_gpu_kernel.h
  26. +4
    -3
      mindspore/ccsrc/plugin/device/gpu/kernel/math/squared_difference_kernel.h
  27. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_complex_gpu_kernel.h
  28. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_gpu_kernel.h
  29. +2
    -2
      mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_collective_gpu_kernel.h
  30. +2
    -2
      mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_p2p_gpu_kernel.h
  31. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_gpu_kernel.h
  32. +2
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_gpu_kernel.h
  33. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_grad_gpu_kernel.h
  34. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_gpu_kernel.h
  35. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_grad_gpu_kernel.h
  36. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/other/concat_offset_gpu_kernel.h
  37. +4
    -4
      mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_broadcast_grad_args_gpu_kernel.h
  38. +3
    -3
      mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_broadcastto_gpu_kernel.h
  39. +3
    -3
      mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_reshape_gpu_kernel.h
  40. +4
    -4
      mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_stitch_gpu_kernel.cc

+ 24
- 4
mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.cc View File

@@ -870,12 +870,31 @@ bool AnfRuntimeAlgorithm::IsIndependentNode(const CNodePtr &node) {
return true;
}

std::vector<size_t> AnfRuntimeAlgorithm::GetInputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index) {
static inline void GetMaxOrDefaultShape(const std::vector<int64_t> &max_shape, std::vector<size_t> *device_shape) {
if (!max_shape.empty()) {
std::transform(max_shape.begin(), max_shape.end(), device_shape->begin(), IntToSize);
} else {
const size_t kDefaultValueForDynamicDim = 16;
auto tmp_shape = *device_shape;
auto ConvertNegOneToDefalut = [](size_t size) {
return static_cast<int64_t>(size) < 0 ? kDefaultValueForDynamicDim : size;
};
std::transform(tmp_shape.begin(), tmp_shape.end(), device_shape->begin(), ConvertNegOneToDefalut);
}
}

// This function get input device shape adaptively in case of dynamic shape and static shape.
// when shape is dynamic, it firstly get shape value from max_shape. If max_shape is empty, it
// just return default shape value to avoid calculating error in init of kernels.
// why do we do this? Because in dynamic shape case, the input shape is unknown when the `init`
// function executes at the very first time, but we still need to some helpful shape to make
// sure the `init` executes correctly.
std::vector<size_t> AnfRuntimeAlgorithm::GetInputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index) {
auto device_shape = GetInputDeviceShape(anf_node, index);
// Initialize GPUKernel with max shape to fit 'InitDynamicOutputKernelRef()' for memory reuse.
if (AnfUtils::IsShapeDynamic(device_shape)) {
auto max_shape = common::AnfAlgo::GetInputMaxShape(anf_node, index);
std::transform(max_shape.begin(), max_shape.end(), device_shape.begin(), IntToSize);
GetMaxOrDefaultShape(max_shape, &device_shape);
auto format = GetInputFormat(anf_node, index);
auto dtype = GetInputDeviceDataType(anf_node, index);
(void)trans::TransShapeToDevice(device_shape, format, anf_node, index, dtype, false);
@@ -883,12 +902,13 @@ std::vector<size_t> AnfRuntimeAlgorithm::GetInputRealDeviceShapeIfExist(const An
return device_shape;
}

std::vector<size_t> AnfRuntimeAlgorithm::GetOutputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index) {
// The same to GetInputDeviceShapeAdaptively
std::vector<size_t> AnfRuntimeAlgorithm::GetOutputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index) {
auto device_shape = GetOutputDeviceShape(anf_node, index);
// Initialize GPUKernel with max shape to fit 'InitDynamicOutputKernelRef()' for memory reuse.
if (AnfUtils::IsShapeDynamic(device_shape)) {
auto max_shape = common::AnfAlgo::GetOutputMaxShape(anf_node, index);
std::transform(max_shape.begin(), max_shape.end(), device_shape.begin(), IntToSize);
GetMaxOrDefaultShape(max_shape, &device_shape);
auto format = GetOutputFormat(anf_node, index);
auto dtype = GetOutputDeviceDataType(anf_node, index);
(void)trans::TransShapeToDevice(device_shape, format, anf_node, index, dtype);


+ 2
- 2
mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.h View File

@@ -178,8 +178,8 @@ class AnfRuntimeAlgorithm {
static std::vector<KernelGraphPtr> GetCallSwitchKernelGraph(const CNodePtr &cnode);
static bool IsIndependentNode(const CNodePtr &node);
static void InferShape(const CNodePtr &node, std::map<uint32_t, tensor::TensorPtr> *depend_tensors = nullptr);
static std::vector<size_t> GetInputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index);
static std::vector<size_t> GetOutputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index);
static std::vector<size_t> GetInputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index);
static std::vector<size_t> GetOutputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index);
static AnfNodePtr FetchFrontNodeByBackendNode(const AnfNodePtr &backend_node, const KernelGraph &graph);
static void InsertMakeTupleForOutput(const NotNull<KernelGraphPtr> &root_graph);
// Save inputs/outputs/workspace address in kernel_mod.


+ 3
- 3
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/array_reduce_gpu_kernel.h View File

@@ -94,7 +94,7 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod {
if (output_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
}
int input_dim_length = SizeToInt(AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0).size());
int input_dim_length = SizeToInt(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0).size());

auto prim = common::AnfAlgo::GetCNodePrimitive(kernel_node);
MS_EXCEPTION_IF_NULL(prim);
@@ -121,8 +121,8 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod {
}
keep_dims_ = GetAttr<bool>(kernel_node, "keep_dims");

auto inputA_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto outputC_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
auto inputA_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto outputC_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ =
CHECK_SHAPE_NULL(inputA_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(outputC_shape, kernel_name_, "output");
if (is_null_input_) {


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/batchtospace_gpu_kernel.h View File

@@ -105,7 +105,7 @@ class BatchToSpaceGpuKernelMod : public NativeGpuKernelMod {
}

// check input_shape
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
if (input_shape.size() != SHAPE_SIZE) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input should be 4, but got "
<< input_shape.size();


+ 2
- 2
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.h View File

@@ -75,7 +75,7 @@ class ConcatV2FwdGpuKernelMod : public NativeGpuKernelMod {
if (!CheckParam(kernel_node)) {
return false;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
int dims = SizeToInt(input_shape.size());
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
if (axis_ < -dims || axis_ >= dims) {
@@ -95,7 +95,7 @@ class ConcatV2FwdGpuKernelMod : public NativeGpuKernelMod {
int current_dim = 0;
for (int i = 0; i < input_num_; i++) {
size_t input_size = 1;
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, i);
for (size_t j = 0; j < input_shape.size(); j++) {
input_size *= input_shape[j];
}


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/depthtospace_gpu_kernel.h View File

@@ -67,7 +67,7 @@ class DepthToSpaceFwdKernelMod : public NativeGpuKernelMod {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
// check input_shape
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();


+ 3
- 3
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/embedding_lookup_gpu_kernel.h View File

@@ -66,9 +66,9 @@ class EmbeddingLookupKernelMod : public NativeGpuKernelMod {
} else {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2 or 3, but got " << input_num;
}
input_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
indices_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
output_shapes_ = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
input_shapes_ = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
indices_shapes_ = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
output_shapes_ = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shapes_, kernel_name, "input") ||
CHECK_SHAPE_NULL(indices_shapes_, kernel_name, "input_indices") ||
CHECK_SHAPE_NULL(output_shapes_, kernel_name, "output");


+ 3
- 3
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gatherv2_gpu_kernel.h View File

@@ -73,9 +73,9 @@ class GatherV2FwdGpuKernelMod : public NativeGpuKernelMod {
} else {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2 or 3, but got " << input_num;
}
input_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
indices_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
output_shapes_ = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
input_shapes_ = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
indices_shapes_ = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
output_shapes_ = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shapes_, kernel_name, "input") ||
CHECK_SHAPE_NULL(indices_shapes_, kernel_name, "indices") ||
CHECK_SHAPE_NULL(output_shapes_, kernel_name, "output");


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/matrix_diag_part_gpu_kernel.h View File

@@ -70,7 +70,7 @@ class MatrixDiagPartGpuKernelMod : public NativeGpuKernelMod {
}

void PostExecute() override {
auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node_.lock(), 0);
auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node_.lock(), 0);
output_shape[shapes_.size() - kDim1] = max_diag_len_;
// If the out shape m' * n', the m' dimension is 1, then remove this dimension
output_shape[shapes_.size() - kDim2] = num_diags_;


+ 3
- 3
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/slice_gpu_kernel.h View File

@@ -106,8 +106,8 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod {
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
(void)CheckParam(kernel_node);

auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto out_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto out_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ =
CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(out_shape, kernel_name_, "output");
if (is_null_input_) {
@@ -126,7 +126,7 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod {
std::vector<size_t> dynamic_attr_indexs = {kBeginIndex_, kSizeIndex_};
for (size_t index : dynamic_attr_indexs) {
input_size = sizeof(T);
for (size_t x : AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, index)) {
for (size_t x : AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, index)) {
input_size *= x;
}
input_size_list_.push_back(input_size);


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetobatch_gpu_kernel.h View File

@@ -107,7 +107,7 @@ class SpaceToBatchGpuKernelMod : public NativeGpuKernelMod {
}

// check input_shape
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
if (input_shape.size() != SHAPE_SIZE) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be equal to " << SHAPE_SIZE
<< ", but got " << input_shape.size();


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/spacetodepth_gpu_kernel.h View File

@@ -68,7 +68,7 @@ class SpaceToDepthFwdKernelMod : public NativeGpuKernelMod {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 2, but got " << output_num;
}
// check input_shape
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();


+ 2
- 2
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/split_gpu_kernel.h View File

@@ -54,7 +54,7 @@ class SplitFwdGpuKernelMod : public NativeGpuKernelMod {
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
InitSizeLists();
@@ -96,7 +96,7 @@ class SplitFwdGpuKernelMod : public NativeGpuKernelMod {

for (int i = 0; i < output_num_; i++) {
size_t output_size = 1;
auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, i);
auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, i);
is_null_input_ = CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
InitSizeLists();


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/squeeze_gpu_kernel.h View File

@@ -52,7 +52,7 @@ class SqueezeGpuKernelMod : public NativeGpuKernelMod {
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto axis = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/transpose_gpu_kernel.h View File

@@ -92,7 +92,7 @@ class TransposeFwdGpuKernelMod : public NativeGpuKernelMod {
if (output_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unique_gpu_kernel.h View File

@@ -48,7 +48,7 @@ class UniqueGpuKernelMod : public NativeGpuKernelMod {
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
std::vector<size_t> shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
std::vector<size_t> shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();


+ 3
- 3
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_max_gpu_kernel.h View File

@@ -52,9 +52,9 @@ class UnsortedSegmentMaxGpuKernelMod : public NativeGpuKernelMod {
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto segment_ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto segment_ids_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") ||
CHECK_SHAPE_NULL(segment_ids_shapes, kernel_name, "segment_ids") ||
CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");


+ 3
- 3
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_min_gpu_kernel.h View File

@@ -46,9 +46,9 @@ class UnsortedSegmentMinGpuKernelMod : public NativeGpuKernelMod {

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto segment_ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto segment_ids_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") ||
CHECK_SHAPE_NULL(segment_ids_shapes, kernel_name, "segment_ids") ||
CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");


+ 3
- 3
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_sum_gpu_kernel.h View File

@@ -50,9 +50,9 @@ class UnsortedSegmentSumGpuKernelMod : public NativeGpuKernelMod {
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto ids_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") ||
CHECK_SHAPE_NULL(ids_shapes, kernel_name, "segment_ids") ||
CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/arrays/zeroslike_gpu_kernel.h View File

@@ -51,7 +51,7 @@ class ZerosLikeGpuKernelMod : public NativeGpuKernelMod {
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;

std::vector<size_t> input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
std::vector<size_t> input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();


+ 4
- 3
mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_complex_gpu_kernel.h View File

@@ -56,9 +56,10 @@ class BroadcastComplexOpGpuKernelMod : public NativeGpuKernelMod {
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
GetOpType(kernel_node);
auto shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto shape3 = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);

auto shape1 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto shape2 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
auto shape3 = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
need_broadcast_ = common::AnfAlgo::IsTensorBroadcast(shape1, shape2);
if (need_broadcast_ && shape1.size() > MAX_DIMS) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS


+ 3
- 3
mindspore/ccsrc/plugin/device/gpu/kernel/math/broadcast_gpu_kernel.h View File

@@ -69,9 +69,9 @@ class BroadcastOpGpuKernelMod : public NativeGpuKernelMod {
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
GetOpType(kernel_node);
auto shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto shape3 = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
auto shape1 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto shape2 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
auto shape3 = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(shape1, kernel_name_, "input") ||
CHECK_SHAPE_NULL(shape2, kernel_name_, "input") ||
CHECK_SHAPE_NULL(shape3, kernel_name_, "output");


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/math/identity_gpu_kernel.h View File

@@ -57,7 +57,7 @@ class IdentityGpuKernelMod : public NativeGpuKernelMod {
if (output_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();


+ 3
- 3
mindspore/ccsrc/plugin/device/gpu/kernel/math/linspace.h View File

@@ -56,9 +56,9 @@ class LinSpaceGpuKernelMod : public NativeGpuKernelMod {
if (output_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto value_count = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
auto input_1 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto input_2 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
auto value_count = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_1, kernel_name, "start") ||
CHECK_SHAPE_NULL(input_2, kernel_name, "stop") ||
CHECK_SHAPE_NULL(value_count, kernel_name, "output");


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/math/logical_not_gpu_kernel.h View File

@@ -48,7 +48,7 @@ class LogicalNotGpuKernelMod : public NativeGpuKernelMod {
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();


+ 4
- 3
mindspore/ccsrc/plugin/device/gpu/kernel/math/squared_difference_kernel.h View File

@@ -53,9 +53,10 @@ class SquaredDifferenceOpGpuKernelMod : public NativeGpuKernelMod {

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
auto input_shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape1 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto input_shape2 = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);

is_null_input_ = CHECK_SHAPE_NULL(input_shape1, kernel_name, "input") ||
CHECK_SHAPE_NULL(input_shape2, kernel_name, "input") ||
CHECK_SHAPE_NULL(output_shape, kernel_name, "output");


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_complex_gpu_kernel.h View File

@@ -79,7 +79,7 @@ class UnaryOpComplexGpuKernelMod : public NativeGpuKernelMod {
if (output_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 3, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
InitSizeLists();


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_gpu_kernel.h View File

@@ -130,7 +130,7 @@ class UnaryOpGpuKernelMod : public NativeGpuKernelMod {
if (output_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();


+ 2
- 2
mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_collective_gpu_kernel.h View File

@@ -84,7 +84,7 @@ class NcclCollectiveGpuKernel : public NcclGpuKernelMod {
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node);
for (size_t i = 0; i < input_num; ++i) {
auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i);
auto shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, i);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
if (is_null_input_) {
InitSizeLists();
@@ -99,7 +99,7 @@ class NcclCollectiveGpuKernel : public NcclGpuKernelMod {
input_size_ += aligned_size;
}
for (size_t i = 0; i < output_num; ++i) {
auto shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, i);
auto shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, i);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "output");
if (is_null_input_) {
InitSizeLists();


+ 2
- 2
mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_p2p_gpu_kernel.h View File

@@ -70,7 +70,7 @@ class NcclP2PGpuKernel : public NcclGpuKernelMod {
output_nccl_data_type_ = nccl_dtype(AnfAlgo::GetOutputDeviceDataType(kernel_node, 0));
}
for (size_t i = 0; i < input_num; ++i) {
auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i);
auto shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, i);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
if (is_null_input_) {
InitSizeLists();
@@ -84,7 +84,7 @@ class NcclP2PGpuKernel : public NcclGpuKernelMod {
input_size_ += size;
}
for (size_t i = 0; i < output_num; ++i) {
auto shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, i);
auto shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, i);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "output");
if (is_null_input_) {
InitSizeLists();


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/nn/activation_gpu_kernel.h View File

@@ -65,7 +65,7 @@ class ActivationFwdGpuKernelMod : public NativeGpuKernelMod {
if (input_num != 1) {
MS_LOG(EXCEPTION) << "For '" << node_name << "', the number of inputs should be 1, but got " << input_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, node_name, "input");
if (is_null_input_) {
InitSizeLists();


+ 2
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_gpu_kernel.h View File

@@ -48,7 +48,8 @@ class FlattenFwdGpuKernelMod : public NativeGpuKernelMod {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);

is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
if (is_null_input_) {
InitSizeLists();


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/nn/flatten_grad_gpu_kernel.h View File

@@ -54,7 +54,7 @@ class FlattenGardBkwGpuKernelMod : public NativeGpuKernelMod {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
}

auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
if (is_null_input_) {
InitSizeLists();


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_gpu_kernel.h View File

@@ -50,7 +50,7 @@ class ReLUFwdGpuKernelMod : public NativeGpuKernelMod {
if (input_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_grad_gpu_kernel.h View File

@@ -54,7 +54,7 @@ class ReluGradFwdGpuKernelMod : public NativeGpuKernelMod {
if (input_num != 2) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();


+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/other/concat_offset_gpu_kernel.h View File

@@ -64,7 +64,7 @@ class ConcatOffsetGpuKernelMod : public NativeGpuKernelMod {
}
for (size_t i = 0; i < input_num; i++) {
size_t input_size = 1;
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i);
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, i);
for (size_t j = 0; j < input_shape.size(); j++) {
input_size *= input_shape[j];
}


+ 4
- 4
mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_broadcast_grad_args_gpu_kernel.h View File

@@ -61,10 +61,10 @@ class DynamicBroadcastGradientArgsGpuKernelMod : public NativeGpuKernelMod {
if (input_num != kInputNum) {
MS_LOG(EXCEPTION) << "DynamicBroadcastGradiendArgs needs " << kInputNum << " inputs, but get " << input_num;
}
auto s0_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto s1_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto r0_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
auto r1_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 1);
auto s0_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto s1_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
auto r0_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
auto r1_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 1);
if (s0_shape.size() != 1 || s1_shape.size() != 1) {
MS_LOG(EXCEPTION) << "Inputs must be [1-D], but get " << s0_shape.size() << "-D and " << s1_shape.size() << "-D.";
}


+ 3
- 3
mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_broadcastto_gpu_kernel.h View File

@@ -58,9 +58,9 @@ class DynamicBroadcastToGpuKernelMod : public NativeGpuKernelMod {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto shape_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shapes = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto shape_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(output_shapes) || CHECK_NULL_INPUT(shape_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BroadcastToGpuKernelMod', input or output is null";


+ 3
- 3
mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_reshape_gpu_kernel.h View File

@@ -53,9 +53,9 @@ class DynamicReshapeKernelMod : public NativeGpuKernelMod {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
auto input_x_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
auto input_x_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto input_shape_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 1);
auto data_type = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
data_type_size_ = mindspore::kernel::GetDtypeNbyte(TypeIdToString(data_type, true));
shape_size_ = input_shape_shape.size();


+ 4
- 4
mindspore/ccsrc/plugin/device/gpu/kernel/other/dynamic_stitch_gpu_kernel.cc View File

@@ -34,15 +34,15 @@ bool DynamicStitchKernelMod::Init(const CNodePtr &kernel_node) {
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
n_ = input_num / kDivNum2;

auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
auto data_type = AnfAlgo::GetInputDeviceDataType(kernel_node, n_);
// Index type is restricted to int32 by kernel prim.
size_t index_type_size = sizeof(int);
data_type_size_ = GetDtypeNbyte(TypeIdToString(data_type, true));
auto first_data_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, n_);
auto first_data_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, n_);
one_data_ele_num_ = first_data_shape[first_data_shape.size() - 1];
for (size_t i = 0; i < n_; i++) {
auto data_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, n_ + i);
auto data_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, n_ + i);
size_t data_size = std::accumulate(data_shape.begin(), data_shape.end(), 1, std::multiplies<size_t>());
// Data size
input_size_list_.push_back(data_size * data_type_size_);
@@ -60,7 +60,7 @@ bool DynamicStitchKernelMod::Init(const CNodePtr &kernel_node) {
}

void DynamicStitchKernelMod::PostExecute() {
auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node_.lock(), 0);
auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node_.lock(), 0);
output_shape[0] = max_index_ + 1;
auto data_type = AnfAlgo::GetInputDeviceDataType(kernel_node_.lock(), n_);
common::AnfAlgo::SetOutputInferTypeAndShape({data_type}, {output_shape}, kernel_node_.lock().get());


Loading…
Cancel
Save