Browse Source

optimizes the kernel error description of GPU about tile,topk,transpose etc.

tags/v1.6.0
tacyi139 4 years ago
parent
commit
0609a235fd
68 changed files with 500 additions and 462 deletions
  1. +0
    -2
      mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.h
  2. +8
    -7
      mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/tile_gpu_kernel.h
  3. +3
    -2
      mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/topk_gpu_kernel.h
  4. +6
    -8
      mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.h
  5. +2
    -2
      mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unique_gpu_kernel.h
  6. +7
    -11
      mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unpack_gpu_kernel.h
  7. +6
    -6
      mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_max_gpu_kernel.h
  8. +6
    -7
      mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_min_gpu_kernel.h
  9. +4
    -2
      mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.h
  10. +2
    -2
      mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/zeroslike_gpu_kernel.h
  11. +15
    -16
      mindspore/ccsrc/backend/kernel_compiler/gpu/custom/custom_aot_gpu_kernel.h
  12. +3
    -2
      mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.cc
  13. +4
    -4
      mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.cc
  14. +4
    -4
      mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_profiling.cc
  15. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_profiling.h
  16. +4
    -4
      mindspore/ccsrc/backend/kernel_compiler/gpu/debug/print_gpu_kernel.h
  17. +5
    -6
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.h
  18. +4
    -6
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.h
  19. +6
    -2
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_complex_gpu_kernel.h
  20. +16
    -8
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h
  21. +12
    -7
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.h
  22. +2
    -2
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/cast_all_gpu_kernel.h
  23. +12
    -9
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_gpu_kernel.h
  24. +19
    -13
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_solve_gpu_kernel.h
  25. +10
    -9
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_trsm_solve_gpu_kernel.h
  26. +5
    -4
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/cumprod_gpu_kernel.h
  27. +5
    -4
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/cumsum_gpu_kernel.h
  28. +12
    -13
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/determinant_triangle_gpu_kernel.h
  29. +14
    -7
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/eigh_c_gpu_kernel.h
  30. +12
    -6
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/eigh_gpu_kernel.h
  31. +4
    -6
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.h
  32. +9
    -13
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.h
  33. +4
    -6
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/identity_gpu_kernel.h
  34. +8
    -6
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/index_add_gpu_kernel.h
  35. +10
    -13
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/linspace.h
  36. +2
    -2
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/logical_not_gpu_kernel.h
  37. +16
    -9
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/lu_gpu_kernel.h
  38. +9
    -7
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h
  39. +8
    -5
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/matrix_inverse_gpu_kernel.h
  40. +6
    -8
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/multinomial_gpu_kernel.h
  41. +4
    -6
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/nms_with_mask_gpu_kernel.h
  42. +10
    -11
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.h
  43. +4
    -3
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/square_sum_all_gpu_kernel.h
  44. +10
    -5
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/squared_difference_kernel.h
  45. +9
    -7
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/trsm_solve_gpu_kernel.h
  46. +8
    -9
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_complex_gpu_kernel.h
  47. +9
    -8
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.h
  48. +12
    -11
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_grad_gpu_kernel.h
  49. +10
    -13
      mindspore/ccsrc/backend/kernel_compiler/gpu/math/update_thor_gradient.h
  50. +9
    -7
      mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_collective_gpu_kernel.h
  51. +11
    -8
      mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_p2p_gpu_kernel.h
  52. +3
    -4
      mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_recv_gpu_kernel.h
  53. +3
    -4
      mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_send_gpu_kernel.h
  54. +12
    -8
      mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/sync_batch_norm_gpu_kernel.h
  55. +12
    -8
      mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/sync_batch_norm_grad_gpu_kernel.h
  56. +7
    -11
      mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.h
  57. +15
    -10
      mindspore/ccsrc/backend/kernel_compiler/gpu/other/boundingbox_decode_gpu_kernel.h
  58. +13
    -10
      mindspore/ccsrc/backend/kernel_compiler/gpu/other/boundingbox_encode_gpu_kernel.h
  59. +7
    -7
      mindspore/ccsrc/backend/kernel_compiler/gpu/other/check_valid_gpu_kernel.h
  60. +3
    -4
      mindspore/ccsrc/backend/kernel_compiler/gpu/other/gpu_convert_to_dynamic_shape_gpu_kernel.h
  61. +6
    -9
      mindspore/ccsrc/backend/kernel_compiler/gpu/other/iou_gpu_kernel.h
  62. +6
    -7
      mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.h
  63. +6
    -7
      mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h
  64. +6
    -9
      mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.h
  65. +7
    -10
      mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.h
  66. +5
    -6
      mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.h
  67. +5
    -6
      mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.h
  68. +3
    -4
      mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_learned_scale_quant_perchannel_gpu_kernel.cc

+ 0
- 2
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.h View File

@@ -96,12 +96,10 @@ class ScatterNdGpuFwdKernel : public GpuKernel {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
return false;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
return false;
}
input_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);


+ 8
- 7
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/tile_gpu_kernel.h View File

@@ -59,25 +59,26 @@ class TileGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but Tile needs 1 input.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but Tile has 1 output.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(output_shape_);
is_null_input_ =
CHECK_SHAPE_NULL(input_shape_, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape_, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'TileGpuKernel', input or output is null";
InitSizeLists();
return true;
}
if (output_shape_.size() < 1) {
MS_LOG(EXCEPTION) << "For 'TileGpuKernel', the rank of output cannot be less than 1, but got "
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be less than 1, but got "
<< output_shape_.size();
}
input_size_ = 1;
@@ -87,8 +88,8 @@ class TileGpuKernel : public GpuKernel {

output_size_ = 1;
if (output_shape_.size() > TILE_MAX_DIMENSION) {
MS_LOG(EXCEPTION) << "Output is " << output_shape_.size() << "-D, but Tile supports up to " << TILE_MAX_DIMENSION
<< "-D.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be greater than "
<< TILE_MAX_DIMENSION << ", but got " << output_shape_.size();
}
shape_size_ = output_shape_.size();
for (size_t i = 0; i < output_shape_.size(); i++) {


+ 3
- 2
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/topk_gpu_kernel.h View File

@@ -83,12 +83,13 @@ class TopKGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto input_shapes = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shapes = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(output_shapes);
is_null_input_ =
CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") || CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'TopkGpuKernel', input or output is null";
InitSizeLists();
return true;
}


+ 6
- 8
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.h View File

@@ -74,28 +74,26 @@ class TransposeGpuFwdKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but transpose needs 1 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but transpose needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'TransposeGpuKernel', input is null";
InitSizeLists();
return true;
}
shape_size_ = input_shape.size();
if (shape_size_ > TRANSPOSE_MAX_DIMENSION) {
MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but transpose supports max " << TRANSPOSE_MAX_DIMENSION
<< "-D inputs.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be greater than "
<< TRANSPOSE_MAX_DIMENSION << ", but got " << shape_size_;
}

input_size_ = 1;


+ 2
- 2
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unique_gpu_kernel.h View File

@@ -50,11 +50,11 @@ class UniqueGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
std::vector<size_t> shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UniqueGpuKernel', input is null";
InitSizeLists();
return true;
}


+ 7
- 11
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unpack_gpu_kernel.h View File

@@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_UNPACK_GPU_KERNEL_H

#include <vector>
#include <string>
#include <memory>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@@ -55,10 +56,9 @@ class UnpackGpuFwdKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
if (!CheckParam(kernel_node)) {
return false;
}
(void)CheckParam(kernel_node);
axis_ = static_cast<int32_t>(GetAttr<int64_t>(kernel_node, "axis"));
if (axis_ < 0) {
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
@@ -73,9 +73,8 @@ class UnpackGpuFwdKernel : public GpuKernel {
for (size_t i = 0; i < output_num_; i++) {
size_t _size = 1;
auto _shape = AnfAlgo::GetOutputDeviceShape(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(_shape);
is_null_input_ = CHECK_SHAPE_NULL(_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnpackGpuKernel', output is null";
InitSizeLists();
return true;
}
@@ -87,9 +86,8 @@ class UnpackGpuFwdKernel : public GpuKernel {
workspace_size_list_.push_back(sizeof(T *) * output_num_);

auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnpackGpuKernel', input is null";
InitSizeLists();
return true;
}
@@ -108,13 +106,11 @@ class UnpackGpuFwdKernel : public GpuKernel {
void InitSizeLists() override {}

private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "input number is " << input_num << ", but UnpackGpuFwdKernel needs 1 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
}
return true;
}
int axis_;
bool is_null_input_;


+ 6
- 6
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_max_gpu_kernel.h View File

@@ -54,14 +54,15 @@ class UnsortedSegmentMaxGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto segment_ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ =
CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(segment_ids_shapes) || CHECK_NULL_INPUT(output_shapes);
is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") ||
CHECK_SHAPE_NULL(segment_ids_shapes, kernel_name, "segment_ids") ||
CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnsortedSegmentMaxGpuKernel', input or output is null";
InitSizeLists();
return true;
}
@@ -73,9 +74,8 @@ class UnsortedSegmentMaxGpuKernel : public GpuKernel {
MS_LOG(INFO) << "UnsortedSegmentMax Kernel Input count is 2";
}
if (output_shapes.size() < 1) {
MS_LOG(EXCEPTION)
<< "For UnsortedSegmentMax, output shape incorrect rank. Expect Rank at least rank 1, got Rank: "
<< output_shapes.size() << ".";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be less than 1, but got "
<< output_shapes.size();
}
num_segments_ = output_shapes[0];
input_size_ = 1;


+ 6
- 7
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_min_gpu_kernel.h View File

@@ -49,13 +49,14 @@ class UnsortedSegmentMinGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto segment_ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ =
CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(segment_ids_shapes) || CHECK_NULL_INPUT(output_shapes);
is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") ||
CHECK_SHAPE_NULL(segment_ids_shapes, kernel_name, "segment_ids") ||
CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnsortedSegmentMinGpuKernel', input or output is null";
InitSizeLists();
return true;
}
@@ -67,9 +68,8 @@ class UnsortedSegmentMinGpuKernel : public GpuKernel {
MS_LOG(INFO) << "UnsortedSegmentMin Kernel Input count is 2";
}
if (output_shapes.size() < 1) {
MS_LOG(EXCEPTION)
<< "For UnsortedSegmentMin, output shape incorrect rank. Expect Rank at least rank 1, got Rank: "
<< output_shapes.size() << ".";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be less than 1, but got "
<< output_shapes.size();
}
num_segments_ = output_shapes[0];
input_size_ = 1;
@@ -96,7 +96,6 @@ class UnsortedSegmentMinGpuKernel : public GpuKernel {
InitSizeLists();
return true;
}

void ResetResource() noexcept override {
num_segments_ = 1;
inner_size_ = 1;


+ 4
- 2
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.h View File

@@ -52,13 +52,15 @@ class UnsortedSegmentSumGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(ids_shapes) || CHECK_NULL_INPUT(output_shapes);
is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") ||
CHECK_SHAPE_NULL(ids_shapes, kernel_name, "segment_ids") ||
CHECK_SHAPE_NULL(output_shapes, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnsortedSegmentSumGpuKernel', input or output is null";
InitSizeLists();
return true;
}


+ 2
- 2
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/zeroslike_gpu_kernel.h View File

@@ -51,12 +51,12 @@ class ZerosLikeGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;

std::vector<size_t> input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'ZeroslikeGpuKernel', input is null";
InitSizeLists();
return true;
}


+ 15
- 16
mindspore/ccsrc/backend/kernel_compiler/gpu/custom/custom_aot_gpu_kernel.h View File

@@ -55,7 +55,7 @@ class CustomAOTGpuKernel : public GpuKernel {
if (!handle_) {
handle_ = dlopen(file_path_.c_str(), RTLD_LAZY | RTLD_LOCAL);
if (!handle_) {
MS_LOG(ERROR) << "Open Error: " << dlerror();
MS_LOG(ERROR) << "For '" << kernel_name_ << "', open should be successful, but error, " << dlerror();
return false;
}
}
@@ -65,7 +65,7 @@ class CustomAOTGpuKernel : public GpuKernel {
reinterpret_cast<std::add_pointer<int(int, void **, int *, int64_t **, const char **, void *, void *)>::type>(
dlsym(handle_, func_name_.c_str()));
if (auto error_info = dlerror(); error_info != nullptr) {
MS_LOG(ERROR) << error_info;
MS_LOG(ERROR) << "For '" << kernel_name_ << "', error info: " << error_info;
return false;
}
}
@@ -79,7 +79,8 @@ class CustomAOTGpuKernel : public GpuKernel {
ret = aot_func_(nparam, &params[0], &ndims_[0], &shapes_[0], &type_pointer_list_[0], stream_ptr, nullptr);
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "CustomAOT operator failed when running user defined file " << file_path_ << "! "
MS_LOG(ERROR) << "For '" << kernel_name_ << "', operator failed when running user defined file " << file_path_
<< "! "
<< "Error message is " << e.what();
return false;
}
@@ -88,14 +89,15 @@ class CustomAOTGpuKernel : public GpuKernel {
case 0:
break;
case 1:
MS_LOG(ERROR) << "Number of parameters passed to AOT kernel is " << nparam
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the number of parameters passed to AOT kernel is " << nparam
<< ", inconsistent with what the user wants";
return false;
case 2:
MS_LOG(ERROR) << "Type of parameters passed to AOT kernel is inconsistent with what the user wants";
MS_LOG(ERROR) << "For '" << kernel_name_
<< "', type of parameters passed to AOT kernel is inconsistent with what the user wants";
return false;
default:
MS_LOG(ERROR) << "Error occurred when running AOT kernel, "
MS_LOG(ERROR) << "For '" << kernel_name_ << "', error occurred when running AOT kernel, "
<< "error id is " << ret;
return false;
}
@@ -104,27 +106,25 @@ class CustomAOTGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
const auto &exec_info = AnfAlgo::GetNodeAttr<std::string>(kernel_node, "func_name");
if (auto pos = exec_info.find(":"); pos != std::string::npos) {
auto path = exec_info.substr(0, pos);
auto real_path = FileUtils::GetRealPath(path.c_str());
if (!real_path.has_value()) {
MS_LOG(ERROR) << "Invalid file path, " << path << " does not exist.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the file path should be exist, but got " << path;
}
file_path_ = real_path.value();
func_name_ = exec_info.substr(pos + 1);
} else {
MS_LOG(ERROR) << "Wrong execute info:" << exec_info;
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', Wrong execute info:" << exec_info;
}
num_input_ = AnfAlgo::GetInputTensorNum(kernel_node);
auto input_type_list = AnfAlgo::GetAllInputDeviceTypes(kernel_node);
if (num_input_ != input_type_list.size()) {
MS_LOG(ERROR) << "Input shapes'size is " << num_input_ << ", while input types' size is "
<< input_type_list.size();
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be " << input_type_list.size()
<< ", but got " << num_input_;
}
for (size_t i = 0; i < num_input_; i++) {
@@ -141,9 +141,8 @@ class CustomAOTGpuKernel : public GpuKernel {
auto output_type_list = AnfAlgo::GetAllOutputDeviceTypes(kernel_node);
if (num_output_ != output_type_list.size()) {
MS_LOG(ERROR) << "Output shapes'size is " << num_output_ << ", while output types' size is "
<< output_type_list.size();
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be " << output_type_list.size()
<< ", but got " << num_output_;
}
for (size_t i = 0; i < num_output_; i++) {


+ 3
- 2
mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.cc View File

@@ -61,12 +61,13 @@ bool DatasetInitKernel::Launch(const std::vector<AddressPtr> &, const std::vecto
size_t len = total_bytes_ * buffer_q_capacity_;

if (!device::gpu::GPUMemoryAllocator::GetInstance().AllocBufferQueueMem(len, &addr)) {
MS_LOG(EXCEPTION) << "Memory not enough: failed to allocate GPU buffer queue memory[" << len << "].";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', memory not enough: failed to allocate GPU buffer queue memory["
<< len << "].";
}

auto status = GpuBufferMgr::GetInstance().Create(0, queue_name_, addr, shapes_, buffer_q_capacity_);
if (status) {
MS_LOG(EXCEPTION) << "Init Dataset Failed. len: " << len << ", status:" << status;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', init Dataset Failed. len: " << len << ", status:" << status;
}

return true;


+ 4
- 4
mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.cc View File

@@ -121,7 +121,7 @@ bool DatasetIteratorKernel::ReadDevice(void **addr, size_t *len) {
#ifdef ENABLE_DUMP_IR
mindspore::RDR::TriggerAll();
#endif
MS_LOG(EXCEPTION) << "Get data timeout";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', get data timeout";
}
}
#ifndef ENABLE_SECURITY
@@ -130,7 +130,7 @@ bool DatasetIteratorKernel::ReadDevice(void **addr, size_t *len) {
profiling_op_->RecordData(queue_size, start_time_stamp, end_time_stamp);
}
#endif
MS_LOG(ERROR) << "Get data failed, errcode " << ret;
MS_LOG(ERROR) << "For '" << kernel_name_ << "', get data failed, errcode " << ret;
return false;
}
return true;
@@ -141,7 +141,7 @@ bool DatasetIteratorKernel::Launch(const std::vector<AddressPtr> &, const std::v
if (handle_ == HandleMgr::INVALID_HANDLE) {
handle_ = GpuBufferMgr::GetInstance().Open(0, queue_name_, output_size_list_);
if (handle_ == HandleMgr::INVALID_HANDLE) {
MS_LOG(EXCEPTION) << "Gpu Queue(" << queue_name_ << ") Open Failed";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', gpu Queue(" << queue_name_ << ") Open Failed";
}
}

@@ -151,7 +151,7 @@ bool DatasetIteratorKernel::Launch(const std::vector<AddressPtr> &, const std::v
return false;
}
if (total_bytes_ != len) {
MS_LOG(ERROR) << "Dataset front error. read: " << len << ", expect: " << total_bytes_ << ", ";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', dataset front error, read: " << len << ", expect: " << total_bytes_;
return false;
}



+ 4
- 4
mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_profiling.cc View File

@@ -26,7 +26,7 @@

namespace mindspore {
namespace kernel {
GetNextProfiling::GetNextProfiling(const std::string &path) : profiling_path_(path) {}
GetNextProfiling::GetNextProfiling(const std::string &path) : profiling_path_(path), kernel_name_("GetNextProfiling") {}

void GetNextProfiling::GetDeviceId() {
auto context_ptr = MsContext::GetInstance();
@@ -44,13 +44,13 @@ void GetNextProfiling::Init() {
void GetNextProfiling::SaveProfilingData() {
std::ofstream handle(file_name_, std::ios::trunc);
if (!handle.is_open()) {
MS_LOG(ERROR) << "Open get-next profiling file failed.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', open get-next profiling file failed.";
return;
}
for (uint32_t index = 0; index < queue_size_.size(); index++) {
if (index > time_stamp_.size() - 1) {
handle.close();
MS_LOG(EXCEPTION) << "index exceeds time_stamp_ size.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', index exceeds time_stamp_ size.";
}
handle << Name() << " " << time_stamp_[index].first << " " << time_stamp_[index].second << " " << queue_size_[index]
<< std::endl;
@@ -62,7 +62,7 @@ void GetNextProfiling::SaveProfilingData() {

void GetNextProfiling::ChangeFileMode() {
if (chmod(common::SafeCStr(file_name_), S_IRUSR | S_IWUSR) == -1) {
MS_LOG(ERROR) << "Modify file:" << file_name_ << " to rw fail.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', modify file:" << file_name_ << " to rw fail.";
return;
}
}


+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_profiling.h View File

@@ -44,6 +44,7 @@ class GetNextProfiling : public ProfilingOp {
std::vector<std::pair<uint64_t, uint64_t>> time_stamp_; // First value of std::pair is the start time stamp,
// Second value of std::pair is the stop time stamp
std::string device_id_;
std::string kernel_name_;
};
} // namespace kernel
} // namespace mindspore


+ 4
- 4
mindspore/ccsrc/backend/kernel_compiler/gpu/debug/print_gpu_kernel.h View File

@@ -90,6 +90,7 @@ class PrintGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
if (AnfAlgo::HasNodeAttr("string_pos", kernel_node)) {
@@ -105,9 +106,8 @@ class PrintGpuKernel : public GpuKernel {
input_flag_ = SetInputFlag(&string_pos_, input_tensor_num);
for (size_t i = 0; i < input_tensor_num; i++) {
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'PrintGpuKernel', input is null";
InitSizeLists();
return true;
}
@@ -183,7 +183,7 @@ class PrintGpuKernel : public GpuKernel {
input_device_data->push_back(GetDeviceAddress<double>(inputs, i));
break;
default:
MS_LOG(EXCEPTION) << "TypeId: " << type_id << " is not supported in Print.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the typeid cannot be " << type_id;
}
}
}
@@ -200,7 +200,7 @@ class PrintGpuKernel : public GpuKernel {
}
for (size_t i = 0; i < string_pos->size(); i++) {
if ((*string_pos)[i] < 0) {
MS_LOG(EXCEPTION) << "string_pos cannot be a negative value";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', string_pos cannot be a negative value";
}
auto index = IntToSize((*string_pos)[i]);
res[index] = -1;


+ 5
- 6
mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.h View File

@@ -65,22 +65,21 @@ class AddNGpuFwdKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
num_input_ = GetAttr<int64_t>(kernel_node, "n");
if (num_input_ != input_num) {
MS_LOG(ERROR) << "Input number is " << num_input_ << " in attr, but got " << input_num << "input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << num_input_ << ", but got "
<< input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but cudnnAddTensor needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "AddNGpuFwdKernel input is null";
InitSizeLists();
return true;
}


+ 4
- 6
mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.h View File

@@ -49,20 +49,18 @@ class AssignAddGpuFwdKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but cudnnAddTensor needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but cudnnAddTensor needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "AssignAddGpuFwdKernel input is null";
InitSizeLists();
return true;
}


+ 6
- 2
mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_complex_gpu_kernel.h View File

@@ -57,13 +57,15 @@ class BroadcastComplexOpGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
GetOpType(kernel_node);
auto shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto shape3 = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
need_broadcast_ = AnfAlgo::IsTensorBroadcast(shape1, shape2);
if (need_broadcast_ && shape1.size() > MAX_DIMS) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than " << MAX_DIMS;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS
<< ", but got " << shape1.size();
}

lhs_shape_.resize(MAX_DIMS, 1);
@@ -128,7 +130,9 @@ class BroadcastComplexOpGpuKernel : public GpuKernel {
return;
}

MS_LOG(EXCEPTION) << "operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< ", only support these types: RealDiv, Mul, Sub, Add, Div or Complex currently, but got "
<< kernel_name;
}

BroadcastOpType op_type_;


+ 16
- 8
mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h View File

@@ -70,20 +70,22 @@ class BroadcastOpGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
GetOpType(kernel_node);
auto shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto shape3 = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape1) || CHECK_NULL_INPUT(shape2) || CHECK_NULL_INPUT(shape3);
is_null_input_ = CHECK_SHAPE_NULL(shape1, kernel_name_, "input") ||
CHECK_SHAPE_NULL(shape2, kernel_name_, "input") ||
CHECK_SHAPE_NULL(shape3, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BroadcastGpuKernel', input or output is null";
InitSizeLists();
return true;
}
need_broadcast_ = AnfAlgo::IsTensorBroadcast(shape1, shape2);
if (need_broadcast_ && shape1.size() > MAX_DIMS) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than: " << MAX_DIMS << ", actual size is "
<< shape1.size();
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS
<< ", but got " << shape1.size();
}

lhs_shape_.resize(MAX_DIMS, 1);
@@ -94,7 +96,8 @@ class BroadcastOpGpuKernel : public GpuKernel {
if (i < MAX_DIMS) {
output_shape_[i] = shape3[i];
} else {
MS_LOG(EXCEPTION) << "Output index: " << i << " should be less than " << MAX_DIMS;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the index of output should be less than " << MAX_DIMS
<< ", but got " << i;
}
}
output_num_ *= shape3[i];
@@ -106,7 +109,8 @@ class BroadcastOpGpuKernel : public GpuKernel {
lhs_shape_[j + lhs_offset] = shape1[j];
} else {
auto index = j + lhs_offset;
MS_LOG(EXCEPTION) << "Invalid input1 index: " << index;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the index of input cannot be " << index << ", but got "
<< index;
}
}
input1_num_ *= shape1[j];
@@ -118,7 +122,8 @@ class BroadcastOpGpuKernel : public GpuKernel {
rhs_shape_[k + rhs_offset] = shape2[k];
} else {
auto index = k + rhs_offset;
MS_LOG(EXCEPTION) << "Invalid input2 index: " << index;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the index of input cannot be " << index << ", but got "
<< index;
}
}
input2_num_ *= shape2[k];
@@ -201,7 +206,10 @@ class BroadcastOpGpuKernel : public GpuKernel {
return;
}

MS_LOG(EXCEPTION) << "operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< ", only support these types: Maximum, Minimum, Pow, RealDiv, Mul, Sub, Add, Div, DivNoNan, "
"Mod, FloorDiv, AbsGrad, FloorMod, Atan2, TruncateDiv or TruncateMod currently, but got "
<< kernel_name;
}

BroadcastOpType op_type_;


+ 12
- 7
mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.h View File

@@ -69,20 +69,23 @@ class BroadcastOpGradGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
GetOpType(kernel_node);
auto shape1 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape2 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto shape3 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
is_null_input_ = CHECK_NULL_INPUT(shape1) || CHECK_NULL_INPUT(shape2) || CHECK_NULL_INPUT(shape3);
is_null_input_ = CHECK_SHAPE_NULL(shape1, kernel_name_, "input_1") ||
CHECK_SHAPE_NULL(shape2, kernel_name_, "input_2") ||
CHECK_SHAPE_NULL(shape3, kernel_name_, "input_3");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BroadcastGradGpuKernel', input or output is null";
InitSizeLists();
return true;
}
need_broadcast_ = AnfAlgo::IsTensorBroadcast(shape1, shape2);
if (need_broadcast_ && shape1.size() > kMaxShapeSize) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than " << kMaxShapeSize;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than "
<< kMaxShapeSize << ", but got " << shape1.size();
}

for (size_t i = 0; i < shape3.size(); i++) {
@@ -98,8 +101,8 @@ class BroadcastOpGradGpuKernel : public GpuKernel {
x1_shape_[i + x1_offset] = shape1[i];
} else {
auto index = i + x1_offset;
MS_LOG(EXCEPTION) << "For 'BroadcastOpGrad', the dimension of input cannot be greater than " << kMaxShapeSize
<< ", but got " << (index + 1);
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than "
<< kMaxShapeSize << ", but got " << (index + 1);
}
}
input1_num_ *= shape1[i];
@@ -111,7 +114,8 @@ class BroadcastOpGradGpuKernel : public GpuKernel {
x2_shape_[i + x2_offset] = shape2[i];
} else {
auto index = i + x2_offset;
MS_LOG(EXCEPTION) << "Invalid input2 index: " << index;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than "
<< kMaxShapeSize << ", but got " << (index + 1);
}
}
input2_num_ *= shape2[i];
@@ -162,7 +166,8 @@ class BroadcastOpGradGpuKernel : public GpuKernel {

auto iter = kBroadcastTypeMap.find(kernel_name);
if (iter == kBroadcastTypeMap.end()) {
MS_LOG(EXCEPTION) << "operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< ", only support these types: MaximumGrad or MinimumGrad currently, but got " << kernel_name;
} else {
op_type_ = iter->second;
}


+ 2
- 2
mindspore/ccsrc/backend/kernel_compiler/gpu/math/cast_all_gpu_kernel.h View File

@@ -66,14 +66,14 @@ class CastAllGpuFwdKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
num_input_ = GetAttr<size_t>(kernel_node, "n");
size_ = std::make_unique<size_t[]>(num_input_);
for (size_t i = 0; i < num_input_; i++) {
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CastAllGpuKernel', input is null";
InitSizeLists();
return true;
}


+ 12
- 9
mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_gpu_kernel.h View File

@@ -19,6 +19,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <algorithm>
#include "backend/kernel_compiler/gpu/cuda_impl/eye_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/matrix_split_impl.cuh"
@@ -62,6 +63,7 @@ class CholeskyGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
lower_ = static_cast<bool>(GetAttr<bool>(kernel_node, kLower));
split_dim_ = static_cast<int>(GetAttr<int64_t>(kernel_node, kSplitDim));
@@ -77,11 +79,11 @@ class CholeskyGpuKernel : public GpuKernel {

auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex);

is_null_input_ = CHECK_NULL_INPUT(in_shape);
is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(EXCEPTION) << "For 'CholeskyGpuKernel', input is null";
InitSizeLists();
return true;
}

if (split_dim_ == 0) {
return InitNoSplitDim(in_shape);
}
@@ -103,11 +105,11 @@ class CholeskyGpuKernel : public GpuKernel {
cho_row_ = in_shape.at(kDim1);
cho_col_ = in_shape.at(kDim2);
} else {
MS_LOG(ERROR) << "Input Only support Rank 2 OR 3";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input only should be 2 or 3";
return false;
}
if (cho_row_ != cho_col_) {
MS_LOG(ERROR) << "Cholesky need square matrix as input.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the shape of input should be square matrix";
return false;
}
// set matrix row or col to be lead dimension
@@ -121,13 +123,14 @@ class CholeskyGpuKernel : public GpuKernel {

bool InitSplitDim(const std::vector<size_t> &in_shape) {
if (in_shape.size() != kCholeskyNormalShape) {
MS_LOG(ERROR) << "Cholesky Split Matrix Need Input Rank as 2.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input should be " << kCholeskyNormalShape
<< ", but got " << in_shape.size();
return false;
}
cho_row_ = in_shape.at(kDim0);
cho_col_ = in_shape.at(kDim1);
if (cho_row_ != cho_col_) {
MS_LOG(ERROR) << "Cholesky Split Matrix Need Square Matrix as Input.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the shape of input should be square matrix";
return false;
}

@@ -206,7 +209,7 @@ class CholeskyGpuKernel : public GpuKernel {
kernel_node_, cusolverDnDpotrfBatched(handle_, uplo_, m_, d_array_addr, lda_, d_info_array_addr, batch_),
"cusolver cholesky batched Fail");
} else {
MS_LOG(EXCEPTION) << "cholesky factorization do not support other data type but only float or double, right now.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type only should be float or double, right now.";
}
size_t output_elements = outputs.at(kDim0)->size / unit_size_;
// copy results from written input's matrix to output's matrix by up or lower flag.
@@ -243,7 +246,7 @@ class CholeskyGpuKernel : public GpuKernel {
kernel_node_, cusolverDnDpotrfBatched(handle_, uplo_, m_, d_array_addr, lda_, d_info_array_addr, batch_),
"cusolver cholesky batched Fail");
} else {
MS_LOG(EXCEPTION) << "cholesky factorization do not support other data type but only float or double, right now.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type only should be float or double, right now.";
}

TriangleMatrixCopy(d_batch_input_addr, output_addr, uplo_, outputs[0]->size / sizeof(T), ldb_, m_,


+ 19
- 13
mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_solve_gpu_kernel.h View File

@@ -19,6 +19,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <algorithm>
#include "backend/kernel_compiler/gpu/cuda_impl/triangle_matrix_copy_impl.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
@@ -41,7 +42,7 @@ class CholeskySolveGpuKernel : public GpuKernel {
public:
using pointer = T *;

CholeskySolveGpuKernel() = default;
CholeskySolveGpuKernel() : is_null_input_(false) {}
~CholeskySolveGpuKernel() = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
@@ -49,6 +50,9 @@ class CholeskySolveGpuKernel : public GpuKernel {

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(handle_, reinterpret_cast<cudaStream_t>(stream_ptr)),
"cusolverDnSetStream failed");
auto input_a_addr = GetDeviceAddress<T>(inputs, kDim0);
@@ -81,7 +85,7 @@ class CholeskySolveGpuKernel : public GpuKernel {
d_b_array_addr, ldb_, d_info_array_addr, batch_),
"cusolver cholesky solve batched Fail");
} else {
MS_LOG(EXCEPTION) << "cholesky solve do not support other data type but only float or double, right now.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type only should be float or double, right now.";
}
size_t output_elements = outputs.at(kDim0)->size / unit_size_;
// copy results from written input's matrix to output's matrix.
@@ -90,6 +94,7 @@ class CholeskySolveGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
lower_ = static_cast<bool>(GetAttr<bool>(kernel_node, kLower));
// gpu input is col major default, so need to change row major.
@@ -103,10 +108,14 @@ class CholeskySolveGpuKernel : public GpuKernel {
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle();
auto in_a_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kDim0);
auto in_b_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kDim1);
if (CHECK_NULL_INPUT(in_a_shape) || CHECK_NULL_INPUT(in_b_shape)) {
MS_LOG(EXCEPTION) << "For 'CholeskySolveGpuKernel', input is null";
is_null_input_ =
CHECK_SHAPE_NULL(in_a_shape, kernel_name_, "input_a") || CHECK_SHAPE_NULL(in_b_shape, kernel_name_, "input_b");
if (is_null_input_) {
InitSizeLists();
return true;
}
return InitDim(in_a_shape, in_b_shape);
(void)InitDim(in_a_shape, in_b_shape);
return true;
}

protected:
@@ -127,7 +136,7 @@ class CholeskySolveGpuKernel : public GpuKernel {
}

private:
bool InitDim(const std::vector<size_t> &in_a_shape, const std::vector<size_t> &in_b_shape) {
void InitDim(const std::vector<size_t> &in_a_shape, const std::vector<size_t> &in_b_shape) {
if (in_a_shape.size() == kCholeskyDefaultShape) {
batch_ = 1;
cho_row_ = in_a_shape.at(kDim0);
@@ -141,17 +150,14 @@ class CholeskySolveGpuKernel : public GpuKernel {
cho_row_ = in_a_shape.at(kDim1);
cho_col_ = in_a_shape.at(kDim2);
} else {
MS_LOG(ERROR) << "Input Only support Rank 2 OR 3";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input only should be 2 or 3";
}
if (cho_row_ != cho_col_) {
MS_LOG(ERROR) << "Cholesky need square matrix as input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input should be square matrix";
}
size_t b_row = in_b_shape.size() == kCholeskyBatchedShape ? in_b_shape.at(kDim1) : in_b_shape.at(kDim0);
if (cho_row_ != b_row) {
MS_LOG(ERROR) << "Cholesky right hand matrix is not equal to left matrix.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', right hand matrix should be equal to left matrix";
}
m_ = SizeToInt(in_a_shape.at(kDim1));
lda_ = m_;
@@ -159,7 +165,6 @@ class CholeskySolveGpuKernel : public GpuKernel {
h_a_array_.resize(batch_);
h_b_array_.resize(batch_);
InitSizeLists();
return true;
}
size_t cho_row_{0};
size_t cho_col_{0};
@@ -177,6 +182,7 @@ class CholeskySolveGpuKernel : public GpuKernel {
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
bool is_null_input_;
};
} // namespace kernel
} // namespace mindspore


+ 10
- 9
mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_trsm_solve_gpu_kernel.h View File

@@ -19,6 +19,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <algorithm>
#include "backend/kernel_compiler/gpu/cuda_impl/eye_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/matrix_split_impl.cuh"
@@ -67,13 +68,13 @@ class CholeskyTrsmGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle();
blas_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(in_shape);
is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CholeskyTrsmSolveGpuKernel', input is null";
InitSizeLists();
return true;
}
@@ -84,12 +85,10 @@ class CholeskyTrsmGpuKernel : public GpuKernel {
}
} else {
if (in_shape.size() != 2) {
MS_LOG(ERROR) << "CholeskyTrsm Split Matrix Need Input Rank as 2.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input only should be 2";
}
if (in_shape[0] != in_shape[1]) {
MS_LOG(ERROR) << "CholeskyTrsm Split Matrix Need Square Matrix as Input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input should be square matrix";
}
InitDimOthers(kernel_node, in_shape);
}
@@ -201,17 +200,19 @@ class CholeskyTrsmGpuKernel : public GpuKernel {
if (in_shape.size() == 2) {
batch_ = 1;
if (in_shape[0] != in_shape[1]) {
MS_LOG(ERROR) << "CholeskyTrsm shape0: " << in_shape[0] << ", is not equal to shape1: " << in_shape[1];
MS_LOG(ERROR) << "For '" << kernel_name_ << "', shape0 should be equal to " << in_shape[1] << ", but got "
<< in_shape[0];
return false;
}
} else if (in_shape.size() == 3) {
batch_ = SizeToInt(in_shape[0]);
if (in_shape[1] != in_shape[2]) {
MS_LOG(ERROR) << "CholeskyTrsm shape1: " << in_shape[1] << ", is not equal to shape2: " << in_shape[2];
MS_LOG(ERROR) << "For '" << kernel_name_ << "', shape1 should be equal to " << in_shape[2] << ", but got "
<< in_shape[1];
return false;
}
} else {
MS_LOG(ERROR) << "Input Only support Rank 2 OR 3";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input only should be 2 or 3";
return false;
}



+ 5
- 4
mindspore/ccsrc/backend/kernel_compiler/gpu/math/cumprod_gpu_kernel.h View File

@@ -55,15 +55,15 @@ class CumProdGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but CumProdGpuKernel needs 1.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
input_size_0_ = sizeof(T);
shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape_);
is_null_input_ = CHECK_SHAPE_NULL(shape_, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CumProdGpuKernel', input is null.";
InitSizeLists();
return true;
}
@@ -72,7 +72,8 @@ class CumProdGpuKernel : public GpuKernel {
reverse_ = GetAttr<bool>(kernel_node, "reverse");
int input_dim_length = SizeToInt(shape_.size());
if (axis_ >= input_dim_length) {
MS_LOG(EXCEPTION) << "Axis is: " << axis_ << " out of bounds.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the value of 'axis' should be less than " << input_dim_length
<< ", but got " << axis_;
}
while (axis_ < 0) {
axis_ += input_dim_length;


+ 5
- 4
mindspore/ccsrc/backend/kernel_compiler/gpu/math/cumsum_gpu_kernel.h View File

@@ -55,15 +55,15 @@ class CumSumGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but CumSumGpuKernel needs 1.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
input_size_0_ = sizeof(T);
shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape_);
is_null_input_ = CHECK_SHAPE_NULL(shape_, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CumSumGpuKernel', input is null";
InitSizeLists();
return true;
}
@@ -72,7 +72,8 @@ class CumSumGpuKernel : public GpuKernel {
reverse_ = GetAttr<bool>(kernel_node, "reverse");
int input_dim_length = SizeToInt(shape_.size());
if (axis_ >= input_dim_length) {
MS_LOG(EXCEPTION) << "Axis is: " << axis_ << " out of bounds.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the value of 'axis' should be less than " << input_dim_length
<< ", but got " << axis_;
}
while (axis_ < 0) {
axis_ += input_dim_length;


+ 12
- 13
mindspore/ccsrc/backend/kernel_compiler/gpu/math/determinant_triangle_gpu_kernel.h View File

@@ -47,7 +47,8 @@ class DetTriangleGpuKernel : public GpuKernel {

if (!CheckTriangle(input_addr, fill_mode_, matrix_n_, outputs[0]->size / sizeof(T),
reinterpret_cast<cudaStream_t>(stream_ptr))) {
MS_LOG(ERROR) << "The elements in the upper half of the matrix should be all 0, fill mode is: " << fill_mode_;
MS_LOG(ERROR) << "For '" << kernel_name_
<< "', the elements in the upper half of the matrix should be all 0, fill mode is: " << fill_mode_;
return false;
}
DetTriangle(input_addr, output_addr, matrix_n_, outputs[0]->size / sizeof(T),
@@ -56,21 +57,20 @@ class DetTriangleGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but DetTriangle needs 1 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but DetTriangle needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
is_null_input_ =
CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'DeterminantTriangleGpuKernel', input or output is null";
InitSizeLists();
return true;
}
@@ -79,8 +79,8 @@ class DetTriangleGpuKernel : public GpuKernel {
}

if (input_shape.size() < 2) {
MS_LOG(ERROR) << "The input should have rank at least 2.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than 2, but got "
<< input_shape.size();
}

matrix_n_ = input_shape[input_shape.size() - 1];
@@ -89,12 +89,11 @@ class DetTriangleGpuKernel : public GpuKernel {
output_size_ *= output_shape[i];
}
if (matrix_n_ == 0 || output_size_ != input_size_ / matrix_n_ / matrix_n_) {
MS_LOG(ERROR) << "The output shape is wrong.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of output should be "
<< (input_size_ / matrix_n_ / matrix_n_) << ", but got " << output_size_;
}
if (input_shape[input_shape.size() - 2] != input_shape[input_shape.size() - 1]) {
MS_LOG(ERROR) << "The matrix should be in shape of square.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input should be square matrix";
}
auto prim = AnfAlgo::GetCNodePrimitive(kernel_node);
MS_EXCEPTION_IF_NULL(prim);


+ 14
- 7
mindspore/ccsrc/backend/kernel_compiler/gpu/math/eigh_c_gpu_kernel.h View File

@@ -21,6 +21,7 @@
#include <cusolverDn.h>
#include <cuda_runtime.h>
#include <vector>
#include <string>
#include <complex>
#include <algorithm>
#include <type_traits>
@@ -52,13 +53,14 @@ struct Complex_traits<Complex<T>> {
template <typename T>
class EighcGpuKernel : public GpuKernel {
public:
EighcGpuKernel() = default;
EighcGpuKernel() : is_null_input_(false) {}
~EighcGpuKernel() = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }

bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
blas_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
auto A_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
@@ -70,13 +72,14 @@ class EighcGpuKernel : public GpuKernel {
jobz_ = CUSOLVER_EIG_MODE_NOVECTOR;
}
cusolver_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle();
bool is_null_input = CHECK_NULL_INPUT(A_shape);
if (is_null_input) {
MS_LOG(EXCEPTION) << "For 'EighValue GpuKernel', input is null";
is_null_input_ = CHECK_SHAPE_NULL(A_shape, kernel_name_, "input");
if (is_null_input_) {
InitSizeLists();
return true;
}
if (A_shape.size() != kShape2dDims || A_shape[1] != A_shape[1]) {
MS_LOG(EXCEPTION) << "wrong array shape, A should be a square matrix, but got [" << A_shape[0] << " X "
<< A_shape[1] << "]";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input should be square matrix, but got ["
<< A_shape[0] << " X " << A_shape[1] << "]";
}
m_ = A_shape[0];
InitSizeLists();
@@ -85,6 +88,9 @@ class EighcGpuKernel : public GpuKernel {

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
CHECK_CUBLAS_RET_WITH_ERROR(cublasSetStream(blas_handle_, reinterpret_cast<cudaStream_t>(stream_ptr)),
"cublasSetStream failed");
CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(cusolver_handle_, reinterpret_cast<cudaStream_t>(stream_ptr)),
@@ -136,7 +142,7 @@ class EighcGpuKernel : public GpuKernel {
}
d_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(sizeof(T) * lwork);
if (!d_work) {
MS_LOG(EXCEPTION) << "GPU memory alloca failed.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', GPU memory alloca failed.";
}
if constexpr (std::is_same_v<T, Complex<float>>) {
cusolverDnCheevd(cusolver_handle_, jobz_, uplo_, m_, reinterpret_cast<cuComplex *>(w_v_addr), lda_, w_w_addr,
@@ -198,6 +204,7 @@ class EighcGpuKernel : public GpuKernel {
std::vector<size_t> output_size_list_{};
std::vector<size_t> workspace_size_list_{};
using D = typename Complex_traits<T>::value_type;
bool is_null_input_;
};
} // namespace kernel
} // namespace mindspore


+ 12
- 6
mindspore/ccsrc/backend/kernel_compiler/gpu/math/eigh_gpu_kernel.h View File

@@ -39,13 +39,14 @@ constexpr char LOWER[] = "lower";
template <typename T>
class EighGpuKernel : public GpuKernel {
public:
EighGpuKernel() = default;
EighGpuKernel() : is_null_input_(false) {}
~EighGpuKernel() = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
auto A_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
compute_eigen_vectors_ = static_cast<bool>(GetAttr<bool>(kernel_node, C_EIEH_VECTOR));
@@ -56,13 +57,14 @@ class EighGpuKernel : public GpuKernel {
jobz_ = CUSOLVER_EIG_MODE_NOVECTOR;
}
cusolver_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle();
bool is_null_input = CHECK_NULL_INPUT(A_shape);
if (is_null_input) {
MS_LOG(EXCEPTION) << "For 'EighValue GpuKernel', input is null";
is_null_input_ = CHECK_SHAPE_NULL(A_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();
return true;
}
if (A_shape.size() != kShape2dDims || A_shape[0] != A_shape[1]) {
MS_LOG(EXCEPTION) << "wrong array shape, A should be a square matrix, but got [" << A_shape[0] << " X "
<< A_shape[1] << "]";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the shape of input should be square matrix, but got ["
<< A_shape[0] << " X " << A_shape[1] << "]";
}
m_ = A_shape[0];
InitSizeLists();
@@ -71,6 +73,9 @@ class EighGpuKernel : public GpuKernel {

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(cusolver_handle_, reinterpret_cast<cudaStream_t>(stream_ptr)),
"cusolverDnSetStream failed");
// matrix A, input or output(eigenvector)
@@ -152,6 +157,7 @@ class EighGpuKernel : public GpuKernel {
cusolverEigMode_t jobz_ = CUSOLVER_EIG_MODE_NOVECTOR;
bool compute_eigen_vectors_{false};
bool lower_{true};
bool is_null_input_;
std::vector<T *> h_array_{};
std::vector<size_t> input_size_list_{};
std::vector<size_t> output_size_list_{};


+ 4
- 6
mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.h View File

@@ -49,23 +49,21 @@ class EqualCountGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but equalcount needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but equalcount needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}

output_size_ = sizeof(T);
input_size_ = sizeof(T);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'EqualcountGpuKernel', input is null";
InitSizeLists();
return true;
}


+ 9
- 13
mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.h View File

@@ -77,13 +77,11 @@ class FloatStatusGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
if (!CheckParam(kernel_node)) {
return false;
}
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
(void)CheckParam(kernel_node);
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'FloatStatusGpuKernel', input is null";
InitSizeLists();
return true;
}
@@ -91,10 +89,10 @@ class FloatStatusGpuKernel : public GpuKernel {
for (size_t x : shape) {
input_size_ = input_size_ * x;
}
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kOpTypeMap.find(kernel_name);
if (iter == kOpTypeMap.end()) {
MS_LOG(EXCEPTION) << "FloatStatus kernel " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: FloatStatus, IsInf, IsNan, IsFinite "
<< "currently, but got " << kernel_name;
}
kernel_name_ = iter->second;

@@ -114,18 +112,16 @@ class FloatStatusGpuKernel : public GpuKernel {
}

private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but FloatStatusGpuKernel needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but FloatStatusGpuKernel needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
return true;
}

std::vector<size_t> input_size_list_;


+ 4
- 6
mindspore/ccsrc/backend/kernel_compiler/gpu/math/identity_gpu_kernel.h View File

@@ -52,20 +52,18 @@ class IdentityGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but identity needs 1 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but identity needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "IdentityGpuKernel input is null";
InitSizeLists();
return true;
}


+ 8
- 6
mindspore/ccsrc/backend/kernel_compiler/gpu/math/index_add_gpu_kernel.h View File

@@ -62,18 +62,20 @@ class IndexAddGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but index add needs 3 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
}
std::vector<size_t> dst_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
std::vector<size_t> index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
std::vector<size_t> src_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
is_null_input_ = CHECK_NULL_INPUT(dst_shape) || CHECK_NULL_INPUT(index_shape) || CHECK_NULL_INPUT(src_shape);
is_null_input_ = CHECK_SHAPE_NULL(dst_shape, kernel_name, "x") ||
CHECK_SHAPE_NULL(index_shape, kernel_name, "indices") ||
CHECK_SHAPE_NULL(src_shape, kernel_name, "y");

if (is_null_input_) {
MS_LOG(WARNING) << "For 'IndexAddGpuKernel', input is null";
InitSizeLists();
return true;
}
@@ -91,8 +93,8 @@ class IndexAddGpuKernel : public GpuKernel {
inner_size_ *= src_shape[i];
}
if (axis < 0 || axis >= SizeToInt(src_shape.size()) || axis >= SizeToInt(dst_shape.size())) {
MS_LOG(EXCEPTION) << "Init axis size failed, actual src axis size is " << src_axis_size_
<< ", actual dst axis size is " << dst_axis_size_;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the size of 'axis' cannot be greater than or equal to "
<< SizeToInt(src_shape.size()) << " or " << SizeToInt(dst_shape.size()) << ", but got " << axis;
}
src_axis_size_ = src_shape[axis];
dst_axis_size_ = dst_shape[axis];


+ 10
- 13
mindspore/ccsrc/backend/kernel_compiler/gpu/math/linspace.h View File

@@ -49,37 +49,34 @@ class LinSpaceGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but DynamicLinSpace needs 3 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but DynamicLinSpace needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto value_count = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_1) || CHECK_NULL_INPUT(input_2) || CHECK_NULL_INPUT(value_count);
is_null_input_ = CHECK_SHAPE_NULL(input_1, kernel_name, "start") ||
CHECK_SHAPE_NULL(input_2, kernel_name, "stop") ||
CHECK_SHAPE_NULL(value_count, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'LinspaceGpuKernel', input is null";
InitSizeLists();
return true;
}
// error checking input data
if ((input_1.size() != 0) || (input_2.size() != 0)) {
MS_LOG(ERROR) << "For LinShape "
<< "both start and end must be 0-D Tensors. Got " << input_1.size() << " and " << input_2.size()
<< ".";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', both start and end should be 0-D Tensors, but got dimension "
<< "of start: " << input_1.size() << " and dimension of end: " << input_2.size();
}

if (value_count.size() != 1) {
MS_LOG(ERROR) << "For LinShape, output shape incorrect rank. Expect Rank: 1, got Rank: " << value_count.size()
<< ".";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output should be 1, but got "
<< value_count.size();
}
value_count_ = value_count[0];
InitSizeLists();


+ 2
- 2
mindspore/ccsrc/backend/kernel_compiler/gpu/math/logical_not_gpu_kernel.h View File

@@ -49,11 +49,11 @@ class LogicalNotGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'LogicalGpuKernel', input is null";
InitSizeLists();
return true;
}


+ 16
- 9
mindspore/ccsrc/backend/kernel_compiler/gpu/math/lu_gpu_kernel.h View File

@@ -19,6 +19,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <algorithm>
#include <type_traits>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
@@ -39,7 +40,7 @@ constexpr size_t kLuNormalShape = 2;
template <typename T>
class LUGpuKernel : public GpuKernel {
public:
LUGpuKernel() = default;
LUGpuKernel() : is_null_input_(false) {}
~LUGpuKernel() = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
@@ -47,6 +48,9 @@ class LUGpuKernel : public GpuKernel {

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(handle_, reinterpret_cast<cudaStream_t>(stream_ptr)),
"cusolverDnSetStream failed");
auto input_addr = GetDeviceAddress<T>(inputs, kDim0);
@@ -65,7 +69,7 @@ class LUGpuKernel : public GpuKernel {
"cusolver query lu work size fail");

if (cudaMalloc(reinterpret_cast<void **>(&d_work_), unit_size_ * lwork_) != cudaSuccess) {
MS_LOG(EXCEPTION) << "cusolver malloc work size fail";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', cusolver malloc work size fail";
}

CHECK_CUSOLVER_RET_WITH_EXCEPT(
@@ -79,7 +83,7 @@ class LUGpuKernel : public GpuKernel {
// 5. malloc device working space of getrf

if (cudaMalloc(reinterpret_cast<void **>(&d_work_), unit_size_ * lwork_) != cudaSuccess) {
MS_LOG(EXCEPTION) << "cusolver malloc work size fail";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', cusolver malloc work size fail";
}

// 6. solve to lu factorization according to cuSolver api, outputs have been written to input's matrix.
@@ -87,7 +91,7 @@ class LUGpuKernel : public GpuKernel {
kernel_node_, cusolverDnDgetrf(handle_, m_, m_, input_addr, lda_, d_work_, piv_output_addr, info_output_addr),
"cusolver lu fail");
} else {
MS_LOG(EXCEPTION) << "cholesky factorization do not support other data type but only float or double, right now.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type only should be float or double, right now.";
}
// 7. copy results from written input's matrix to output's matrix.
// if (cudaMemcpy(output_addr, input_addr, lda_ * m_ * unit_size_, cudaMemcpyDeviceToDevice) != cudaSuccess) {
@@ -101,14 +105,16 @@ class LUGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
// 1. get CuSolver Dense matrix handler
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle();
auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
// 2. check input shape not null
bool is_null_input = CHECK_NULL_INPUT(in_shape);
if (is_null_input) {
MS_LOG(EXCEPTION) << "For 'PureCholeskyGpuKernel', input is null";
is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "input");
if (is_null_input_) {
InitSizeLists();
return true;
}
// 3. calculate input size
if (!InitInputSize(in_shape)) {
@@ -126,11 +132,11 @@ class LUGpuKernel : public GpuKernel {
lu_row_ = in_shape.at(kDim0);
lu_col_ = in_shape.at(kDim1);
} else {
MS_LOG(ERROR) << "Input Only support Rank 1 OR 2";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input only should be 1 or 2";
return false;
}
if (lu_row_ != lu_col_) {
MS_LOG(ERROR) << "Cholesky need square matrix as input.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the shape of input should be square matrix";
return false;
}
// set matrix row or col to be lead dimension
@@ -170,6 +176,7 @@ class LUGpuKernel : public GpuKernel {
std::vector<size_t> input_size_list_{};
std::vector<size_t> output_size_list_{};
std::vector<size_t> workspace_size_list_{};
bool is_null_input_;
};
} // namespace kernel
} // namespace mindspore


+ 9
- 7
mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h View File

@@ -91,13 +91,14 @@ class MatMulGpuKernel : public GpuKernel {
"cublasGemmStridedBatchedEx failed. Possible reasons: the GPU is occupied by other processes.");
}
} catch (const std::exception &e) {
MS_LOG(EXCEPTION) << "Encountered an exception: " << e.what() << " when invoke cublas "
<< (batch_ == 1 ? "cublasGemmEx" : "cublasGemmStridedBatchedEx");
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', encountered an exception: " << e.what() << " when invoke "
<< "cublas " << (batch_ == 1 ? "cublasGemmEx" : "cublasGemmStridedBatchedEx");
}
return true;
}

bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
dtype_a_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
@@ -105,7 +106,7 @@ class MatMulGpuKernel : public GpuKernel {
dtype_c_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetOutputDeviceDataType(kernel_node, 0)));
auto node_name = AnfAlgo::GetCNodeName(kernel_node);
if (dtype_a_ != dtype_b_ || dtype_a_ != dtype_c_) {
MS_LOG(EXCEPTION) << "input and output types are not the same in " << node_name;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', input and output types are not the same in " << node_name;
}
if (dtype_a_ == CUDA_R_16F && dtype_b_ == CUDA_R_16F && dtype_c_ == CUDA_R_16F) {
MS_LOG(INFO) << "input and output type is float16, allow to use Tensor Core operations if possible";
@@ -113,15 +114,16 @@ class MatMulGpuKernel : public GpuKernel {
}
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
auto input1_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(output_shape) || CHECK_NULL_INPUT(input1_shape);
is_null_input_ =
CHECK_SHAPE_NULL(input1_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'MatmulGpuKernel', input or output is null";
InitSizeLists();
return true;
}
auto dims = output_shape.size();
if (dims < 2) {
MS_LOG(EXCEPTION) << "Output dims " << dims << " not support.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of output cannot be less than 2, but got "
<< dims;
}

m_ = output_shape[dims - 2];
@@ -139,7 +141,7 @@ class MatMulGpuKernel : public GpuKernel {
} else if (!transpose && input1_shape.size() > (dims - 1)) {
k_ = input1_shape[dims - 1];
} else {
MS_LOG(EXCEPTION) << "Init k_ via input1_shape failed.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', init k_ via input1_shape failed.";
}

transpose = GetAttr<bool>(kernel_node, "transpose_x2");


+ 8
- 5
mindspore/ccsrc/backend/kernel_compiler/gpu/math/matrix_inverse_gpu_kernel.h View File

@@ -19,6 +19,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <type_traits>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@@ -90,28 +91,30 @@ class MatrixInverseGpuKernel : public GpuKernel {
reinterpret_cast<double **>(inv_batch_addr), len, info_addr, batchsize),
"cublas trsm batched Fail");
} else {
MS_LOG(EXCEPTION) << "The data type entered must be float or double.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type entered must be float or double.";
}

return true;
}

bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'MatrixInverseGpuKernel', input is null";
InitSizeLists();
return true;
}
if (input_shape.size() < 2) {
MS_LOG(EXCEPTION) << "The dim entered needs to be greater than 2, but " << input_shape.size() << " was taken";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than 2, but got "
<< input_shape.size();
}
size_t last_index = input_shape.size() - 1;
if (input_shape[last_index] != input_shape[last_index - 1]) {
MS_LOG(EXCEPTION) << "The last two dimensions of the input matrix should be equal!";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the last two dimensions of the input matrix should be equal, "
<< "but got one: " << input_shape[last_index] << ", another: " << input_shape[last_index - 1];
}
size_ = input_shape[last_index];
for (size_t i = 0; i < last_index - 1; i++) {


+ 6
- 8
mindspore/ccsrc/backend/kernel_compiler/gpu/math/multinomial_gpu_kernel.h View File

@@ -59,7 +59,7 @@ class MultinomialGpuKernel : public GpuKernel {
T *probs_addr = GetDeviceAddress<T>(inputs, 0);
int64_t *num_sample_addr = GetDeviceAddress<int64_t>(inputs, 1);
if (distributions_ == 0) {
MS_LOG(ERROR) << "Divide by zero. the distributions_ is 0.";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', divide by zero. the distributions_ is 0.";
return false;
}
@@ -83,22 +83,20 @@ class MultinomialGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but multinomial needs 2 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but multinomial needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape_0 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape_0) || CHECK_NULL_INPUT(output_shape);
is_null_input_ =
CHECK_SHAPE_NULL(input_shape_0, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'MultinomialGpuKernel', input or output is null";
InitSizeLists();
return true;
}


+ 4
- 6
mindspore/ccsrc/backend/kernel_compiler/gpu/math/nms_with_mask_gpu_kernel.h View File

@@ -65,24 +65,22 @@ class NMSWithMaskGpuFwdKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
iou_value_ = GetAttr<float>(kernel_node, "iou_threshold");

size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but NMSWithMask needs 1 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}

size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 3) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but NMSWithMask needs 3 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 3, but got " << output_num;
}

auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NMSWithMaskGpuKernel', input is null";
InitSizeLists();
return true;
}


+ 10
- 11
mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.h View File

@@ -120,7 +120,7 @@ class RandomOpGpuKernel : public GpuKernel {
inputs[2]->size / sizeof(T), output_addr, outputs[0]->size / sizeof(T),
reinterpret_cast<cudaStream_t>(stream_ptr));
if (!ret) {
MS_LOG(ERROR) << "For UniformInt op, `minval` should be strictly less than `maxval`";
MS_LOG(ERROR) << "For '" << kernel_name_ << "', `minval` should be strictly less than `maxval`";
return false;
}
break;
@@ -148,7 +148,8 @@ class RandomOpGpuKernel : public GpuKernel {
break;
}
default: {
MS_LOG(EXCEPTION) << "Random operation " << random_op_type_ << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: StandardNormal, CudnnUniformReal, "
<< "UniformInt, UniformReal currently, but got " << random_op_type_;
}
}
return true;
@@ -158,29 +159,27 @@ class RandomOpGpuKernel : public GpuKernel {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kRandomOpTypeMap.find(kernel_name);
if (iter == kRandomOpTypeMap.end()) {
MS_LOG(EXCEPTION) << "Random operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: StandardNormal, CudnnUniformReal, "
<< "UniformInt, UniformReal currently, but got " << kernel_name;
} else {
random_op_type_ = iter->second;
}
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if ((random_op_type_ == RANDOM_OP_NORMAL || random_op_type_ == RANDOM_OP_UNIFORM_REAL) && input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but random op needs 1 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
if (random_op_type_ == RANDOM_OP_UNIFORM_INT && input_num != 3) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but random op needs 3 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but random op needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape_0 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape_0) || CHECK_NULL_INPUT(output_shape);
is_null_input_ =
CHECK_SHAPE_NULL(input_shape_0, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'RandomOpGpuKernel', input or output is null";
InitSizeLists();
return true;
}


+ 4
- 3
mindspore/ccsrc/backend/kernel_compiler/gpu/math/square_sum_all_gpu_kernel.h View File

@@ -51,11 +51,12 @@ class SquareSumAllGpuFwdKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(ERROR) << "SquareSumAllGpuFwdKernel input is null";
return false;
InitSizeLists();
return true;
}
for (size_t i = 0; i < input_shape.size(); i++) {
input_size_ *= input_shape[i];


+ 10
- 5
mindspore/ccsrc/backend/kernel_compiler/gpu/math/squared_difference_kernel.h View File

@@ -56,18 +56,21 @@ class SquaredDifferenceOpGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto input_shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
auto input_shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1);
auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape1) || CHECK_NULL_INPUT(input_shape2) || CHECK_NULL_INPUT(output_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape1, kernel_name, "input") ||
CHECK_SHAPE_NULL(input_shape2, kernel_name, "input") ||
CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'SquaredDifferenceGpuKernel', input or output is null";
InitSizeLists();
return true;
}
need_broadcast_ = IsBroadcast(input_shape1, input_shape2);
if (need_broadcast_ && output_shape.size() > MAX_DIMS) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than " << MAX_DIMS;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be greater than " << MAX_DIMS
<< ", but got " << output_shape.size();
}

lhs_shape_.resize(MAX_DIMS, 1);
@@ -86,7 +89,8 @@ class SquaredDifferenceOpGpuKernel : public GpuKernel {
lhs_shape_[j + lhs_offset] = input_shape1[j];
} else {
auto index = j + lhs_offset;
MS_LOG(EXCEPTION) << "Invalid input1 index: " << index;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the index of input cannot be " << index << ", but got "
<< index;
}
}
input1_num_ *= input_shape1[j];
@@ -98,7 +102,8 @@ class SquaredDifferenceOpGpuKernel : public GpuKernel {
rhs_shape_[k + rhs_offset] = input_shape2[k];
} else {
auto index = k + rhs_offset;
MS_LOG(EXCEPTION) << "Invalid input2 index: " << index;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the index of input cannot be " << index << ", but got "
<< index;
}
}
input2_num_ *= input_shape2[k];


+ 9
- 7
mindspore/ccsrc/backend/kernel_compiler/gpu/math/trsm_solve_gpu_kernel.h View File

@@ -118,28 +118,30 @@ class TrsmGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
blas_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
auto A_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto b_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
is_null_input_ = CHECK_NULL_INPUT(A_shape) || CHECK_NULL_INPUT(b_shape);
is_null_input_ =
CHECK_SHAPE_NULL(A_shape, kernel_name, "input_A") || CHECK_SHAPE_NULL(b_shape, kernel_name, "input_b");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'TrsmGpuKernel', input is null";
InitSizeLists();
return true;
}

if (A_shape[kDim0] != A_shape[kDim1]) {
MS_LOG(EXCEPTION) << "wrong array shape, A should be a squre matrix, but got [" << A_shape[kDim0] << " X "
<< A_shape[kDim1] << "]";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the shape of input should be square matrix, but got ["
<< A_shape[kDim0] << " X " << A_shape[kDim1] << "]";
}
m_ = A_shape[kDim0];

if (b_shape.size() != kAVectorxDimNum && b_shape.size() != kAMatrixDimNum) {
MS_LOG(EXCEPTION) << "wrong array shape, b should be 1D or 2D, but got [" << b_shape.size() << "] dimensions";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 1 or 2, but got "
<< b_shape.size();
}
if (b_shape[kDim0] != m_) {
MS_LOG(EXCEPTION) << "wrong array shape, b should match the shape of A, excepted [" << m_ << "] but got ["
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the shape of input should be [" << m_ << "], but got ["
<< b_shape[kDim0] << "]";
}
if (b_shape.size() == kAVectorxDimNum || (b_shape.size() == kAMatrixDimNum && b_shape[kDim1] == 1)) {
@@ -158,7 +160,7 @@ class TrsmGpuKernel : public GpuKernel {
} else if (trans == "T") {
trans_ = CUBLAS_OP_N;
} else {
MS_LOG(EXCEPTION) << "trans should be in [N, T], but got [" << trans << "]";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', trans should be in [N, T], but got [" << trans << "]";
}

bool lower = AnfAlgo::GetNodeAttr<bool>(kernel_node, "lower");


+ 8
- 9
mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_complex_gpu_kernel.h View File

@@ -64,29 +64,28 @@ class UnaryOpComplexGpuKernel : public GpuKernel {
break;
}
default: {
MS_LOG(EXCEPTION) << "Unary operation " << unary_op_type_ << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: Real, Imag, Conj currently, "
<< "but got " << unary_op_type_;
}
}

return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
GetOpType(kernel_node);
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but unary op needs 1 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but unary op needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 3, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "UnaryOpGpuKernel input is null";
InitSizeLists();
return true;
}
@@ -123,8 +122,8 @@ class UnaryOpComplexGpuKernel : public GpuKernel {
unary_op_type_ = iter->second;
return;
}
MS_LOG(EXCEPTION) << "operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: Real, Imag, Conj currently, but got "
<< kernel_name;
}

private:


+ 9
- 8
mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.h View File

@@ -184,7 +184,9 @@ class UnaryOpGpuKernel : public GpuKernel {
break;
}
default: {
MS_LOG(EXCEPTION) << "Unary operation " << unary_op_type_ << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc,"
<< " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, "
<< "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << unary_op_type_;
}
}
return true;
@@ -193,23 +195,22 @@ class UnaryOpGpuKernel : public GpuKernel {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kUnaryOpTypeMap.find(kernel_name);
if (iter == kUnaryOpTypeMap.end()) {
MS_LOG(EXCEPTION) << "Unary operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc,"
<< " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, "
<< "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << kernel_name;
}
unary_op_type_ = iter->second;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but unary op needs 1 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but unary op needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnaryOpGpuKernel', input is null";
InitSizeLists();
return true;
}


+ 12
- 11
mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_grad_gpu_kernel.h View File

@@ -112,7 +112,9 @@ class UnaryGradOpGpuKernel : public GpuKernel {
break;
}
default: {
MS_LOG(EXCEPTION) << "Unary grad operation " << unary_grad_op_type_ << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: SqrtGrad, RsqrtGrad, AsinGrad, "
<< "ACosGrad, AtanGrad, AsinhGrad, AcoshGrad, ReciprocalGrad currently, but got "
<< unary_grad_op_type_;
}
}
return true;
@@ -121,23 +123,22 @@ class UnaryGradOpGpuKernel : public GpuKernel {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kUnaryGradOpTypeMap.find(kernel_name);
if (iter == kUnaryGradOpTypeMap.end()) {
MS_LOG(EXCEPTION) << "Unary grad operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: SqrtGrad, RsqrtGrad, AsinGrad, "
<< "ACosGrad, AtanGrad, AsinhGrad, AcoshGrad, ReciprocalGrad currently, but got "
<< kernel_name;
}
unary_grad_op_type_ = iter->second;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but unary grad op needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but unary grad op needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UnaryOpGradGpuKernel', input is null";
InitSizeLists();
return true;
}
@@ -145,9 +146,8 @@ class UnaryGradOpGpuKernel : public GpuKernel {
input_size_ *= input_shape[i];
}
auto dx_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
is_null_input_ = CHECK_NULL_INPUT(dx_shape);
is_null_input_ = CHECK_SHAPE_NULL(dx_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "UnaryGradOpGpuKernel input 1 is null";
InitSizeLists();
return true;
}
@@ -155,7 +155,8 @@ class UnaryGradOpGpuKernel : public GpuKernel {
dx_size_ *= dx_shape[i];
}
if (input_size_ != dx_size_) {
MS_LOG(WARNING) << "UnaryGradOpGpuKernel inputs should be same, but got " << input_size_ << " and " << dx_size_;
MS_LOG(WARNING) << "For '" << kernel_name << "', both inputs should be equal, but got " << input_size_ << " and "
<< dx_size_;
InitSizeLists();
return true;
}


+ 10
- 13
mindspore/ccsrc/backend/kernel_compiler/gpu/math/update_thor_gradient.h View File

@@ -19,6 +19,7 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/kernel_constants.h"
@@ -98,7 +99,8 @@ class UpdateThorGradientGpuKernel : public GpuKernel {
CUDA_R_32F, algo_),
"cublasSgemm Call Fail");
} catch (const std::exception &e) {
MS_LOG(EXCEPTION) << "Encountered an exception: " << e.what() << "when invoke cubals cublasGemmStridedBatchedEx";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", encountered an exception: " << e.what()
<< " when invoke cubals cublasGemmStridedBatchedEx";
}

auto r_input_addr = workspace1_addr;
@@ -147,9 +149,7 @@ class UpdateThorGradientGpuKernel : public GpuKernel {
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle();
if (!SetProperty(kernel_node)) {
return false;
}
(void)SetProperty(kernel_node);
InitSizeLists();
return true;
}
@@ -188,22 +188,20 @@ class UpdateThorGradientGpuKernel : public GpuKernel {
}

private:
bool SetProperty(const CNodePtr &kernel_node) {
void SetProperty(const CNodePtr &kernel_node) {
auto matrix_a_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto matrix_g_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
is_null_input_ =
CHECK_NULL_INPUT(matrix_a_shape) || CHECK_NULL_INPUT(gradient_shape) || CHECK_NULL_INPUT(matrix_g_shape);
is_null_input_ = CHECK_SHAPE_NULL(matrix_a_shape, kernel_name_, "matrix_a") ||
CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "gradient") ||
CHECK_SHAPE_NULL(matrix_g_shape, kernel_name_, "matrix_g");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'UpdateThorGradientGpuKernel', input is null";
InitSizeLists();
return true;
return;
}

split_dim = LongToSize(GetAttr<int64_t>(kernel_node, "split_dim"));
if (split_dim == 0) {
MS_LOG(ERROR) << "Divide by zero, split_dim can not be zero.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", divide by zero, split_dim cannot be 0, but got " << split_dim;
}
gradient_size.batch_h = gradient_shape[0] / split_dim;
gradient_size.batch_w = gradient_shape[1] / split_dim;
@@ -244,7 +242,6 @@ class UpdateThorGradientGpuKernel : public GpuKernel {
gradient_size.ori_w = gradient_shape[1];
gradient_size.ori_h = gradient_shape[0];
gradient_size.dtype = GetCudaDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 1)));
return true;
}

size_t split_dim;


+ 9
- 7
mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_collective_gpu_kernel.h View File

@@ -71,13 +71,15 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel {
break;
}
default: {
MS_LOG(EXCEPTION) << "Kernel type " << nccl_kernel_type_ << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: AllReduce, AllGather, Broadcast, "
<< "ReduceScatter currently, but got " << nccl_kernel_type_;
}
}
return true;
}

bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
nccl_data_type_ = nccl_dtype(AnfAlgo::GetInputDeviceDataType(kernel_node, 0));
@@ -87,9 +89,8 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel {
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
for (size_t i = 0; i < input_num; ++i) {
auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NcclCollectiveGpuKernel', input is null";
InitSizeLists();
return true;
}
@@ -103,9 +104,8 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel {
}
for (size_t i = 0; i < output_num; ++i) {
auto shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NcclCollectiveGpuKernel', output is null";
InitSizeLists();
return true;
}
@@ -201,7 +201,8 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kNcclTypeMap.find(kernel_name);
if (iter == kNcclTypeMap.end()) {
MS_LOG(EXCEPTION) << "Kernel " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: AllReduce, AllGather, Broadcast, "
<< "ReduceScatter currently, but got " << kernel_name;
} else {
nccl_kernel_type_ = iter->second;
}
@@ -220,7 +221,8 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel {
} else if (type == "prod") {
nccl_reduce_type_ = ncclProd;
} else {
MS_LOG(EXCEPTION) << "Nccl reduce type " << type << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: sum, max, min, prod currently, "
<< "but got " << type;
}
}



+ 11
- 8
mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_p2p_gpu_kernel.h View File

@@ -52,13 +52,15 @@ class NcclP2PGpuKernel : public NcclGpuKernel {
break;
}
default: {
MS_LOG(EXCEPTION) << "Kernel type " << nccl_kernel_type_ << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: AllToAllv, NeighborExchange "
<< "currently, but got " << nccl_kernel_type_;
}
}
return true;
}

bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
InferCommType(kernel_node);
@@ -73,9 +75,8 @@ class NcclP2PGpuKernel : public NcclGpuKernel {
}
for (size_t i = 0; i < input_num; ++i) {
auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NcclP2PGpuKernel', input shape is null ";
InitSizeLists();
return true;
}
@@ -88,9 +89,8 @@ class NcclP2PGpuKernel : public NcclGpuKernel {
}
for (size_t i = 0; i < output_num; ++i) {
auto shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, i);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NcclP2PGpuKernel', output shape is null";
InitSizeLists();
return true;
}
@@ -157,10 +157,12 @@ class NcclP2PGpuKernel : public NcclGpuKernel {

// send_rank_id and recv rank_id size needs to be equal to input_list size
if (send_rank_ids.size() != input_size_list_.size()) {
MS_LOG(ERROR) << "Trying to use AlltoAllv, but send_rank_ids vector size not equals to input_list size.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", trying to use AlltoAllv, the size of send_rank_ids vector "
<< "should be " << input_size_list_.size() << ", but got " << send_rank_ids.size();
}
if (recv_rank_ids.size() != output_size_list_.size()) {
MS_LOG(ERROR) << "Trying to use AlltoAllv, but recv_rank_ids vector size not equals to output_list size.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", trying to use AlltoAllv, the size of recv_rank_ids vector "
<< "should be " << output_size_list_.size() << ", but got " << recv_rank_ids.size();
}

// This implementation refers to NVIDIA NCCL 2.11 doc.
@@ -182,7 +184,8 @@ class NcclP2PGpuKernel : public NcclGpuKernel {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kNcclTypeMap.find(kernel_name);
if (iter == kNcclTypeMap.end()) {
MS_LOG(EXCEPTION) << "Kernel " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: AllToAllv, NeighborExchange "
<< "currently, but got " << kernel_name;
} else {
nccl_kernel_type_ = iter->second;
}


+ 3
- 4
mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_recv_gpu_kernel.h View File

@@ -46,21 +46,20 @@ class NcclRecvGpuKernel : public NcclGpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but NCCL receive needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
src_rank_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "src_rank"));
group_name_ = GetAttr<std::string>(kernel_node, kAttrGroup);
nccl_data_type_ = nccl_dtype(AnfAlgo::GetOutputDeviceDataType(kernel_node, 0));

auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(output_shape);
is_null_input_ = CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NcclRecvGpuKernel', output is null";
InitSizeLists();
return true;
}


+ 3
- 4
mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_send_gpu_kernel.h View File

@@ -46,12 +46,12 @@ class NcclSendGpuKernel : public NcclGpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but NCCL send needs 1 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}

dest_rank_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "dest_rank"));
@@ -60,9 +60,8 @@ class NcclSendGpuKernel : public NcclGpuKernel {
MS_LOG(INFO) << "NcclSend dest rank is " << dest_rank_ << ", group name is " << group_name_;

auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'NcclSendGpuKernel', input is null";
InitSizeLists();
return true;
}


+ 12
- 8
mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/sync_batch_norm_gpu_kernel.h View File

@@ -41,6 +41,9 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel {

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *x = GetDeviceAddress<T>(inputs, 0);
S *scale = GetDeviceAddress<S>(inputs, 1);
S *bias = GetDeviceAddress<S>(inputs, 2);
@@ -78,6 +81,7 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto root_rank = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kAttrRootRank);
if (root_rank) {
root_ = static_cast<int>(GetValue<int64_t>(root_rank));
@@ -86,24 +90,22 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel {
group_name_ = GetAttr<std::string>(kernel_node, kAttrGroup);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 5) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but SyncBatchNorm needs 5 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 5, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 5) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but SyncBatchNorm needs 5 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 5, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (CHECK_NULL_INPUT(input_shape)) {
MS_LOG(WARNING) << "SyncBatchNorm input is null";
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();
return true;
}
auto input_shape_dims = input_shape.size();
if (input_shape_dims != 4 && input_shape_dims != 2) {
MS_LOG(EXCEPTION) << "Tensor shape is " << input_shape.size()
<< ", SyncBatchNormGpuKernel input should be 2D or 4D";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input only should be 2 or 4, but got "
<< input_shape_dims;
}
input_size_ = 1;
for (auto dim : input_shape) {
@@ -175,6 +177,7 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel {
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
is_null_input_ = false;
}

protected:
@@ -233,6 +236,7 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel {
string group_name_;
int root_;
cudaStream_t comm_stream_;
bool is_null_input_;
};
} // namespace kernel
} // namespace mindspore


+ 12
- 8
mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/sync_batch_norm_grad_gpu_kernel.h View File

@@ -41,6 +41,9 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel {

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
T *dy = GetDeviceAddress<T>(inputs, 0);
T *x_input = GetDeviceAddress<T>(inputs, 1);
S *scale = GetDeviceAddress<S>(inputs, 2);
@@ -65,6 +68,7 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto root_rank = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kAttrRootRank);
if (root_rank) {
root_ = static_cast<int>(GetValue<int64_t>(root_rank));
@@ -73,24 +77,22 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel {
group_name_ = GetAttr<std::string>(kernel_node, kAttrGroup);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 5) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but SyncBatchNormGrad needs 5 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 5, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 3) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but SyncBatchNormGrad needs 5 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 3, but got " << output_num;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (CHECK_NULL_INPUT(input_shape)) {
MS_LOG(WARNING) << "SyncBatchNormGrad input is null";
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();
return true;
}
auto input_shape_dims = input_shape.size();
if (input_shape_dims != 4 && input_shape_dims != 2) {
MS_LOG(EXCEPTION) << "Tensor shape is " << input_shape.size()
<< ", SyncBatchNormGpuGrad input should be 2D or 4D";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input only should be 2 or 4, but got "
<< input_shape_dims;
}
input_size_ = 1;
for (auto dim : input_shape) {
@@ -155,6 +157,7 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel {
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
is_null_input_ = false;
}

protected:
@@ -201,6 +204,7 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel {
string group_name_;
int root_;
cudaStream_t comm_stream_;
bool is_null_input_;
};
} // namespace kernel
} // namespace mindspore


+ 7
- 11
mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.h View File

@@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_OTHER_ASSIGN_GPU_KERNEL_H_

#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"

@@ -52,15 +53,13 @@ class AssignGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
if (!CheckParam(kernel_node)) {
return false;
}
(void)CheckParam(kernel_node);
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(shape);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'AssignGpuKernel', input is null";
InitSizeLists();
return true;
}
@@ -80,19 +79,16 @@ class AssignGpuKernel : public GpuKernel {
}

private:
bool CheckParam(const CNodePtr &kernel_node) {
void CheckParam(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but AssignGpuKernel needs 2 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but AssignGpuKernel needs 1 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
}
return true;
}

std::vector<size_t> input_size_list_;


+ 15
- 10
mindspore/ccsrc/backend/kernel_compiler/gpu/other/boundingbox_decode_gpu_kernel.h View File

@@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_DECODE_GPU_KERNEL_H
#include <vector>
#include <string>
#include <algorithm>
#include "backend/kernel_compiler/gpu/cuda_impl/boundingbox_decode_impl.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
@@ -46,7 +47,8 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
T *bboxes_addr = GetDeviceAddress<T>(outputs, 0);
if (inputs[0]->size != inputs[1]->size) {
MS_LOG(ERROR) << "Rois box size must equal with deltas box size -" << inputs[1]->size << ", but got"
MS_LOG(ERROR) << "For '" << kernel_name_
<< "', rois box size must equal with deltas box size: " << inputs[1]->size << ", but got "
<< inputs[0]->size;
return false;
}
@@ -54,7 +56,7 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
const size_t coordinate = 4;
const size_t block_size = inputs[0]->size / sizeof(T);
if ((block_size % coordinate) != 0) {
MS_LOG(ERROR) << "The size of the box must be a multiple of 4.";
MS_LOG(ERROR) << "For '" << kernel_name_ << ", the size of the box should be a multiple of 4.";
return false;
}
@@ -65,11 +67,11 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but BoundingBoxDecode needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
}
rois_size_ = sizeof(T);
deltas_size_ = sizeof(T);
@@ -78,9 +80,10 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
auto logits_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(logits_shape) || CHECK_NULL_INPUT(labels_shape) || CHECK_NULL_INPUT(output_shape);
is_null_input_ = CHECK_SHAPE_NULL(logits_shape, kernel_name_, "anchor_box") ||
CHECK_SHAPE_NULL(labels_shape, kernel_name_, "deltas") ||
CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BoundingBoxDecodeGpuKernel', input or output is null";
InitSizeLists();
return true;
}
@@ -111,7 +114,7 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
means_.emplace_back(mean);
}
} else {
MS_LOG(EXCEPTION) << "Attribute means type is invalid.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute means type is invalid.";
}
auto stds = prim->GetAttr("stds");
@@ -124,7 +127,7 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
stds_.emplace_back(std);
}
} else {
MS_LOG(EXCEPTION) << "Attribute stds type is invalid.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute stds type is invalid.";
}
std::vector<int64_t> max_shape_me = GetAttr<std::vector<int64_t>>(kernel_node, "max_shape");
@@ -133,11 +136,13 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel {
wh_ratio_clip_ = GetAttr<float>(kernel_node, "wh_ratio_clip");
if (means_.size() < coordinate_size || stds_.size() < coordinate_size) {
MS_LOG(EXCEPTION) << "The size of means or stds is less than 4.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the both size of means or stds cannot be less than 4, but got"
<< " the size of means: " << means_.size() << ", the size of stds: " << stds_.size();
}
if (max_shape_.size() < 2) {
MS_LOG(EXCEPTION) << "The size of max_shape is less than 2.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of max_shape cannot be less than 2, but got "
<< max_shape_.size();
}
return true;


+ 13
- 10
mindspore/ccsrc/backend/kernel_compiler/gpu/other/boundingbox_encode_gpu_kernel.h View File

@@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_ENCODE_GPU_KERNEL_H
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/cuda_impl/boundingbox_encode_impl.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@@ -28,7 +29,6 @@ template <typename T>
class BoundingBoxEncodeGpuKernel : public GpuKernel {
public:
BoundingBoxEncodeGpuKernel() : anchor_size_(0), groundtruth_size_(0), deltas_size_(0), is_null_input_(false) {}
~BoundingBoxEncodeGpuKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@@ -45,7 +45,8 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
T *deltas_addr = GetDeviceAddress<T>(outputs, 0);
if (inputs[0]->size != inputs[1]->size) {
MS_LOG(ERROR) << "Anchor box size must equal with groundtruth box size -" << inputs[1]->size << ", but got"
MS_LOG(ERROR) << "For '" << kernel_name_
<< "', anchor box size must equal with groundtruth box size: " << inputs[1]->size << ", but got "
<< inputs[0]->size;
return false;
}
@@ -53,7 +54,7 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
const size_t coordinate = 4;
const size_t block_size = inputs[0]->size / sizeof(T);
if ((block_size % coordinate) != 0) {
MS_LOG(ERROR) << "The size of the box must be a multiple of 4.";
MS_LOG(ERROR) << "For '" << kernel_name_ << ", the size of the box should be a multiple of 4.";
return false;
}
@@ -64,11 +65,11 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but BoundingBoxEncode needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
}
anchor_size_ = sizeof(T);
groundtruth_size_ = sizeof(T);
@@ -77,9 +78,10 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
auto logits_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(logits_shape) || CHECK_NULL_INPUT(labels_shape) || CHECK_NULL_INPUT(output_shape);
is_null_input_ = CHECK_SHAPE_NULL(logits_shape, kernel_name_, "anchor_box") ||
CHECK_SHAPE_NULL(labels_shape, kernel_name_, "groundtruth_box") ||
CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BoundingBoxEncodeGpuKernel', input or output is null";
InitSizeLists();
return true;
}
@@ -110,7 +112,7 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
means_.emplace_back(mean);
}
} else {
MS_LOG(EXCEPTION) << "Attribute means type is invalid.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute means type is invalid.";
}
auto stds = prim->GetAttr("stds");
MS_EXCEPTION_IF_NULL(stds);
@@ -122,11 +124,12 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel {
stds_.emplace_back(std);
}
} else {
MS_LOG(EXCEPTION) << "Attribute stds type is invalid.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute stds type is invalid.";
}
if (means_.size() < coordinate_size || stds_.size() < coordinate_size) {
MS_LOG(EXCEPTION) << "The size of means or stds is less than 4.";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the both size of means or stds cannot be less than 4, but got"
<< " the size of means: " << means_.size() << ", the size of stds: " << stds_.size();
}
return true;


+ 7
- 7
mindspore/ccsrc/backend/kernel_compiler/gpu/other/check_valid_gpu_kernel.h View File

@@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_KERNEL_GPU_OTHER_CHECK_VALID_GPU_KERNEL_H
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/cuda_impl/check_valid_impl.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@@ -28,7 +29,6 @@ template <typename T, typename S>
class CheckValidGpuKernel : public GpuKernel {
public:
CheckValidGpuKernel() : anchor_boxes_size_(0), img_metas_size_(0), valid_size_(0), is_null_input_(false) {}
~CheckValidGpuKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@@ -48,7 +48,7 @@ class CheckValidGpuKernel : public GpuKernel {
const size_t coordinate = 4;
const size_t block_size = inputs[0]->size / sizeof(T);
if ((block_size % coordinate) != 0) {
MS_LOG(ERROR) << "The size of the box must be a multiple of 4.";
MS_LOG(ERROR) << "For '" << kernel_name_ << ", the size of the box should be a multiple of 4.";
return false;
}
@@ -58,11 +58,11 @@ class CheckValidGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but CheckValid needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
}
anchor_boxes_size_ = sizeof(T);
img_metas_size_ = sizeof(T);
@@ -71,10 +71,10 @@ class CheckValidGpuKernel : public GpuKernel {
auto anchor_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto img_metas_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto valid_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ =
CHECK_NULL_INPUT(anchor_boxes_shape) || CHECK_NULL_INPUT(img_metas_shape) || CHECK_NULL_INPUT(valid_shape);
is_null_input_ = CHECK_SHAPE_NULL(anchor_boxes_shape, kernel_name_, "bboxes") ||
CHECK_SHAPE_NULL(img_metas_shape, kernel_name_, "img_metas") ||
CHECK_SHAPE_NULL(valid_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CheckValidGpuKernel', input or output is null";
InitSizeLists();
return true;
}


+ 3
- 4
mindspore/ccsrc/backend/kernel_compiler/gpu/other/gpu_convert_to_dynamic_shape_gpu_kernel.h View File

@@ -62,18 +62,17 @@ class GpuConvertToDynamicShapeGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
size_t input_count = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_count != 1) {
MS_LOG(ERROR) << input_count << "inputs were provided, but GpuConvertToDynamicShapeGpuKernel expects 1.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_count;
}
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape_);
is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'GpuConvertToDynamicShapeGpuKernel', input is null";
InitSizeLists();
return true;
}


+ 6
- 9
mindspore/ccsrc/backend/kernel_compiler/gpu/other/iou_gpu_kernel.h View File

@@ -29,7 +29,6 @@ template <typename T>
class IOUGpuKernel : public GpuKernel {
public:
IOUGpuKernel() : gt_boxes_size_(0), anchor_boxes_size_(0), iou_size_(0), mode_(0), is_null_input_(false) {}
~IOUGpuKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@@ -50,7 +49,7 @@ class IOUGpuKernel : public GpuKernel {
const size_t block_size_0 = inputs[0]->size / sizeof(T);
const size_t block_size_1 = inputs[1]->size / sizeof(T);
if ((block_size_0 % coordinate) != 0 || (block_size_1 % coordinate) != 0) {
MS_LOG(ERROR) << "The size of the box must be a multiple of 4.";
MS_LOG(ERROR) << "For '" << kernel_name_ << ", the size of the box should be a multiple of 4.";
return false;
}
@@ -65,8 +64,7 @@ class IOUGpuKernel : public GpuKernel {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but IOU needs 2 inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
}
gt_boxes_size_ = sizeof(T);
anchor_boxes_size_ = sizeof(T);
@@ -75,10 +73,10 @@ class IOUGpuKernel : public GpuKernel {
auto gt_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto anchor_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto iou_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ =
CHECK_NULL_INPUT(gt_boxes_shape) || CHECK_NULL_INPUT(anchor_boxes_shape) || CHECK_NULL_INPUT(iou_shape);
is_null_input_ = CHECK_SHAPE_NULL(gt_boxes_shape, kernel_name_, "anchor_boxes") ||
CHECK_SHAPE_NULL(anchor_boxes_shape, kernel_name_, "gt_boxes") ||
CHECK_SHAPE_NULL(iou_shape, kernel_name_, "output");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'IOUGpuKernel', input or output is null";
InitSizeLists();
return true;
}
@@ -103,8 +101,7 @@ class IOUGpuKernel : public GpuKernel {
} else if (mode == "iof") {
mode_ = 1;
} else {
MS_LOG(ERROR) << "Mode only support 'iou' or 'iof'.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', mode only support 'iou' or 'iof'.";
}
return true;


+ 6
- 7
mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.h View File

@@ -66,26 +66,25 @@ class BatchNormFold2GpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
InitResource();

size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != INPUT_NUM) {
MS_LOG(ERROR) << "Argument number is " << input_num << ", but BatchNormFold2GpuKernel needs " << INPUT_NUM
<< " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got "
<< input_num;
}

auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "BatchNormFold2GpuKernel input is null";
InitSizeLists();
return true;
}

if (input_shape.size() != 4) {
MS_LOG(ERROR) << "BatchNormFold2GpuKernel input shape needs (N,C,H,W).";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
<< input_shape.size();
}
batch_size_ = input_shape[0];
channel_ = input_shape[1];


+ 6
- 7
mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h View File

@@ -94,26 +94,25 @@ class BatchNormFold2GradGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != INPUT_NUM) {
MS_LOG(ERROR) << "Argument number is " << input_num << ", but BatchNormFold2GradGpuKernel needs " << INPUT_NUM
<< " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got "
<< input_num;
}

auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "BatchNormFold2GradGpuKernel input is null";
InitSizeLists();
return true;
}

if (input_shape.size() != 4) {
MS_LOG(ERROR) << "BatchNormFold2GradGpuKernel input shape needs (N,C,H,W).";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
<< input_shape.size();
}
batch_size_ = input_shape[0];
channel_ = input_shape[1];


+ 6
- 9
mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.h View File

@@ -99,18 +99,17 @@ class BatchNormFoldGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 4) {
MS_LOG(ERROR) << "Input number is " << input_num << " but BatchNormFold GpuKernel OP needs 4 input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 4, but got " << input_num;
}

size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 4) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but BatchNormFold GpuKernel OP needs 4 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 4, but got " << output_num;
}

auto prim = AnfAlgo::GetCNodePrimitive(kernel_node);
@@ -122,16 +121,14 @@ class BatchNormFoldGpuKernel : public GpuKernel {
freeze_bn_ = static_cast<int>(GetValue<int64_t>(prim->GetAttr("freeze_bn")));

auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BatchNormFoldGpuKernel', input is null";
InitSizeLists();
return true;
}
if (input_shape.size() != 4) {
MS_LOG(ERROR) << "Input shape is " << input_shape.size()
<< ", but BatchNormFold GpuKernel OP needs 4DTensor input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
<< input_shape.size();
}
CheckTensorSize({input_shape});
batch_ = input_shape[0];


+ 7
- 10
mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.h View File

@@ -78,18 +78,17 @@ class BatchNormFoldGradGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != INPUT_NUM) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but BatchNormFoldGrad GpuKernel OP needs " << INPUT_NUM
<< " inputs.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got "
<< input_num;
}

size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but BatchNormFoldGrad GpuKernel OP needs 4 output.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}

auto prim = AnfAlgo::GetCNodePrimitive(kernel_node);
@@ -99,16 +98,14 @@ class BatchNormFoldGradGpuKernel : public GpuKernel {
freeze_bn_ = static_cast<int>(GetValue<int64_t>(prim->GetAttr("freeze_bn")));

auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'BatchNormFoldGradGpuKernel', input is null";
InitSizeLists();
return true;
}
if (input_shape.size() != 4) {
MS_LOG(ERROR) << "Input shape is " << input_shape.size()
<< ", but BatchNormFoldGrad GpuKernel OP needs 4DTensor input.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
<< input_shape.size();
}
batch_ = input_shape[0];
channel_ = input_shape[1];


+ 5
- 6
mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.h View File

@@ -49,24 +49,23 @@ class CorrectionMulGpuKernel : public GpuKernel {
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
InitResource();

size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(ERROR) << "Argument number is " << input_num << ", but CorrectionMulGpuKernel needs 3.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
}

auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CorrectionMulGpuKernel', input is null";
InitSizeLists();
return true;
}
if (input_shape.size() != 4) {
MS_LOG(ERROR) << "CorrectionMulGpuKernel input shape needs (N,C,H,W).";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
<< input_shape.size();
}
batch_size_ = input_shape[0];
channel_ = input_shape[1];


+ 5
- 6
mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.h View File

@@ -55,24 +55,23 @@ class CorrectionMulGradGpuKernel : public GpuKernel {
}

bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
InitResource();

size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 4) {
MS_LOG(ERROR) << "Argument number is " << input_num << ", but CorrectionMulGradGpuKernel needs 4.";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 4, but got " << input_num;
}

auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
MS_LOG(WARNING) << "For 'CorrectionMulGradGpuKernel', input is null";
InitSizeLists();
return true;
}
if (input_shape.size() != 4) {
MS_LOG(ERROR) << "CorrectionMulGradGpuKernel input shape needs (N,C,H,W).";
return false;
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got "
<< input_shape.size();
}
batch_size_ = input_shape[0];
channel_ = input_shape[1];


+ 3
- 4
mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_learned_scale_quant_perchannel_gpu_kernel.cc View File

@@ -45,17 +45,16 @@ const std::vector<size_t> &FakeLearnedScaleQuantPerChannelGpuKernel::GetWorkspac
}
bool FakeLearnedScaleQuantPerChannelGpuKernel::Init(const CNodePtr &kernel_node) {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "Input number is " << input_num
<< ", but FakeLearnedScaleQuantPerChannel GpuKernel OP needs 3 Input.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num
<< ", but FakeLearnedScaleQuantPerChannel GpuKernel OP needs 1 output.";
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
quant_delay_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")));


Loading…
Cancel
Save