| @@ -96,12 +96,10 @@ class ScatterNdGpuFwdKernel : public GpuKernel { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num; | |||
| return false; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| return false; | |||
| } | |||
| input_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| @@ -59,25 +59,26 @@ class TileGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but Tile needs 1 input."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but Tile has 1 output."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(output_shape_); | |||
| is_null_input_ = | |||
| CHECK_SHAPE_NULL(input_shape_, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape_, kernel_name, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'TileGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (output_shape_.size() < 1) { | |||
| MS_LOG(EXCEPTION) << "For 'TileGpuKernel', the rank of output cannot be less than 1, but got " | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be less than 1, but got " | |||
| << output_shape_.size(); | |||
| } | |||
| input_size_ = 1; | |||
| @@ -87,8 +88,8 @@ class TileGpuKernel : public GpuKernel { | |||
| output_size_ = 1; | |||
| if (output_shape_.size() > TILE_MAX_DIMENSION) { | |||
| MS_LOG(EXCEPTION) << "Output is " << output_shape_.size() << "-D, but Tile supports up to " << TILE_MAX_DIMENSION | |||
| << "-D."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be greater than " | |||
| << TILE_MAX_DIMENSION << ", but got " << output_shape_.size(); | |||
| } | |||
| shape_size_ = output_shape_.size(); | |||
| for (size_t i = 0; i < output_shape_.size(); i++) { | |||
| @@ -83,12 +83,13 @@ class TopKGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| auto input_shapes = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto output_shapes = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(output_shapes); | |||
| is_null_input_ = | |||
| CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") || CHECK_SHAPE_NULL(output_shapes, kernel_name, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'TopkGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -74,28 +74,26 @@ class TransposeGpuFwdKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but transpose needs 1 input."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but transpose needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'TransposeGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| shape_size_ = input_shape.size(); | |||
| if (shape_size_ > TRANSPOSE_MAX_DIMENSION) { | |||
| MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but transpose supports max " << TRANSPOSE_MAX_DIMENSION | |||
| << "-D inputs."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be greater than " | |||
| << TRANSPOSE_MAX_DIMENSION << ", but got " << shape_size_; | |||
| } | |||
| input_size_ = 1; | |||
| @@ -50,11 +50,11 @@ class UniqueGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| std::vector<size_t> shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'UniqueGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -18,6 +18,7 @@ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_UNPACK_GPU_KERNEL_H | |||
| #include <vector> | |||
| #include <string> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| @@ -55,10 +56,9 @@ class UnpackGpuFwdKernel : public GpuKernel { | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| if (!CheckParam(kernel_node)) { | |||
| return false; | |||
| } | |||
| (void)CheckParam(kernel_node); | |||
| axis_ = static_cast<int32_t>(GetAttr<int64_t>(kernel_node, "axis")); | |||
| if (axis_ < 0) { | |||
| auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| @@ -73,9 +73,8 @@ class UnpackGpuFwdKernel : public GpuKernel { | |||
| for (size_t i = 0; i < output_num_; i++) { | |||
| size_t _size = 1; | |||
| auto _shape = AnfAlgo::GetOutputDeviceShape(kernel_node, i); | |||
| is_null_input_ = CHECK_NULL_INPUT(_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(_shape, kernel_name_, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'UnpackGpuKernel', output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -87,9 +86,8 @@ class UnpackGpuFwdKernel : public GpuKernel { | |||
| workspace_size_list_.push_back(sizeof(T *) * output_num_); | |||
| auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'UnpackGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -108,13 +106,11 @@ class UnpackGpuFwdKernel : public GpuKernel { | |||
| void InitSizeLists() override {} | |||
| private: | |||
| bool CheckParam(const CNodePtr &kernel_node) { | |||
| void CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(ERROR) << "input number is " << input_num << ", but UnpackGpuFwdKernel needs 1 input."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num; | |||
| } | |||
| return true; | |||
| } | |||
| int axis_; | |||
| bool is_null_input_; | |||
| @@ -54,14 +54,15 @@ class UnsortedSegmentMaxGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| auto segment_ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1); | |||
| auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = | |||
| CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(segment_ids_shapes) || CHECK_NULL_INPUT(output_shapes); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") || | |||
| CHECK_SHAPE_NULL(segment_ids_shapes, kernel_name, "segment_ids") || | |||
| CHECK_SHAPE_NULL(output_shapes, kernel_name, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'UnsortedSegmentMaxGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -73,9 +74,8 @@ class UnsortedSegmentMaxGpuKernel : public GpuKernel { | |||
| MS_LOG(INFO) << "UnsortedSegmentMax Kernel Input count is 2"; | |||
| } | |||
| if (output_shapes.size() < 1) { | |||
| MS_LOG(EXCEPTION) | |||
| << "For UnsortedSegmentMax, output shape incorrect rank. Expect Rank at least rank 1, got Rank: " | |||
| << output_shapes.size() << "."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be less than 1, but got " | |||
| << output_shapes.size(); | |||
| } | |||
| num_segments_ = output_shapes[0]; | |||
| input_size_ = 1; | |||
| @@ -49,13 +49,14 @@ class UnsortedSegmentMinGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| auto segment_ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1); | |||
| auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = | |||
| CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(segment_ids_shapes) || CHECK_NULL_INPUT(output_shapes); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") || | |||
| CHECK_SHAPE_NULL(segment_ids_shapes, kernel_name, "segment_ids") || | |||
| CHECK_SHAPE_NULL(output_shapes, kernel_name, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'UnsortedSegmentMinGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -67,9 +68,8 @@ class UnsortedSegmentMinGpuKernel : public GpuKernel { | |||
| MS_LOG(INFO) << "UnsortedSegmentMin Kernel Input count is 2"; | |||
| } | |||
| if (output_shapes.size() < 1) { | |||
| MS_LOG(EXCEPTION) | |||
| << "For UnsortedSegmentMin, output shape incorrect rank. Expect Rank at least rank 1, got Rank: " | |||
| << output_shapes.size() << "."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be less than 1, but got " | |||
| << output_shapes.size(); | |||
| } | |||
| num_segments_ = output_shapes[0]; | |||
| input_size_ = 1; | |||
| @@ -96,7 +96,6 @@ class UnsortedSegmentMinGpuKernel : public GpuKernel { | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| void ResetResource() noexcept override { | |||
| num_segments_ = 1; | |||
| inner_size_ = 1; | |||
| @@ -52,13 +52,15 @@ class UnsortedSegmentSumGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| auto input_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| auto ids_shapes = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1); | |||
| auto output_shapes = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shapes) || CHECK_NULL_INPUT(ids_shapes) || CHECK_NULL_INPUT(output_shapes); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shapes, kernel_name, "input") || | |||
| CHECK_SHAPE_NULL(ids_shapes, kernel_name, "segment_ids") || | |||
| CHECK_SHAPE_NULL(output_shapes, kernel_name, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'UnsortedSegmentSumGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -51,12 +51,12 @@ class ZerosLikeGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| std::vector<size_t> input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'ZeroslikeGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -55,7 +55,7 @@ class CustomAOTGpuKernel : public GpuKernel { | |||
| if (!handle_) { | |||
| handle_ = dlopen(file_path_.c_str(), RTLD_LAZY | RTLD_LOCAL); | |||
| if (!handle_) { | |||
| MS_LOG(ERROR) << "Open Error: " << dlerror(); | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', open should be successful, but error, " << dlerror(); | |||
| return false; | |||
| } | |||
| } | |||
| @@ -65,7 +65,7 @@ class CustomAOTGpuKernel : public GpuKernel { | |||
| reinterpret_cast<std::add_pointer<int(int, void **, int *, int64_t **, const char **, void *, void *)>::type>( | |||
| dlsym(handle_, func_name_.c_str())); | |||
| if (auto error_info = dlerror(); error_info != nullptr) { | |||
| MS_LOG(ERROR) << error_info; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', error info: " << error_info; | |||
| return false; | |||
| } | |||
| } | |||
| @@ -79,7 +79,8 @@ class CustomAOTGpuKernel : public GpuKernel { | |||
| ret = aot_func_(nparam, ¶ms[0], &ndims_[0], &shapes_[0], &type_pointer_list_[0], stream_ptr, nullptr); | |||
| } | |||
| } catch (const std::exception &e) { | |||
| MS_LOG(ERROR) << "CustomAOT operator failed when running user defined file " << file_path_ << "! " | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', operator failed when running user defined file " << file_path_ | |||
| << "! " | |||
| << "Error message is " << e.what(); | |||
| return false; | |||
| } | |||
| @@ -88,14 +89,15 @@ class CustomAOTGpuKernel : public GpuKernel { | |||
| case 0: | |||
| break; | |||
| case 1: | |||
| MS_LOG(ERROR) << "Number of parameters passed to AOT kernel is " << nparam | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', the number of parameters passed to AOT kernel is " << nparam | |||
| << ", inconsistent with what the user wants"; | |||
| return false; | |||
| case 2: | |||
| MS_LOG(ERROR) << "Type of parameters passed to AOT kernel is inconsistent with what the user wants"; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ | |||
| << "', type of parameters passed to AOT kernel is inconsistent with what the user wants"; | |||
| return false; | |||
| default: | |||
| MS_LOG(ERROR) << "Error occurred when running AOT kernel, " | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', error occurred when running AOT kernel, " | |||
| << "error id is " << ret; | |||
| return false; | |||
| } | |||
| @@ -104,27 +106,25 @@ class CustomAOTGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| const auto &exec_info = AnfAlgo::GetNodeAttr<std::string>(kernel_node, "func_name"); | |||
| if (auto pos = exec_info.find(":"); pos != std::string::npos) { | |||
| auto path = exec_info.substr(0, pos); | |||
| auto real_path = FileUtils::GetRealPath(path.c_str()); | |||
| if (!real_path.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file path, " << path << " does not exist."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the file path should be exist, but got " << path; | |||
| } | |||
| file_path_ = real_path.value(); | |||
| func_name_ = exec_info.substr(pos + 1); | |||
| } else { | |||
| MS_LOG(ERROR) << "Wrong execute info:" << exec_info; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', Wrong execute info:" << exec_info; | |||
| } | |||
| num_input_ = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| auto input_type_list = AnfAlgo::GetAllInputDeviceTypes(kernel_node); | |||
| if (num_input_ != input_type_list.size()) { | |||
| MS_LOG(ERROR) << "Input shapes'size is " << num_input_ << ", while input types' size is " | |||
| << input_type_list.size(); | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be " << input_type_list.size() | |||
| << ", but got " << num_input_; | |||
| } | |||
| for (size_t i = 0; i < num_input_; i++) { | |||
| @@ -141,9 +141,8 @@ class CustomAOTGpuKernel : public GpuKernel { | |||
| auto output_type_list = AnfAlgo::GetAllOutputDeviceTypes(kernel_node); | |||
| if (num_output_ != output_type_list.size()) { | |||
| MS_LOG(ERROR) << "Output shapes'size is " << num_output_ << ", while output types' size is " | |||
| << output_type_list.size(); | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be " << output_type_list.size() | |||
| << ", but got " << num_output_; | |||
| } | |||
| for (size_t i = 0; i < num_output_; i++) { | |||
| @@ -61,12 +61,13 @@ bool DatasetInitKernel::Launch(const std::vector<AddressPtr> &, const std::vecto | |||
| size_t len = total_bytes_ * buffer_q_capacity_; | |||
| if (!device::gpu::GPUMemoryAllocator::GetInstance().AllocBufferQueueMem(len, &addr)) { | |||
| MS_LOG(EXCEPTION) << "Memory not enough: failed to allocate GPU buffer queue memory[" << len << "]."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', memory not enough: failed to allocate GPU buffer queue memory[" | |||
| << len << "]."; | |||
| } | |||
| auto status = GpuBufferMgr::GetInstance().Create(0, queue_name_, addr, shapes_, buffer_q_capacity_); | |||
| if (status) { | |||
| MS_LOG(EXCEPTION) << "Init Dataset Failed. len: " << len << ", status:" << status; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', init Dataset Failed. len: " << len << ", status:" << status; | |||
| } | |||
| return true; | |||
| @@ -121,7 +121,7 @@ bool DatasetIteratorKernel::ReadDevice(void **addr, size_t *len) { | |||
| #ifdef ENABLE_DUMP_IR | |||
| mindspore::RDR::TriggerAll(); | |||
| #endif | |||
| MS_LOG(EXCEPTION) << "Get data timeout"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', get data timeout"; | |||
| } | |||
| } | |||
| #ifndef ENABLE_SECURITY | |||
| @@ -130,7 +130,7 @@ bool DatasetIteratorKernel::ReadDevice(void **addr, size_t *len) { | |||
| profiling_op_->RecordData(queue_size, start_time_stamp, end_time_stamp); | |||
| } | |||
| #endif | |||
| MS_LOG(ERROR) << "Get data failed, errcode " << ret; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', get data failed, errcode " << ret; | |||
| return false; | |||
| } | |||
| return true; | |||
| @@ -141,7 +141,7 @@ bool DatasetIteratorKernel::Launch(const std::vector<AddressPtr> &, const std::v | |||
| if (handle_ == HandleMgr::INVALID_HANDLE) { | |||
| handle_ = GpuBufferMgr::GetInstance().Open(0, queue_name_, output_size_list_); | |||
| if (handle_ == HandleMgr::INVALID_HANDLE) { | |||
| MS_LOG(EXCEPTION) << "Gpu Queue(" << queue_name_ << ") Open Failed"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', gpu Queue(" << queue_name_ << ") Open Failed"; | |||
| } | |||
| } | |||
| @@ -151,7 +151,7 @@ bool DatasetIteratorKernel::Launch(const std::vector<AddressPtr> &, const std::v | |||
| return false; | |||
| } | |||
| if (total_bytes_ != len) { | |||
| MS_LOG(ERROR) << "Dataset front error. read: " << len << ", expect: " << total_bytes_ << ", "; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', dataset front error, read: " << len << ", expect: " << total_bytes_; | |||
| return false; | |||
| } | |||
| @@ -26,7 +26,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| GetNextProfiling::GetNextProfiling(const std::string &path) : profiling_path_(path) {} | |||
| GetNextProfiling::GetNextProfiling(const std::string &path) : profiling_path_(path), kernel_name_("GetNextProfiling") {} | |||
| void GetNextProfiling::GetDeviceId() { | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| @@ -44,13 +44,13 @@ void GetNextProfiling::Init() { | |||
| void GetNextProfiling::SaveProfilingData() { | |||
| std::ofstream handle(file_name_, std::ios::trunc); | |||
| if (!handle.is_open()) { | |||
| MS_LOG(ERROR) << "Open get-next profiling file failed."; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', open get-next profiling file failed."; | |||
| return; | |||
| } | |||
| for (uint32_t index = 0; index < queue_size_.size(); index++) { | |||
| if (index > time_stamp_.size() - 1) { | |||
| handle.close(); | |||
| MS_LOG(EXCEPTION) << "index exceeds time_stamp_ size."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', index exceeds time_stamp_ size."; | |||
| } | |||
| handle << Name() << " " << time_stamp_[index].first << " " << time_stamp_[index].second << " " << queue_size_[index] | |||
| << std::endl; | |||
| @@ -62,7 +62,7 @@ void GetNextProfiling::SaveProfilingData() { | |||
| void GetNextProfiling::ChangeFileMode() { | |||
| if (chmod(common::SafeCStr(file_name_), S_IRUSR | S_IWUSR) == -1) { | |||
| MS_LOG(ERROR) << "Modify file:" << file_name_ << " to rw fail."; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', modify file:" << file_name_ << " to rw fail."; | |||
| return; | |||
| } | |||
| } | |||
| @@ -44,6 +44,7 @@ class GetNextProfiling : public ProfilingOp { | |||
| std::vector<std::pair<uint64_t, uint64_t>> time_stamp_; // First value of std::pair is the start time stamp, | |||
| // Second value of std::pair is the stop time stamp | |||
| std::string device_id_; | |||
| std::string kernel_name_; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -90,6 +90,7 @@ class PrintGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| if (AnfAlgo::HasNodeAttr("string_pos", kernel_node)) { | |||
| @@ -105,9 +106,8 @@ class PrintGpuKernel : public GpuKernel { | |||
| input_flag_ = SetInputFlag(&string_pos_, input_tensor_num); | |||
| for (size_t i = 0; i < input_tensor_num; i++) { | |||
| auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'PrintGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -183,7 +183,7 @@ class PrintGpuKernel : public GpuKernel { | |||
| input_device_data->push_back(GetDeviceAddress<double>(inputs, i)); | |||
| break; | |||
| default: | |||
| MS_LOG(EXCEPTION) << "TypeId: " << type_id << " is not supported in Print."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the typeid cannot be " << type_id; | |||
| } | |||
| } | |||
| } | |||
| @@ -200,7 +200,7 @@ class PrintGpuKernel : public GpuKernel { | |||
| } | |||
| for (size_t i = 0; i < string_pos->size(); i++) { | |||
| if ((*string_pos)[i] < 0) { | |||
| MS_LOG(EXCEPTION) << "string_pos cannot be a negative value"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', string_pos cannot be a negative value"; | |||
| } | |||
| auto index = IntToSize((*string_pos)[i]); | |||
| res[index] = -1; | |||
| @@ -65,22 +65,21 @@ class AddNGpuFwdKernel : public GpuKernel { | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| num_input_ = GetAttr<int64_t>(kernel_node, "n"); | |||
| if (num_input_ != input_num) { | |||
| MS_LOG(ERROR) << "Input number is " << num_input_ << " in attr, but got " << input_num << "input."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << num_input_ << ", but got " | |||
| << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but cudnnAddTensor needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "AddNGpuFwdKernel input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -49,20 +49,18 @@ class AssignAddGpuFwdKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but cudnnAddTensor needs 2 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but cudnnAddTensor needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "AssignAddGpuFwdKernel input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -57,13 +57,15 @@ class BroadcastComplexOpGpuKernel : public GpuKernel { | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| GetOpType(kernel_node); | |||
| auto shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| auto shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1); | |||
| auto shape3 = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0); | |||
| need_broadcast_ = AnfAlgo::IsTensorBroadcast(shape1, shape2); | |||
| if (need_broadcast_ && shape1.size() > MAX_DIMS) { | |||
| MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than " << MAX_DIMS; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS | |||
| << ", but got " << shape1.size(); | |||
| } | |||
| lhs_shape_.resize(MAX_DIMS, 1); | |||
| @@ -128,7 +130,9 @@ class BroadcastComplexOpGpuKernel : public GpuKernel { | |||
| return; | |||
| } | |||
| MS_LOG(EXCEPTION) << "operation " << kernel_name << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ | |||
| << ", only support these types: RealDiv, Mul, Sub, Add, Div or Complex currently, but got " | |||
| << kernel_name; | |||
| } | |||
| BroadcastOpType op_type_; | |||
| @@ -70,20 +70,22 @@ class BroadcastOpGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| GetOpType(kernel_node); | |||
| auto shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| auto shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1); | |||
| auto shape3 = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(shape1) || CHECK_NULL_INPUT(shape2) || CHECK_NULL_INPUT(shape3); | |||
| is_null_input_ = CHECK_SHAPE_NULL(shape1, kernel_name_, "input") || | |||
| CHECK_SHAPE_NULL(shape2, kernel_name_, "input") || | |||
| CHECK_SHAPE_NULL(shape3, kernel_name_, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'BroadcastGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| need_broadcast_ = AnfAlgo::IsTensorBroadcast(shape1, shape2); | |||
| if (need_broadcast_ && shape1.size() > MAX_DIMS) { | |||
| MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than: " << MAX_DIMS << ", actual size is " | |||
| << shape1.size(); | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS | |||
| << ", but got " << shape1.size(); | |||
| } | |||
| lhs_shape_.resize(MAX_DIMS, 1); | |||
| @@ -94,7 +96,8 @@ class BroadcastOpGpuKernel : public GpuKernel { | |||
| if (i < MAX_DIMS) { | |||
| output_shape_[i] = shape3[i]; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Output index: " << i << " should be less than " << MAX_DIMS; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the index of output should be less than " << MAX_DIMS | |||
| << ", but got " << i; | |||
| } | |||
| } | |||
| output_num_ *= shape3[i]; | |||
| @@ -106,7 +109,8 @@ class BroadcastOpGpuKernel : public GpuKernel { | |||
| lhs_shape_[j + lhs_offset] = shape1[j]; | |||
| } else { | |||
| auto index = j + lhs_offset; | |||
| MS_LOG(EXCEPTION) << "Invalid input1 index: " << index; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the index of input cannot be " << index << ", but got " | |||
| << index; | |||
| } | |||
| } | |||
| input1_num_ *= shape1[j]; | |||
| @@ -118,7 +122,8 @@ class BroadcastOpGpuKernel : public GpuKernel { | |||
| rhs_shape_[k + rhs_offset] = shape2[k]; | |||
| } else { | |||
| auto index = k + rhs_offset; | |||
| MS_LOG(EXCEPTION) << "Invalid input2 index: " << index; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the index of input cannot be " << index << ", but got " | |||
| << index; | |||
| } | |||
| } | |||
| input2_num_ *= shape2[k]; | |||
| @@ -201,7 +206,10 @@ class BroadcastOpGpuKernel : public GpuKernel { | |||
| return; | |||
| } | |||
| MS_LOG(EXCEPTION) << "operation " << kernel_name << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ | |||
| << ", only support these types: Maximum, Minimum, Pow, RealDiv, Mul, Sub, Add, Div, DivNoNan, " | |||
| "Mod, FloorDiv, AbsGrad, FloorMod, Atan2, TruncateDiv or TruncateMod currently, but got " | |||
| << kernel_name; | |||
| } | |||
| BroadcastOpType op_type_; | |||
| @@ -69,20 +69,23 @@ class BroadcastOpGradGpuKernel : public GpuKernel { | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| GetOpType(kernel_node); | |||
| auto shape1 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape2 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape3 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| is_null_input_ = CHECK_NULL_INPUT(shape1) || CHECK_NULL_INPUT(shape2) || CHECK_NULL_INPUT(shape3); | |||
| is_null_input_ = CHECK_SHAPE_NULL(shape1, kernel_name_, "input_1") || | |||
| CHECK_SHAPE_NULL(shape2, kernel_name_, "input_2") || | |||
| CHECK_SHAPE_NULL(shape3, kernel_name_, "input_3"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'BroadcastGradGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| need_broadcast_ = AnfAlgo::IsTensorBroadcast(shape1, shape2); | |||
| if (need_broadcast_ && shape1.size() > kMaxShapeSize) { | |||
| MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than " << kMaxShapeSize; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " | |||
| << kMaxShapeSize << ", but got " << shape1.size(); | |||
| } | |||
| for (size_t i = 0; i < shape3.size(); i++) { | |||
| @@ -98,8 +101,8 @@ class BroadcastOpGradGpuKernel : public GpuKernel { | |||
| x1_shape_[i + x1_offset] = shape1[i]; | |||
| } else { | |||
| auto index = i + x1_offset; | |||
| MS_LOG(EXCEPTION) << "For 'BroadcastOpGrad', the dimension of input cannot be greater than " << kMaxShapeSize | |||
| << ", but got " << (index + 1); | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " | |||
| << kMaxShapeSize << ", but got " << (index + 1); | |||
| } | |||
| } | |||
| input1_num_ *= shape1[i]; | |||
| @@ -111,7 +114,8 @@ class BroadcastOpGradGpuKernel : public GpuKernel { | |||
| x2_shape_[i + x2_offset] = shape2[i]; | |||
| } else { | |||
| auto index = i + x2_offset; | |||
| MS_LOG(EXCEPTION) << "Invalid input2 index: " << index; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " | |||
| << kMaxShapeSize << ", but got " << (index + 1); | |||
| } | |||
| } | |||
| input2_num_ *= shape2[i]; | |||
| @@ -162,7 +166,8 @@ class BroadcastOpGradGpuKernel : public GpuKernel { | |||
| auto iter = kBroadcastTypeMap.find(kernel_name); | |||
| if (iter == kBroadcastTypeMap.end()) { | |||
| MS_LOG(EXCEPTION) << "operation " << kernel_name << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ | |||
| << ", only support these types: MaximumGrad or MinimumGrad currently, but got " << kernel_name; | |||
| } else { | |||
| op_type_ = iter->second; | |||
| } | |||
| @@ -66,14 +66,14 @@ class CastAllGpuFwdKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| num_input_ = GetAttr<size_t>(kernel_node, "n"); | |||
| size_ = std::make_unique<size_t[]>(num_input_); | |||
| for (size_t i = 0; i < num_input_; i++) { | |||
| auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); | |||
| is_null_input_ = CHECK_NULL_INPUT(shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'CastAllGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -19,6 +19,7 @@ | |||
| #include <cublas_v2.h> | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/eye_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/matrix_split_impl.cuh" | |||
| @@ -62,6 +63,7 @@ class CholeskyGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| lower_ = static_cast<bool>(GetAttr<bool>(kernel_node, kLower)); | |||
| split_dim_ = static_cast<int>(GetAttr<int64_t>(kernel_node, kSplitDim)); | |||
| @@ -77,11 +79,11 @@ class CholeskyGpuKernel : public GpuKernel { | |||
| auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex); | |||
| is_null_input_ = CHECK_NULL_INPUT(in_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(EXCEPTION) << "For 'CholeskyGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (split_dim_ == 0) { | |||
| return InitNoSplitDim(in_shape); | |||
| } | |||
| @@ -103,11 +105,11 @@ class CholeskyGpuKernel : public GpuKernel { | |||
| cho_row_ = in_shape.at(kDim1); | |||
| cho_col_ = in_shape.at(kDim2); | |||
| } else { | |||
| MS_LOG(ERROR) << "Input Only support Rank 2 OR 3"; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input only should be 2 or 3"; | |||
| return false; | |||
| } | |||
| if (cho_row_ != cho_col_) { | |||
| MS_LOG(ERROR) << "Cholesky need square matrix as input."; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', the shape of input should be square matrix"; | |||
| return false; | |||
| } | |||
| // set matrix row or col to be lead dimension | |||
| @@ -121,13 +123,14 @@ class CholeskyGpuKernel : public GpuKernel { | |||
| bool InitSplitDim(const std::vector<size_t> &in_shape) { | |||
| if (in_shape.size() != kCholeskyNormalShape) { | |||
| MS_LOG(ERROR) << "Cholesky Split Matrix Need Input Rank as 2."; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input should be " << kCholeskyNormalShape | |||
| << ", but got " << in_shape.size(); | |||
| return false; | |||
| } | |||
| cho_row_ = in_shape.at(kDim0); | |||
| cho_col_ = in_shape.at(kDim1); | |||
| if (cho_row_ != cho_col_) { | |||
| MS_LOG(ERROR) << "Cholesky Split Matrix Need Square Matrix as Input."; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', the shape of input should be square matrix"; | |||
| return false; | |||
| } | |||
| @@ -206,7 +209,7 @@ class CholeskyGpuKernel : public GpuKernel { | |||
| kernel_node_, cusolverDnDpotrfBatched(handle_, uplo_, m_, d_array_addr, lda_, d_info_array_addr, batch_), | |||
| "cusolver cholesky batched Fail"); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "cholesky factorization do not support other data type but only float or double, right now."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type only should be float or double, right now."; | |||
| } | |||
| size_t output_elements = outputs.at(kDim0)->size / unit_size_; | |||
| // copy results from written input's matrix to output's matrix by up or lower flag. | |||
| @@ -243,7 +246,7 @@ class CholeskyGpuKernel : public GpuKernel { | |||
| kernel_node_, cusolverDnDpotrfBatched(handle_, uplo_, m_, d_array_addr, lda_, d_info_array_addr, batch_), | |||
| "cusolver cholesky batched Fail"); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "cholesky factorization do not support other data type but only float or double, right now."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type only should be float or double, right now."; | |||
| } | |||
| TriangleMatrixCopy(d_batch_input_addr, output_addr, uplo_, outputs[0]->size / sizeof(T), ldb_, m_, | |||
| @@ -19,6 +19,7 @@ | |||
| #include <cublas_v2.h> | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/triangle_matrix_copy_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| @@ -41,7 +42,7 @@ class CholeskySolveGpuKernel : public GpuKernel { | |||
| public: | |||
| using pointer = T *; | |||
| CholeskySolveGpuKernel() = default; | |||
| CholeskySolveGpuKernel() : is_null_input_(false) {} | |||
| ~CholeskySolveGpuKernel() = default; | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| @@ -49,6 +50,9 @@ class CholeskySolveGpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(handle_, reinterpret_cast<cudaStream_t>(stream_ptr)), | |||
| "cusolverDnSetStream failed"); | |||
| auto input_a_addr = GetDeviceAddress<T>(inputs, kDim0); | |||
| @@ -81,7 +85,7 @@ class CholeskySolveGpuKernel : public GpuKernel { | |||
| d_b_array_addr, ldb_, d_info_array_addr, batch_), | |||
| "cusolver cholesky solve batched Fail"); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "cholesky solve do not support other data type but only float or double, right now."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type only should be float or double, right now."; | |||
| } | |||
| size_t output_elements = outputs.at(kDim0)->size / unit_size_; | |||
| // copy results from written input's matrix to output's matrix. | |||
| @@ -90,6 +94,7 @@ class CholeskySolveGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| lower_ = static_cast<bool>(GetAttr<bool>(kernel_node, kLower)); | |||
| // gpu input is col major default, so need to change row major. | |||
| @@ -103,10 +108,14 @@ class CholeskySolveGpuKernel : public GpuKernel { | |||
| handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle(); | |||
| auto in_a_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kDim0); | |||
| auto in_b_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kDim1); | |||
| if (CHECK_NULL_INPUT(in_a_shape) || CHECK_NULL_INPUT(in_b_shape)) { | |||
| MS_LOG(EXCEPTION) << "For 'CholeskySolveGpuKernel', input is null"; | |||
| is_null_input_ = | |||
| CHECK_SHAPE_NULL(in_a_shape, kernel_name_, "input_a") || CHECK_SHAPE_NULL(in_b_shape, kernel_name_, "input_b"); | |||
| if (is_null_input_) { | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| return InitDim(in_a_shape, in_b_shape); | |||
| (void)InitDim(in_a_shape, in_b_shape); | |||
| return true; | |||
| } | |||
| protected: | |||
| @@ -127,7 +136,7 @@ class CholeskySolveGpuKernel : public GpuKernel { | |||
| } | |||
| private: | |||
| bool InitDim(const std::vector<size_t> &in_a_shape, const std::vector<size_t> &in_b_shape) { | |||
| void InitDim(const std::vector<size_t> &in_a_shape, const std::vector<size_t> &in_b_shape) { | |||
| if (in_a_shape.size() == kCholeskyDefaultShape) { | |||
| batch_ = 1; | |||
| cho_row_ = in_a_shape.at(kDim0); | |||
| @@ -141,17 +150,14 @@ class CholeskySolveGpuKernel : public GpuKernel { | |||
| cho_row_ = in_a_shape.at(kDim1); | |||
| cho_col_ = in_a_shape.at(kDim2); | |||
| } else { | |||
| MS_LOG(ERROR) << "Input Only support Rank 2 OR 3"; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input only should be 2 or 3"; | |||
| } | |||
| if (cho_row_ != cho_col_) { | |||
| MS_LOG(ERROR) << "Cholesky need square matrix as input."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input should be square matrix"; | |||
| } | |||
| size_t b_row = in_b_shape.size() == kCholeskyBatchedShape ? in_b_shape.at(kDim1) : in_b_shape.at(kDim0); | |||
| if (cho_row_ != b_row) { | |||
| MS_LOG(ERROR) << "Cholesky right hand matrix is not equal to left matrix."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', right hand matrix should be equal to left matrix"; | |||
| } | |||
| m_ = SizeToInt(in_a_shape.at(kDim1)); | |||
| lda_ = m_; | |||
| @@ -159,7 +165,6 @@ class CholeskySolveGpuKernel : public GpuKernel { | |||
| h_a_array_.resize(batch_); | |||
| h_b_array_.resize(batch_); | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| size_t cho_row_{0}; | |||
| size_t cho_col_{0}; | |||
| @@ -177,6 +182,7 @@ class CholeskySolveGpuKernel : public GpuKernel { | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| bool is_null_input_; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -19,6 +19,7 @@ | |||
| #include <cublas_v2.h> | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/eye_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/matrix_split_impl.cuh" | |||
| @@ -67,13 +68,13 @@ class CholeskyTrsmGpuKernel : public GpuKernel { | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle(); | |||
| blas_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle(); | |||
| auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(in_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'CholeskyTrsmSolveGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -84,12 +85,10 @@ class CholeskyTrsmGpuKernel : public GpuKernel { | |||
| } | |||
| } else { | |||
| if (in_shape.size() != 2) { | |||
| MS_LOG(ERROR) << "CholeskyTrsm Split Matrix Need Input Rank as 2."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input only should be 2"; | |||
| } | |||
| if (in_shape[0] != in_shape[1]) { | |||
| MS_LOG(ERROR) << "CholeskyTrsm Split Matrix Need Square Matrix as Input."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input should be square matrix"; | |||
| } | |||
| InitDimOthers(kernel_node, in_shape); | |||
| } | |||
| @@ -201,17 +200,19 @@ class CholeskyTrsmGpuKernel : public GpuKernel { | |||
| if (in_shape.size() == 2) { | |||
| batch_ = 1; | |||
| if (in_shape[0] != in_shape[1]) { | |||
| MS_LOG(ERROR) << "CholeskyTrsm shape0: " << in_shape[0] << ", is not equal to shape1: " << in_shape[1]; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', shape0 should be equal to " << in_shape[1] << ", but got " | |||
| << in_shape[0]; | |||
| return false; | |||
| } | |||
| } else if (in_shape.size() == 3) { | |||
| batch_ = SizeToInt(in_shape[0]); | |||
| if (in_shape[1] != in_shape[2]) { | |||
| MS_LOG(ERROR) << "CholeskyTrsm shape1: " << in_shape[1] << ", is not equal to shape2: " << in_shape[2]; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', shape1 should be equal to " << in_shape[2] << ", but got " | |||
| << in_shape[1]; | |||
| return false; | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "Input Only support Rank 2 OR 3"; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input only should be 2 or 3"; | |||
| return false; | |||
| } | |||
| @@ -55,15 +55,15 @@ class CumProdGpuKernel : public GpuKernel { | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but CumProdGpuKernel needs 1."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num; | |||
| } | |||
| input_size_0_ = sizeof(T); | |||
| shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(shape_); | |||
| is_null_input_ = CHECK_SHAPE_NULL(shape_, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'CumProdGpuKernel', input is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -72,7 +72,8 @@ class CumProdGpuKernel : public GpuKernel { | |||
| reverse_ = GetAttr<bool>(kernel_node, "reverse"); | |||
| int input_dim_length = SizeToInt(shape_.size()); | |||
| if (axis_ >= input_dim_length) { | |||
| MS_LOG(EXCEPTION) << "Axis is: " << axis_ << " out of bounds."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the value of 'axis' should be less than " << input_dim_length | |||
| << ", but got " << axis_; | |||
| } | |||
| while (axis_ < 0) { | |||
| axis_ += input_dim_length; | |||
| @@ -55,15 +55,15 @@ class CumSumGpuKernel : public GpuKernel { | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but CumSumGpuKernel needs 1."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num; | |||
| } | |||
| input_size_0_ = sizeof(T); | |||
| shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(shape_); | |||
| is_null_input_ = CHECK_SHAPE_NULL(shape_, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'CumSumGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -72,7 +72,8 @@ class CumSumGpuKernel : public GpuKernel { | |||
| reverse_ = GetAttr<bool>(kernel_node, "reverse"); | |||
| int input_dim_length = SizeToInt(shape_.size()); | |||
| if (axis_ >= input_dim_length) { | |||
| MS_LOG(EXCEPTION) << "Axis is: " << axis_ << " out of bounds."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the value of 'axis' should be less than " << input_dim_length | |||
| << ", but got " << axis_; | |||
| } | |||
| while (axis_ < 0) { | |||
| axis_ += input_dim_length; | |||
| @@ -47,7 +47,8 @@ class DetTriangleGpuKernel : public GpuKernel { | |||
| if (!CheckTriangle(input_addr, fill_mode_, matrix_n_, outputs[0]->size / sizeof(T), | |||
| reinterpret_cast<cudaStream_t>(stream_ptr))) { | |||
| MS_LOG(ERROR) << "The elements in the upper half of the matrix should be all 0, fill mode is: " << fill_mode_; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ | |||
| << "', the elements in the upper half of the matrix should be all 0, fill mode is: " << fill_mode_; | |||
| return false; | |||
| } | |||
| DetTriangle(input_addr, output_addr, matrix_n_, outputs[0]->size / sizeof(T), | |||
| @@ -56,21 +57,20 @@ class DetTriangleGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but DetTriangle needs 1 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but DetTriangle needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape); | |||
| is_null_input_ = | |||
| CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'DeterminantTriangleGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -79,8 +79,8 @@ class DetTriangleGpuKernel : public GpuKernel { | |||
| } | |||
| if (input_shape.size() < 2) { | |||
| MS_LOG(ERROR) << "The input should have rank at least 2."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than 2, but got " | |||
| << input_shape.size(); | |||
| } | |||
| matrix_n_ = input_shape[input_shape.size() - 1]; | |||
| @@ -89,12 +89,11 @@ class DetTriangleGpuKernel : public GpuKernel { | |||
| output_size_ *= output_shape[i]; | |||
| } | |||
| if (matrix_n_ == 0 || output_size_ != input_size_ / matrix_n_ / matrix_n_) { | |||
| MS_LOG(ERROR) << "The output shape is wrong."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of output should be " | |||
| << (input_size_ / matrix_n_ / matrix_n_) << ", but got " << output_size_; | |||
| } | |||
| if (input_shape[input_shape.size() - 2] != input_shape[input_shape.size() - 1]) { | |||
| MS_LOG(ERROR) << "The matrix should be in shape of square."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input should be square matrix"; | |||
| } | |||
| auto prim = AnfAlgo::GetCNodePrimitive(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(prim); | |||
| @@ -21,6 +21,7 @@ | |||
| #include <cusolverDn.h> | |||
| #include <cuda_runtime.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <complex> | |||
| #include <algorithm> | |||
| #include <type_traits> | |||
| @@ -52,13 +53,14 @@ struct Complex_traits<Complex<T>> { | |||
| template <typename T> | |||
| class EighcGpuKernel : public GpuKernel { | |||
| public: | |||
| EighcGpuKernel() = default; | |||
| EighcGpuKernel() : is_null_input_(false) {} | |||
| ~EighcGpuKernel() = default; | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| blas_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle(); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| auto A_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| @@ -70,13 +72,14 @@ class EighcGpuKernel : public GpuKernel { | |||
| jobz_ = CUSOLVER_EIG_MODE_NOVECTOR; | |||
| } | |||
| cusolver_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle(); | |||
| bool is_null_input = CHECK_NULL_INPUT(A_shape); | |||
| if (is_null_input) { | |||
| MS_LOG(EXCEPTION) << "For 'EighValue GpuKernel', input is null"; | |||
| is_null_input_ = CHECK_SHAPE_NULL(A_shape, kernel_name_, "input"); | |||
| if (is_null_input_) { | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (A_shape.size() != kShape2dDims || A_shape[1] != A_shape[1]) { | |||
| MS_LOG(EXCEPTION) << "wrong array shape, A should be a square matrix, but got [" << A_shape[0] << " X " | |||
| << A_shape[1] << "]"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of input should be square matrix, but got [" | |||
| << A_shape[0] << " X " << A_shape[1] << "]"; | |||
| } | |||
| m_ = A_shape[0]; | |||
| InitSizeLists(); | |||
| @@ -85,6 +88,9 @@ class EighcGpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| CHECK_CUBLAS_RET_WITH_ERROR(cublasSetStream(blas_handle_, reinterpret_cast<cudaStream_t>(stream_ptr)), | |||
| "cublasSetStream failed"); | |||
| CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(cusolver_handle_, reinterpret_cast<cudaStream_t>(stream_ptr)), | |||
| @@ -136,7 +142,7 @@ class EighcGpuKernel : public GpuKernel { | |||
| } | |||
| d_work = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(sizeof(T) * lwork); | |||
| if (!d_work) { | |||
| MS_LOG(EXCEPTION) << "GPU memory alloca failed."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', GPU memory alloca failed."; | |||
| } | |||
| if constexpr (std::is_same_v<T, Complex<float>>) { | |||
| cusolverDnCheevd(cusolver_handle_, jobz_, uplo_, m_, reinterpret_cast<cuComplex *>(w_v_addr), lda_, w_w_addr, | |||
| @@ -198,6 +204,7 @@ class EighcGpuKernel : public GpuKernel { | |||
| std::vector<size_t> output_size_list_{}; | |||
| std::vector<size_t> workspace_size_list_{}; | |||
| using D = typename Complex_traits<T>::value_type; | |||
| bool is_null_input_; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -39,13 +39,14 @@ constexpr char LOWER[] = "lower"; | |||
| template <typename T> | |||
| class EighGpuKernel : public GpuKernel { | |||
| public: | |||
| EighGpuKernel() = default; | |||
| EighGpuKernel() : is_null_input_(false) {} | |||
| ~EighGpuKernel() = default; | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| auto A_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| compute_eigen_vectors_ = static_cast<bool>(GetAttr<bool>(kernel_node, C_EIEH_VECTOR)); | |||
| @@ -56,13 +57,14 @@ class EighGpuKernel : public GpuKernel { | |||
| jobz_ = CUSOLVER_EIG_MODE_NOVECTOR; | |||
| } | |||
| cusolver_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle(); | |||
| bool is_null_input = CHECK_NULL_INPUT(A_shape); | |||
| if (is_null_input) { | |||
| MS_LOG(EXCEPTION) << "For 'EighValue GpuKernel', input is null"; | |||
| is_null_input_ = CHECK_SHAPE_NULL(A_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (A_shape.size() != kShape2dDims || A_shape[0] != A_shape[1]) { | |||
| MS_LOG(EXCEPTION) << "wrong array shape, A should be a square matrix, but got [" << A_shape[0] << " X " | |||
| << A_shape[1] << "]"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the shape of input should be square matrix, but got [" | |||
| << A_shape[0] << " X " << A_shape[1] << "]"; | |||
| } | |||
| m_ = A_shape[0]; | |||
| InitSizeLists(); | |||
| @@ -71,6 +73,9 @@ class EighGpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(cusolver_handle_, reinterpret_cast<cudaStream_t>(stream_ptr)), | |||
| "cusolverDnSetStream failed"); | |||
| // matrix A, input or output(eigenvector) | |||
| @@ -152,6 +157,7 @@ class EighGpuKernel : public GpuKernel { | |||
| cusolverEigMode_t jobz_ = CUSOLVER_EIG_MODE_NOVECTOR; | |||
| bool compute_eigen_vectors_{false}; | |||
| bool lower_{true}; | |||
| bool is_null_input_; | |||
| std::vector<T *> h_array_{}; | |||
| std::vector<size_t> input_size_list_{}; | |||
| std::vector<size_t> output_size_list_{}; | |||
| @@ -49,23 +49,21 @@ class EqualCountGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but equalcount needs 2 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but equalcount needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| output_size_ = sizeof(T); | |||
| input_size_ = sizeof(T); | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'EqualcountGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -77,13 +77,11 @@ class FloatStatusGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| if (!CheckParam(kernel_node)) { | |||
| return false; | |||
| } | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| (void)CheckParam(kernel_node); | |||
| auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'FloatStatusGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -91,10 +89,10 @@ class FloatStatusGpuKernel : public GpuKernel { | |||
| for (size_t x : shape) { | |||
| input_size_ = input_size_ * x; | |||
| } | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto iter = kOpTypeMap.find(kernel_name); | |||
| if (iter == kOpTypeMap.end()) { | |||
| MS_LOG(EXCEPTION) << "FloatStatus kernel " << kernel_name << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: FloatStatus, IsInf, IsNan, IsFinite " | |||
| << "currently, but got " << kernel_name; | |||
| } | |||
| kernel_name_ = iter->second; | |||
| @@ -114,18 +112,16 @@ class FloatStatusGpuKernel : public GpuKernel { | |||
| } | |||
| private: | |||
| bool CheckParam(const CNodePtr &kernel_node) { | |||
| void CheckParam(const CNodePtr &kernel_node) { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but FloatStatusGpuKernel needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but FloatStatusGpuKernel needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| return true; | |||
| } | |||
| std::vector<size_t> input_size_list_; | |||
| @@ -52,20 +52,18 @@ class IdentityGpuKernel : public GpuKernel { | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but identity needs 1 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but identity needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "IdentityGpuKernel input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -62,18 +62,20 @@ class IndexAddGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but index add needs 3 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num; | |||
| } | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| std::vector<size_t> index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| is_null_input_ = CHECK_NULL_INPUT(dst_shape) || CHECK_NULL_INPUT(index_shape) || CHECK_NULL_INPUT(src_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(dst_shape, kernel_name, "x") || | |||
| CHECK_SHAPE_NULL(index_shape, kernel_name, "indices") || | |||
| CHECK_SHAPE_NULL(src_shape, kernel_name, "y"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'IndexAddGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -91,8 +93,8 @@ class IndexAddGpuKernel : public GpuKernel { | |||
| inner_size_ *= src_shape[i]; | |||
| } | |||
| if (axis < 0 || axis >= SizeToInt(src_shape.size()) || axis >= SizeToInt(dst_shape.size())) { | |||
| MS_LOG(EXCEPTION) << "Init axis size failed, actual src axis size is " << src_axis_size_ | |||
| << ", actual dst axis size is " << dst_axis_size_; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the size of 'axis' cannot be greater than or equal to " | |||
| << SizeToInt(src_shape.size()) << " or " << SizeToInt(dst_shape.size()) << ", but got " << axis; | |||
| } | |||
| src_axis_size_ = src_shape[axis]; | |||
| dst_axis_size_ = dst_shape[axis]; | |||
| @@ -49,37 +49,34 @@ class LinSpaceGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but DynamicLinSpace needs 3 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but DynamicLinSpace needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| auto input_1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| auto input_2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1); | |||
| auto value_count = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_1) || CHECK_NULL_INPUT(input_2) || CHECK_NULL_INPUT(value_count); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_1, kernel_name, "start") || | |||
| CHECK_SHAPE_NULL(input_2, kernel_name, "stop") || | |||
| CHECK_SHAPE_NULL(value_count, kernel_name, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'LinspaceGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| // error checking input data | |||
| if ((input_1.size() != 0) || (input_2.size() != 0)) { | |||
| MS_LOG(ERROR) << "For LinShape " | |||
| << "both start and end must be 0-D Tensors. Got " << input_1.size() << " and " << input_2.size() | |||
| << "."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', both start and end should be 0-D Tensors, but got dimension " | |||
| << "of start: " << input_1.size() << " and dimension of end: " << input_2.size(); | |||
| } | |||
| if (value_count.size() != 1) { | |||
| MS_LOG(ERROR) << "For LinShape, output shape incorrect rank. Expect Rank: 1, got Rank: " << value_count.size() | |||
| << "."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output should be 1, but got " | |||
| << value_count.size(); | |||
| } | |||
| value_count_ = value_count[0]; | |||
| InitSizeLists(); | |||
| @@ -49,11 +49,11 @@ class LogicalNotGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'LogicalGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -19,6 +19,7 @@ | |||
| #include <cublas_v2.h> | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include <type_traits> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| @@ -39,7 +40,7 @@ constexpr size_t kLuNormalShape = 2; | |||
| template <typename T> | |||
| class LUGpuKernel : public GpuKernel { | |||
| public: | |||
| LUGpuKernel() = default; | |||
| LUGpuKernel() : is_null_input_(false) {} | |||
| ~LUGpuKernel() = default; | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| @@ -47,6 +48,9 @@ class LUGpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(handle_, reinterpret_cast<cudaStream_t>(stream_ptr)), | |||
| "cusolverDnSetStream failed"); | |||
| auto input_addr = GetDeviceAddress<T>(inputs, kDim0); | |||
| @@ -65,7 +69,7 @@ class LUGpuKernel : public GpuKernel { | |||
| "cusolver query lu work size fail"); | |||
| if (cudaMalloc(reinterpret_cast<void **>(&d_work_), unit_size_ * lwork_) != cudaSuccess) { | |||
| MS_LOG(EXCEPTION) << "cusolver malloc work size fail"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', cusolver malloc work size fail"; | |||
| } | |||
| CHECK_CUSOLVER_RET_WITH_EXCEPT( | |||
| @@ -79,7 +83,7 @@ class LUGpuKernel : public GpuKernel { | |||
| // 5. malloc device working space of getrf | |||
| if (cudaMalloc(reinterpret_cast<void **>(&d_work_), unit_size_ * lwork_) != cudaSuccess) { | |||
| MS_LOG(EXCEPTION) << "cusolver malloc work size fail"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', cusolver malloc work size fail"; | |||
| } | |||
| // 6. solve to lu factorization according to cuSolver api, outputs have been written to input's matrix. | |||
| @@ -87,7 +91,7 @@ class LUGpuKernel : public GpuKernel { | |||
| kernel_node_, cusolverDnDgetrf(handle_, m_, m_, input_addr, lda_, d_work_, piv_output_addr, info_output_addr), | |||
| "cusolver lu fail"); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "cholesky factorization do not support other data type but only float or double, right now."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type only should be float or double, right now."; | |||
| } | |||
| // 7. copy results from written input's matrix to output's matrix. | |||
| // if (cudaMemcpy(output_addr, input_addr, lda_ * m_ * unit_size_, cudaMemcpyDeviceToDevice) != cudaSuccess) { | |||
| @@ -101,14 +105,16 @@ class LUGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| // 1. get CuSolver Dense matrix handler | |||
| handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCusolverDnHandle(); | |||
| auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| // 2. check input shape not null | |||
| bool is_null_input = CHECK_NULL_INPUT(in_shape); | |||
| if (is_null_input) { | |||
| MS_LOG(EXCEPTION) << "For 'PureCholeskyGpuKernel', input is null"; | |||
| is_null_input_ = CHECK_SHAPE_NULL(in_shape, kernel_name_, "input"); | |||
| if (is_null_input_) { | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| // 3. calculate input size | |||
| if (!InitInputSize(in_shape)) { | |||
| @@ -126,11 +132,11 @@ class LUGpuKernel : public GpuKernel { | |||
| lu_row_ = in_shape.at(kDim0); | |||
| lu_col_ = in_shape.at(kDim1); | |||
| } else { | |||
| MS_LOG(ERROR) << "Input Only support Rank 1 OR 2"; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input only should be 1 or 2"; | |||
| return false; | |||
| } | |||
| if (lu_row_ != lu_col_) { | |||
| MS_LOG(ERROR) << "Cholesky need square matrix as input."; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', the shape of input should be square matrix"; | |||
| return false; | |||
| } | |||
| // set matrix row or col to be lead dimension | |||
| @@ -170,6 +176,7 @@ class LUGpuKernel : public GpuKernel { | |||
| std::vector<size_t> input_size_list_{}; | |||
| std::vector<size_t> output_size_list_{}; | |||
| std::vector<size_t> workspace_size_list_{}; | |||
| bool is_null_input_; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -91,13 +91,14 @@ class MatMulGpuKernel : public GpuKernel { | |||
| "cublasGemmStridedBatchedEx failed. Possible reasons: the GPU is occupied by other processes."); | |||
| } | |||
| } catch (const std::exception &e) { | |||
| MS_LOG(EXCEPTION) << "Encountered an exception: " << e.what() << " when invoke cublas " | |||
| << (batch_ == 1 ? "cublasGemmEx" : "cublasGemmStridedBatchedEx"); | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', encountered an exception: " << e.what() << " when invoke " | |||
| << "cublas " << (batch_ == 1 ? "cublasGemmEx" : "cublasGemmStridedBatchedEx"); | |||
| } | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle(); | |||
| dtype_a_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); | |||
| @@ -105,7 +106,7 @@ class MatMulGpuKernel : public GpuKernel { | |||
| dtype_c_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetOutputDeviceDataType(kernel_node, 0))); | |||
| auto node_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| if (dtype_a_ != dtype_b_ || dtype_a_ != dtype_c_) { | |||
| MS_LOG(EXCEPTION) << "input and output types are not the same in " << node_name; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', input and output types are not the same in " << node_name; | |||
| } | |||
| if (dtype_a_ == CUDA_R_16F && dtype_b_ == CUDA_R_16F && dtype_c_ == CUDA_R_16F) { | |||
| MS_LOG(INFO) << "input and output type is float16, allow to use Tensor Core operations if possible"; | |||
| @@ -113,15 +114,16 @@ class MatMulGpuKernel : public GpuKernel { | |||
| } | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| auto input1_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(output_shape) || CHECK_NULL_INPUT(input1_shape); | |||
| is_null_input_ = | |||
| CHECK_SHAPE_NULL(input1_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'MatmulGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| auto dims = output_shape.size(); | |||
| if (dims < 2) { | |||
| MS_LOG(EXCEPTION) << "Output dims " << dims << " not support."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of output cannot be less than 2, but got " | |||
| << dims; | |||
| } | |||
| m_ = output_shape[dims - 2]; | |||
| @@ -139,7 +141,7 @@ class MatMulGpuKernel : public GpuKernel { | |||
| } else if (!transpose && input1_shape.size() > (dims - 1)) { | |||
| k_ = input1_shape[dims - 1]; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Init k_ via input1_shape failed."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', init k_ via input1_shape failed."; | |||
| } | |||
| transpose = GetAttr<bool>(kernel_node, "transpose_x2"); | |||
| @@ -19,6 +19,7 @@ | |||
| #include <cublas_v2.h> | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <type_traits> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| @@ -90,28 +91,30 @@ class MatrixInverseGpuKernel : public GpuKernel { | |||
| reinterpret_cast<double **>(inv_batch_addr), len, info_addr, batchsize), | |||
| "cublas trsm batched Fail"); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "The data type entered must be float or double."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the data type entered must be float or double."; | |||
| } | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle(); | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'MatrixInverseGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shape.size() < 2) { | |||
| MS_LOG(EXCEPTION) << "The dim entered needs to be greater than 2, but " << input_shape.size() << " was taken"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than 2, but got " | |||
| << input_shape.size(); | |||
| } | |||
| size_t last_index = input_shape.size() - 1; | |||
| if (input_shape[last_index] != input_shape[last_index - 1]) { | |||
| MS_LOG(EXCEPTION) << "The last two dimensions of the input matrix should be equal!"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the last two dimensions of the input matrix should be equal, " | |||
| << "but got one: " << input_shape[last_index] << ", another: " << input_shape[last_index - 1]; | |||
| } | |||
| size_ = input_shape[last_index]; | |||
| for (size_t i = 0; i < last_index - 1; i++) { | |||
| @@ -59,7 +59,7 @@ class MultinomialGpuKernel : public GpuKernel { | |||
| T *probs_addr = GetDeviceAddress<T>(inputs, 0); | |||
| int64_t *num_sample_addr = GetDeviceAddress<int64_t>(inputs, 1); | |||
| if (distributions_ == 0) { | |||
| MS_LOG(ERROR) << "Divide by zero. the distributions_ is 0."; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', divide by zero. the distributions_ is 0."; | |||
| return false; | |||
| } | |||
| @@ -83,22 +83,20 @@ class MultinomialGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but multinomial needs 2 input."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but multinomial needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| auto input_shape_0 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape_0) || CHECK_NULL_INPUT(output_shape); | |||
| is_null_input_ = | |||
| CHECK_SHAPE_NULL(input_shape_0, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'MultinomialGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -65,24 +65,22 @@ class NMSWithMaskGpuFwdKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| iou_value_ = GetAttr<float>(kernel_node, "iou_threshold"); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but NMSWithMask needs 1 input."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 3) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but NMSWithMask needs 3 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 3, but got " << output_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'NMSWithMaskGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -120,7 +120,7 @@ class RandomOpGpuKernel : public GpuKernel { | |||
| inputs[2]->size / sizeof(T), output_addr, outputs[0]->size / sizeof(T), | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "For UniformInt op, `minval` should be strictly less than `maxval`"; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << "', `minval` should be strictly less than `maxval`"; | |||
| return false; | |||
| } | |||
| break; | |||
| @@ -148,7 +148,8 @@ class RandomOpGpuKernel : public GpuKernel { | |||
| break; | |||
| } | |||
| default: { | |||
| MS_LOG(EXCEPTION) << "Random operation " << random_op_type_ << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: StandardNormal, CudnnUniformReal, " | |||
| << "UniformInt, UniformReal currently, but got " << random_op_type_; | |||
| } | |||
| } | |||
| return true; | |||
| @@ -158,29 +159,27 @@ class RandomOpGpuKernel : public GpuKernel { | |||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto iter = kRandomOpTypeMap.find(kernel_name); | |||
| if (iter == kRandomOpTypeMap.end()) { | |||
| MS_LOG(EXCEPTION) << "Random operation " << kernel_name << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: StandardNormal, CudnnUniformReal, " | |||
| << "UniformInt, UniformReal currently, but got " << kernel_name; | |||
| } else { | |||
| random_op_type_ = iter->second; | |||
| } | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if ((random_op_type_ == RANDOM_OP_NORMAL || random_op_type_ == RANDOM_OP_UNIFORM_REAL) && input_num != 1) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but random op needs 1 input."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num; | |||
| } | |||
| if (random_op_type_ == RANDOM_OP_UNIFORM_INT && input_num != 3) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but random op needs 3 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but random op needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| auto input_shape_0 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape_0) || CHECK_NULL_INPUT(output_shape); | |||
| is_null_input_ = | |||
| CHECK_SHAPE_NULL(input_shape_0, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'RandomOpGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -51,11 +51,12 @@ class SquareSumAllGpuFwdKernel : public GpuKernel { | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(ERROR) << "SquareSumAllGpuFwdKernel input is null"; | |||
| return false; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| for (size_t i = 0; i < input_shape.size(); i++) { | |||
| input_size_ *= input_shape[i]; | |||
| @@ -56,18 +56,21 @@ class SquaredDifferenceOpGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto input_shape1 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| auto input_shape2 = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1); | |||
| auto output_shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape1) || CHECK_NULL_INPUT(input_shape2) || CHECK_NULL_INPUT(output_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape1, kernel_name, "input") || | |||
| CHECK_SHAPE_NULL(input_shape2, kernel_name, "input") || | |||
| CHECK_SHAPE_NULL(output_shape, kernel_name, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'SquaredDifferenceGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| need_broadcast_ = IsBroadcast(input_shape1, input_shape2); | |||
| if (need_broadcast_ && output_shape.size() > MAX_DIMS) { | |||
| MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than " << MAX_DIMS; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be greater than " << MAX_DIMS | |||
| << ", but got " << output_shape.size(); | |||
| } | |||
| lhs_shape_.resize(MAX_DIMS, 1); | |||
| @@ -86,7 +89,8 @@ class SquaredDifferenceOpGpuKernel : public GpuKernel { | |||
| lhs_shape_[j + lhs_offset] = input_shape1[j]; | |||
| } else { | |||
| auto index = j + lhs_offset; | |||
| MS_LOG(EXCEPTION) << "Invalid input1 index: " << index; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the index of input cannot be " << index << ", but got " | |||
| << index; | |||
| } | |||
| } | |||
| input1_num_ *= input_shape1[j]; | |||
| @@ -98,7 +102,8 @@ class SquaredDifferenceOpGpuKernel : public GpuKernel { | |||
| rhs_shape_[k + rhs_offset] = input_shape2[k]; | |||
| } else { | |||
| auto index = k + rhs_offset; | |||
| MS_LOG(EXCEPTION) << "Invalid input2 index: " << index; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the index of input cannot be " << index << ", but got " | |||
| << index; | |||
| } | |||
| } | |||
| input2_num_ *= input_shape2[k]; | |||
| @@ -118,28 +118,30 @@ class TrsmGpuKernel : public GpuKernel { | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| blas_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle(); | |||
| auto A_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto b_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| is_null_input_ = CHECK_NULL_INPUT(A_shape) || CHECK_NULL_INPUT(b_shape); | |||
| is_null_input_ = | |||
| CHECK_SHAPE_NULL(A_shape, kernel_name, "input_A") || CHECK_SHAPE_NULL(b_shape, kernel_name, "input_b"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'TrsmGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (A_shape[kDim0] != A_shape[kDim1]) { | |||
| MS_LOG(EXCEPTION) << "wrong array shape, A should be a squre matrix, but got [" << A_shape[kDim0] << " X " | |||
| << A_shape[kDim1] << "]"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the shape of input should be square matrix, but got [" | |||
| << A_shape[kDim0] << " X " << A_shape[kDim1] << "]"; | |||
| } | |||
| m_ = A_shape[kDim0]; | |||
| if (b_shape.size() != kAVectorxDimNum && b_shape.size() != kAMatrixDimNum) { | |||
| MS_LOG(EXCEPTION) << "wrong array shape, b should be 1D or 2D, but got [" << b_shape.size() << "] dimensions"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 1 or 2, but got " | |||
| << b_shape.size(); | |||
| } | |||
| if (b_shape[kDim0] != m_) { | |||
| MS_LOG(EXCEPTION) << "wrong array shape, b should match the shape of A, excepted [" << m_ << "] but got [" | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the shape of input should be [" << m_ << "], but got [" | |||
| << b_shape[kDim0] << "]"; | |||
| } | |||
| if (b_shape.size() == kAVectorxDimNum || (b_shape.size() == kAMatrixDimNum && b_shape[kDim1] == 1)) { | |||
| @@ -158,7 +160,7 @@ class TrsmGpuKernel : public GpuKernel { | |||
| } else if (trans == "T") { | |||
| trans_ = CUBLAS_OP_N; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "trans should be in [N, T], but got [" << trans << "]"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', trans should be in [N, T], but got [" << trans << "]"; | |||
| } | |||
| bool lower = AnfAlgo::GetNodeAttr<bool>(kernel_node, "lower"); | |||
| @@ -64,29 +64,28 @@ class UnaryOpComplexGpuKernel : public GpuKernel { | |||
| break; | |||
| } | |||
| default: { | |||
| MS_LOG(EXCEPTION) << "Unary operation " << unary_op_type_ << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: Real, Imag, Conj currently, " | |||
| << "but got " << unary_op_type_; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| GetOpType(kernel_node); | |||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but unary op needs 1 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but unary op needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 3, but got " << output_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "UnaryOpGpuKernel input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -123,8 +122,8 @@ class UnaryOpComplexGpuKernel : public GpuKernel { | |||
| unary_op_type_ = iter->second; | |||
| return; | |||
| } | |||
| MS_LOG(EXCEPTION) << "operation " << kernel_name << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: Real, Imag, Conj currently, but got " | |||
| << kernel_name; | |||
| } | |||
| private: | |||
| @@ -184,7 +184,9 @@ class UnaryOpGpuKernel : public GpuKernel { | |||
| break; | |||
| } | |||
| default: { | |||
| MS_LOG(EXCEPTION) << "Unary operation " << unary_op_type_ << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc," | |||
| << " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, " | |||
| << "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << unary_op_type_; | |||
| } | |||
| } | |||
| return true; | |||
| @@ -193,23 +195,22 @@ class UnaryOpGpuKernel : public GpuKernel { | |||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto iter = kUnaryOpTypeMap.find(kernel_name); | |||
| if (iter == kUnaryOpTypeMap.end()) { | |||
| MS_LOG(EXCEPTION) << "Unary operation " << kernel_name << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc," | |||
| << " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, " | |||
| << "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << kernel_name; | |||
| } | |||
| unary_op_type_ = iter->second; | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but unary op needs 1 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but unary op needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'UnaryOpGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -112,7 +112,9 @@ class UnaryGradOpGpuKernel : public GpuKernel { | |||
| break; | |||
| } | |||
| default: { | |||
| MS_LOG(EXCEPTION) << "Unary grad operation " << unary_grad_op_type_ << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: SqrtGrad, RsqrtGrad, AsinGrad, " | |||
| << "ACosGrad, AtanGrad, AsinhGrad, AcoshGrad, ReciprocalGrad currently, but got " | |||
| << unary_grad_op_type_; | |||
| } | |||
| } | |||
| return true; | |||
| @@ -121,23 +123,22 @@ class UnaryGradOpGpuKernel : public GpuKernel { | |||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto iter = kUnaryGradOpTypeMap.find(kernel_name); | |||
| if (iter == kUnaryGradOpTypeMap.end()) { | |||
| MS_LOG(EXCEPTION) << "Unary grad operation " << kernel_name << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: SqrtGrad, RsqrtGrad, AsinGrad, " | |||
| << "ACosGrad, AtanGrad, AsinhGrad, AcoshGrad, ReciprocalGrad currently, but got " | |||
| << kernel_name; | |||
| } | |||
| unary_grad_op_type_ = iter->second; | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but unary grad op needs 2 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but unary grad op needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'UnaryOpGradGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -145,9 +146,8 @@ class UnaryGradOpGpuKernel : public GpuKernel { | |||
| input_size_ *= input_shape[i]; | |||
| } | |||
| auto dx_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| is_null_input_ = CHECK_NULL_INPUT(dx_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(dx_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "UnaryGradOpGpuKernel input 1 is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -155,7 +155,8 @@ class UnaryGradOpGpuKernel : public GpuKernel { | |||
| dx_size_ *= dx_shape[i]; | |||
| } | |||
| if (input_size_ != dx_size_) { | |||
| MS_LOG(WARNING) << "UnaryGradOpGpuKernel inputs should be same, but got " << input_size_ << " and " << dx_size_; | |||
| MS_LOG(WARNING) << "For '" << kernel_name << "', both inputs should be equal, but got " << input_size_ << " and " | |||
| << dx_size_; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -19,6 +19,7 @@ | |||
| #include <cublas_v2.h> | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "backend/kernel_compiler/gpu/kernel_constants.h" | |||
| @@ -98,7 +99,8 @@ class UpdateThorGradientGpuKernel : public GpuKernel { | |||
| CUDA_R_32F, algo_), | |||
| "cublasSgemm Call Fail"); | |||
| } catch (const std::exception &e) { | |||
| MS_LOG(EXCEPTION) << "Encountered an exception: " << e.what() << "when invoke cubals cublasGemmStridedBatchedEx"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", encountered an exception: " << e.what() | |||
| << " when invoke cubals cublasGemmStridedBatchedEx"; | |||
| } | |||
| auto r_input_addr = workspace1_addr; | |||
| @@ -147,9 +149,7 @@ class UpdateThorGradientGpuKernel : public GpuKernel { | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle(); | |||
| if (!SetProperty(kernel_node)) { | |||
| return false; | |||
| } | |||
| (void)SetProperty(kernel_node); | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -188,22 +188,20 @@ class UpdateThorGradientGpuKernel : public GpuKernel { | |||
| } | |||
| private: | |||
| bool SetProperty(const CNodePtr &kernel_node) { | |||
| void SetProperty(const CNodePtr &kernel_node) { | |||
| auto matrix_a_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto matrix_g_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| is_null_input_ = | |||
| CHECK_NULL_INPUT(matrix_a_shape) || CHECK_NULL_INPUT(gradient_shape) || CHECK_NULL_INPUT(matrix_g_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(matrix_a_shape, kernel_name_, "matrix_a") || | |||
| CHECK_SHAPE_NULL(gradient_shape, kernel_name_, "gradient") || | |||
| CHECK_SHAPE_NULL(matrix_g_shape, kernel_name_, "matrix_g"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'UpdateThorGradientGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| return; | |||
| } | |||
| split_dim = LongToSize(GetAttr<int64_t>(kernel_node, "split_dim")); | |||
| if (split_dim == 0) { | |||
| MS_LOG(ERROR) << "Divide by zero, split_dim can not be zero."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", divide by zero, split_dim cannot be 0, but got " << split_dim; | |||
| } | |||
| gradient_size.batch_h = gradient_shape[0] / split_dim; | |||
| gradient_size.batch_w = gradient_shape[1] / split_dim; | |||
| @@ -244,7 +242,6 @@ class UpdateThorGradientGpuKernel : public GpuKernel { | |||
| gradient_size.ori_w = gradient_shape[1]; | |||
| gradient_size.ori_h = gradient_shape[0]; | |||
| gradient_size.dtype = GetCudaDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 1))); | |||
| return true; | |||
| } | |||
| size_t split_dim; | |||
| @@ -71,13 +71,15 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel { | |||
| break; | |||
| } | |||
| default: { | |||
| MS_LOG(EXCEPTION) << "Kernel type " << nccl_kernel_type_ << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: AllReduce, AllGather, Broadcast, " | |||
| << "ReduceScatter currently, but got " << nccl_kernel_type_; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| nccl_data_type_ = nccl_dtype(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)); | |||
| @@ -87,9 +89,8 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel { | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| for (size_t i = 0; i < input_num; ++i) { | |||
| auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i); | |||
| is_null_input_ = CHECK_NULL_INPUT(shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'NcclCollectiveGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -103,9 +104,8 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel { | |||
| } | |||
| for (size_t i = 0; i < output_num; ++i) { | |||
| auto shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, i); | |||
| is_null_input_ = CHECK_NULL_INPUT(shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'NcclCollectiveGpuKernel', output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -201,7 +201,8 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel { | |||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto iter = kNcclTypeMap.find(kernel_name); | |||
| if (iter == kNcclTypeMap.end()) { | |||
| MS_LOG(EXCEPTION) << "Kernel " << kernel_name << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: AllReduce, AllGather, Broadcast, " | |||
| << "ReduceScatter currently, but got " << kernel_name; | |||
| } else { | |||
| nccl_kernel_type_ = iter->second; | |||
| } | |||
| @@ -220,7 +221,8 @@ class NcclCollectiveGpuKernel : public NcclGpuKernel { | |||
| } else if (type == "prod") { | |||
| nccl_reduce_type_ = ncclProd; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Nccl reduce type " << type << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: sum, max, min, prod currently, " | |||
| << "but got " << type; | |||
| } | |||
| } | |||
| @@ -52,13 +52,15 @@ class NcclP2PGpuKernel : public NcclGpuKernel { | |||
| break; | |||
| } | |||
| default: { | |||
| MS_LOG(EXCEPTION) << "Kernel type " << nccl_kernel_type_ << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: AllToAllv, NeighborExchange " | |||
| << "currently, but got " << nccl_kernel_type_; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| InferCommType(kernel_node); | |||
| @@ -73,9 +75,8 @@ class NcclP2PGpuKernel : public NcclGpuKernel { | |||
| } | |||
| for (size_t i = 0; i < input_num; ++i) { | |||
| auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, i); | |||
| is_null_input_ = CHECK_NULL_INPUT(shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'NcclP2PGpuKernel', input shape is null "; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -88,9 +89,8 @@ class NcclP2PGpuKernel : public NcclGpuKernel { | |||
| } | |||
| for (size_t i = 0; i < output_num; ++i) { | |||
| auto shape = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, i); | |||
| is_null_input_ = CHECK_NULL_INPUT(shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'NcclP2PGpuKernel', output shape is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -157,10 +157,12 @@ class NcclP2PGpuKernel : public NcclGpuKernel { | |||
| // send_rank_id and recv rank_id size needs to be equal to input_list size | |||
| if (send_rank_ids.size() != input_size_list_.size()) { | |||
| MS_LOG(ERROR) << "Trying to use AlltoAllv, but send_rank_ids vector size not equals to input_list size."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", trying to use AlltoAllv, the size of send_rank_ids vector " | |||
| << "should be " << input_size_list_.size() << ", but got " << send_rank_ids.size(); | |||
| } | |||
| if (recv_rank_ids.size() != output_size_list_.size()) { | |||
| MS_LOG(ERROR) << "Trying to use AlltoAllv, but recv_rank_ids vector size not equals to output_list size."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", trying to use AlltoAllv, the size of recv_rank_ids vector " | |||
| << "should be " << output_size_list_.size() << ", but got " << recv_rank_ids.size(); | |||
| } | |||
| // This implementation refers to NVIDIA NCCL 2.11 doc. | |||
| @@ -182,7 +184,8 @@ class NcclP2PGpuKernel : public NcclGpuKernel { | |||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto iter = kNcclTypeMap.find(kernel_name); | |||
| if (iter == kNcclTypeMap.end()) { | |||
| MS_LOG(EXCEPTION) << "Kernel " << kernel_name << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: AllToAllv, NeighborExchange " | |||
| << "currently, but got " << kernel_name; | |||
| } else { | |||
| nccl_kernel_type_ = iter->second; | |||
| } | |||
| @@ -46,21 +46,20 @@ class NcclRecvGpuKernel : public NcclGpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but NCCL receive needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| src_rank_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "src_rank")); | |||
| group_name_ = GetAttr<std::string>(kernel_node, kAttrGroup); | |||
| nccl_data_type_ = nccl_dtype(AnfAlgo::GetOutputDeviceDataType(kernel_node, 0)); | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(output_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(output_shape, kernel_name, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'NcclRecvGpuKernel', output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -46,12 +46,12 @@ class NcclSendGpuKernel : public NcclGpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but NCCL send needs 1 input."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num; | |||
| } | |||
| dest_rank_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "dest_rank")); | |||
| @@ -60,9 +60,8 @@ class NcclSendGpuKernel : public NcclGpuKernel { | |||
| MS_LOG(INFO) << "NcclSend dest rank is " << dest_rank_ << ", group name is " << group_name_; | |||
| auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'NcclSendGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -41,6 +41,9 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *x = GetDeviceAddress<T>(inputs, 0); | |||
| S *scale = GetDeviceAddress<S>(inputs, 1); | |||
| S *bias = GetDeviceAddress<S>(inputs, 2); | |||
| @@ -78,6 +81,7 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto root_rank = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kAttrRootRank); | |||
| if (root_rank) { | |||
| root_ = static_cast<int>(GetValue<int64_t>(root_rank)); | |||
| @@ -86,24 +90,22 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel { | |||
| group_name_ = GetAttr<std::string>(kernel_node, kAttrGroup); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 5) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but SyncBatchNorm needs 5 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 5, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 5) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but SyncBatchNorm needs 5 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 5, but got " << output_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (CHECK_NULL_INPUT(input_shape)) { | |||
| MS_LOG(WARNING) << "SyncBatchNorm input is null"; | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| auto input_shape_dims = input_shape.size(); | |||
| if (input_shape_dims != 4 && input_shape_dims != 2) { | |||
| MS_LOG(EXCEPTION) << "Tensor shape is " << input_shape.size() | |||
| << ", SyncBatchNormGpuKernel input should be 2D or 4D"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input only should be 2 or 4, but got " | |||
| << input_shape_dims; | |||
| } | |||
| input_size_ = 1; | |||
| for (auto dim : input_shape) { | |||
| @@ -175,6 +177,7 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel { | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| workspace_size_list_.clear(); | |||
| is_null_input_ = false; | |||
| } | |||
| protected: | |||
| @@ -233,6 +236,7 @@ class SyncBatchNormGpuKernel : public NcclGpuKernel { | |||
| string group_name_; | |||
| int root_; | |||
| cudaStream_t comm_stream_; | |||
| bool is_null_input_; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -41,6 +41,9 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *dy = GetDeviceAddress<T>(inputs, 0); | |||
| T *x_input = GetDeviceAddress<T>(inputs, 1); | |||
| S *scale = GetDeviceAddress<S>(inputs, 2); | |||
| @@ -65,6 +68,7 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel { | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto root_rank = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kAttrRootRank); | |||
| if (root_rank) { | |||
| root_ = static_cast<int>(GetValue<int64_t>(root_rank)); | |||
| @@ -73,24 +77,22 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel { | |||
| group_name_ = GetAttr<std::string>(kernel_node, kAttrGroup); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 5) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but SyncBatchNormGrad needs 5 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 5, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 3) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but SyncBatchNormGrad needs 5 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 3, but got " << output_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (CHECK_NULL_INPUT(input_shape)) { | |||
| MS_LOG(WARNING) << "SyncBatchNormGrad input is null"; | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| auto input_shape_dims = input_shape.size(); | |||
| if (input_shape_dims != 4 && input_shape_dims != 2) { | |||
| MS_LOG(EXCEPTION) << "Tensor shape is " << input_shape.size() | |||
| << ", SyncBatchNormGpuGrad input should be 2D or 4D"; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input only should be 2 or 4, but got " | |||
| << input_shape_dims; | |||
| } | |||
| input_size_ = 1; | |||
| for (auto dim : input_shape) { | |||
| @@ -155,6 +157,7 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel { | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| workspace_size_list_.clear(); | |||
| is_null_input_ = false; | |||
| } | |||
| protected: | |||
| @@ -201,6 +204,7 @@ class SyncBatchNormGradGpuKernel : public NcclGpuKernel { | |||
| string group_name_; | |||
| int root_; | |||
| cudaStream_t comm_stream_; | |||
| bool is_null_input_; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -18,6 +18,7 @@ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_OTHER_ASSIGN_GPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <string> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| @@ -52,15 +53,13 @@ class AssignGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| if (!CheckParam(kernel_node)) { | |||
| return false; | |||
| } | |||
| (void)CheckParam(kernel_node); | |||
| auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'AssignGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -80,19 +79,16 @@ class AssignGpuKernel : public GpuKernel { | |||
| } | |||
| private: | |||
| bool CheckParam(const CNodePtr &kernel_node) { | |||
| void CheckParam(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but AssignGpuKernel needs 2 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but AssignGpuKernel needs 1 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| return true; | |||
| } | |||
| std::vector<size_t> input_size_list_; | |||
| @@ -18,6 +18,7 @@ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_DECODE_GPU_KERNEL_H | |||
| #include <vector> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/boundingbox_decode_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| @@ -46,7 +47,8 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel { | |||
| T *bboxes_addr = GetDeviceAddress<T>(outputs, 0); | |||
| if (inputs[0]->size != inputs[1]->size) { | |||
| MS_LOG(ERROR) << "Rois box size must equal with deltas box size -" << inputs[1]->size << ", but got" | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ | |||
| << "', rois box size must equal with deltas box size: " << inputs[1]->size << ", but got " | |||
| << inputs[0]->size; | |||
| return false; | |||
| } | |||
| @@ -54,7 +56,7 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel { | |||
| const size_t coordinate = 4; | |||
| const size_t block_size = inputs[0]->size / sizeof(T); | |||
| if ((block_size % coordinate) != 0) { | |||
| MS_LOG(ERROR) << "The size of the box must be a multiple of 4."; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << ", the size of the box should be a multiple of 4."; | |||
| return false; | |||
| } | |||
| @@ -65,11 +67,11 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but BoundingBoxDecode needs 2 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num; | |||
| } | |||
| rois_size_ = sizeof(T); | |||
| deltas_size_ = sizeof(T); | |||
| @@ -78,9 +80,10 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel { | |||
| auto logits_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(logits_shape) || CHECK_NULL_INPUT(labels_shape) || CHECK_NULL_INPUT(output_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(logits_shape, kernel_name_, "anchor_box") || | |||
| CHECK_SHAPE_NULL(labels_shape, kernel_name_, "deltas") || | |||
| CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'BoundingBoxDecodeGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -111,7 +114,7 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel { | |||
| means_.emplace_back(mean); | |||
| } | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Attribute means type is invalid."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute means type is invalid."; | |||
| } | |||
| auto stds = prim->GetAttr("stds"); | |||
| @@ -124,7 +127,7 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel { | |||
| stds_.emplace_back(std); | |||
| } | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Attribute stds type is invalid."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute stds type is invalid."; | |||
| } | |||
| std::vector<int64_t> max_shape_me = GetAttr<std::vector<int64_t>>(kernel_node, "max_shape"); | |||
| @@ -133,11 +136,13 @@ class BoundingBoxDecodeGpuKernel : public GpuKernel { | |||
| wh_ratio_clip_ = GetAttr<float>(kernel_node, "wh_ratio_clip"); | |||
| if (means_.size() < coordinate_size || stds_.size() < coordinate_size) { | |||
| MS_LOG(EXCEPTION) << "The size of means or stds is less than 4."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the both size of means or stds cannot be less than 4, but got" | |||
| << " the size of means: " << means_.size() << ", the size of stds: " << stds_.size(); | |||
| } | |||
| if (max_shape_.size() < 2) { | |||
| MS_LOG(EXCEPTION) << "The size of max_shape is less than 2."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of max_shape cannot be less than 2, but got " | |||
| << max_shape_.size(); | |||
| } | |||
| return true; | |||
| @@ -18,6 +18,7 @@ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_ENCODE_GPU_KERNEL_H | |||
| #include <vector> | |||
| #include <string> | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/boundingbox_encode_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| @@ -28,7 +29,6 @@ template <typename T> | |||
| class BoundingBoxEncodeGpuKernel : public GpuKernel { | |||
| public: | |||
| BoundingBoxEncodeGpuKernel() : anchor_size_(0), groundtruth_size_(0), deltas_size_(0), is_null_input_(false) {} | |||
| ~BoundingBoxEncodeGpuKernel() override = default; | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| @@ -45,7 +45,8 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel { | |||
| T *deltas_addr = GetDeviceAddress<T>(outputs, 0); | |||
| if (inputs[0]->size != inputs[1]->size) { | |||
| MS_LOG(ERROR) << "Anchor box size must equal with groundtruth box size -" << inputs[1]->size << ", but got" | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ | |||
| << "', anchor box size must equal with groundtruth box size: " << inputs[1]->size << ", but got " | |||
| << inputs[0]->size; | |||
| return false; | |||
| } | |||
| @@ -53,7 +54,7 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel { | |||
| const size_t coordinate = 4; | |||
| const size_t block_size = inputs[0]->size / sizeof(T); | |||
| if ((block_size % coordinate) != 0) { | |||
| MS_LOG(ERROR) << "The size of the box must be a multiple of 4."; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << ", the size of the box should be a multiple of 4."; | |||
| return false; | |||
| } | |||
| @@ -64,11 +65,11 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but BoundingBoxEncode needs 2 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num; | |||
| } | |||
| anchor_size_ = sizeof(T); | |||
| groundtruth_size_ = sizeof(T); | |||
| @@ -77,9 +78,10 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel { | |||
| auto logits_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(logits_shape) || CHECK_NULL_INPUT(labels_shape) || CHECK_NULL_INPUT(output_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(logits_shape, kernel_name_, "anchor_box") || | |||
| CHECK_SHAPE_NULL(labels_shape, kernel_name_, "groundtruth_box") || | |||
| CHECK_SHAPE_NULL(output_shape, kernel_name_, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'BoundingBoxEncodeGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -110,7 +112,7 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel { | |||
| means_.emplace_back(mean); | |||
| } | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Attribute means type is invalid."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute means type is invalid."; | |||
| } | |||
| auto stds = prim->GetAttr("stds"); | |||
| MS_EXCEPTION_IF_NULL(stds); | |||
| @@ -122,11 +124,12 @@ class BoundingBoxEncodeGpuKernel : public GpuKernel { | |||
| stds_.emplace_back(std); | |||
| } | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Attribute stds type is invalid."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute stds type is invalid."; | |||
| } | |||
| if (means_.size() < coordinate_size || stds_.size() < coordinate_size) { | |||
| MS_LOG(EXCEPTION) << "The size of means or stds is less than 4."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the both size of means or stds cannot be less than 4, but got" | |||
| << " the size of means: " << means_.size() << ", the size of stds: " << stds_.size(); | |||
| } | |||
| return true; | |||
| @@ -18,6 +18,7 @@ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_OTHER_CHECK_VALID_GPU_KERNEL_H | |||
| #include <vector> | |||
| #include <string> | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/check_valid_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| @@ -28,7 +29,6 @@ template <typename T, typename S> | |||
| class CheckValidGpuKernel : public GpuKernel { | |||
| public: | |||
| CheckValidGpuKernel() : anchor_boxes_size_(0), img_metas_size_(0), valid_size_(0), is_null_input_(false) {} | |||
| ~CheckValidGpuKernel() override = default; | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| @@ -48,7 +48,7 @@ class CheckValidGpuKernel : public GpuKernel { | |||
| const size_t coordinate = 4; | |||
| const size_t block_size = inputs[0]->size / sizeof(T); | |||
| if ((block_size % coordinate) != 0) { | |||
| MS_LOG(ERROR) << "The size of the box must be a multiple of 4."; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << ", the size of the box should be a multiple of 4."; | |||
| return false; | |||
| } | |||
| @@ -58,11 +58,11 @@ class CheckValidGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but CheckValid needs 2 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num; | |||
| } | |||
| anchor_boxes_size_ = sizeof(T); | |||
| img_metas_size_ = sizeof(T); | |||
| @@ -71,10 +71,10 @@ class CheckValidGpuKernel : public GpuKernel { | |||
| auto anchor_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto img_metas_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto valid_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = | |||
| CHECK_NULL_INPUT(anchor_boxes_shape) || CHECK_NULL_INPUT(img_metas_shape) || CHECK_NULL_INPUT(valid_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(anchor_boxes_shape, kernel_name_, "bboxes") || | |||
| CHECK_SHAPE_NULL(img_metas_shape, kernel_name_, "img_metas") || | |||
| CHECK_SHAPE_NULL(valid_shape, kernel_name_, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'CheckValidGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -62,18 +62,17 @@ class GpuConvertToDynamicShapeGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| size_t input_count = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_count != 1) { | |||
| MS_LOG(ERROR) << input_count << "inputs were provided, but GpuConvertToDynamicShapeGpuKernel expects 1."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_count; | |||
| } | |||
| input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape_); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape_, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'GpuConvertToDynamicShapeGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -29,7 +29,6 @@ template <typename T> | |||
| class IOUGpuKernel : public GpuKernel { | |||
| public: | |||
| IOUGpuKernel() : gt_boxes_size_(0), anchor_boxes_size_(0), iou_size_(0), mode_(0), is_null_input_(false) {} | |||
| ~IOUGpuKernel() override = default; | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| @@ -50,7 +49,7 @@ class IOUGpuKernel : public GpuKernel { | |||
| const size_t block_size_0 = inputs[0]->size / sizeof(T); | |||
| const size_t block_size_1 = inputs[1]->size / sizeof(T); | |||
| if ((block_size_0 % coordinate) != 0 || (block_size_1 % coordinate) != 0) { | |||
| MS_LOG(ERROR) << "The size of the box must be a multiple of 4."; | |||
| MS_LOG(ERROR) << "For '" << kernel_name_ << ", the size of the box should be a multiple of 4."; | |||
| return false; | |||
| } | |||
| @@ -65,8 +64,7 @@ class IOUGpuKernel : public GpuKernel { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but IOU needs 2 inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num; | |||
| } | |||
| gt_boxes_size_ = sizeof(T); | |||
| anchor_boxes_size_ = sizeof(T); | |||
| @@ -75,10 +73,10 @@ class IOUGpuKernel : public GpuKernel { | |||
| auto gt_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto anchor_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto iou_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = | |||
| CHECK_NULL_INPUT(gt_boxes_shape) || CHECK_NULL_INPUT(anchor_boxes_shape) || CHECK_NULL_INPUT(iou_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(gt_boxes_shape, kernel_name_, "anchor_boxes") || | |||
| CHECK_SHAPE_NULL(anchor_boxes_shape, kernel_name_, "gt_boxes") || | |||
| CHECK_SHAPE_NULL(iou_shape, kernel_name_, "output"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'IOUGpuKernel', input or output is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| @@ -103,8 +101,7 @@ class IOUGpuKernel : public GpuKernel { | |||
| } else if (mode == "iof") { | |||
| mode_ = 1; | |||
| } else { | |||
| MS_LOG(ERROR) << "Mode only support 'iou' or 'iof'."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', mode only support 'iou' or 'iof'."; | |||
| } | |||
| return true; | |||
| @@ -66,26 +66,25 @@ class BatchNormFold2GpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| InitResource(); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != INPUT_NUM) { | |||
| MS_LOG(ERROR) << "Argument number is " << input_num << ", but BatchNormFold2GpuKernel needs " << INPUT_NUM | |||
| << " inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got " | |||
| << input_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "BatchNormFold2GpuKernel input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shape.size() != 4) { | |||
| MS_LOG(ERROR) << "BatchNormFold2GpuKernel input shape needs (N,C,H,W)."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got " | |||
| << input_shape.size(); | |||
| } | |||
| batch_size_ = input_shape[0]; | |||
| channel_ = input_shape[1]; | |||
| @@ -94,26 +94,25 @@ class BatchNormFold2GradGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| InitResource(); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != INPUT_NUM) { | |||
| MS_LOG(ERROR) << "Argument number is " << input_num << ", but BatchNormFold2GradGpuKernel needs " << INPUT_NUM | |||
| << " inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got " | |||
| << input_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "BatchNormFold2GradGpuKernel input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shape.size() != 4) { | |||
| MS_LOG(ERROR) << "BatchNormFold2GradGpuKernel input shape needs (N,C,H,W)."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got " | |||
| << input_shape.size(); | |||
| } | |||
| batch_size_ = input_shape[0]; | |||
| channel_ = input_shape[1]; | |||
| @@ -99,18 +99,17 @@ class BatchNormFoldGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| InitResource(); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 4) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << " but BatchNormFold GpuKernel OP needs 4 input."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 4, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 4) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but BatchNormFold GpuKernel OP needs 4 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 4, but got " << output_num; | |||
| } | |||
| auto prim = AnfAlgo::GetCNodePrimitive(kernel_node); | |||
| @@ -122,16 +121,14 @@ class BatchNormFoldGpuKernel : public GpuKernel { | |||
| freeze_bn_ = static_cast<int>(GetValue<int64_t>(prim->GetAttr("freeze_bn"))); | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'BatchNormFoldGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shape.size() != 4) { | |||
| MS_LOG(ERROR) << "Input shape is " << input_shape.size() | |||
| << ", but BatchNormFold GpuKernel OP needs 4DTensor input."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got " | |||
| << input_shape.size(); | |||
| } | |||
| CheckTensorSize({input_shape}); | |||
| batch_ = input_shape[0]; | |||
| @@ -78,18 +78,17 @@ class BatchNormFoldGradGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != INPUT_NUM) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but BatchNormFoldGrad GpuKernel OP needs " << INPUT_NUM | |||
| << " inputs."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got " | |||
| << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(ERROR) << "Output number is " << output_num << ", but BatchNormFoldGrad GpuKernel OP needs 4 output."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| auto prim = AnfAlgo::GetCNodePrimitive(kernel_node); | |||
| @@ -99,16 +98,14 @@ class BatchNormFoldGradGpuKernel : public GpuKernel { | |||
| freeze_bn_ = static_cast<int>(GetValue<int64_t>(prim->GetAttr("freeze_bn"))); | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'BatchNormFoldGradGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shape.size() != 4) { | |||
| MS_LOG(ERROR) << "Input shape is " << input_shape.size() | |||
| << ", but BatchNormFoldGrad GpuKernel OP needs 4DTensor input."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got " | |||
| << input_shape.size(); | |||
| } | |||
| batch_ = input_shape[0]; | |||
| channel_ = input_shape[1]; | |||
| @@ -49,24 +49,23 @@ class CorrectionMulGpuKernel : public GpuKernel { | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| InitResource(); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(ERROR) << "Argument number is " << input_num << ", but CorrectionMulGpuKernel needs 3."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'CorrectionMulGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shape.size() != 4) { | |||
| MS_LOG(ERROR) << "CorrectionMulGpuKernel input shape needs (N,C,H,W)."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got " | |||
| << input_shape.size(); | |||
| } | |||
| batch_size_ = input_shape[0]; | |||
| channel_ = input_shape[1]; | |||
| @@ -55,24 +55,23 @@ class CorrectionMulGradGpuKernel : public GpuKernel { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| InitResource(); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 4) { | |||
| MS_LOG(ERROR) << "Argument number is " << input_num << ", but CorrectionMulGradGpuKernel needs 4."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 4, but got " << input_num; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'CorrectionMulGradGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shape.size() != 4) { | |||
| MS_LOG(ERROR) << "CorrectionMulGradGpuKernel input shape needs (N,C,H,W)."; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input should be 4, but got " | |||
| << input_shape.size(); | |||
| } | |||
| batch_size_ = input_shape[0]; | |||
| channel_ = input_shape[1]; | |||
| @@ -45,17 +45,16 @@ const std::vector<size_t> &FakeLearnedScaleQuantPerChannelGpuKernel::GetWorkspac | |||
| } | |||
| bool FakeLearnedScaleQuantPerChannelGpuKernel::Init(const CNodePtr &kernel_node) { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num | |||
| << ", but FakeLearnedScaleQuantPerChannel GpuKernel OP needs 3 Input."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num | |||
| << ", but FakeLearnedScaleQuantPerChannel GpuKernel OP needs 1 output."; | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; | |||
| } | |||
| quant_delay_ = static_cast<int>(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay"))); | |||