| @@ -41,13 +41,17 @@ class CropAndResizeGpuKernel : public GpuKernel { | |||
| input_width_(0), | |||
| final_height_(0), | |||
| final_width_(0), | |||
| channel_(0) {} | |||
| channel_(0), | |||
| is_null_input_(false) {} | |||
| ~CropAndResizeGpuKernel() override = default; | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| VARIABLE_NOT_USED(workspace); | |||
| T *input_image = GetDeviceAddress<T>(inputs, 0); | |||
| float *input_boxes = GetDeviceAddress<float>(inputs, 1); | |||
| @@ -72,6 +76,18 @@ class CropAndResizeGpuKernel : public GpuKernel { | |||
| } | |||
| // input image | |||
| auto input_image_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto input_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto input_box_index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto input_crop_size_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_image_shape) || CHECK_NULL_INPUT(input_boxes_shape) || | |||
| CHECK_NULL_INPUT(input_box_index_shape) || CHECK_NULL_INPUT(input_crop_size_shape) || | |||
| CHECK_NULL_INPUT(output_shape); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'CropAndResizeGpuKernel', input or output is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| size_t input_image_shape_len = input_image_shape.size(); | |||
| if (input_image_shape_len != 4) { | |||
| MS_LOG(ERROR) << " image tensor is " << input_image_shape_len << "-D, but CropAndResize supports only " << 4 | |||
| @@ -86,7 +102,6 @@ class CropAndResizeGpuKernel : public GpuKernel { | |||
| input_height_ = input_image_shape[1]; | |||
| input_width_ = input_image_shape[2]; | |||
| // input boxes | |||
| auto input_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| size_t input_boxes_shape_len = input_boxes_shape.size(); | |||
| if (input_boxes_shape_len != 2) { | |||
| MS_LOG(ERROR) << "Boxes is rank" << input_boxes_shape_len << " but CropAndResize supports only rank " << 2 | |||
| @@ -99,7 +114,6 @@ class CropAndResizeGpuKernel : public GpuKernel { | |||
| } | |||
| input_boxes_size_ *= sizeof(float); | |||
| // input box_index | |||
| auto input_box_index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| size_t input_box_index_shape_len = input_box_index_shape.size(); | |||
| if (input_box_index_shape_len != 1) { | |||
| MS_LOG(ERROR) << "Box_index is rank " << input_box_index_shape_len << " but CropAndResize supports only rank " | |||
| @@ -110,7 +124,6 @@ class CropAndResizeGpuKernel : public GpuKernel { | |||
| input_box_ind_size_ *= input_box_index_shape[0]; // single dim required | |||
| input_box_ind_size_ *= sizeof(int); | |||
| // input crop_size | |||
| auto input_crop_size_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| size_t input_crop_size_shape_len = input_crop_size_shape.size(); | |||
| if (input_crop_size_shape_len != 1) { | |||
| MS_LOG(ERROR) << "Crop_size is rank " << input_crop_size_shape_len << "-D, but CropAndResize supports only rank " | |||
| @@ -126,8 +139,11 @@ class CropAndResizeGpuKernel : public GpuKernel { | |||
| input_crop_size_ *= input_crop_size_shape[0]; | |||
| input_crop_size_ *= sizeof(int); | |||
| // output | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| auto output_shape_len = output_shape.size(); | |||
| if (output_shape_len != 4) { | |||
| MS_LOG(ERROR) << "For 'CropAndResize', the rank of output should be 4, but got " << output_shape_len; | |||
| return false; | |||
| } | |||
| output_size_ = 1; | |||
| for (size_t i = 0; i < output_shape_len; i++) { | |||
| output_size_ *= output_shape[i]; | |||
| @@ -175,6 +191,7 @@ class CropAndResizeGpuKernel : public GpuKernel { | |||
| int final_height_; | |||
| int final_width_; | |||
| int channel_; | |||
| bool is_null_input_; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| @@ -1,4 +1,3 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| @@ -37,6 +36,9 @@ class DepthToSpaceFwdKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| // get device buffer ptr | |||
| T *input = GetDeviceAddress<T>(inputs, 0); | |||
| T *output = GetDeviceAddress<T>(outputs, 0); | |||
| @@ -71,6 +73,12 @@ class DepthToSpaceFwdKernel : public GpuKernel { | |||
| } | |||
| // check input_shape | |||
| auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'DepthToSpaceGpuKernel', input is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| shape_size_ = input_shape.size(); | |||
| if (shape_size_ != DEPTHTOSPACE_BUFFER_DIMENSION) { | |||
| MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but DepthToSpace supports 4-D tensor."; | |||
| @@ -111,6 +119,7 @@ class DepthToSpaceFwdKernel : public GpuKernel { | |||
| oc_ = 0; | |||
| oh_ = 0; | |||
| ow_ = 0; | |||
| is_null_input_ = false; | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| @@ -140,6 +149,7 @@ class DepthToSpaceFwdKernel : public GpuKernel { | |||
| size_t oc_; | |||
| size_t oh_; | |||
| size_t ow_; | |||
| bool is_null_input_; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -37,6 +37,9 @@ class EmbeddingLookupKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| VARIABLE_NOT_USED(workspace); | |||
| T *input_addr = GetDeviceAddress<T>(inputs, 0); | |||
| S *indices_addr = GetDeviceAddress<S>(inputs, 1); | |||
| @@ -69,6 +72,16 @@ class EmbeddingLookupKernel : public GpuKernel { | |||
| input_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| indices_shapes_ = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 1); | |||
| output_shapes_ = AnfAlgo::GetOutputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = | |||
| CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(indices_shapes_) || CHECK_NULL_INPUT(output_shapes_); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'EmbeddingLookupGpuKernel', input or output is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shapes_.size() < 1) { | |||
| MS_LOG(EXCEPTION) << "For 'EmbeddingLookupGpuKernel', the rank of input cannot be less than 1."; | |||
| } | |||
| if (!is_dynamic_shape_) { | |||
| offset_ = GetAttr<int64_t>(kernel_node, "offset"); | |||
| } | |||
| @@ -78,6 +91,7 @@ class EmbeddingLookupKernel : public GpuKernel { | |||
| } | |||
| void ResetResource() noexcept override { | |||
| is_dynamic_shape_ = false; | |||
| is_null_input_ = false; | |||
| input_shapes_.clear(); | |||
| indices_shapes_.clear(); | |||
| output_shapes_.clear(); | |||
| @@ -138,6 +152,7 @@ class EmbeddingLookupKernel : public GpuKernel { | |||
| size_t dims_[3] = {}; | |||
| int64_t offset_; | |||
| bool is_dynamic_shape_; | |||
| bool is_null_input_; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| @@ -40,6 +40,9 @@ class ExtractImagePatchesKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *input = GetDeviceAddress<T>(inputs, 0); | |||
| T *output = GetDeviceAddress<T>(outputs, 0); | |||
| T *t_input = GetDeviceAddress<T>(workspace, 0); | |||
| @@ -92,16 +95,28 @@ class ExtractImagePatchesKernel : public GpuKernel { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ExtractImagePatches has 1 output."; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'ExtractImagePatchesGpuKernel', input or output is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| input_size_ = 1; | |||
| for (size_t i = 0; i < input_shape.size(); i++) { | |||
| input_size_ *= input_shape[i]; | |||
| input_shape_.push_back(input_shape[i]); | |||
| } | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| output_size_ = 1; | |||
| for (size_t i = 0; i < output_shape.size(); i++) { | |||
| output_size_ *= output_shape[i]; | |||
| } | |||
| if (input_shape.size() != 4 || output_shape.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "For 'ExtractImagePatchesGpuKernel', the rank of input and output should be 4, " | |||
| << "but got the rank of input: " << input_shape.size() | |||
| << ", the rank of output: " << output_shape.size(); | |||
| } | |||
| // transposed NHWC shape | |||
| t_output_shape_ = {output_shape[0], output_shape[2], output_shape[3], output_shape[1]}; | |||
| @@ -109,6 +124,11 @@ class ExtractImagePatchesKernel : public GpuKernel { | |||
| auto ksizes = GetAttr<std::vector<int64_t>>(kernel_node, "ksizes"); | |||
| auto strides = GetAttr<std::vector<int64_t>>(kernel_node, "strides"); | |||
| auto rates = GetAttr<std::vector<int64_t>>(kernel_node, "rates"); | |||
| if (ksizes.size() != 4 || strides.size() != 4 || rates.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "For 'ExtractImagePatchesGpuKernel', the rank of ksizes, strides and rates should be 4, " | |||
| << "but got the rank of ksizes: " << ksizes.size() | |||
| << ", the rank of strides: " << strides.size() << ", the rank of rates: " << rates.size(); | |||
| } | |||
| ksize_row_ = ksizes[2]; | |||
| ksize_col_ = ksizes[3]; | |||
| @@ -127,6 +147,9 @@ class ExtractImagePatchesKernel : public GpuKernel { | |||
| int64_t patch_rows_eff = ksize_row_ + (ksize_row_ - 1) * (rate_row_ - 1); | |||
| int64_t patch_cols_eff = ksize_col_ + (ksize_col_ - 1) * (rate_col_ - 1); | |||
| MS_EXCEPTION_IF_ZERO("stride row", stride_row_); | |||
| MS_EXCEPTION_IF_ZERO("stride col", stride_col_); | |||
| if (padding == "VALID") { | |||
| output_rows_ = std::ceil((input_row_size_ - patch_rows_eff + 1.f) / static_cast<float>(stride_row_)); | |||
| output_cols_ = std::ceil((input_col_size_ - patch_cols_eff + 1.f) / static_cast<float>(stride_col_)); | |||
| @@ -148,6 +171,7 @@ class ExtractImagePatchesKernel : public GpuKernel { | |||
| row_input_stride_ = input_depth * input_col_size_; | |||
| patch_input_stride_ = input_depth * input_col_size_ * input_row_size_; | |||
| output_depth_ = input_depth; | |||
| MS_EXCEPTION_IF_ZERO("other stride", other_stride_); | |||
| need_batch_ = (output_size_ - 1) / other_stride_; | |||
| InitSizeLists(); | |||
| @@ -177,6 +201,7 @@ class ExtractImagePatchesKernel : public GpuKernel { | |||
| row_input_stride_ = 1; | |||
| patch_input_stride_ = 1; | |||
| output_depth_ = 1; | |||
| is_null_input_ = false; | |||
| input_shape_.clear(); | |||
| t_output_shape_.clear(); | |||
| input_size_list_.clear(); | |||
| @@ -208,6 +233,7 @@ class ExtractImagePatchesKernel : public GpuKernel { | |||
| int64_t output_rows_; | |||
| int64_t output_cols_; | |||
| bool need_batch_; | |||
| bool is_null_input_; | |||
| int64_t row_stride_; | |||
| int64_t patch_stride_; | |||
| int64_t other_stride_; | |||
| @@ -40,6 +40,9 @@ class InTopKGpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *predictions_device = GetDeviceAddress<T>(inputs, 0); | |||
| int32_t *targets_device = GetDeviceAddress<int32_t>(inputs, 1); | |||
| @@ -106,6 +109,16 @@ class InTopKGpuKernel : public GpuKernel { | |||
| } | |||
| input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (input_shape_.size() < 2) { | |||
| MS_LOG(EXCEPTION) << "For 'InTopKGpuKernel', the rank of input cannot be less than 2, but got " | |||
| << input_shape_.size(); | |||
| } | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape_); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'InTopKGpuKernel', input is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| input_rank_ = input_shape_.size(); | |||
| input_size_ = 1; | |||
| for (size_t i = 0; i < input_rank_; i++) { | |||
| @@ -136,6 +149,7 @@ class InTopKGpuKernel : public GpuKernel { | |||
| input_rank_ = 0; | |||
| outer_size_ = 0; | |||
| inner_size_ = 0; | |||
| is_null_input_ = false; | |||
| top_k_init_ = static_cast<T>(0.); | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| @@ -171,6 +185,7 @@ class InTopKGpuKernel : public GpuKernel { | |||
| // for topk | |||
| size_t outer_size_; | |||
| size_t inner_size_; | |||
| bool is_null_input_; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| @@ -41,6 +41,9 @@ class MeshgridGpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *ones_device = GetDeviceAddress<T>(workspace, 0); | |||
| CalOnesLike(output_size_, static_cast<T *>(nullptr), ones_device, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| @@ -79,9 +82,14 @@ class MeshgridGpuKernel : public GpuKernel { | |||
| input_size_ = 1; | |||
| input_count_ = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| for (size_t i = 0; i < input_count_; i++) { | |||
| size_t input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i)[0]; | |||
| input_shapes_.push_back(input_shape); | |||
| input_size_ *= input_shape; | |||
| auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i); | |||
| if (input_shape.size() < 1) { | |||
| MS_LOG(ERROR) << "For 'MeshGridGpuKernel', the rank of input" << i << " cannot be less than 1."; | |||
| return false; | |||
| } | |||
| size_t input_size = input_shape[0]; | |||
| input_shapes_.push_back(input_size); | |||
| input_size_ *= input_size; | |||
| } | |||
| output_size_ = 1; | |||
| @@ -89,6 +97,12 @@ class MeshgridGpuKernel : public GpuKernel { | |||
| // inferred shape swaps output shape for us if needed | |||
| output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(output_shape_); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'MeshGridGpuKernel', output is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (output_count_ != input_count_) { | |||
| MS_LOG(ERROR) << "output count is " << output_count_ << ", but MeshgridGpuKernel needs " << input_count_ | |||
| @@ -101,7 +115,7 @@ class MeshgridGpuKernel : public GpuKernel { | |||
| } | |||
| // need to pad output shape with ones for broadcast kernel | |||
| for (size_t i = 0; i < output_shape_.size() - MAX_DIMS; i++) { | |||
| for (size_t i = 0; i < MAX_DIMS - output_shape_.size(); i++) { | |||
| output_shape_.push_back(1); | |||
| } | |||
| @@ -118,6 +132,7 @@ class MeshgridGpuKernel : public GpuKernel { | |||
| output_size_ = 0; | |||
| output_count_ = 0; | |||
| swap_indexing_ = true; | |||
| is_null_input_ = false; | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| @@ -145,6 +160,7 @@ class MeshgridGpuKernel : public GpuKernel { | |||
| size_t output_size_; | |||
| size_t output_count_; | |||
| bool swap_indexing_; | |||
| bool is_null_input_; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| @@ -36,6 +36,7 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel { | |||
| input_size_(0), | |||
| batch_dim_(0), | |||
| seq_dim_(0), | |||
| is_null_input_(false), | |||
| seq_len_size_(0), | |||
| total_index_dim_(0), | |||
| output_size_(0), | |||
| @@ -46,6 +47,9 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel { | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *input = GetDeviceAddress<T>(inputs, 0); | |||
| S *seq_len = GetDeviceAddress<S>(inputs, 1); | |||
| size_t *input_shape_ptr = GetDeviceAddress<size_t>(workspace, 0); | |||
| @@ -75,23 +79,23 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel { | |||
| return false; | |||
| } | |||
| input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (CHECK_NULL_INPUT(input_shape_)) { | |||
| MS_LOG(WARNING) << "ReverseSequence input is null"; | |||
| auto seq_len_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(seq_len_shape); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'ReverseSequenceGpuKernel', input is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shape_.size() < 1) { | |||
| MS_LOG(EXCEPTION) << "For 'ReverseSequenceGpuKernel', the rank of input cannot be less than 1, but got " | |||
| << input_shape_.size(); | |||
| } | |||
| input_size_ = 1; | |||
| shape_size_ = input_shape_.size(); // required for calls | |||
| for (size_t i = 0; i < shape_size_; i++) { | |||
| input_size_ *= input_shape_[i]; | |||
| } | |||
| // get seq len shape | |||
| auto seq_len_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| if (CHECK_NULL_INPUT(seq_len_shape)) { | |||
| MS_LOG(WARNING) << "ReverseSequence seq lengths input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| seq_len_size_ = seq_len_shape.size(); | |||
| output_size_ = input_size_; // size does not change | |||
| // Allocate workspace memory to use for storing indices for each thread to compute with | |||
| @@ -116,6 +120,7 @@ class ReverseSequenceGpuFwdKernel : public GpuKernel { | |||
| size_t input_size_; | |||
| int64_t batch_dim_; | |||
| int64_t seq_dim_; | |||
| bool is_null_input_; | |||
| size_t seq_len_size_; | |||
| size_t total_index_dim_; | |||
| size_t output_size_; | |||
| @@ -38,6 +38,9 @@ class ReverseV2GpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *input_device = GetDeviceAddress<T>(inputs, 0); | |||
| T *output_device = GetDeviceAddress<T>(outputs, 0); | |||
| size_t *input_shape_device = GetDeviceAddress<size_t>(workspace, 0); | |||
| @@ -79,7 +82,16 @@ class ReverseV2GpuKernel : public GpuKernel { | |||
| } | |||
| input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape_); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'ReverseV2GpuKernel', input is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| input_rank_ = input_shape_.size(); | |||
| if (input_rank_ < 1) { | |||
| MS_LOG(EXCEPTION) << "For 'ReverseV2GpuKernel', the rank of input cannot be less than 1, bot got " << input_rank_; | |||
| } | |||
| input_size_ = 1; | |||
| for (size_t i = 0; i < input_rank_; i++) { | |||
| input_size_ *= input_shape_[i]; | |||
| @@ -92,6 +104,9 @@ class ReverseV2GpuKernel : public GpuKernel { | |||
| } | |||
| axis_ = GetAttr<std::vector<int64_t>>(kernel_node, "axis"); | |||
| if (axis_.size() < 1) { | |||
| MS_LOG(EXCEPTION) << "For 'ReverseV2GpuKernel', the rank of axis cannot be less than 1, bot got " << axis_.size(); | |||
| } | |||
| for (int64_t &dimension : axis_) { | |||
| if (dimension < 0) { | |||
| dimension += input_rank_; | |||
| @@ -106,6 +121,7 @@ class ReverseV2GpuKernel : public GpuKernel { | |||
| void ResetResource() noexcept override { | |||
| input_size_ = 0; | |||
| input_rank_ = 0; | |||
| is_null_input_ = false; | |||
| input_shape_.clear(); | |||
| strides_.clear(); | |||
| axis_.clear(); | |||
| @@ -131,6 +147,7 @@ class ReverseV2GpuKernel : public GpuKernel { | |||
| std::vector<size_t> input_shape_; | |||
| std::vector<int64_t> strides_; | |||
| std::vector<int64_t> axis_; | |||
| bool is_null_input_; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| @@ -42,6 +42,9 @@ class SortGpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *input_device = GetDeviceAddress<T>(inputs, 0); | |||
| T *output_device = GetDeviceAddress<T>(outputs, 0); | |||
| @@ -127,10 +130,17 @@ class SortGpuKernel : public GpuKernel { | |||
| } | |||
| input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape_); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'SortGpuKernel', input is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| input_rank_ = input_shape_.size(); | |||
| if (input_rank_ > TRANSPOSE_MAX_DIMENSION) { | |||
| MS_LOG(ERROR) << "Sort cannot support input that has more than " << TRANSPOSE_MAX_DIMENSION << " dimensions."; | |||
| if (input_rank_ > TRANSPOSE_MAX_DIMENSION || input_rank_ < 1) { | |||
| MS_LOG(ERROR) << "For 'SortGpuKernel', the rank of input cannot be more than " << TRANSPOSE_MAX_DIMENSION | |||
| << " dimensions or less than 1 dimension."; | |||
| return false; | |||
| } | |||
| @@ -145,6 +155,11 @@ class SortGpuKernel : public GpuKernel { | |||
| if (axis_ < 0) { | |||
| axis_ += input_rank_; | |||
| } | |||
| if ((size_t)axis_ >= input_rank_) { | |||
| MS_LOG(ERROR) << "For 'SortGpuKernel', axis should be less than the rank of input, bot got axis: " << axis_ | |||
| << " the rank of input: " << input_rank_; | |||
| return false; | |||
| } | |||
| perm_.resize(input_rank_); | |||
| std::iota(perm_.begin(), perm_.end(), 0); | |||
| @@ -172,6 +187,7 @@ class SortGpuKernel : public GpuKernel { | |||
| input_size_ = 0; | |||
| axis_ = 0; | |||
| descending_ = false; | |||
| is_null_input_ = false; | |||
| input_shape_.clear(); | |||
| input_rank_ = 0; | |||
| transposed_shape_.clear(); | |||
| @@ -206,6 +222,7 @@ class SortGpuKernel : public GpuKernel { | |||
| size_t input_size_; | |||
| int64_t axis_; | |||
| bool descending_; | |||
| bool is_null_input_; | |||
| std::vector<size_t> input_shape_; | |||
| size_t input_rank_; | |||
| @@ -37,6 +37,9 @@ class SpaceToDepthFwdKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| // get device buffer ptr | |||
| T *input = GetDeviceAddress<T>(inputs, 0); | |||
| T *output = GetDeviceAddress<T>(outputs, 0); | |||
| @@ -71,6 +74,12 @@ class SpaceToDepthFwdKernel : public GpuKernel { | |||
| } | |||
| // check input_shape | |||
| auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'SpaceToDepthGpuKernel', input is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| shape_size_ = input_shape.size(); | |||
| if (shape_size_ != SPACETODEPTH_BUFFER_DIMENSION) { | |||
| MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but SpaceToDepth supports 4-D tensor."; | |||
| @@ -102,6 +111,7 @@ class SpaceToDepthFwdKernel : public GpuKernel { | |||
| input_size_ = 0; | |||
| output_size_ = 0; | |||
| block_size_ = 0; | |||
| is_null_input_ = false; | |||
| in_ = 0; | |||
| ic_ = 0; | |||
| ih_ = 0; | |||
| @@ -131,6 +141,7 @@ class SpaceToDepthFwdKernel : public GpuKernel { | |||
| size_t input_size_; | |||
| size_t output_size_; | |||
| size_t block_size_; | |||
| bool is_null_input_; | |||
| size_t in_; | |||
| size_t ic_; | |||
| size_t ih_; | |||
| @@ -31,11 +31,14 @@ namespace kernel { | |||
| template <typename T> | |||
| class TensorCopySlicesGpuKernel : public GpuKernel { | |||
| public: | |||
| TensorCopySlicesGpuKernel() : input_size_(0), update_size_(0), output_size_(0) {} | |||
| TensorCopySlicesGpuKernel() : input_size_(0), update_size_(0), output_size_(0), is_null_input_(false) {} | |||
| ~TensorCopySlicesGpuKernel() {} | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *input_addr = GetDeviceAddress<T>(inputs, 0); | |||
| T *update_addr = GetDeviceAddress<T>(inputs, 1); | |||
| T *output_addr = GetDeviceAddress<T>(outputs, 0); | |||
| @@ -69,7 +72,13 @@ class TensorCopySlicesGpuKernel : public GpuKernel { | |||
| } | |||
| input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto update_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(update_shape); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'TensorCopySlicesGpuKernel', input or output is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shape_.size() > kMaxDims) { | |||
| MS_LOG(ERROR) << "StridedSlice support dims no more than " << kMaxDims << ", but the input shape is " | |||
| << input_shape_.size(); | |||
| @@ -80,6 +89,13 @@ class TensorCopySlicesGpuKernel : public GpuKernel { | |||
| end_ = GetAttr<std::vector<int64_t>>(kernel_node, kAttrEnd); | |||
| strides_ = GetAttr<std::vector<int64_t>>(kernel_node, kAttrStrides); | |||
| if (begin_.size() > input_shape_.size()) { | |||
| MS_LOG(ERROR) << "For 'TensorCopySlicesGpuKernel', the rank of begin attr cannot be more than the rank of input, " | |||
| << "but got the rank of begin attr: " << begin_.size() | |||
| << ", the rank of input: " << input_shape_.size(); | |||
| return false; | |||
| } | |||
| FillEmptyDims(kernel_node); | |||
| output_shape_ = input_shape_; | |||
| FillUpdateDim(); | |||
| @@ -100,6 +116,7 @@ class TensorCopySlicesGpuKernel : public GpuKernel { | |||
| auto len = begin_.size(); | |||
| size_t total_input_num = 1; | |||
| for (size_t i = 0; i < len; ++i) { | |||
| MS_EXCEPTION_IF_ZERO("strides_[i]", strides_[i]); | |||
| total_input_num *= ((end_[i] - begin_[i]) / strides_[i]); | |||
| } | |||
| if (total_input_num != total_update_num) { | |||
| @@ -161,6 +178,7 @@ class TensorCopySlicesGpuKernel : public GpuKernel { | |||
| if (begin_[i] <= end_[i] && strides_[i] > 0) { | |||
| update_shape_.push_back((end_[i] - 1 - begin_[i]) / strides_[i] + 1); | |||
| } else if (begin_[i] > end_[i] && strides_[i] < 0) { | |||
| MS_EXCEPTION_IF_ZERO("strides_[i] + 1", strides_[i] + 1); | |||
| update_shape_.push_back((end_[i] - begin_[i] + 1) / strides_[i] + 1); | |||
| } else { | |||
| update_shape_.push_back(0); | |||
| @@ -185,6 +203,7 @@ class TensorCopySlicesGpuKernel : public GpuKernel { | |||
| size_t update_size_; | |||
| size_t output_size_; | |||
| inline static size_t kMaxDims = 8; | |||
| bool is_null_input_; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -38,7 +38,8 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel { | |||
| work_shape_(nullptr), | |||
| indices_dim_0_(0), | |||
| indices_dim_1_(0), | |||
| memcpy_flag_(false) {} | |||
| memcpy_flag_(false), | |||
| is_null_input_(false) {} | |||
| ~TensorScatterAddGpuFwdKernel() { | |||
| if (indices_stride_ != nullptr) { | |||
| device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(static_cast<void *>(indices_stride_)); | |||
| @@ -54,6 +55,9 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| VARIABLE_NOT_USED(workspace); | |||
| T *input = GetDeviceAddress<T>(inputs, 0); | |||
| S *indices = GetDeviceAddress<S>(inputs, 1); | |||
| @@ -77,10 +81,10 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel { | |||
| const size_t update_size = update_size_ / sizeof(T); | |||
| const size_t output_size = output_size_ / sizeof(T); | |||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, | |||
| cudaMemcpyAsync(&output[0], &input[0], input_size_, cudaMemcpyDeviceToDevice, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)), | |||
| "cudaMemcpyAsync output failed"); | |||
| CHECK_CUDA_RET_WITH_EXCEPT( | |||
| kernel_node_, | |||
| cudaMemcpyAsync(output, input, input_size_, cudaMemcpyDeviceToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), | |||
| "cudaMemcpyAsync output failed"); | |||
| TensorScatterAdd(input, indices, update, output, block_size_, update_size, output_size, indices_dim_0_, | |||
| indices_dim_1_, indices_stride_, work_shape_, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| @@ -106,6 +110,13 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel { | |||
| indices_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| input_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(update_shapes_) || CHECK_NULL_INPUT(indices_shapes_) || | |||
| CHECK_NULL_INPUT(input_shapes_) || CHECK_NULL_INPUT(output_shapes_); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'TensorScatterAddGpuKernel', input or output is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| std::vector<size_t> shape_me = input_shapes_; | |||
| (void)std::transform(shape_me.begin(), shape_me.end(), std::back_inserter(vec_work_shape_), | |||
| @@ -126,6 +137,10 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel { | |||
| MS_LOG(EXCEPTION) << "Failed to alloc work_shape_work, size: " << vec_work_len; | |||
| } | |||
| work_shape_ = static_cast<S *>(work_shape_work); | |||
| if (vec_work_shape_.size() < 1) { | |||
| MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', the rank of vec work cannot be less than 1, but got " | |||
| << vec_work_shape_.size(); | |||
| } | |||
| InitSizeLists(); | |||
| @@ -160,6 +175,10 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel { | |||
| output_size_ *= output_shapes_[i]; | |||
| } | |||
| if (indices_shapes_.size() < 1) { | |||
| MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', the rank of indices cannot be less than 1, but got " | |||
| << indices_shapes_.size(); | |||
| } | |||
| // calculate indices dim 0/1 | |||
| indices_dim_0_ = indices_shapes_[0]; | |||
| indices_dim_1_ = indices_shapes_[indices_shapes_.size() - 1]; | |||
| @@ -169,6 +188,11 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel { | |||
| block_size_ *= output_shapes_[i]; | |||
| } | |||
| if (indices_dim_1_ < 1 || indices_dim_1_ > output_shapes_.size()) { | |||
| MS_LOG(EXCEPTION) << "For 'TensorScatterAddGpuKernel', indices_shapes[-1] cannot be less than 1 and greater than " | |||
| << "the rank of output_shapes, but got indices_shapes[-1]: " << indices_dim_1_ | |||
| << ", rank of output_shapes: " << output_shapes_.size(); | |||
| } | |||
| // calculate indices_stride | |||
| vec_indices_stride_.resize(indices_dim_1_, 0); | |||
| vec_indices_stride_[indices_dim_1_ - 1] = block_size_; | |||
| @@ -201,6 +225,7 @@ class TensorScatterAddGpuFwdKernel : public GpuKernel { | |||
| size_t indices_dim_0_; | |||
| size_t indices_dim_1_; | |||
| bool memcpy_flag_; | |||
| bool is_null_input_; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -63,12 +63,10 @@ class TileGpuKernel : public GpuKernel { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but Tile needs 1 input."; | |||
| return false; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but Tile has 1 output."; | |||
| return false; | |||
| } | |||
| input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| @@ -78,6 +76,10 @@ class TileGpuKernel : public GpuKernel { | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (output_shape_.size() < 1) { | |||
| MS_LOG(EXCEPTION) << "For 'TileGpuKernel', the rank of output cannot be less than 1, but got " | |||
| << output_shape_.size(); | |||
| } | |||
| input_size_ = 1; | |||
| for (size_t i = 0; i < input_shape_.size(); i++) { | |||
| input_size_ *= input_shape_[i]; | |||
| @@ -140,6 +140,9 @@ std::pair<bool, size_t> GpuKernelFactory::GpuKernelAttrCheck(const std::string & | |||
| } | |||
| bool flag = true; | |||
| auto attr_size = (&(iter->second))->at(attr_index).first.GetInputSize(); | |||
| if (kernel_info->GetInputNum() > 0) { | |||
| MS_EXCEPTION_IF_ZERO("attr size", attr_size); | |||
| } | |||
| // data type matching check of all input parameters of kernel | |||
| for (size_t input_index = 0; input_index < kernel_info->GetInputNum(); input_index++) { | |||
| GpuKernelFactory::CheckSM(kernel_info, input_index); | |||
| @@ -153,6 +156,9 @@ std::pair<bool, size_t> GpuKernelFactory::GpuKernelAttrCheck(const std::string & | |||
| continue; | |||
| } | |||
| attr_size = (&(iter->second))->at(attr_index).first.GetOutputSize(); | |||
| if (kernel_info->GetOutputNum() > 0) { | |||
| MS_EXCEPTION_IF_ZERO("attr size", attr_size); | |||
| } | |||
| // data type matching check of all output parameters of kernel | |||
| for (size_t output_index = 0; output_index < kernel_info->GetOutputNum(); output_index++) { | |||
| if (kernel_info->GetOutputDeviceType(output_index) != | |||
| @@ -28,7 +28,14 @@ constexpr int kMaxDimsSize = 3; | |||
| template <typename T> | |||
| class CumProdGpuKernel : public GpuKernel { | |||
| public: | |||
| CumProdGpuKernel() : exclusive_(false), reverse_(false), axis_(0), input_size_0_(0), stride_(0), stride2_(0) {} | |||
| CumProdGpuKernel() | |||
| : exclusive_(false), | |||
| reverse_(false), | |||
| is_null_input_(false), | |||
| axis_(0), | |||
| input_size_0_(0), | |||
| stride_(0), | |||
| stride2_(0) {} | |||
| ~CumProdGpuKernel() = default; | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| @@ -37,6 +44,9 @@ class CumProdGpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *input_addr = GetDeviceAddress<T>(inputs, 0); | |||
| T *output_addr = GetDeviceAddress<T>(outputs, 0); | |||
| T *ws_addr = GetDeviceAddress<T>(workspace, 0); | |||
| @@ -51,6 +61,12 @@ class CumProdGpuKernel : public GpuKernel { | |||
| } | |||
| input_size_0_ = sizeof(T); | |||
| shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(shape_); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'CumProdGpuKernel', input is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis")); | |||
| exclusive_ = GetAttr<bool>(kernel_node, "exclusive"); | |||
| reverse_ = GetAttr<bool>(kernel_node, "reverse"); | |||
| @@ -93,6 +109,7 @@ class CumProdGpuKernel : public GpuKernel { | |||
| } | |||
| bool exclusive_; | |||
| bool reverse_; | |||
| bool is_null_input_; | |||
| int axis_; | |||
| size_t input_size_0_; | |||
| size_t stride_; | |||
| @@ -39,6 +39,9 @@ class IdentityGpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *input_addr = GetDeviceAddress<T>(inputs, 0); | |||
| T *output_addr = GetDeviceAddress<T>(outputs, 0); | |||
| @@ -36,6 +36,9 @@ class ApplyGradientDescentKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| VARIABLE_NOT_USED(workspace); | |||
| T *var = GetDeviceAddress<T>(inputs, 0); | |||
| T *alpha = GetDeviceAddress<T>(inputs, 1); | |||
| @@ -50,14 +53,18 @@ class ApplyGradientDescentKernel : public GpuKernel { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ApplyGradientDescent needs 3 inputs."; | |||
| return false; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ApplyGradientDescent has 1 output."; | |||
| return false; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'ApplyGradientDescentGpuKernel', input is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| input_size_ = 1; | |||
| for (size_t i = 0; i < input_shape.size(); i++) { | |||
| input_size_ *= input_shape[i]; | |||
| @@ -68,6 +75,7 @@ class ApplyGradientDescentKernel : public GpuKernel { | |||
| void ResetResource() noexcept override { | |||
| input_size_ = 1; | |||
| is_null_input_ = false; | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| workspace_size_list_.clear(); | |||
| @@ -83,6 +91,7 @@ class ApplyGradientDescentKernel : public GpuKernel { | |||
| private: | |||
| size_t input_size_; | |||
| bool is_null_input_; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| @@ -36,6 +36,9 @@ class BCEWithLogitsLossKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *predict = GetDeviceAddress<T>(inputs, 0); | |||
| T *target = GetDeviceAddress<T>(inputs, 1); | |||
| T *weight = GetDeviceAddress<T>(inputs, 2); | |||
| @@ -69,25 +72,42 @@ class BCEWithLogitsLossKernel : public GpuKernel { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 4) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but BCEWithLogitsLoss needs 4 inputs."; | |||
| return false; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but BCEWithLogitsLoss has 1 output."; | |||
| return false; | |||
| } | |||
| input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| pos_weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| is_null_input_ = | |||
| CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(weight_shape_) || CHECK_NULL_INPUT(pos_weight_shape_); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'BCEWithLogitsLossGpuKernel', input is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shape_.size() < 1) { | |||
| MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of input cannot be less than 1, but got " | |||
| << input_shape_.size(); | |||
| } | |||
| if (weight_shape_.size() < 1) { | |||
| MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of weight cannot be less than 1, but got " | |||
| << weight_shape_.size(); | |||
| } | |||
| if (pos_weight_shape_.size() < 1) { | |||
| MS_LOG(EXCEPTION) << "For 'BCEWithLogitsLossGpuKernel', the rank of pos_weight cannot be less than 1, but got " | |||
| << pos_weight_shape_.size(); | |||
| } | |||
| input_size_ = 1; | |||
| if (input_shape_.size() > MAX_LOGITS_DIMENSION) { | |||
| MS_LOG(EXCEPTION) << "Input dimension is " << input_shape_.size() | |||
| << ", but BCEWithLogitsLoss can only support up to " << MAX_LOGITS_DIMENSION << "-D."; | |||
| return false; | |||
| } | |||
| for (size_t i = 0; i < input_shape_.size(); i++) { | |||
| input_size_ *= input_shape_[i]; | |||
| } | |||
| // weight shape | |||
| weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| weight_size_ = 1; | |||
| for (size_t i = 0; i < weight_shape_.size(); i++) { | |||
| weight_size_ *= weight_shape_[i]; | |||
| @@ -95,7 +115,6 @@ class BCEWithLogitsLossKernel : public GpuKernel { | |||
| weight_need_broadcast_ = NeedBroadcast(&weight_shape_, input_shape_); | |||
| // pos_weight shape | |||
| pos_weight_size_ = 1; | |||
| pos_weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| for (size_t i = 0; i < pos_weight_shape_.size(); i++) { | |||
| pos_weight_size_ *= pos_weight_shape_[i]; | |||
| } | |||
| @@ -110,6 +129,7 @@ class BCEWithLogitsLossKernel : public GpuKernel { | |||
| pos_weight_size_ = 1; | |||
| weight_need_broadcast_ = false; | |||
| pos_weight_need_broadcast_ = false; | |||
| is_null_input_ = false; | |||
| input_shape_.clear(); | |||
| weight_shape_.clear(); | |||
| pos_weight_shape_.clear(); | |||
| @@ -137,7 +157,7 @@ class BCEWithLogitsLossKernel : public GpuKernel { | |||
| bool NeedBroadcast(std::vector<size_t> *shape, const std::vector<size_t> &result_shape) { | |||
| // result_shape is larger that shape | |||
| // and shape is able to broadcasted to result_shape | |||
| if (shape->size() != result_shape.size()) { | |||
| if (shape->size() < result_shape.size()) { | |||
| size_t fill_size = result_shape.size() - shape->size(); | |||
| (void)shape->insert(shape->begin(), fill_size, 1); | |||
| return true; | |||
| @@ -155,6 +175,7 @@ class BCEWithLogitsLossKernel : public GpuKernel { | |||
| size_t pos_weight_size_; | |||
| bool weight_need_broadcast_; | |||
| bool pos_weight_need_broadcast_; | |||
| bool is_null_input_; | |||
| std::vector<size_t> input_shape_; | |||
| std::vector<size_t> weight_shape_; | |||
| std::vector<size_t> pos_weight_shape_; | |||
| @@ -77,7 +77,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { | |||
| const float beta = 0; | |||
| if (use_pad_) { | |||
| T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1); | |||
| T *padded = GetDeviceAddress<T>(workspace, 1); | |||
| if (data_format_ == kOpFormat_NHWC) { | |||
| CalPadNHWC(padded_size_ / sizeof(T), x, n_, old_height_, old_width_, c_, old_height_ + pad_height_, | |||
| old_width_ + pad_width_, pad_top_, pad_left_, pad_value_, padded, | |||
| @@ -50,7 +50,7 @@ class Conv3dGpuKernel : public GpuKernel { | |||
| const float alpha = 1; | |||
| const float beta = 0; | |||
| if (use_pad_) { | |||
| T *padded_addr = GetPossiblyNullDeviceAddress<T>(workspace, 1); | |||
| T *padded_addr = GetDeviceAddress<T>(workspace, 1); | |||
| CalPad3d(padded_size_ / sizeof(T), input_addr, n_, c_, old_depth_, old_height_, old_width_, | |||
| old_depth_ + pad_depth_, old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_, | |||
| pad_left_, pad_value_, padded_addr, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| @@ -61,7 +61,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel { | |||
| const float alpha = 1; | |||
| const float beta = 0; | |||
| if (use_pad_) { | |||
| T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1); | |||
| T *padded = GetDeviceAddress<T>(workspace, 1); | |||
| CalPad3d(padded_size_ / sizeof(T), x, n_, c_, old_depth_, old_height_, old_width_, old_depth_ + pad_depth_, | |||
| old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_, pad_left_, pad_value_, padded, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| @@ -72,7 +72,7 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel { | |||
| } | |||
| } else { | |||
| if (greater_stride_) { | |||
| T *stride_padded = GetPossiblyNullDeviceAddress<T>(workspace, 1); | |||
| T *stride_padded = GetDeviceAddress<T>(workspace, 1); | |||
| CHECK_CUDNN_RET_WITH_EXCEPT( | |||
| kernel_node_, | |||
| cudnnConvolutionBackwardData(cudnn_handle_, &alpha, filter_desc_, filter_addr, input_desc_, input_addr, | |||
| @@ -93,6 +93,23 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel { | |||
| return true; | |||
| } | |||
| bool CheckNull(const std::vector<size_t> filter_shape, const std::vector<size_t> input_shape) { | |||
| is_null_input_ = CHECK_NULL_INPUT(filter_shape) || CHECK_NULL_INPUT(input_shape); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'Conv3dTransposeGpuKernel', input is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| return false; | |||
| } | |||
| void CheckSize(const size_t value, const size_t expect_value, const string arg_name) { | |||
| if (value != expect_value) { | |||
| MS_LOG(EXCEPTION) << "For 'Conv3dTransposeGpuKernel', the length of " << arg_name << " must be " << expect_value | |||
| << ", but got " << value; | |||
| } | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| InitResource(); | |||
| @@ -105,10 +122,7 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel { | |||
| } | |||
| auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "Conv3dTransposeGpuBkwKernel input is null."; | |||
| InitSizeLists(); | |||
| if (CheckNull(filter_shape, input_shape)) { | |||
| return true; | |||
| } | |||
| std::vector<size_t> output_shape; | |||
| @@ -133,6 +147,8 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel { | |||
| pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode"); | |||
| SetStrideAndDilation(kernel_node); | |||
| std::vector<int> stride_pad_list(6, 0); | |||
| (void)CheckSize(filter_shape.size(), 5, "filter_shape"); | |||
| (void)CheckSize(pad_list.size(), 6, "pad_list"); | |||
| if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { // pad_mode_ = same | |||
| UpdatePaddingAndDilation(input_shape, filter_shape, pad_list.data(), stride_pad_list.data()); | |||
| } | |||
| @@ -101,6 +101,7 @@ class Dropout3DGpuFwdKernel : public GpuKernel { | |||
| } | |||
| num_chan_ = n_ * c_; | |||
| MS_EXCEPTION_IF_ZERO("num channel", num_chan_); | |||
| num_per_chan_ = num_count_ / num_chan_; // number of elements per channel | |||
| keep_prob_ = GetAttr<float>(kernel_node, "keep_prob"); | |||
| @@ -51,12 +51,10 @@ class HSigmoidKernel : public GpuKernel { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoid needs 1 inputs."; | |||
| return false; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoid has 1 output."; | |||
| return false; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| @@ -52,12 +52,10 @@ class HSigmoidGradKernel : public GpuKernel { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoidGrad needs 2 inputs."; | |||
| return false; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoidGrad has 1 output."; | |||
| return false; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| @@ -37,6 +37,9 @@ class NLLLossGpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *input_device = GetDeviceAddress<T>(inputs, 0); | |||
| int32_t *target_device = GetDeviceAddress<int32_t>(inputs, 1); // nll_loss only supports int32 target | |||
| S *weight_device = GetDeviceAddress<S>(inputs, 2); | |||
| @@ -44,7 +47,9 @@ class NLLLossGpuKernel : public GpuKernel { | |||
| T *loss_device = GetDeviceAddress<T>(outputs, 0); | |||
| S *total_weight_device = GetDeviceAddress<S>(outputs, 1); | |||
| T *tmp_loss_device = GetPossiblyNullDeviceAddress<T>(workspace, 0); | |||
| T *tmp_loss_device = | |||
| reduction_ != 0 ? GetDeviceAddress<T>(workspace, 0) : GetPossiblyNullDeviceAddress<T>(workspace, 0); | |||
| S *tmp_target_weight_device = GetDeviceAddress<S>(workspace, 1); | |||
| NLLLoss(n_, c_, reduction_, input_device, target_device, weight_device, loss_device, total_weight_device, | |||
| @@ -54,6 +59,16 @@ class NLLLossGpuKernel : public GpuKernel { | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'NllLossGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shape.size() < 2) { | |||
| MS_LOG(EXCEPTION) << "For 'NllLossGpuKernel', the rank of input cannot less than 2, but got " | |||
| << input_shape.size(); | |||
| } | |||
| n_ = static_cast<int>(input_shape[0]); | |||
| c_ = static_cast<int>(input_shape[1]); | |||
| for (size_t i = 0; i < input_shape.size(); i++) { | |||
| @@ -83,6 +98,7 @@ class NLLLossGpuKernel : public GpuKernel { | |||
| input_size_ = 1; | |||
| n_ = 0; | |||
| c_ = 0; | |||
| is_null_input_ = false; | |||
| reduction_ = 1; // default value | |||
| tmp_loss_size_ = 0; | |||
| tmp_target_weight_size_ = 0; // tmp_target_weight (N,) array | |||
| @@ -114,6 +130,7 @@ class NLLLossGpuKernel : public GpuKernel { | |||
| size_t tmp_target_weight_size_; | |||
| int n_; | |||
| int c_; | |||
| bool is_null_input_; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| @@ -37,6 +37,9 @@ class NLLLossGradGpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *input_device = GetDeviceAddress<T>(inputs, 0); | |||
| T *dloss_device = GetDeviceAddress<T>(inputs, 1); | |||
| int32_t *target_device = GetDeviceAddress<int32_t>(inputs, 2); // nll_loss_grad only supports int32 target | |||
| @@ -53,6 +56,16 @@ class NLLLossGradGpuKernel : public GpuKernel { | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'NllLossGradGpuKernel', input is null"; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shape.size() < 2) { | |||
| MS_LOG(EXCEPTION) << "For 'NllLossGradGpuKernel', the rank of input cannot less than 2, but got " | |||
| << input_shape.size(); | |||
| } | |||
| n_ = static_cast<int>(input_shape[0]); | |||
| c_ = static_cast<int>(input_shape[1]); | |||
| for (size_t i = 0; i < input_shape.size(); i++) { | |||
| @@ -79,6 +92,7 @@ class NLLLossGradGpuKernel : public GpuKernel { | |||
| input_size_ = 1; | |||
| n_ = 0; | |||
| c_ = 0; | |||
| is_null_input_ = false; | |||
| reduction_ = 1; // default value | |||
| num_dloss_ = 1; // default size (scalar) | |||
| input_size_list_.clear(); | |||
| @@ -102,6 +116,7 @@ class NLLLossGradGpuKernel : public GpuKernel { | |||
| int reduction_; | |||
| int n_; | |||
| int c_; | |||
| bool is_null_input_; | |||
| int num_dloss_; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| @@ -36,6 +36,9 @@ class ResizeBilinearGpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *input = GetDeviceAddress<T>(inputs, 0); | |||
| T *output = GetDeviceAddress<T>(outputs, 0); | |||
| float h_scale = Scaling(input_h_, output_h_, align_corners_); | |||
| @@ -58,8 +61,15 @@ class ResizeBilinearGpuKernel : public GpuKernel { | |||
| } | |||
| std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| std::vector<size_t> output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| if (input_shape.size() != 4) { | |||
| MS_LOG(ERROR) << "Input is " << input_shape.size() << "-D, but ResizeBilinear supports only 4-D inputs."; | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'ResizeBilinearGpuKernel', input or output is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (input_shape.size() != 4 || output_shape.size() != 4) { | |||
| MS_LOG(ERROR) << "For 'ResizeBilinear', the rank of input and output must be 4, but got the rank of input: " | |||
| << input_shape.size() << ", the rank of output: " << output_shape.size(); | |||
| return false; | |||
| } | |||
| n_ = SizeToInt(input_shape[0]); | |||
| @@ -83,6 +93,7 @@ class ResizeBilinearGpuKernel : public GpuKernel { | |||
| void ResetResource() noexcept override { | |||
| align_corners_ = false; | |||
| is_null_input_ = false; | |||
| n_ = 0; | |||
| c_ = 0; | |||
| input_h_ = 0; | |||
| @@ -110,6 +121,7 @@ class ResizeBilinearGpuKernel : public GpuKernel { | |||
| } | |||
| bool align_corners_; | |||
| bool is_null_input_; | |||
| int n_; | |||
| int c_; | |||
| int input_h_; | |||
| @@ -36,6 +36,9 @@ class ResizeBilinearGradGpuKernel : public GpuKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| T *dy = GetDeviceAddress<T>(inputs, 0); | |||
| float *interim = GetDeviceAddress<float>(workspace, 0); | |||
| T *dx = GetDeviceAddress<T>(outputs, 0); | |||
| @@ -67,6 +70,12 @@ class ResizeBilinearGradGpuKernel : public GpuKernel { | |||
| std::vector<size_t> dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| std::vector<size_t> dx_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(x_shape) || CHECK_NULL_INPUT(dx_shape); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "For 'ResizeBilinearGradGpuKernel', input or output is null."; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| if (dy_shape.size() != 4) { | |||
| MS_LOG(ERROR) << "Input is " << dy_shape.size() << "-D, but ResizeBilinearGrad supports only 4-D inputs."; | |||
| return false; | |||
| @@ -75,6 +84,10 @@ class ResizeBilinearGradGpuKernel : public GpuKernel { | |||
| MS_LOG(ERROR) << "Input is " << x_shape.size() << "-D, but ResizeBilinearGrad supports only 4-D inputs."; | |||
| return false; | |||
| } | |||
| if (dx_shape.size() != 4) { | |||
| MS_LOG(ERROR) << "For 'ResizeBilinearGradGpuKernel', the rank of output must be 4, but got " << dx_shape.size(); | |||
| return false; | |||
| } | |||
| n_ = SizeToInt(dy_shape[0]); | |||
| c_ = SizeToInt(dy_shape[1]); | |||
| dy_h_ = SizeToInt(dy_shape[2]); | |||
| @@ -97,6 +110,7 @@ class ResizeBilinearGradGpuKernel : public GpuKernel { | |||
| void ResetResource() noexcept override { | |||
| align_corners_ = false; | |||
| is_null_input_ = false; | |||
| n_ = 0; | |||
| c_ = 0; | |||
| dy_h_ = 0; | |||
| @@ -125,6 +139,7 @@ class ResizeBilinearGradGpuKernel : public GpuKernel { | |||
| } | |||
| bool align_corners_; | |||
| bool is_null_input_; | |||
| int n_; | |||
| int c_; | |||
| int dy_h_; | |||
| @@ -84,6 +84,10 @@ class RandpermGpuKernel : public GpuKernel { | |||
| } | |||
| max_length_ = static_cast<size_t>(GetAttr<int64_t>(kernel_node, "max_length")); | |||
| if (max_length_ < 1) { | |||
| MS_LOG(ERROR) << "For 'RandpermGpuKernel', the max_length cannot be less than 1, but got " << max_length_; | |||
| return false; | |||
| } | |||
| pad_ = static_cast<T>(GetAttr<int64_t>(kernel_node, "pad")); | |||
| InitSizeLists(); | |||