| @@ -216,7 +216,7 @@ class ArrayReduceGpuKernel : public GpuKernel { | |||||
| std::vector<size_t> inputA; | std::vector<size_t> inputA; | ||||
| std::vector<size_t> outputC_shape = output_shape; | std::vector<size_t> outputC_shape = output_shape; | ||||
| const int split_dim = 4; | const int split_dim = 4; | ||||
| CHECK_TENSOR_SIZE(input_shape); | |||||
| if (input_shape.size() <= split_dim) { | if (input_shape.size() <= split_dim) { | ||||
| ShapeNdTo4d(input_shape, &inputA); | ShapeNdTo4d(input_shape, &inputA); | ||||
| CHECK_CUDNN_RET_WITH_EXCEPT( | CHECK_CUDNN_RET_WITH_EXCEPT( | ||||
| @@ -76,6 +76,7 @@ class ActivationGpuFwdKernel : public GpuKernel { | |||||
| InitSizeLists(); | InitSizeLists(); | ||||
| return true; | return true; | ||||
| } | } | ||||
| CHECK_TENSOR_SIZE(input_shape); | |||||
| std::vector<size_t> shape; | std::vector<size_t> shape; | ||||
| double coef = (mode_ == CUDNN_ACTIVATION_CLIPPED_RELU) ? 6.0 : 0.0; | double coef = (mode_ == CUDNN_ACTIVATION_CLIPPED_RELU) ? 6.0 : 0.0; | ||||
| if (mode_ == CUDNN_ACTIVATION_ELU) { | if (mode_ == CUDNN_ACTIVATION_ELU) { | ||||
| @@ -85,7 +86,6 @@ class ActivationGpuFwdKernel : public GpuKernel { | |||||
| CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, | CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, | ||||
| cudnnSetActivationDescriptor(activation_desc_, mode_, CUDNN_NOT_PROPAGATE_NAN, coef), | cudnnSetActivationDescriptor(activation_desc_, mode_, CUDNN_NOT_PROPAGATE_NAN, coef), | ||||
| "cudnnSetActivationDescriptor failed"); | "cudnnSetActivationDescriptor failed"); | ||||
| const int split_dim = 4; | const int split_dim = 4; | ||||
| if (input_shape.size() <= split_dim) { | if (input_shape.size() <= split_dim) { | ||||
| ShapeNdTo4d(input_shape, &shape); | ShapeNdTo4d(input_shape, &shape); | ||||
| @@ -84,6 +84,7 @@ class ActivationGradGpuKernel : public GpuKernel { | |||||
| InitSizeLists(); | InitSizeLists(); | ||||
| return true; | return true; | ||||
| } | } | ||||
| CHECK_TENSOR_SIZE(input_shape); | |||||
| std::vector<size_t> shape; | std::vector<size_t> shape; | ||||
| double coef = (mode_ == CUDNN_ACTIVATION_CLIPPED_RELU) ? 5.999999 : 0.0; | double coef = (mode_ == CUDNN_ACTIVATION_CLIPPED_RELU) ? 5.999999 : 0.0; | ||||
| if (mode_ == CUDNN_ACTIVATION_ELU) coef = 1.0; | if (mode_ == CUDNN_ACTIVATION_ELU) coef = 1.0; | ||||
| @@ -132,6 +132,7 @@ class BatchNormGpuKernel : public GpuKernel { | |||||
| if (format_attr == kOpFormat_NHWC) { | if (format_attr == kOpFormat_NHWC) { | ||||
| format = kOpFormat_NHWC; | format = kOpFormat_NHWC; | ||||
| } | } | ||||
| CHECK_TENSOR_SIZE(shape); | |||||
| SetTensorDescriptor(format, shape); | SetTensorDescriptor(format, shape); | ||||
| InitSizeLists(); | InitSizeLists(); | ||||
| return true; | return true; | ||||
| @@ -254,7 +255,6 @@ class BatchNormGpuKernel : public GpuKernel { | |||||
| width = SizeToInt(shape[3]); | width = SizeToInt(shape[3]); | ||||
| cudnn_format = CUDNN_TENSOR_NCHW; | cudnn_format = CUDNN_TENSOR_NCHW; | ||||
| } | } | ||||
| CHECK_CUDNN_RET_WITH_EXCEPT( | CHECK_CUDNN_RET_WITH_EXCEPT( | ||||
| kernel_node_, cudnnSetTensor4dDescriptor(x_desc_, cudnn_format, cudnn_data_type_, batch, channel, height, width), | kernel_node_, cudnnSetTensor4dDescriptor(x_desc_, cudnn_format, cudnn_data_type_, batch, channel, height, width), | ||||
| "Set x desc failed"); | "Set x desc failed"); | ||||
| @@ -155,6 +155,7 @@ class BatchNormGradGpuKernel : public GpuKernel { | |||||
| format = kOpFormat_NHWC; | format = kOpFormat_NHWC; | ||||
| } | } | ||||
| beta_data_diff_ = GetAttrWithDefault(kernel_node, "inplace_algo", std::string("cover")) == "cover" ? 0 : 1; | beta_data_diff_ = GetAttrWithDefault(kernel_node, "inplace_algo", std::string("cover")) == "cover" ? 0 : 1; | ||||
| CHECK_TENSOR_SIZE(shape); | |||||
| SetTensorDescriptor(format, shape); | SetTensorDescriptor(format, shape); | ||||
| InitSizeLists(); | InitSizeLists(); | ||||
| is_train_ = GetAttr<bool>(kernel_node, "is_training"); | is_train_ = GetAttr<bool>(kernel_node, "is_training"); | ||||
| @@ -99,6 +99,7 @@ class Conv2dGpuFwdKernel : public GpuKernel { | |||||
| InitSizeLists(); | InitSizeLists(); | ||||
| return true; | return true; | ||||
| } | } | ||||
| CHECK_TENSOR_SIZE(in_shape); | |||||
| SetNCHW(in_shape, &n_, &c_, &old_height_, &old_width_, data_format_); | SetNCHW(in_shape, &n_, &c_, &old_height_, &old_width_, data_format_); | ||||
| if (data_format_ == kOpFormat_NHWC) { | if (data_format_ == kOpFormat_NHWC) { | ||||
| compute_format_ = CUDNN_TENSOR_NHWC; | compute_format_ = CUDNN_TENSOR_NHWC; | ||||
| @@ -118,6 +118,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { | |||||
| InitSizeLists(); | InitSizeLists(); | ||||
| return true; | return true; | ||||
| } | } | ||||
| CHECK_TENSOR_SIZE(in_shape); | |||||
| data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0); | data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0); | ||||
| format_attr_ = GetAttr<std::string>(kernel_node, "format"); | format_attr_ = GetAttr<std::string>(kernel_node, "format"); | ||||
| if (format_attr_ == kOpFormat_NHWC) { | if (format_attr_ == kOpFormat_NHWC) { | ||||
| @@ -133,6 +133,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { | |||||
| ShapeNCHW2NHWC(&input_shape); | ShapeNCHW2NHWC(&input_shape); | ||||
| } | } | ||||
| } | } | ||||
| CHECK_TENSOR_SIZE(input_shape); | |||||
| SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_); | SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_); | ||||
| Set4DDesc(dy_shape, input_shape, filter_shape); | Set4DDesc(dy_shape, input_shape, filter_shape); | ||||
| @@ -98,6 +98,7 @@ class Im2ColGpuFwdKernel : public GpuKernel { | |||||
| InitSizeLists(); | InitSizeLists(); | ||||
| return true; | return true; | ||||
| } | } | ||||
| CHECK_TENSOR_SIZE(in_shape); | |||||
| Set4DDesc(in_shape, filter_shape, output_shape); | Set4DDesc(in_shape, filter_shape, output_shape); | ||||
| CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetConvolutionGroupCount(conv_desc_, 1), | CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetConvolutionGroupCount(conv_desc_, 1), | ||||
| "cudnnSetConvGroupCount failed"); | "cudnnSetConvGroupCount failed"); | ||||
| @@ -134,6 +134,7 @@ class InstanceNormGpuKernel : public GpuKernel { | |||||
| InitSizeLists(); | InitSizeLists(); | ||||
| return true; | return true; | ||||
| } | } | ||||
| CHECK_TENSOR_SIZE(input_shape_); | |||||
| SetTensorDescriptor(); | SetTensorDescriptor(); | ||||
| InitSizeLists(); | InitSizeLists(); | ||||
| return true; | return true; | ||||
| @@ -131,6 +131,7 @@ class InstanceNormGradGpuKernel : public GpuKernel { | |||||
| InitSizeLists(); | InitSizeLists(); | ||||
| return true; | return true; | ||||
| } | } | ||||
| CHECK_TENSOR_SIZE(input_shape_); | |||||
| beta_data_diff_ = GetAttrWithDefault(kernel_node, "inplace_algo", std::string("cover")) == "cover" ? 0 : 1; | beta_data_diff_ = GetAttrWithDefault(kernel_node, "inplace_algo", std::string("cover")) == "cover" ? 0 : 1; | ||||
| SetTensorDescriptor(); | SetTensorDescriptor(); | ||||
| InitSizeLists(); | InitSizeLists(); | ||||
| @@ -110,6 +110,7 @@ class L2NormalizeGpuKernel : public GpuKernel { | |||||
| InitSizeLists(); | InitSizeLists(); | ||||
| return true; | return true; | ||||
| } | } | ||||
| CHECK_TENSOR_SIZE(inputA_shape); | |||||
| if (inputA_shape.size() > MAX_DIMS) { | if (inputA_shape.size() > MAX_DIMS) { | ||||
| MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than 7"; | MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than 7"; | ||||
| } | } | ||||
| @@ -262,7 +262,8 @@ class L2NormalizeGradGpuKernel : public GpuKernel { | |||||
| std::vector<size_t> inputA; | std::vector<size_t> inputA; | ||||
| std::vector<size_t> outputC_shape = output_shape; | std::vector<size_t> outputC_shape = output_shape; | ||||
| constexpr int split_dim = 4; | constexpr int split_dim = 4; | ||||
| CHECK_TENSOR_SIZE(input_shape); | |||||
| CHECK_TENSOR_SIZE(output_shape); | |||||
| if (input_shape.size() <= split_dim) { | if (input_shape.size() <= split_dim) { | ||||
| ShapeNdTo4d(input_shape, &inputA); | ShapeNdTo4d(input_shape, &inputA); | ||||
| CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, | CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, | ||||
| @@ -93,6 +93,7 @@ class PoolingGpuFwdKernel : public GpuKernel { | |||||
| InitSizeLists(); | InitSizeLists(); | ||||
| return true; | return true; | ||||
| } | } | ||||
| CHECK_TENSOR_SIZE(input_shape); | |||||
| SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_); | SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_); | ||||
| const int nbDims = 4; | const int nbDims = 4; | ||||
| int dimA[4]; | int dimA[4]; | ||||
| @@ -96,6 +96,7 @@ class PoolingGradGpuKernel : public GpuKernel { | |||||
| InitSizeLists(); | InitSizeLists(); | ||||
| return false; | return false; | ||||
| } | } | ||||
| CHECK_TENSOR_SIZE(input_shape); | |||||
| SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format); | SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format); | ||||
| SetDimA(input_shape, dimA, 4, data_format); | SetDimA(input_shape, dimA, 4, data_format); | ||||
| SetStrideA(input_shape, strideAin, 4, data_format); | SetStrideA(input_shape, strideAin, 4, data_format); | ||||
| @@ -136,6 +136,7 @@ class BatchNormFoldGpuKernel : public GpuKernel { | |||||
| << ", but BatchNormFold GpuKernel OP needs 4DTensor input."; | << ", but BatchNormFold GpuKernel OP needs 4DTensor input."; | ||||
| return false; | return false; | ||||
| } | } | ||||
| CHECK_TENSOR_SIZE(input_shape); | |||||
| batch_ = input_shape[0]; | batch_ = input_shape[0]; | ||||
| channel_ = input_shape[1]; | channel_ = input_shape[1]; | ||||
| height_ = input_shape[2]; | height_ = input_shape[2]; | ||||
| @@ -189,7 +189,7 @@ namespace gpu { | |||||
| #define VARIABLE_NOT_USED(var) \ | #define VARIABLE_NOT_USED(var) \ | ||||
| { (void)(var); } | { (void)(var); } | ||||
| inline bool CheckNullInput(std::vector<size_t> input_shape) { | |||||
| inline bool CheckNullInput(const std::vector<size_t> &input_shape) { | |||||
| // If input_shape.size() == 0, it means a scalar input; If input_shape.size() != 0 and input_shape contains 0, | // If input_shape.size() == 0, it means a scalar input; If input_shape.size() != 0 and input_shape contains 0, | ||||
| // it means a null input. Just return a null output. | // it means a null input. Just return a null output. | ||||
| if (input_shape.size() != 0) { | if (input_shape.size() != 0) { | ||||
| @@ -201,6 +201,19 @@ inline bool CheckNullInput(std::vector<size_t> input_shape) { | |||||
| } | } | ||||
| #define CHECK_NULL_INPUT(input_shape) mindspore::device::gpu::CheckNullInput(input_shape) | #define CHECK_NULL_INPUT(input_shape) mindspore::device::gpu::CheckNullInput(input_shape) | ||||
| // The tensor size is limited to 2G by cudnn. | |||||
| inline void CheckTensorSize(const std::vector<size_t> &shape) { | |||||
| size_t total_size = 1; | |||||
| for (auto i : shape) { | |||||
| total_size *= i; | |||||
| } | |||||
| if (total_size >= 2147483648) { | |||||
| MS_EXCEPTION(ValueError) << "The total size of the tensor exceeds the max_limit of 2 Giga-elements, which is " | |||||
| << total_size << "elements (" << shape << ")."; | |||||
| } | |||||
| } | |||||
| #define CHECK_TENSOR_SIZE(shape) mindspore::device::gpu::CheckTensorSize(shape) | |||||
| #define CHECK_CURAND_RET_WITH_EXCEPT(expression, message) \ | #define CHECK_CURAND_RET_WITH_EXCEPT(expression, message) \ | ||||
| { \ | { \ | ||||
| curandStatus_t status = (expression); \ | curandStatus_t status = (expression); \ | ||||