/** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_COMMON_H_ #define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_COMMON_H_ #include #include #include #include #include #include #include #include "utils/log_adapter.h" #include "utils/trace_base.h" #include "include/curand.h" namespace mindspore { namespace device { namespace gpu { #define CHECK_OP_RET_WITH_EXCEPT(expression, message) \ { \ bool success = (expression); \ if (!success) { \ MS_LOG(EXCEPTION) << "Op Error: " << message << " | Error Number: " << success; \ } \ } #define CHECK_OP_RET_WITH_ERROR(expression, message) \ { \ bool success = (expression); \ if (!success) { \ MS_LOG(ERROR) << "Op Error: " << message << " | Error Number: " << success; \ } \ } #define CHECK_RET_WITH_RETURN_ERROR(expression, message) \ { \ bool success = (expression); \ if (!success) { \ MS_LOG(ERROR) << message; \ return false; \ } \ } #define CHECK_CUDA_RET_WITH_ERROR(node, expression, message) \ { \ cudaError_t status = (expression); \ if (status != cudaSuccess) { \ MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " << cudaGetErrorString(status) \ << trace::DumpSourceLines(node.lock()); \ } \ } #define CHECK_CUDA_RET_WITH_ERROR_NOTRACE(expression, message) \ { \ cudaError_t status = (expression); \ if (status != cudaSuccess) { \ MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " \ << cudaGetErrorString(status); \ } \ } #define CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(expression, message) \ { \ cudaError_t status = (expression); \ if (status != cudaSuccess) { \ MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " \ << cudaGetErrorString(status); \ return false; \ } \ } #define CHECK_CUDA_RET_WITH_EXCEPT(node, expression, message) \ { \ cudaError_t status = (expression); \ if (status != cudaSuccess) { \ MS_LOG(EXCEPTION) << "CUDA Error: " << message << " | Error Number: " << status << " " \ << cudaGetErrorString(status) << trace::DumpSourceLines(node.lock()); \ } \ } #define CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(expression, message) \ { \ cudaError_t status = (expression); \ if (status != cudaSuccess) { \ MS_LOG(EXCEPTION) << "CUDA Error: " << message << " | Error Number: " << status << " " \ << cudaGetErrorString(status); \ } \ } #define CHECK_CUDNN_RET_WITH_EXCEPT(node, expression, message) \ { \ cudnnStatus_t status = (expression); \ if (status != CUDNN_STATUS_SUCCESS) { \ MS_LOG(EXCEPTION) << "cuDNN Error: " << message << " | Error Number: " << status << " " \ << cudnnGetErrorString(status) << trace::DumpSourceLines(node.lock()); \ } \ } #define CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE(expression, message) \ { \ cudnnStatus_t status = (expression); \ if (status != CUDNN_STATUS_SUCCESS) { \ MS_LOG(EXCEPTION) << "cuDNN Error: " << message << " | Error Number: " << status << " " \ << cudnnGetErrorString(status); \ } \ } #define CHECK_CUDNN_RET_WITH_ERROR_NOTRACE(expression, message) \ { \ cudnnStatus_t status = (expression); \ if (status != CUDNN_STATUS_SUCCESS) { \ MS_LOG(ERROR) << "cuDNN Error: " << message << " | Error Number: " << status << " " \ << cudnnGetErrorString(status); \ } \ } #define CHECK_CUDNN_RET_WITH_ERROR(node, expression, message) \ { \ cudnnStatus_t status = (expression); \ if (status != CUDNN_STATUS_SUCCESS) { \ MS_LOG(ERROR) << "cuDNN Error: " << message << " | Error Number: " << status << " " \ << cudnnGetErrorString(status) << trace::DumpSourceLines(node.lock()); \ } \ } #define CHECK_CUBLAS_RET_WITH_EXCEPT_NOTRACE(expression, message) \ { \ cublasStatus_t status = (expression); \ if (status != CUBLAS_STATUS_SUCCESS) { \ MS_LOG(EXCEPTION) << "cuBLAS Error: " << message << " | Error Number: " << status << " " \ << mindspore::device::gpu::cuBlasGetErrorString(status); \ } \ } #define CHECK_CUBLAS_RET_WITH_EXCEPT(node, expression, message) \ { \ cublasStatus_t status = (expression); \ if (status != CUBLAS_STATUS_SUCCESS) { \ MS_LOG(EXCEPTION) << "cuBLAS Error: " << message << " | Error Number: " << status << " " \ << mindspore::device::gpu::cuBlasGetErrorString(status) \ << trace::DumpSourceLines(node.lock()); \ } \ } #define CHECK_CUBLAS_RET_WITH_ERROR(expression, message) \ { \ cublasStatus_t status = (expression); \ if (status != CUBLAS_STATUS_SUCCESS) { \ MS_LOG(ERROR) << "cuBLAS Error: " << message << " | Error Number: " << status << " " \ << mindspore::device::gpu::cuBlasGetErrorString(status); \ } \ } #define CHECK_CUSOLVER_RET_WITH_EXCEPT_NOTRACE(expression, message) \ { \ cusolverStatus_t status = (expression); \ if (status != CUSOLVER_STATUS_SUCCESS) { \ MS_LOG(EXCEPTION) << "cusolver Error: " << message << " | Error Number: " << status; \ } \ } #define CHECK_CUSOLVER_RET_WITH_EXCEPT(node, expression, message) \ { \ cusolverStatus_t status = (expression); \ if (status != CUSOLVER_STATUS_SUCCESS) { \ MS_LOG(EXCEPTION) << "cusolver Error: " << message << " | Error Number: " << status \ << trace::DumpSourceLines(node.lock()); \ ; \ } \ } #define CHECK_CUSOLVER_RET_WITH_ERROR(expression, message) \ { \ cusolverStatus_t status = (expression); \ if (status != CUSOLVER_STATUS_SUCCESS) { \ MS_LOG(ERROR) << "cusolver Error: " << message << " | Error Number: " << status; \ } \ } #define CHECK_NCCL_RET_WITH_EXCEPT(node, expression, message) \ { \ int result = (expression); \ if (result != ncclSuccess) { \ MS_LOG(EXCEPTION) << "NCCL Error: " << message << " | Error Number: " << result \ << trace::DumpSourceLines(node.lock()); \ } \ } #define VARIABLE_NOT_USED(var) \ { (void)(var); } inline bool CheckNullInput(const std::vector &input_shape) { // If input_shape.size() == 0, it means a scalar input; If input_shape.size() != 0 and input_shape contains 0, // it means a null input. Just return a null output. if (input_shape.size() != 0) { if (std::any_of(input_shape.begin(), input_shape.end(), [](size_t i) { return i == 0; })) { return true; } } return false; } #define CHECK_NULL_INPUT(input_shape) mindspore::device::gpu::CheckNullInput(input_shape) template inline std::string ConvertVectorToString(const std::vector &value) { std::stringstream ss; ss << "("; for (auto it = value.begin(); it != value.end(); it++) { if (it == value.begin()) { ss << *it; } else { ss << ", " << *it; } } ss << ")"; return ss.str(); } #define CONVERT_VECTOR_TO_STRING(value) mindspore::device::gpu::ConvertVectorToString(value) inline bool CheckShapeNull(const std::vector &shape, std::string kernel_name, std::string param_name) { if (CHECK_NULL_INPUT(shape)) { MS_LOG(WARNING) << "For '" << kernel_name << "', the shape of " << param_name << " cannot contain zero, but got " << CONVERT_VECTOR_TO_STRING(shape); return true; } return false; } #define CHECK_SHAPE_NULL(shape, kernel_name, param_name) \ mindspore::device::gpu::CheckShapeNull(shape, kernel_name, param_name) inline const char *CurandGetErrorString(curandStatus_t status) { switch (status) { case CURAND_STATUS_VERSION_MISMATCH: return "Header file and linked library version do not match."; case CURAND_STATUS_NOT_INITIALIZED: return "Generator not initialized."; case CURAND_STATUS_ALLOCATION_FAILED: return "Memory allocation failed."; case CURAND_STATUS_TYPE_ERROR: return "Generator is wrong type."; case CURAND_STATUS_OUT_OF_RANGE: return "Argument out of range."; case CURAND_STATUS_LENGTH_NOT_MULTIPLE: return "Length requested is not a multiple of dimension."; case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: return "GPU does not have double precision required by MRG32k3a."; case CURAND_STATUS_LAUNCH_FAILURE: return "Kernel launch failure."; case CURAND_STATUS_PREEXISTING_FAILURE: return "Preexisting failure on library entry."; case CURAND_STATUS_INITIALIZATION_FAILED: return "Initialization of CUDA failed."; case CURAND_STATUS_ARCH_MISMATCH: return "Architecture mismatch, GPU does not support requested feature."; case CURAND_STATUS_INTERNAL_ERROR: return "Internal library error."; default: return "Unknown the curandStatus."; } } inline const char *cuBlasGetErrorString(cublasStatus_t status) { switch (status) { case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS: The operation completed successfully."; case CUBLAS_STATUS_NOT_INITIALIZED: return "CUBLAS_STATUS_NOT_INITIALIZED: The cuBLAS library was not initialized."; case CUBLAS_STATUS_ALLOC_FAILED: return "CUBLAS_STATUS_ALLOC_FAILED: Resource allocation failed inside the cuBLAS library. This is usually caused " "by a cudaMalloc() failure. "; case CUBLAS_STATUS_INVALID_VALUE: return "CUBLAS_STATUS_INVALID_VALUE: An unsupported value or parameter was passed to the function (a negative " "vector size, for example)."; case CUBLAS_STATUS_ARCH_MISMATCH: return "CUBLAS_STATUS_ARCH_MISMATCH: The function requires a feature absent from the device architecture; " "usually caused by compute capability lower than 5.0."; case CUBLAS_STATUS_MAPPING_ERROR: return "CUBLAS_STATUS_MAPPING_ERROR: An access to GPU memory space failed, which is usually caused by a failure " "to bind a texture."; case CUBLAS_STATUS_EXECUTION_FAILED: return "CUBLAS_STATUS_EXECUTION_FAILED: The GPU program failed to execute. This is often caused by a launch " "failure of the kernel on the GPU, which can be caused by multiple reasons."; case CUBLAS_STATUS_INTERNAL_ERROR: return "CUBLAS_STATUS_INTERNAL_ERROR: An internal cuBLAS operation failed. This error is usually caused by a " "cudaMemcpyAsync() failure. "; case CUBLAS_STATUS_NOT_SUPPORTED: return "CUBLAS_STATUS_NOT_SUPPORTED: The functionality requested is not supported."; case CUBLAS_STATUS_LICENSE_ERROR: return "CUBLAS_STATUS_LICENSE_ERROR: The functionality requested requires some license and an error was detected " "when trying to check the current licensing. This error can happen if the license is not present or is " "expired or if the environment variable NVIDIA_LICENSE_FILE is not set properly. "; default: return "Unknown cublasStatus."; } } #define CHECK_CURAND_RET_WITH_EXCEPT(expression, message) \ { \ curandStatus_t status = (expression); \ if (status != CURAND_STATUS_SUCCESS) { \ MS_LOG(EXCEPTION) << "CUDA curand Error: " << message << " | curandStatus: " << status << " " \ << mindspore::device::gpu::CurandGetErrorString(status); \ } \ } } // namespace gpu } // namespace device } // namespace mindspore #endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_COMMON_H_