You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gpu_common.h 14 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_COMMON_H_
  17. #define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_COMMON_H_
  18. #include <iostream>
  19. #include <vector>
  20. #include <algorithm>
  21. #include <map>
  22. #include "utils/log_adapter.h"
  23. #include "utils/trace_base.h"
  24. #include "include/curand.h"
  25. namespace mindspore {
  26. namespace device {
  27. namespace gpu {
  28. #define CHECK_OP_RET_WITH_EXCEPT(expression, message) \
  29. { \
  30. bool success = (expression); \
  31. if (!success) { \
  32. MS_LOG(EXCEPTION) << "Op Error: " << message << " | Error Number: " << success; \
  33. } \
  34. }
  35. #define CHECK_OP_RET_WITH_ERROR(expression, message) \
  36. { \
  37. bool success = (expression); \
  38. if (!success) { \
  39. MS_LOG(ERROR) << "Op Error: " << message << " | Error Number: " << success; \
  40. } \
  41. }
  42. #define CHECK_CUDA_RET_WITH_ERROR(node, expression, message) \
  43. { \
  44. cudaError_t status = (expression); \
  45. if (status != cudaSuccess) { \
  46. MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " << cudaGetErrorString(status) \
  47. << trace::DumpSourceLines(node.lock()); \
  48. } \
  49. }
  50. #define CHECK_CUDA_RET_WITH_ERROR_NOTRACE(expression, message) \
  51. { \
  52. cudaError_t status = (expression); \
  53. if (status != cudaSuccess) { \
  54. MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " \
  55. << cudaGetErrorString(status); \
  56. } \
  57. }
  58. #define CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(expression, message) \
  59. { \
  60. cudaError_t status = (expression); \
  61. if (status != cudaSuccess) { \
  62. MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " \
  63. << cudaGetErrorString(status); \
  64. return false; \
  65. } \
  66. }
  67. #define CHECK_CUDA_RET_WITH_EXCEPT(node, expression, message) \
  68. { \
  69. cudaError_t status = (expression); \
  70. if (status != cudaSuccess) { \
  71. MS_LOG(EXCEPTION) << "CUDA Error: " << message << " | Error Number: " << status << " " \
  72. << cudaGetErrorString(status) << trace::DumpSourceLines(node.lock()); \
  73. } \
  74. }
  75. #define CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(expression, message) \
  76. { \
  77. cudaError_t status = (expression); \
  78. if (status != cudaSuccess) { \
  79. MS_LOG(EXCEPTION) << "CUDA Error: " << message << " | Error Number: " << status << " " \
  80. << cudaGetErrorString(status); \
  81. } \
  82. }
  83. #define CHECK_CUDNN_RET_WITH_EXCEPT(node, expression, message) \
  84. { \
  85. cudnnStatus_t status = (expression); \
  86. if (status != CUDNN_STATUS_SUCCESS) { \
  87. MS_LOG(EXCEPTION) << "cuDNN Error: " << message << " | Error Number: " << status << " " \
  88. << cudnnGetErrorString(status) << trace::DumpSourceLines(node.lock()); \
  89. } \
  90. }
  91. #define CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE(expression, message) \
  92. { \
  93. cudnnStatus_t status = (expression); \
  94. if (status != CUDNN_STATUS_SUCCESS) { \
  95. MS_LOG(EXCEPTION) << "cuDNN Error: " << message << " | Error Number: " << status << " " \
  96. << cudnnGetErrorString(status); \
  97. } \
  98. }
  99. #define CHECK_CUDNN_RET_WITH_ERROR_NOTRACE(expression, message) \
  100. { \
  101. cudnnStatus_t status = (expression); \
  102. if (status != CUDNN_STATUS_SUCCESS) { \
  103. MS_LOG(ERROR) << "cuDNN Error: " << message << " | Error Number: " << status << " " \
  104. << cudnnGetErrorString(status); \
  105. } \
  106. }
  107. #define CHECK_CUDNN_RET_WITH_ERROR(node, expression, message) \
  108. { \
  109. cudnnStatus_t status = (expression); \
  110. if (status != CUDNN_STATUS_SUCCESS) { \
  111. MS_LOG(ERROR) << "cuDNN Error: " << message << " | Error Number: " << status << " " \
  112. << cudnnGetErrorString(status) << trace::DumpSourceLines(node.lock()); \
  113. } \
  114. }
  115. #define CHECK_CUBLAS_RET_WITH_EXCEPT_NOTRACE(expression, message) \
  116. { \
  117. cublasStatus_t status = (expression); \
  118. if (status != CUBLAS_STATUS_SUCCESS) { \
  119. MS_LOG(EXCEPTION) << "cuBLAS Error: " << message << " | Error Number: " << status; \
  120. } \
  121. }
  122. #define CHECK_CUBLAS_RET_WITH_EXCEPT(node, expression, message) \
  123. { \
  124. cublasStatus_t status = (expression); \
  125. if (status != CUBLAS_STATUS_SUCCESS) { \
  126. MS_LOG(EXCEPTION) << "cuBLAS Error: " << message << " | Error Number: " << status \
  127. << trace::DumpSourceLines(node.lock()); \
  128. } \
  129. }
  130. #define CHECK_CUBLAS_RET_WITH_ERROR(expression, message) \
  131. { \
  132. cublasStatus_t status = (expression); \
  133. if (status != CUBLAS_STATUS_SUCCESS) { \
  134. MS_LOG(ERROR) << "cuBLAS Error: " << message << " | Error Number: " << status; \
  135. } \
  136. }
  137. #define CHECK_CUSOLVER_RET_WITH_EXCEPT_NOTRACE(expression, message) \
  138. { \
  139. cusolverStatus_t status = (expression); \
  140. if (status != CUSOLVER_STATUS_SUCCESS) { \
  141. MS_LOG(EXCEPTION) << "cusolver Error: " << message << " | Error Number: " << status; \
  142. } \
  143. }
  144. #define CHECK_CUSOLVER_RET_WITH_EXCEPT(node, expression, message) \
  145. { \
  146. cusolverStatus_t status = (expression); \
  147. if (status != CUSOLVER_STATUS_SUCCESS) { \
  148. MS_LOG(EXCEPTION) << "cusolver Error: " << message << " | Error Number: " << status \
  149. << trace::DumpSourceLines(node.lock()); \
  150. ; \
  151. } \
  152. }
  153. #define CHECK_CUSOLVER_RET_WITH_ERROR(expression, message) \
  154. { \
  155. cusolverStatus_t status = (expression); \
  156. if (status != CUSOLVER_STATUS_SUCCESS) { \
  157. MS_LOG(ERROR) << "cusolver Error: " << message << " | Error Number: " << status; \
  158. } \
  159. }
  160. #define CHECK_NCCL_RET_WITH_EXCEPT(node, expression, message) \
  161. { \
  162. int result = (expression); \
  163. if (result != ncclSuccess) { \
  164. MS_LOG(EXCEPTION) << "NCCL Error: " << message << " | Error Number: " << result \
  165. << trace::DumpSourceLines(node.lock()); \
  166. } \
  167. }
  168. #define VARIABLE_NOT_USED(var) \
  169. { (void)(var); }
  170. inline bool CheckNullInput(const std::vector<size_t> &input_shape) {
  171. // If input_shape.size() == 0, it means a scalar input; If input_shape.size() != 0 and input_shape contains 0,
  172. // it means a null input. Just return a null output.
  173. if (input_shape.size() != 0) {
  174. if (std::any_of(input_shape.begin(), input_shape.end(), [](size_t i) { return i == 0; })) {
  175. return true;
  176. }
  177. }
  178. return false;
  179. }
  180. #define CHECK_NULL_INPUT(input_shape) mindspore::device::gpu::CheckNullInput(input_shape)
  181. // The tensor size is limited to 2G by cudnn.
  182. inline void CheckTensorSize(const std::vector<size_t> &shape) {
  183. size_t total_size = 1;
  184. for (auto i : shape) {
  185. total_size *= i;
  186. }
  187. if (total_size >= 2147483648) {
  188. MS_EXCEPTION(ValueError) << "The total size of the tensor exceeds the max_limit of 2 Giga-elements, which is "
  189. << total_size << "elements (" << shape << ").";
  190. }
  191. }
  192. #define CHECK_TENSOR_SIZE(shape) mindspore::device::gpu::CheckTensorSize(shape)
  193. #define CHECK_CURAND_RET_WITH_EXCEPT(expression, message) \
  194. { \
  195. curandStatus_t status = (expression); \
  196. if (status != CURAND_STATUS_SUCCESS) { \
  197. MS_LOG(EXCEPTION) << "CUDA curand Error: " << message << " | curandStatus: " << status; \
  198. } \
  199. }
  200. } // namespace gpu
  201. } // namespace device
  202. } // namespace mindspore
  203. #endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_COMMON_H_