You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.cpp 5.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. /**
  2. * \file dnn/src/cuda/utils.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "src/cuda/utils.cuh"
  13. #include "src/cuda/utils.h"
  14. #include "src/common/utils.h"
  15. #include "src/cuda/handle.h"
  16. #include "src/cuda/int_fastdiv.cuh"
  17. #include <mutex>
  18. using namespace megdnn;
  19. using namespace cuda;
  20. namespace {
  21. struct DevicePropRec {
  22. bool init = false;
  23. cudaDeviceProp prop;
  24. std::mutex mtx;
  25. };
  26. constexpr int MAX_NR_DEVICE = 32;
  27. DevicePropRec device_prop_rec[MAX_NR_DEVICE];
  28. const char* cublasGetErrorString(cublasStatus_t error) {
  29. switch (error) {
  30. case CUBLAS_STATUS_SUCCESS:
  31. return "CUBLAS_STATUS_SUCCESS";
  32. case CUBLAS_STATUS_NOT_INITIALIZED:
  33. return "CUBLAS_STATUS_NOT_INITIALIZED";
  34. case CUBLAS_STATUS_ALLOC_FAILED:
  35. return "CUBLAS_STATUS_ALLOC_FAILED";
  36. case CUBLAS_STATUS_INVALID_VALUE:
  37. return "CUBLAS_STATUS_INVALID_VALUE";
  38. case CUBLAS_STATUS_ARCH_MISMATCH:
  39. return "CUBLAS_STATUS_ARCH_MISMATCH";
  40. case CUBLAS_STATUS_MAPPING_ERROR:
  41. return "CUBLAS_STATUS_MAPPING_ERROR";
  42. case CUBLAS_STATUS_EXECUTION_FAILED:
  43. return "CUBLAS_STATUS_EXECUTION_FAILED";
  44. case CUBLAS_STATUS_INTERNAL_ERROR:
  45. return "CUBLAS_STATUS_INTERNAL_ERROR";
  46. case CUBLAS_STATUS_LICENSE_ERROR:
  47. return "CUBLAS_STATUS_LICENSE_ERROR";
  48. case CUBLAS_STATUS_NOT_SUPPORTED:
  49. return "CUBLAS_STATUS_NOT_SUPPORTED";
  50. }
  51. return "Unknown CUBLAS error";
  52. }
  53. } // anonymous namespace
  54. void cuda::__throw_cuda_error__(cudaError_t err, const char* msg) {
  55. auto s = ssprintf(
  56. "cuda error %s(%d) occurred; expr: %s", cudaGetErrorString(err), int(err),
  57. msg);
  58. megdnn_throw(s.c_str());
  59. }
  60. void cuda::__throw_cudnn_error__(cudnnStatus_t err, const char* msg) {
  61. auto s = ssprintf(
  62. "cudnn error %s(%d) occurred; expr: %s", cudnnGetErrorString(err), int(err),
  63. msg);
  64. megdnn_throw(s.c_str());
  65. }
  66. void cuda::__throw_cublas_error__(cublasStatus_t err, const char* msg) {
  67. auto s = ssprintf(
  68. "cublas error %s(%d) occurred; expr: %s", cublasGetErrorString(err),
  69. int(err), msg);
  70. megdnn_throw(s.c_str());
  71. }
  72. void cuda::__throw_cusolver_error__(cusolverStatus_t err, const char* msg) {
  73. auto s = ssprintf("cusolver error %d occurred; expr: %s", int(err), msg);
  74. megdnn_throw(s.c_str());
  75. }
  76. void cuda::__throw_cuda_driver_error__(CUresult err, const char* msg) {
  77. const char* err_str = nullptr;
  78. cuGetErrorName(err, &err_str);
  79. err_str = err_str ? err_str : "unknown error";
  80. auto s = ssprintf("cuda driver error %d(%s) occurred; expr: %s", int(err), err_str, msg);
  81. megdnn_throw(s.c_str());
  82. }
  83. void cuda::__throw_cutlass_error__(cutlass::Status err, const char* msg) {
  84. auto s = ssprintf(
  85. "cutlass error %s(%d) occurred; expr: %s",
  86. cutlass::cutlassGetStatusString(err), int(err), msg);
  87. megdnn_throw(s.c_str());
  88. }
  89. void cuda::report_error(const char* msg) {
  90. megdnn_throw(msg);
  91. MEGDNN_MARK_USED_VAR(msg);
  92. }
  93. uint32_t cuda::safe_size_in_kern(size_t size) {
  94. if (!size || size > Uint32Fastdiv::MAX_DIVIDEND) {
  95. megdnn_throw(ssprintf(
  96. "invalid size for element-wise kernel: %zu; "
  97. "max supported size is %u",
  98. size, Uint32Fastdiv::MAX_DIVIDEND));
  99. }
  100. return size;
  101. }
  102. const cudaDeviceProp& cuda::current_device_prop() {
  103. int dev;
  104. cuda_check(cudaGetDevice(&dev));
  105. return *(cuda::get_device_prop(dev));
  106. }
  107. const cudaDeviceProp* cuda::get_device_prop(int device) {
  108. megdnn_assert(device < MAX_NR_DEVICE, "device number too large: %d", device);
  109. megdnn_assert(device >= 0, "device number must not be negative, got %d", device);
  110. auto&& rec = device_prop_rec[device];
  111. if (!rec.init) {
  112. std::lock_guard<std::mutex> lock(rec.mtx);
  113. if (!rec.init) {
  114. cuda_check(cudaGetDeviceProperties(&rec.prop, device));
  115. rec.init = true;
  116. }
  117. }
  118. return &(rec.prop);
  119. }
  120. bool cuda::is_compute_capability_required(int major, int minor) {
  121. auto&& device_prop = cuda::current_device_prop();
  122. return device_prop.major > major ||
  123. (device_prop.major == major && device_prop.minor >= minor);
  124. }
  125. bool cuda::is_compute_capability_equalto(int major, int minor) {
  126. auto&& device_prop = cuda::current_device_prop();
  127. return device_prop.major == major && device_prop.minor == minor;
  128. }
  129. size_t cuda::max_batch_x_channel_size() {
  130. return current_device_prop().maxGridSize[2];
  131. }
  132. uint32_t cuda::param_buffer_start_address() {
  133. auto&& device_prop = current_device_prop();
  134. int cap = 10 * device_prop.major + device_prop.minor;
  135. // maxwell and pascal: 0x140
  136. if (cap >= 50 && cap < 70)
  137. return 0x140;
  138. // volta ~ ampere: 0x160
  139. else if (cap >= 70)
  140. return 0x160;
  141. megdnn_throw(ssprintf("unsupported cuda compute capability %d", cap).c_str());
  142. }
  143. const char* cuda::current_device_arch_name() {
  144. auto&& device_prop = current_device_prop();
  145. int cap = 10 * device_prop.major + device_prop.minor;
  146. if (cap >= 50 && cap < 60)
  147. return "maxwell";
  148. else if (cap >= 60 && cap < 70)
  149. return "pascal";
  150. else if (cap >= 70 && cap < 75)
  151. return "volta";
  152. else if (cap >= 75 && cap < 80)
  153. return "turing";
  154. else if (cap >= 80)
  155. return "ampere";
  156. megdnn_throw(ssprintf("unsupported cuda compute capability %d", cap).c_str());
  157. }
  158. // vim: syntax=cpp.doxygen