You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

libcuda.cpp 2.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. #pragma GCC visibility push(default)
  2. #include <cstdio>
  3. #define LOGE(fmt, v...) fprintf(stderr, "err: " fmt "\n", ##v)
  4. extern "C" {
  5. #include <cuda.h>
  6. }
  7. #include <cudaProfiler.h>
  8. #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
  9. #if defined(_WIN32)
  10. #include <windows.h>
  11. #define RTLD_LAZY 0
  12. static void* dlopen(const char* file, int) {
  13. return static_cast<void*>(LoadLibraryA(file));
  14. }
  15. static void* dlerror() {
  16. const char* errmsg = "dlerror not aviable in windows";
  17. return const_cast<char*>(errmsg);
  18. }
  19. static void* dlsym(void* handle, const char* name) {
  20. FARPROC symbol = GetProcAddress((HMODULE)handle, name);
  21. return reinterpret_cast<void*>(symbol);
  22. }
  23. #else
  24. #include <dlfcn.h>
  25. #include <unistd.h>
  26. #endif
  27. static void log_failed_load(int func_idx);
  28. namespace {
  29. template <typename T>
  30. T on_init_failed(int func_idx);
  31. template <>
  32. CUresult on_init_failed(int func_idx) {
  33. log_failed_load(func_idx);
  34. return CUDA_ERROR_UNKNOWN;
  35. }
  36. }
  37. #define _WRAPLIB_API_CALL CUDAAPI
  38. #define _WRAPLIB_CALLBACK CUDA_CB
  39. #include "./libcuda-wrap.h"
  40. #undef _WRAPLIB_CALLBACK
  41. #undef _WRAPLIB_API_CALL
  42. // Harvested from cuda_drvapi_dynlink.c
  43. static const char* default_so_paths[] = {
  44. #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
  45. "nvcuda.dll",
  46. #elif defined(__unix__) || defined (__QNX__) || defined(__APPLE__) || defined(__MACOSX)
  47. #if defined(__APPLE__) || defined(__MACOSX)
  48. "/usr/local/cuda/lib/libcuda.dylib",
  49. #elif defined(__ANDROID__)
  50. #if defined (__aarch64__)
  51. "/system/vendor/lib64/libcuda.so",
  52. #elif defined(__arm__)
  53. "/system/vendor/lib/libcuda.so",
  54. #endif
  55. #else
  56. "libcuda.so.1",
  57. // In case some users does not have correct search path configured in
  58. // /etc/ld.so.conf
  59. "/usr/lib/x86_64-linux-gnu/libcuda.so",
  60. "/usr/local/nvidia/lib64/libcuda.so",
  61. #endif
  62. #else
  63. #error "Unknown platform"
  64. #endif
  65. };
  66. static void* get_library_handle() {
  67. void* handle = nullptr;
  68. for (size_t i = 0; i < (sizeof(default_so_paths) / sizeof(char*)); i++) {
  69. handle = dlopen(default_so_paths[i], RTLD_LAZY);
  70. if (handle) {
  71. break;
  72. }
  73. }
  74. if (!handle) {
  75. LOGE("Failed to load CUDA Driver API library");
  76. return nullptr;
  77. }
  78. return handle;
  79. }
  80. static void log_failed_load(int func_idx) {
  81. LOGE("failed to load cuda func: %s", g_func_name[func_idx]);
  82. }
  83. static void* resolve_library_func(void* handle, const char* func) {
  84. if (!handle) {
  85. LOGE("handle should not be nullptr!");
  86. return nullptr;
  87. }
  88. auto ret = dlsym(handle, func);
  89. if (!ret) {
  90. LOGE("failed to load cuda func: %s", func);
  91. }
  92. return ret;
  93. }

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台