You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cuda_driver.h 3.5 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_DRIVER_H_
  17. #define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_DRIVER_H_
  18. #include <cuda_runtime_api.h>
  19. namespace mindspore {
  20. namespace device {
  21. namespace gpu {
  22. typedef void *CudaDeviceStream;
  23. typedef void *CudaDeviceEvent;
  24. typedef void *HostMemPtr;
  25. typedef void *DeviceMemPtr;
  26. class CudaDriver {
  27. public:
  28. // Encapsulate the cuda APIs associated with memory operations
  29. // such as malloc/free and memory copy from host to device and reverse.
  30. static size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr);
  31. static bool FreeDeviceMem(const DeviceMemPtr &addr);
  32. static size_t AllocHostPinnedMem(size_t size, void **addr);
  33. static void FreeHostPinnedMem(void *addr);
  34. static bool CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size);
  35. static bool CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size);
  36. static bool CopyHostMemToDeviceAsync(const DeviceMemPtr &dst, const void *src, size_t size,
  37. CudaDeviceStream stream = 0);
  38. static bool CopyDeviceMemToHostAsync(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size,
  39. CudaDeviceStream stream = 0);
  40. static bool CopyDeviceMemToDeviceAsync(const DeviceMemPtr &dst, const DeviceMemPtr &src, size_t size,
  41. CudaDeviceStream stream = 0);
  42. static size_t total_mem_size();
  43. static size_t free_mem_size();
  44. // Encapsulate the cuda APIs associated with device resource
  45. // such as Stream and Event.
  46. static bool CreateStream(CudaDeviceStream *stream);
  47. static bool DestroyStream(const CudaDeviceStream &stream);
  48. static bool SyncStream(const CudaDeviceStream &stream);
  49. static bool CreateEvent(CudaDeviceEvent *event, unsigned int flag = cudaEventDefault);
  50. static bool DestroyEvent(const CudaDeviceEvent &event);
  51. static bool RecordEvent(CudaDeviceEvent event, CudaDeviceStream stream = 0);
  52. static bool SyncEvent(const CudaDeviceEvent &event);
  53. static bool QueryEvent(const CudaDeviceEvent &event);
  54. static bool ElapsedTime(float *cost_time, const CudaDeviceEvent &start, const CudaDeviceEvent &end);
  55. // Encapsulate the cuda APIs associated with device management.
  56. static int device_count();
  57. static bool SetDevice(int index);
  58. private:
  59. CudaDriver() = delete;
  60. ~CudaDriver() = delete;
  61. CudaDriver(const CudaDriver &) = delete;
  62. CudaDriver &operator=(const CudaDriver &) = delete;
  63. static constexpr float mem_malloc_retry_rate_{0.99};
  64. static constexpr size_t mem_malloc_retry_conut_max_{10};
  65. static constexpr size_t mem_malloc_align_size_{4};
  66. };
  67. } // namespace gpu
  68. } // namespace device
  69. } // namespace mindspore
  70. #endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_DRIVER_H_