You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cuda_common.h 2.8 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_COMMON_H_
  17. #define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_COMMON_H_
  18. #include <algorithm>
  19. #include "runtime/device/gpu/gpu_device_manager.h"
  20. #define CUDA_KERNEL_ASSERT(cond) \
  21. if (!(cond)) { \
  22. __assert_fail(#cond, __FILE__, static_cast<unsigned int>(__LINE__), __FUNCTION__); \
  23. }
  24. namespace mindspore {
  25. namespace device {
  26. namespace gpu {
  27. class CudaCommon {
  28. public:
  29. inline int threads_num() const { return threads_per_block_; }
  30. inline int major_sm() const { return major_sm_; }
  31. inline int blocks_num(const int total_threads) const {
  32. return std::min(((total_threads - 1) / threads_per_block_) + 1, max_blocks_);
  33. }
  34. size_t share_memory_size() const { return max_share_memory_; }
  35. void set_check_sm(const bool &flag) { check_sm_ = flag; }
  36. bool check_sm() const { return check_sm_; }
  37. static CudaCommon &GetInstance() {
  38. static CudaCommon instance;
  39. return instance;
  40. }
  41. private:
  42. CudaCommon() {
  43. uint32_t device_id = GPUDeviceManager::GetInstance().cur_device_id();
  44. cudaDeviceProp prop;
  45. (void)cudaGetDeviceProperties(&prop, device_id);
  46. threads_per_block_ = prop.maxThreadsPerBlock;
  47. max_blocks_ = prop.multiProcessorCount;
  48. major_sm_ = prop.major;
  49. max_share_memory_ = prop.sharedMemPerBlock;
  50. }
  51. ~CudaCommon() = default;
  52. CudaCommon(const CudaCommon &) = delete;
  53. CudaCommon &operator=(const CudaCommon &) = delete;
  54. int max_blocks_;
  55. int threads_per_block_;
  56. int major_sm_;
  57. size_t max_share_memory_;
  58. bool check_sm_{true};
  59. };
  60. #define GET_BLOCKS(total_threads) mindspore::device::gpu::CudaCommon::GetInstance().blocks_num(total_threads)
  61. #define GET_THREADS mindspore::device::gpu::CudaCommon::GetInstance().threads_num()
  62. #define GET_MAJOR_SM mindspore::device::gpu::CudaCommon::GetInstance().major_sm()
  63. #define SHARED_MEM_PER_BLOCK mindspore::device::gpu::CudaCommon::GetInstance().share_memory_size()
  64. #define MINIUM_SM 6
  65. #define RECOMMEND_SM 7
  66. } // namespace gpu
  67. } // namespace device
  68. } // namespace mindspore
  69. #endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_COMMON_H_