You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cuda_common.h 2.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_COMMON_H_
  17. #define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_COMMON_H_
  18. #include <algorithm>
  19. #include "runtime/device/gpu/gpu_device_manager.h"
  20. namespace mindspore {
  21. namespace device {
  22. namespace gpu {
  23. class CudaCommon {
  24. public:
  25. inline int threads_num() const { return threads_per_block_; }
  26. inline int major_sm() const { return major_sm_; }
  27. inline int blocks_num(const int total_threads) const {
  28. return std::min(((total_threads - 1) / threads_per_block_) + 1, max_blocks_);
  29. }
  30. size_t share_memory_size() const { return max_share_memory_; }
  31. static CudaCommon &GetInstance() {
  32. static CudaCommon instance;
  33. return instance;
  34. }
  35. private:
  36. CudaCommon() {
  37. uint32_t device_id = GPUDeviceManager::GetInstance().cur_device_id();
  38. cudaDeviceProp prop;
  39. (void)cudaGetDeviceProperties(&prop, device_id);
  40. threads_per_block_ = prop.maxThreadsPerBlock;
  41. max_blocks_ = prop.multiProcessorCount;
  42. major_sm_ = prop.major;
  43. max_share_memory_ = prop.sharedMemPerBlock;
  44. }
  45. ~CudaCommon() = default;
  46. CudaCommon(const CudaCommon &) = delete;
  47. CudaCommon &operator=(const CudaCommon &) = delete;
  48. int max_blocks_;
  49. int threads_per_block_;
  50. int major_sm_;
  51. size_t max_share_memory_;
  52. };
  53. #define GET_BLOCKS(total_threads) mindspore::device::gpu::CudaCommon::GetInstance().blocks_num(total_threads)
  54. #define GET_THREADS mindspore::device::gpu::CudaCommon::GetInstance().threads_num()
  55. #define GET_MAJOR_SM mindspore::device::gpu::CudaCommon::GetInstance().major_sm()
  56. #define SHARED_MEM_PER_BLOCK mindspore::device::gpu::CudaCommon::GetInstance().share_memory_size()
  57. #define MINIUM_SM 6
  58. #define RECOMMEND_SM 7
  59. } // namespace gpu
  60. } // namespace device
  61. } // namespace mindspore
  62. #endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_COMMON_H_