You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cuda_common.h 2.2 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_DEVICE_GPU_CUDA_COMMON_H_
  17. #define MINDSPORE_CCSRC_DEVICE_GPU_CUDA_COMMON_H_
  18. #include <algorithm>
  19. #include "device/gpu/gpu_device_manager.h"
  20. namespace mindspore {
  21. namespace device {
  22. namespace gpu {
  23. class CudaCommon {
  24. public:
  25. inline int threads_num() const { return threads_per_block_; }
  26. inline int major_sm() const { return major_sm_; }
  27. inline int blocks_num(const int total_threads) const {
  28. return std::min(((total_threads - 1) / threads_per_block_) + 1, max_blocks_);
  29. }
  30. static CudaCommon &GetInstance() {
  31. static CudaCommon instance;
  32. return instance;
  33. }
  34. private:
  35. CudaCommon() {
  36. uint32_t device_id = GPUDeviceManager::GetInstance().cur_device_id();
  37. cudaDeviceProp prop;
  38. (void)cudaGetDeviceProperties(&prop, device_id);
  39. threads_per_block_ = prop.maxThreadsPerBlock;
  40. max_blocks_ = prop.multiProcessorCount;
  41. major_sm_ = prop.major;
  42. }
  43. ~CudaCommon() = default;
  44. CudaCommon(const CudaCommon &) = delete;
  45. CudaCommon &operator=(const CudaCommon &) = delete;
  46. int max_blocks_;
  47. int threads_per_block_;
  48. int major_sm_;
  49. };
  50. #define GET_BLOCKS(total_threads) mindspore::device::gpu::CudaCommon::GetInstance().blocks_num(total_threads)
  51. #define GET_THREADS mindspore::device::gpu::CudaCommon::GetInstance().threads_num()
  52. #define GET_MAJOR_SM mindspore::device::gpu::CudaCommon::GetInstance().major_sm()
  53. #define MINIUM_SM 6
  54. #define RECOMMEND_SM 7
  55. } // namespace gpu
  56. } // namespace device
  57. } // namespace mindspore
  58. #endif // MINDSPORE_CCSRC_DEVICE_GPU_CUDA_COMMON_H_