You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

blocking_queue.cc 4.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "runtime/device/gpu/blocking_queue.h"
  17. #include <chrono>
  18. #include "runtime/device/gpu/queue_common.h"
  19. #include "utils/ms_utils.h"
  20. namespace mindspore {
  21. namespace device {
  22. const size_t kTimeout = 100;
  23. GpuQueue::GpuQueue(void *addr, const std::vector<size_t> &shape, const size_t &capacity)
  24. : buffer_(addr),
  25. head_(0),
  26. tail_(0),
  27. shape_(shape),
  28. len_(0),
  29. size_(0),
  30. capacity_(capacity),
  31. stream_(0),
  32. node_info_(nullptr) {
  33. CHECK_CUDA_RET_WITH_ERROR(cudaStreamCreate(&stream_), "Cuda Create Stream Failed");
  34. node_info_ = std::make_unique<NodeInfo[]>(capacity);
  35. for (auto item : shape) {
  36. len_ += item;
  37. }
  38. }
  39. GpuQueue::~GpuQueue() { buffer_ = nullptr; }
  40. BlockQueueStatus_T GpuQueue::Push(const std::vector<DataItemGpu> &data) {
  41. int offset = 0;
  42. for (size_t i = 0; i < data.size(); i++) {
  43. auto item = data[i];
  44. if (item.data_ptr_ == nullptr || item.data_len_ != shape_[i]) {
  45. MS_LOG(ERROR) << "Invalid Input: ptr: " << item.data_ptr_ << ", len: " << item.data_len_;
  46. return ERROR_INPUT;
  47. }
  48. void *addr = reinterpret_cast<unsigned char *>(buffer_) + tail_ * len_ + offset;
  49. CHECK_CUDA_RET_WITH_ERROR(cudaMemcpyAsync(addr, item.data_ptr_, item.data_len_, cudaMemcpyHostToDevice, stream_),
  50. "Cuda Memcpy Error");
  51. offset += item.data_len_;
  52. }
  53. node_info_[tail_].event_.reset(new cudaEvent_t());
  54. CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&(*(node_info_[tail_].event_))), "Cuda Create Event Failed");
  55. CHECK_CUDA_RET_WITH_ERROR(cudaEventRecord(*(node_info_[tail_].event_), stream_), "Cuda Create Event Failed");
  56. node_info_[tail_].data_ = data;
  57. tail_ = (tail_ + 1) % (capacity_);
  58. ++size_;
  59. return SUCCESS;
  60. }
  61. BlockQueueStatus_T GpuQueue::Front(void **addr, size_t *len) const {
  62. CHECK_CUDA_RET_WITH_ERROR(cudaEventSynchronize(*(node_info_[head_].event_)), "Cuda Event Syn Failed");
  63. CHECK_CUDA_RET_WITH_ERROR(cudaEventDestroy(*(node_info_[head_].event_)), "Cuda Destroy Event Failed");
  64. *addr = (unsigned char *)buffer_ + head_ * len_;
  65. *len = len_;
  66. for (auto item : node_info_[head_].data_) {
  67. host_release_(item.data_ptr_, item.worker_id_);
  68. }
  69. return SUCCESS;
  70. }
  71. BlockQueueStatus_T GpuQueue::Pop() {
  72. head_ = (head_ + 1) % (capacity_);
  73. --size_;
  74. return SUCCESS;
  75. }
  76. bool GpuQueue::Destroy() {
  77. if (stream_ != nullptr) {
  78. auto ret = cudaStreamDestroy(stream_);
  79. if (ret == cudaSuccess) {
  80. return true;
  81. } else {
  82. return false;
  83. }
  84. } else {
  85. return true;
  86. }
  87. }
  88. BlockQueueStatus_T BlockingQueue::Create(void *addr, const std::vector<size_t> &shape, const size_t &capacity) {
  89. if (addr == nullptr) {
  90. MS_LOG(ERROR) << "addr is nullptr";
  91. return INTERNAL_ERROR;
  92. }
  93. queue_ = std::make_shared<GpuQueue>(addr, shape, capacity);
  94. return SUCCESS;
  95. }
  96. void BlockingQueue::RegisterRelease(const std::function<void(void *, int32_t)> &func) { queue_->RegisterRelease(func); }
  97. BlockQueueStatus_T BlockingQueue::Push(const std::vector<DataItemGpu> &data, unsigned int) {
  98. std::unique_lock<std::mutex> locker(mutex_);
  99. if (queue_->IsFull()) {
  100. if (not_full_cond_.wait_for(locker, std::chrono::microseconds(kTimeout)) == std::cv_status::timeout) {
  101. return TIMEOUT;
  102. }
  103. }
  104. auto ret = queue_->Push(data);
  105. if (ret) {
  106. return ret;
  107. }
  108. not_empty_cond_.notify_one();
  109. return SUCCESS;
  110. }
  111. BlockQueueStatus_T BlockingQueue::Front(void **addr, size_t *len) {
  112. std::unique_lock<std::mutex> locker(mutex_);
  113. bool timeout = not_empty_cond_.wait_for(locker, std::chrono::seconds(30), [this] { return !queue_->IsEmpty(); });
  114. if (!timeout) {
  115. return TIMEOUT;
  116. }
  117. return queue_->Front(addr, len);
  118. }
  119. BlockQueueStatus_T BlockingQueue::Pop() {
  120. std::unique_lock<std::mutex> locker(mutex_);
  121. not_empty_cond_.wait(locker, [this] { return !queue_->IsEmpty(); });
  122. auto ret = queue_->Pop();
  123. if (ret) {
  124. return ret;
  125. }
  126. not_full_cond_.notify_one();
  127. return SUCCESS;
  128. }
  129. bool BlockingQueue::Destroy() {
  130. if (queue_ != nullptr) {
  131. return queue_->Destroy();
  132. } else {
  133. return true;
  134. }
  135. }
  136. } // namespace device
  137. } // namespace mindspore