You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensor_load.h 11 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. /**
  2. * Copyright 2019-2022 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_DEBUG_TENSOR_LOAD_H_
  17. #define MINDSPORE_CCSRC_DEBUG_TENSOR_LOAD_H_
  18. #include <memory>
  19. #include <vector>
  20. #include <map>
  21. #include <mutex>
  22. #include <tuple>
  23. #include <string>
  24. #include <utility>
  25. #include <deque>
  26. #include <algorithm>
  27. #include "debug/tensor_data.h"
  28. #ifdef ONLINE_DBG_MODE
  29. #include "debug/data_dump/dump_json_parser.h"
  30. namespace mindspore {
  31. #endif
  32. class TensorLoader {
  33. public:
  34. #ifndef __APPLE__
  35. TensorLoader() : iter_num_(-1), mem_total_(0), mem_usage_(0) {}
  36. #else
  37. TensorLoader() : mem_total_(0), mem_usage_(0) {}
  38. #endif
  39. ~TensorLoader() { EmptyTensor(); }
  40. void MoveTensorCurrentToPrev(std::string tensor_name) {
  41. auto handle = tensor_list_map_.extract(tensor_name);
  42. if (!handle.empty()) {
  43. MS_LOG(INFO) << "Moving " << tensor_name << " from current map to previous map";
  44. prev_tensor_list_map_.insert(std::move(handle));
  45. }
  46. }
  47. void SwapCurrentPrev() { tensor_list_map_.swap(prev_tensor_list_map_); }
  48. bool TensorExistsInCurrent(std::string tensor_name) const {
  49. return tensor_list_map_.find(tensor_name) != tensor_list_map_.end();
  50. }
  51. // only parameters will return true
  52. bool PrevTensorExistsInCurrent(std::string tensor_name) const { return TensorExistsInCurrent(tensor_name + ":prev"); }
  53. void MoveParametersCurrentToPrev() {
  54. MS_LOG(INFO) << "Moving parameters from current map to previous map";
  55. auto iter = tensor_list_map_.begin();
  56. while (iter != tensor_list_map_.end()) {
  57. auto key = iter->first;
  58. if (PrevTensorExistsInCurrent(key)) {
  59. // :prev tensor only exists for parameter. Move it to prev
  60. ++iter;
  61. MoveTensorCurrentToPrev(key);
  62. } else {
  63. ++iter;
  64. }
  65. }
  66. }
  67. bool IsPrevTensor(std::string tensor_name) const {
  68. const std::string suffix = ":prev";
  69. if (tensor_name.length() <= suffix.length()) return false;
  70. return std::equal(suffix.rbegin(), suffix.rend(), tensor_name.rbegin());
  71. }
  72. /*
  73. * Feature group: Dump, Online debugger and Offline debugger.
  74. * Target device group: Ascend, GPU.
  75. * Runtime category: Old runtime, MindRT.
  76. * Description: Load new tensor into tensor_list_map_ (debugger backend cache). In offline debugger, add ":prev" to
  77. * the previous tensor's name to avoid segfault caused by wrongly evicting the tensor when memory limit is enabled.
  78. */
  79. bool LoadNewTensor(std::shared_ptr<TensorData> tensor, bool keep_prev) {
  80. lock_.lock();
  81. auto tensor_name = tensor->GetName();
  82. if (keep_prev) {
  83. // add prev step tensor into current step map with ":prev" suffix
  84. auto handle = prev_tensor_list_map_.extract(tensor_name);
  85. if (!handle.empty()) {
  86. handle.key() = tensor_name + ":prev";
  87. tensor_list_map_.insert(std::move(handle));
  88. }
  89. }
  90. std::string key_name = tensor_name;
  91. #ifdef OFFLINE_DBG_MODE
  92. key_name += (":" + std::to_string(tensor->GetDeviceId()) + ":" + std::to_string(tensor->GetRootGraphId()) + ":" +
  93. std::to_string(tensor->GetIsOutput()) + ":" + std::to_string(tensor->GetSlot()));
  94. if (tensor_list_map_.find(key_name) != tensor_list_map_.end() &&
  95. tensor->GetIteration() == tensor_list_map_[key_name]->GetPrevIteration()) {
  96. key_name += ":prev";
  97. }
  98. #endif
  99. tensor_list_map_[key_name] = tensor; // use [] instead of insert to ensure latest value
  100. lock_.unlock();
  101. return true;
  102. }
  103. std::vector<std::shared_ptr<TensorData>> GetTensor() {
  104. std::vector<std::shared_ptr<TensorData>> tensor_list;
  105. for (auto &it : tensor_list_map_) {
  106. if (!IsPrevTensor(it.first)) tensor_list.push_back(it.second);
  107. }
  108. return tensor_list;
  109. }
  110. std::shared_ptr<TensorData> GetTensor(const std::string &tensor_name) const {
  111. auto iter = tensor_list_map_.find(tensor_name);
  112. if (iter != tensor_list_map_.end()) return iter->second;
  113. return nullptr;
  114. }
  115. std::shared_ptr<TensorData> GetPrevTensor(const std::string &tensor_name) {
  116. if (tensor_list_map_.find(tensor_name + ":prev") != tensor_list_map_.end()) {
  117. return tensor_list_map_[tensor_name + ":prev"];
  118. }
  119. return nullptr;
  120. }
  121. /*
  122. * Feature group: Online debugger.
  123. * Target device group: Ascend, GPU.
  124. * Runtime category: Old runtime, MindRT.
  125. * Description: Search and obtain TensorData for a list of tensors from tensor_list_map_ (debugger backend cache).
  126. * Return nullptr if the tensor is not found.
  127. */
  128. void SearchTensors(const std::vector<std::string> &search_list,
  129. std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> *result_list) {
  130. for (auto i : search_list) {
  131. std::map<std::string, std::shared_ptr<TensorData>>::iterator iter;
  132. iter = tensor_list_map_.find(i);
  133. if (iter != tensor_list_map_.end()) {
  134. result_list->push_back(std::make_tuple(i, iter->second));
  135. } else {
  136. result_list->push_back(std::make_tuple(i, nullptr));
  137. }
  138. }
  139. }
  140. void EmptyTensor() {
  141. std::lock_guard<std::mutex> lg(lock_);
  142. prev_tensor_list_map_.clear();
  143. tensor_list_map_.swap(prev_tensor_list_map_);
  144. }
  145. void EmptyCurrentTensor() { tensor_list_map_.clear(); }
  146. bool EnableMemoryControl() { return mem_total_ > 0; }
  147. /*
  148. * Feature group: Offline debugger.
  149. * Target device group: Ascend, GPU.
  150. * Runtime category: Old runtime, MindRT.
  151. * Description: This function is for memory control feature only. When finishing using a tensor in offline debugger,
  152. * it will be added to cache_evict_queue_ and become an eviction candidate. Once there is no memory to read in a new
  153. * tensor, it will be evicted from cache.
  154. */
  155. void AppendToCacheEvictQueue(const std::string &tensor_name) {
  156. std::lock_guard<std::mutex> lk(mem_lock_);
  157. if (std::find(cache_evict_queue_.begin(), cache_evict_queue_.end(), tensor_name) == cache_evict_queue_.end()) {
  158. cache_evict_queue_.push_back(tensor_name);
  159. evict_cond.notify_one();
  160. }
  161. }
  162. /*
  163. * Feature group: Offline debugger.
  164. * Target device group: Ascend, GPU.
  165. * Runtime category: Old runtime, MindRT.
  166. * Description: This function is for memory control feature only. Check if the tensor size is greater than the preset
  167. * limit. If not, evect the candidate tensor in cache_evict_queue_ to make room for it.
  168. */
  169. bool CheckMemoryAvailable(const std::string &backend_name, const uint64_t data_size) {
  170. // 1. Check if the tensor can fit in the entire limit. If not, don't attempt any read or evictions and generate
  171. // warning.
  172. if (data_size > mem_total_) {
  173. MS_LOG(ERROR) << "Failed to load data of tensor " << backend_name << " because the its data size (" << data_size
  174. << ") exceeds the maximum memory limit (" << mem_total_ << ").";
  175. return false;
  176. }
  177. // 2. Check if there's is enough cache space available for current tensor. If not, try evict cache.
  178. bool ret = CheckAndEvictTensorCache(data_size);
  179. return ret;
  180. }
  181. /*
  182. * Feature group: Offline debugger.
  183. * Target device group: Ascend, GPU.
  184. * Runtime category: Old runtime, MindRT.
  185. * Description: This function is for memory control feature only. Greedily evict not-in-use tensors from cache queue.
  186. * If no candidate in the queue, block the thread until there is any candidate available.
  187. */
  188. bool CheckAndEvictTensorCache(const uint64_t data_size) {
  189. std::string candidate_name;
  190. uint64_t candidates_size;
  191. std::unique_lock<std::mutex> lk(mem_lock_);
  192. while (data_size > mem_total_ - mem_usage_) {
  193. // wait until there is any not-in-use candidate to be evicted from cache
  194. evict_cond.wait(lk, [&] { return !cache_evict_queue_.empty(); });
  195. candidate_name = cache_evict_queue_.front();
  196. cache_evict_queue_.pop_front();
  197. // evict candidate tensor
  198. lock_.lock();
  199. auto tensor = GetTensor(candidate_name);
  200. if (tensor == nullptr) {
  201. MS_LOG(INFO) << "Tensor: " << candidate_name << " has already been evicted.";
  202. lock_.unlock();
  203. continue;
  204. }
  205. candidates_size = tensor->GetByteSize();
  206. tensor_list_map_.erase(candidate_name);
  207. lock_.unlock();
  208. mem_usage_ = std::max(uint64_t(0), mem_usage_ - candidates_size);
  209. MS_LOG(INFO) << "Evict tensor: " << candidate_name;
  210. }
  211. // Reserve space for the current target tensor.
  212. mem_usage_ = std::min(mem_total_, mem_usage_ + data_size);
  213. return true;
  214. }
  215. void SetMemTotal(uint64_t total_mem_size) { this->mem_total_ = total_mem_size; }
  216. #ifdef ONLINE_DBG_MODE
  217. /*
  218. * Feature group: Dump.
  219. * Target device group: GPU.
  220. * Runtime category: Old runtime, MindRT.
  221. * Description: Load tensor data from debugger backend cache (tensor_list_map_) and dump to file in npy format.
  222. */
  223. bool DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath,
  224. const std::string &host_fmt, const std::vector<int64_t> &host_shape, TypeId host_type,
  225. TypeId device_type, const std::string &addr_format, size_t slot) {
  226. if (filepath.empty()) {
  227. MS_LOG(ERROR) << "Dump file path is null!";
  228. return false;
  229. }
  230. std::string path = "";
  231. if (trans_flag) {
  232. path = filepath + '.' + host_fmt;
  233. } else {
  234. path = filepath + '.' + addr_format;
  235. }
  236. MS_LOG(INFO) << "Dump path is " << path;
  237. std::string tensor_loader_name = tensor_name + ":" + std::to_string(slot);
  238. auto iter = tensor_list_map_.find(tensor_loader_name);
  239. if (iter != tensor_list_map_.end()) {
  240. std::shared_ptr<TensorData> node = iter->second;
  241. size_t host_size = node->GetByteSize();
  242. return DumpJsonParser::DumpToFile(path, node->GetDataPtr(), host_size, host_shape, host_type);
  243. }
  244. MS_LOG(INFO) << "Tensor name:" << tensor_name << " not found in tensor_list_map_";
  245. return false;
  246. }
  247. #endif
  248. private:
  249. // the pair is (device_id, iteration)
  250. std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map_;
  251. std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map_;
  252. #ifndef __APPLE__
  253. uint32_t iter_num_;
  254. #endif
  255. std::mutex lock_;
  256. std::mutex mem_lock_;
  257. uint64_t mem_total_;
  258. uint64_t mem_usage_;
  259. std::deque<std::string> cache_evict_queue_;
  260. std::condition_variable evict_cond;
  261. };
  262. #ifdef ONLINE_DBG_MODE
  263. } // namespace mindspore
  264. #endif
  265. #endif // MINDSPORE_CCSRC_DEBUG_TENSOR_LOAD_H_