You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cache_pool.cc 7.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <algorithm>
  17. #include "utils/ms_utils.h"
  18. #include "minddata/dataset/util/cache_pool.h"
  19. #include "minddata/dataset/util/services.h"
  20. namespace mindspore {
  21. namespace dataset {
  22. CachePool::CachePool(const value_allocator &alloc, bool ourOwnArena, const std::string &root)
  23. : alloc_(alloc),
  24. root_(root),
  25. subfolder_(Services::GetUniqueID()),
  26. sm_(nullptr),
  27. tree_(nullptr),
  28. custom_arena_(ourOwnArena) {}
  29. Status CachePool::DoServiceStart() {
  30. tree_ = std::make_shared<data_index>();
  31. // If we are given a disk path, set up the StorageManager
  32. if (!root_.toString().empty()) {
  33. Path spill = GetSpillPath();
  34. RETURN_IF_NOT_OK(spill.CreateDirectories());
  35. sm_ = std::make_shared<StorageManager>(spill);
  36. RETURN_IF_NOT_OK(sm_->ServiceStart());
  37. MS_LOG(INFO) << "CachePool will use disk folder: " << common::SafeCStr(spill.toString());
  38. }
  39. return Status::OK();
  40. }
  41. Status CachePool::DoServiceStop() {
  42. Status rc;
  43. Status rc2;
  44. if (sm_ != nullptr) {
  45. rc = sm_->ServiceStop();
  46. if (rc.IsError()) {
  47. rc2 = rc;
  48. }
  49. }
  50. sm_.reset();
  51. // If it is our own arena, skip freeing individual pieces.
  52. if (!custom_arena_) {
  53. for (auto &bl : *tree_) {
  54. if (bl.ptr != nullptr) {
  55. alloc_.deallocate(bl.ptr, bl.sz);
  56. }
  57. }
  58. }
  59. tree_.reset();
  60. if (!root_.toString().empty()) {
  61. Path spill = GetSpillPath();
  62. auto it = Path::DirIterator::OpenDirectory(&spill);
  63. while (it->hasNext()) {
  64. rc = it->next().Remove();
  65. if (rc.IsError() && rc2.IsOk()) {
  66. rc2 = rc;
  67. }
  68. }
  69. rc = spill.Remove();
  70. if (rc.IsError() && rc2.IsOk()) {
  71. rc2 = rc;
  72. }
  73. }
  74. return rc2;
  75. }
  76. CachePool::~CachePool() noexcept { (void)ServiceStop(); }
  77. Status CachePool::Insert(CachePool::key_type key, const std::vector<ReadableSlice> &buf, bool writeToDiskDirectly) {
  78. DataLocator bl;
  79. Status rc;
  80. size_t sz = 0;
  81. // We will consolidate all the slices into one piece.
  82. for (auto &v : buf) {
  83. sz += v.GetSize();
  84. }
  85. bl.sz = sz;
  86. try {
  87. if (!writeToDiskDirectly) {
  88. bl.ptr = alloc_.allocate(sz);
  89. // We will do a piecewise copy.
  90. WritableSlice dest(bl.ptr, bl.sz);
  91. size_t pos = 0;
  92. for (auto &v : buf) {
  93. WritableSlice out(dest, pos);
  94. rc = WritableSlice::Copy(&out, v);
  95. if (rc.IsError()) {
  96. break;
  97. }
  98. pos += v.GetSize();
  99. }
  100. if (rc.IsError()) {
  101. alloc_.deallocate(bl.ptr, sz);
  102. bl.ptr = nullptr;
  103. return rc;
  104. }
  105. } else if (sm_ != nullptr) {
  106. MS_LOG(DEBUG) << "Spill to disk directly ... " << bl.sz << " bytes.";
  107. RETURN_IF_NOT_OK(sm_->Write(&bl.storage_key, buf));
  108. } else {
  109. // If asked to spill to disk instead but there is no storage set up, simply return no memory
  110. // instead.
  111. return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__);
  112. }
  113. } catch (std::bad_alloc &e) {
  114. if (sm_ != nullptr) {
  115. RETURN_IF_NOT_OK(sm_->Write(&bl.storage_key, buf));
  116. } else {
  117. return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__);
  118. }
  119. }
  120. // Insert into the B+ tree. We may still get out of memory error. So need to catch it.
  121. try {
  122. rc = tree_->DoInsert(key, bl);
  123. } catch (const std::bad_alloc &e) {
  124. rc = Status(StatusCode::kOutOfMemory, __LINE__, __FILE__);
  125. }
  126. // Duplicate key is treated as error and we will also free the memory.
  127. if (rc.IsError() && bl.ptr != nullptr) {
  128. alloc_.deallocate(bl.ptr, sz);
  129. }
  130. return rc;
  131. }
  132. Status CachePool::Read(CachePool::key_type key, WritableSlice *dest, size_t *bytesRead) const {
  133. RETURN_UNEXPECTED_IF_NULL(dest);
  134. auto r = tree_->Search(key);
  135. if (r.second) {
  136. auto &it = r.first;
  137. if (it->ptr != nullptr) {
  138. ReadableSlice src(it->ptr, it->sz);
  139. RETURN_IF_NOT_OK(WritableSlice::Copy(dest, src));
  140. } else if (sm_ != nullptr) {
  141. size_t expectedLength = 0;
  142. RETURN_IF_NOT_OK(sm_->Read(it->storage_key, dest, &expectedLength));
  143. if (expectedLength != it->sz) {
  144. MS_LOG(ERROR) << "Unexpected length. Read " << expectedLength << ". Expected " << it->sz << "."
  145. << " Internal key: " << key << "\n";
  146. RETURN_STATUS_UNEXPECTED("Length mismatch. See log file for details.");
  147. }
  148. }
  149. if (bytesRead != nullptr) {
  150. *bytesRead = it->sz;
  151. }
  152. } else {
  153. RETURN_STATUS_UNEXPECTED("Key not found");
  154. }
  155. return Status::OK();
  156. }
  157. const CachePool::value_allocator &CachePool::get_allocator() const { return alloc_; }
  158. Path CachePool::GetSpillPath() const {
  159. auto spill = Path(root_) / subfolder_;
  160. return spill;
  161. }
  162. CachePool::CacheStat CachePool::GetStat(bool GetMissingKeys) const {
  163. CacheStat cs{-1, -1, 0, 0, 0};
  164. int64_t total_sz = 0;
  165. if (tree_->begin() != tree_->end()) {
  166. cs.min_key = tree_->begin().key();
  167. cs.max_key = cs.min_key; // will adjust later.
  168. for (auto it = tree_->begin(); it != tree_->end(); ++it) {
  169. total_sz += it.value().sz;
  170. if (it.value().ptr != nullptr) {
  171. ++cs.num_mem_cached;
  172. } else {
  173. ++cs.num_disk_cached;
  174. }
  175. auto cur_key = it.key();
  176. if (GetMissingKeys) {
  177. for (auto i = cs.max_key + 1; i < cur_key; ++i) {
  178. cs.gap.push_back((i));
  179. }
  180. }
  181. cs.max_key = cur_key;
  182. }
  183. }
  184. if (total_sz > 0) {
  185. // integer arithmetic. NO need to cast to float or double.
  186. cs.average_cache_sz = total_sz / (cs.num_disk_cached + cs.num_mem_cached);
  187. if (cs.average_cache_sz == 0) {
  188. cs.average_cache_sz = 1;
  189. }
  190. }
  191. return cs;
  192. }
  193. Status CachePool::Spill(CachePool::DataLocator *dl) {
  194. if (sm_ == nullptr) {
  195. RETURN_STATUS_UNEXPECTED("No disk storage to spill");
  196. }
  197. RETURN_UNEXPECTED_IF_NULL(dl);
  198. RETURN_UNEXPECTED_IF_NULL(dl->ptr);
  199. if (dl->storage_key == 0) {
  200. ReadableSlice data(dl->ptr, dl->sz);
  201. RETURN_IF_NOT_OK(sm_->Write(&dl->storage_key, {data}));
  202. }
  203. alloc_.deallocate(dl->ptr, dl->sz);
  204. dl->ptr = nullptr;
  205. return Status::OK();
  206. }
  207. Status CachePool::Locate(CachePool::DataLocator *dl) {
  208. RETURN_UNEXPECTED_IF_NULL(dl);
  209. if (dl->ptr == nullptr) {
  210. if (sm_ == nullptr) {
  211. RETURN_STATUS_UNEXPECTED("No disk storage to locate the data");
  212. }
  213. try {
  214. dl->ptr = alloc_.allocate(dl->sz);
  215. WritableSlice dest(dl->ptr, dl->sz);
  216. Status rc = Read(dl->storage_key, &dest);
  217. if (rc.IsError()) {
  218. alloc_.deallocate(dl->ptr, dl->sz);
  219. dl->ptr = nullptr;
  220. return rc;
  221. }
  222. } catch (const std::bad_alloc &e) {
  223. return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__);
  224. }
  225. }
  226. return Status::OK();
  227. }
  228. size_t CachePool::GetSize(CachePool::key_type key) const {
  229. auto r = tree_->Search(key);
  230. if (r.second) {
  231. auto &it = r.first;
  232. return it->sz;
  233. } else {
  234. return 0;
  235. }
  236. }
  237. } // namespace dataset
  238. } // namespace mindspore