You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cache_pool.h 5.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_CACHE_POOL_H_
  17. #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_CACHE_POOL_H_
  18. #include <memory>
  19. #include <mutex>
  20. #include <string>
  21. #include <vector>
  22. #include "minddata/dataset/util/allocator.h"
  23. #include "minddata/dataset/util/service.h"
  24. #include "minddata/dataset/util/slice.h"
  25. #include "minddata/dataset/util/storage_manager.h"
  26. #include "minddata/dataset/util/auto_index.h"
  27. #include "minddata/dataset/util/btree.h"
  28. namespace mindspore {
  29. namespace dataset {
  30. /// \brief A CachePool provides service for backup/restore a buffer. A buffer can be represented in a form of vector of
  31. /// ReadableSlice where all memory blocks will be copied to one contiguous block which can be in memory or spilled to
  32. /// disk (if a disk directory is provided). User must provide a key to insert the buffer.
  33. /// \see ReadableSlice
  34. class CachePool : public Service {
  35. public:
  36. using base_type = uint8_t;
  37. using pointer = base_type *;
  38. using const_pointer = const base_type *;
  39. using reference = base_type &;
  40. using const_reference = const base_type &;
  41. using value_allocator = Allocator<base_type>;
  42. // An internal class to locate the whereabouts of a backed up buffer which can be either in
  43. class DataLocator {
  44. public:
  45. DataLocator() : ptr(nullptr), sz(0), storage_key(0) {}
  46. ~DataLocator() = default;
  47. DataLocator(const DataLocator &other) = default;
  48. DataLocator &operator=(const DataLocator &other) = default;
  49. DataLocator(DataLocator &&other) noexcept {
  50. ptr = other.ptr;
  51. sz = other.sz;
  52. storage_key = other.storage_key;
  53. other.ptr = nullptr;
  54. other.sz = 0;
  55. other.storage_key = 0;
  56. }
  57. DataLocator &operator=(DataLocator &&other) noexcept {
  58. if (&other != this) {
  59. ptr = other.ptr;
  60. sz = other.sz;
  61. storage_key = other.storage_key;
  62. other.ptr = nullptr;
  63. other.sz = 0;
  64. other.storage_key = 0;
  65. }
  66. return *this;
  67. }
  68. pointer ptr;
  69. size_t sz;
  70. StorageManager::key_type storage_key;
  71. };
  72. using data_index = BPlusTree<int64_t, DataLocator>;
  73. using key_type = data_index::key_type;
  74. using bl_alloc_type = typename value_allocator::template rebind<DataLocator>::other;
  75. /// \brief Simple statistics returned from CachePool like how many elements are cached in memory and
  76. /// how many elements are spilled to disk.
  77. struct CacheStat {
  78. key_type min_key;
  79. key_type max_key;
  80. int64_t num_mem_cached;
  81. int64_t num_disk_cached;
  82. int64_t average_cache_sz;
  83. std::vector<key_type> gap;
  84. };
  85. /// \brief Constructor
  86. /// \param alloc Allocator to allocate memory from
  87. /// \param root Optional disk folder to spill
  88. explicit CachePool(const value_allocator &alloc, bool customArena, const std::string &root = "");
  89. CachePool(const CachePool &) = delete;
  90. CachePool(CachePool &&) = delete;
  91. CachePool &operator=(const CachePool &) = delete;
  92. CachePool &operator=(CachePool &&) = delete;
  93. ~CachePool() noexcept;
  94. Status DoServiceStart() override;
  95. Status DoServiceStop() override;
  96. Path GetSpillPath() const;
  97. /// \brief Insert a sequence of ReadableSlice objects into the pool.
  98. /// All memory blocks will be consolidated into one contiguous block and be cached in either memory or on disk.
  99. /// \param[in] key User supplied key
  100. /// \param[in] buf A sequence of ReadableSlice objects.
  101. /// \param[in] writeToDiskDirectly If true, no spill to disk if spill is enabled, or return no memory
  102. /// \return Error code
  103. Status Insert(key_type key, const std::vector<ReadableSlice> &buf, bool writeToDiskDirectly);
  104. /// \brief Restore a cached buffer (from memory or disk)
  105. /// \param[in] key A previous key returned from Insert
  106. /// \param[out] dest The cached buffer will be copied to this destination represented by a WritableSlice
  107. /// \param[out] bytesRead Optional. Number of bytes read.
  108. /// \return Error code
  109. Status Read(key_type key, WritableSlice *dest, size_t *bytesRead = nullptr) const;
  110. Status Spill(DataLocator *dl);
  111. Status Locate(DataLocator *dl);
  112. size_t GetSize(key_type key) const;
  113. /// \brief Get statistics.
  114. /// \return CacheStat object
  115. CacheStat GetStat(bool GetMissingKeys = false) const;
  116. const value_allocator &get_allocator() const;
  117. std::string MyName() const { return subfolder_; }
  118. /// \brief Toggle locking
  119. /// \note Once locking is off. It is user's responsibility to ensure concurrency
  120. void SetLocking(bool on_off) { tree_->SetLocking(on_off); }
  121. private:
  122. value_allocator alloc_;
  123. Path root_;
  124. const std::string subfolder_;
  125. std::shared_ptr<StorageManager> sm_;
  126. std::shared_ptr<data_index> tree_;
  127. bool custom_arena_;
  128. };
  129. } // namespace dataset
  130. } // namespace mindspore
  131. #endif