You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

arena.h 6.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ARENA_H_
  17. #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ARENA_H_
  18. #include <memory>
  19. #include <mutex>
  20. #include <utility>
  21. #include "minddata/dataset/util/allocator.h"
  22. #include "minddata/dataset/util/memory_pool.h"
  23. #include "minddata/dataset/util/treap.h"
  24. #ifdef ENABLE_GPUQUE
  25. #include <cuda_runtime_api.h>
  26. #endif
  27. #define ARENA_LOG_BLK_SZ (6u)
  28. #define ARENA_BLK_SZ (static_cast<uint16_t>(1u << ARENA_LOG_BLK_SZ))
  29. #define ARENA_WALL_OVERHEAD_SZ 32
  30. namespace mindspore {
  31. namespace dataset {
  32. /// This is a memory arena based on a treap data structure.
  33. /// The constructor of the Arena takes the size of the initial memory size (in MB).
  34. /// Internally we divide the memory into multiple blocks. Each block is 64 bytes.
  35. /// The treap contains all the free blocks with the relative memory address as key
  36. /// and the size of the block as priority.
  37. ///
  38. /// Initially the treap has only one root which is the whole memory piece.
  39. ///
  40. /// For memory suballocation, we pop the root node of the treap which contains the largest free block.
  41. /// We allocate what we need and return the rest back to the treap. We search for the first fit instead
  42. /// of the best fit so to give us a constant time in memory allocation.
  43. ///
  44. /// When a block of memory is freed. It is joined with the blocks before and after (if they are available) to
  45. /// form a bigger block.
  46. /// At the lowest level, we don't really care where the memory is coming from.
  47. /// This allows other class to make use of Arena method and override the origin of the
  48. /// memory, say from some unix shared memory instead.
  49. /// \note Implementation class is not thread safe. Caller needs to ensure proper serialization
  50. class ArenaImpl {
  51. public:
  52. /// Constructor
  53. /// \param ptr The start of the memory address
  54. /// \param sz Size of the memory block we manage
  55. ArenaImpl(void *ptr, size_t sz);
  56. ~ArenaImpl() { ptr_ = nullptr; }
  57. /// \brief Allocate a sub block
  58. /// \param n Size requested
  59. /// \param p pointer to where the result is stored
  60. /// \return Status object.
  61. Status Allocate(size_t n, void **p);
  62. /// \brief Enlarge or shrink a sub block
  63. /// \param old_sz Original size
  64. /// \param new_sz New size
  65. /// \return Status object
  66. Status Reallocate(void **, size_t old_sz, size_t new_sz);
  67. /// \brief Free a sub block
  68. /// \param Address of the block to be freed.
  69. void Deallocate(void *);
  70. /// \brief Calculate % free of the memory
  71. /// \return Percent free
  72. int PercentFree() const;
  73. /// \brief What is the maximum we can support in allocate.
  74. /// \return Max value
  75. uint64_t get_max_size() const { return (size_in_bytes_ - ARENA_WALL_OVERHEAD_SZ); }
  76. /// \brief Get the start of the address. Read only
  77. /// \return Start of the address block
  78. const void *get_base_addr() const { return ptr_; }
  79. static uint64_t SizeToBlk(uint64_t sz);
  80. friend std::ostream &operator<<(std::ostream &os, const ArenaImpl &s);
  81. private:
  82. size_t size_in_bytes_;
  83. Treap<uint64_t, uint64_t> tr_;
  84. void *ptr_;
  85. void *get_user_addr(void *base_addr) const { return reinterpret_cast<char *>(base_addr) + ARENA_WALL_OVERHEAD_SZ; }
  86. void *get_base_addr(void *user_addr) const { return reinterpret_cast<char *>(user_addr) - ARENA_WALL_OVERHEAD_SZ; }
  87. std::pair<std::pair<uint64_t, uint64_t>, bool> FindPrevBlk(uint64_t addr);
  88. bool BlockEnlarge(uint64_t *addr, uint64_t old_sz, uint64_t new_sz);
  89. Status FreeAndAlloc(void **pp, size_t old_sz, size_t new_sz);
  90. };
  91. /// \brief This version of Arena allocates from private memory
  92. class Arena : public MemoryPool {
  93. public:
  94. // Disable copy and assignment constructor
  95. Arena(const Arena &) = delete;
  96. Arena &operator=(const Arena &) = delete;
  97. ~Arena() override {
  98. #ifdef ENABLE_GPUQUE
  99. if (is_cuda_malloc_) {
  100. if (ptr_) {
  101. (void)cudaFreeHost(ptr_);
  102. }
  103. }
  104. #else
  105. if (ptr_ != nullptr) {
  106. free(ptr_);
  107. }
  108. ptr_ = nullptr;
  109. #endif
  110. }
  111. /// As a derived class of MemoryPool, we have to implement the following.
  112. /// But we simply transfer the call to the implementation class
  113. Status Allocate(size_t size, void **pVoid) override {
  114. std::unique_lock<std::mutex> lock(mux_);
  115. return impl_->Allocate(size, pVoid);
  116. }
  117. Status Reallocate(void **pVoid, size_t old_sz, size_t new_sz) override {
  118. std::unique_lock<std::mutex> lock(mux_);
  119. return impl_->Reallocate(pVoid, old_sz, new_sz);
  120. }
  121. void Deallocate(void *pVoid) override {
  122. std::unique_lock<std::mutex> lock(mux_);
  123. impl_->Deallocate(pVoid);
  124. }
  125. uint64_t get_max_size() const override { return impl_->get_max_size(); }
  126. int PercentFree() const override {
  127. std::unique_lock<std::mutex> lock(mux_);
  128. return impl_->PercentFree();
  129. }
  130. /// \return Return the start of the memory block
  131. const void *get_base_addr() const { return impl_->get_base_addr(); }
  132. /// \brief Dump the memory allocation block.
  133. friend std::ostream &operator<<(std::ostream &os, const Arena &s) {
  134. os << *(s.impl_);
  135. return os;
  136. }
  137. #ifdef ENABLE_GPUQUE
  138. /// The only method to create an arena.
  139. static Status CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB = 4096, bool is_cuda_malloc = false);
  140. #else
  141. /// The only method to create an arena.
  142. static Status CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB = 4096);
  143. #endif
  144. protected:
  145. mutable std::mutex mux_;
  146. std::unique_ptr<ArenaImpl> impl_;
  147. void *ptr_;
  148. size_t size_in_MB_;
  149. #ifdef ENABLE_GPUQUE
  150. bool is_cuda_malloc_;
  151. explicit Arena(size_t val_in_MB = 4096, bool is_cuda_malloc = false);
  152. #else
  153. explicit Arena(size_t val_in_MB = 4096);
  154. #endif
  155. Status Init();
  156. };
  157. } // namespace dataset
  158. } // namespace mindspore
  159. #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ARENA_H_