/** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ARENA_H_ #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ARENA_H_ #include #include #include #include "minddata/dataset/util/allocator.h" #include "minddata/dataset/util/memory_pool.h" #include "minddata/dataset/util/treap.h" #ifdef ENABLE_GPUQUE #include #endif #define ARENA_LOG_BLK_SZ (6u) #define ARENA_BLK_SZ (static_cast(1u << ARENA_LOG_BLK_SZ)) #define ARENA_WALL_OVERHEAD_SZ 32 namespace mindspore { namespace dataset { /// This is a memory arena based on a treap data structure. /// The constructor of the Arena takes the size of the initial memory size (in MB). /// Internally we divide the memory into multiple blocks. Each block is 64 bytes. /// The treap contains all the free blocks with the relative memory address as key /// and the size of the block as priority. /// /// Initially the treap has only one root which is the whole memory piece. /// /// For memory suballocation, we pop the root node of the treap which contains the largest free block. /// We allocate what we need and return the rest back to the treap. We search for the first fit instead /// of the best fit so to give us a constant time in memory allocation. /// /// When a block of memory is freed. It is joined with the blocks before and after (if they are available) to /// form a bigger block. /// At the lowest level, we don't really care where the memory is coming from. /// This allows other class to make use of Arena method and override the origin of the /// memory, say from some unix shared memory instead. /// \note Implementation class is not thread safe. Caller needs to ensure proper serialization class ArenaImpl { public: /// Constructor /// \param ptr The start of the memory address /// \param sz Size of the memory block we manage ArenaImpl(void *ptr, size_t sz); ~ArenaImpl() { ptr_ = nullptr; } /// \brief Allocate a sub block /// \param n Size requested /// \param p pointer to where the result is stored /// \return Status object. Status Allocate(size_t n, void **p); /// \brief Enlarge or shrink a sub block /// \param old_sz Original size /// \param new_sz New size /// \return Status object Status Reallocate(void **, size_t old_sz, size_t new_sz); /// \brief Free a sub block /// \param Address of the block to be freed. void Deallocate(void *); /// \brief Calculate % free of the memory /// \return Percent free int PercentFree() const; /// \brief What is the maximum we can support in allocate. /// \return Max value uint64_t get_max_size() const { return (size_in_bytes_ - ARENA_WALL_OVERHEAD_SZ); } /// \brief Get the start of the address. Read only /// \return Start of the address block const void *get_base_addr() const { return ptr_; } static uint64_t SizeToBlk(uint64_t sz); friend std::ostream &operator<<(std::ostream &os, const ArenaImpl &s); private: size_t size_in_bytes_; Treap tr_; void *ptr_; void *get_user_addr(void *base_addr) const { return reinterpret_cast(base_addr) + ARENA_WALL_OVERHEAD_SZ; } void *get_base_addr(void *user_addr) const { return reinterpret_cast(user_addr) - ARENA_WALL_OVERHEAD_SZ; } std::pair, bool> FindPrevBlk(uint64_t addr); bool BlockEnlarge(uint64_t *addr, uint64_t old_sz, uint64_t new_sz); Status FreeAndAlloc(void **pp, size_t old_sz, size_t new_sz); }; /// \brief This version of Arena allocates from private memory class Arena : public MemoryPool { public: // Disable copy and assignment constructor Arena(const Arena &) = delete; Arena &operator=(const Arena &) = delete; ~Arena() override { #ifdef ENABLE_GPUQUE if (is_cuda_malloc_) { if (ptr_) { (void)cudaFreeHost(ptr_); } } #else if (ptr_ != nullptr) { free(ptr_); } ptr_ = nullptr; #endif } /// As a derived class of MemoryPool, we have to implement the following. /// But we simply transfer the call to the implementation class Status Allocate(size_t size, void **pVoid) override { std::unique_lock lock(mux_); return impl_->Allocate(size, pVoid); } Status Reallocate(void **pVoid, size_t old_sz, size_t new_sz) override { std::unique_lock lock(mux_); return impl_->Reallocate(pVoid, old_sz, new_sz); } void Deallocate(void *pVoid) override { std::unique_lock lock(mux_); impl_->Deallocate(pVoid); } uint64_t get_max_size() const override { return impl_->get_max_size(); } int PercentFree() const override { std::unique_lock lock(mux_); return impl_->PercentFree(); } /// \return Return the start of the memory block const void *get_base_addr() const { return impl_->get_base_addr(); } /// \brief Dump the memory allocation block. friend std::ostream &operator<<(std::ostream &os, const Arena &s) { os << *(s.impl_); return os; } #ifdef ENABLE_GPUQUE /// The only method to create an arena. static Status CreateArena(std::shared_ptr *p_ba, size_t val_in_MB = 4096, bool is_cuda_malloc = false); #else /// The only method to create an arena. static Status CreateArena(std::shared_ptr *p_ba, size_t val_in_MB = 4096); #endif protected: mutable std::mutex mux_; std::unique_ptr impl_; void *ptr_; size_t size_in_MB_; #ifdef ENABLE_GPUQUE bool is_cuda_malloc_; explicit Arena(size_t val_in_MB = 4096, bool is_cuda_malloc = false); #else explicit Arena(size_t val_in_MB = 4096); #endif Status Init(); }; } // namespace dataset } // namespace mindspore #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ARENA_H_