|
- /**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ARENA_H_
- #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ARENA_H_
-
- #include <memory>
- #include <mutex>
- #include <utility>
- #include "minddata/dataset/util/allocator.h"
- #include "minddata/dataset/util/memory_pool.h"
- #include "minddata/dataset/util/treap.h"
- #ifdef ENABLE_GPUQUE
- #include <cuda_runtime_api.h>
- #endif
-
- #define ARENA_LOG_BLK_SZ (6u)
- #define ARENA_BLK_SZ (static_cast<uint16_t>(1u << ARENA_LOG_BLK_SZ))
- #define ARENA_WALL_OVERHEAD_SZ 32
- namespace mindspore {
- namespace dataset {
- /// This is a memory arena based on a treap data structure.
- /// The constructor of the Arena takes the size of the initial memory size (in MB).
- /// Internally we divide the memory into multiple blocks. Each block is 64 bytes.
- /// The treap contains all the free blocks with the relative memory address as key
- /// and the size of the block as priority.
- ///
- /// Initially the treap has only one root which is the whole memory piece.
- ///
- /// For memory suballocation, we pop the root node of the treap which contains the largest free block.
- /// We allocate what we need and return the rest back to the treap. We search for the first fit instead
- /// of the best fit so to give us a constant time in memory allocation.
- ///
- /// When a block of memory is freed. It is joined with the blocks before and after (if they are available) to
- /// form a bigger block.
-
- /// At the lowest level, we don't really care where the memory is coming from.
- /// This allows other class to make use of Arena method and override the origin of the
- /// memory, say from some unix shared memory instead.
- /// \note Implementation class is not thread safe. Caller needs to ensure proper serialization
- class ArenaImpl {
- public:
- /// Constructor
- /// \param ptr The start of the memory address
- /// \param sz Size of the memory block we manage
- ArenaImpl(void *ptr, size_t sz);
- ~ArenaImpl() { ptr_ = nullptr; }
-
- /// \brief Allocate a sub block
- /// \param n Size requested
- /// \param p pointer to where the result is stored
- /// \return Status object.
- Status Allocate(size_t n, void **p);
-
- /// \brief Enlarge or shrink a sub block
- /// \param old_sz Original size
- /// \param new_sz New size
- /// \return Status object
- Status Reallocate(void **, size_t old_sz, size_t new_sz);
-
- /// \brief Free a sub block
- /// \param Address of the block to be freed.
- void Deallocate(void *);
-
- /// \brief Calculate % free of the memory
- /// \return Percent free
- int PercentFree() const;
-
- /// \brief What is the maximum we can support in allocate.
- /// \return Max value
- uint64_t get_max_size() const { return (size_in_bytes_ - ARENA_WALL_OVERHEAD_SZ); }
-
- /// \brief Get the start of the address. Read only
- /// \return Start of the address block
- const void *get_base_addr() const { return ptr_; }
-
- static uint64_t SizeToBlk(uint64_t sz);
- friend std::ostream &operator<<(std::ostream &os, const ArenaImpl &s);
-
- private:
- size_t size_in_bytes_;
- Treap<uint64_t, uint64_t> tr_;
- void *ptr_;
-
- void *get_user_addr(void *base_addr) const { return reinterpret_cast<char *>(base_addr) + ARENA_WALL_OVERHEAD_SZ; }
- void *get_base_addr(void *user_addr) const { return reinterpret_cast<char *>(user_addr) - ARENA_WALL_OVERHEAD_SZ; }
- std::pair<std::pair<uint64_t, uint64_t>, bool> FindPrevBlk(uint64_t addr);
- bool BlockEnlarge(uint64_t *addr, uint64_t old_sz, uint64_t new_sz);
- Status FreeAndAlloc(void **pp, size_t old_sz, size_t new_sz);
- };
-
- /// \brief This version of Arena allocates from private memory
- class Arena : public MemoryPool {
- public:
- // Disable copy and assignment constructor
- Arena(const Arena &) = delete;
- Arena &operator=(const Arena &) = delete;
- ~Arena() override {
- #ifdef ENABLE_GPUQUE
- if (is_cuda_malloc_) {
- if (ptr_) {
- (void)cudaFreeHost(ptr_);
- }
- }
- #else
- if (ptr_ != nullptr) {
- free(ptr_);
- }
- ptr_ = nullptr;
- #endif
- }
-
- /// As a derived class of MemoryPool, we have to implement the following.
- /// But we simply transfer the call to the implementation class
- Status Allocate(size_t size, void **pVoid) override {
- std::unique_lock<std::mutex> lock(mux_);
- return impl_->Allocate(size, pVoid);
- }
- Status Reallocate(void **pVoid, size_t old_sz, size_t new_sz) override {
- std::unique_lock<std::mutex> lock(mux_);
- return impl_->Reallocate(pVoid, old_sz, new_sz);
- }
- void Deallocate(void *pVoid) override {
- std::unique_lock<std::mutex> lock(mux_);
- impl_->Deallocate(pVoid);
- }
- uint64_t get_max_size() const override { return impl_->get_max_size(); }
- int PercentFree() const override {
- std::unique_lock<std::mutex> lock(mux_);
- return impl_->PercentFree();
- }
-
- /// \return Return the start of the memory block
- const void *get_base_addr() const { return impl_->get_base_addr(); }
-
- /// \brief Dump the memory allocation block.
- friend std::ostream &operator<<(std::ostream &os, const Arena &s) {
- os << *(s.impl_);
- return os;
- }
-
- #ifdef ENABLE_GPUQUE
- /// The only method to create an arena.
- static Status CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB = 4096, bool is_cuda_malloc = false);
- #else
- /// The only method to create an arena.
- static Status CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB = 4096);
- #endif
-
- protected:
- mutable std::mutex mux_;
- std::unique_ptr<ArenaImpl> impl_;
- void *ptr_;
- size_t size_in_MB_;
- #ifdef ENABLE_GPUQUE
- bool is_cuda_malloc_;
-
- explicit Arena(size_t val_in_MB = 4096, bool is_cuda_malloc = false);
- #else
-
- explicit Arena(size_t val_in_MB = 4096);
- #endif
-
- Status Init();
- };
- } // namespace dataset
- } // namespace mindspore
-
- #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ARENA_H_
|