GitOrigin-RevId: 61874faa6d
tags/v1.0.0-rc1
| @@ -267,45 +267,59 @@ MemAllocImplHelper::MemAddr DevMemAllocImpl::alloc_from_parent(size_t size) { | |||
| } | |||
| size_t DevMemAllocImpl::gather_stream_free_blk_and_release_full() { | |||
| size_t gathered_size = 0; | |||
| MGB_LOCK_GUARD(m_mutex); | |||
| for (auto &&pair: m_stream_alloc) { | |||
| auto ch = pair.second.get(); | |||
| auto &&chmtx = ch->m_mutex; | |||
| MGB_LOCK_GUARD(chmtx); | |||
| for (auto &&i: ch->m_free_blk_size) { | |||
| merge_free_unsafe(i.first); | |||
| gathered_size += i.first.size; | |||
| } | |||
| ch->m_free_blk_addr.clear(); | |||
| ch->m_free_blk_size.clear(); | |||
| } | |||
| mgb_assert(gathered_size <= m_used_size.load()); | |||
| m_used_size -= gathered_size; | |||
| size_t free_size = 0; | |||
| using Iter = decltype(m_free_blk_size.begin()); | |||
| std::vector<void*> to_free_by_raw; | |||
| for (Iter i = m_free_blk_size.begin(), inext; i != m_free_blk_size.end(); | |||
| i = inext) { | |||
| inext = i; | |||
| ++ inext; | |||
| auto &&blk = i->first; | |||
| if (blk.addr.is_head) { | |||
| auto riter = m_alloc_from_raw.find(blk.addr.addr_ptr()); | |||
| mgb_assert(riter != m_alloc_from_raw.end() && | |||
| blk.size <= riter->second); | |||
| if (blk.size == riter->second) { | |||
| to_free_by_raw.push_back(blk.addr.addr_ptr()); | |||
| free_size += blk.size; | |||
| auto j = i->second.aiter; | |||
| m_free_blk_size.erase(i); | |||
| m_free_blk_addr.erase(j); | |||
| m_alloc_from_raw.erase(riter); | |||
| MGB_LOCK_GUARD(m_mutex); | |||
| auto return_full_free_blk_unsafe = [&](MemAllocImplHelper* alloc) { | |||
| auto&& free_blk_size = alloc->m_free_blk_size; | |||
| auto&& free_blk_addr = alloc->m_free_blk_addr; | |||
| using Iter = decltype(m_free_blk_size.begin()); | |||
| for (Iter i = free_blk_size.begin(), inext; i != free_blk_size.end(); | |||
| i = inext) { | |||
| inext = i; | |||
| ++ inext; | |||
| auto &&blk = i->first; | |||
| if (blk.addr.is_head) { | |||
| auto riter = m_alloc_from_raw.find(blk.addr.addr_ptr()); | |||
| mgb_assert(riter != m_alloc_from_raw.end() && | |||
| blk.size <= riter->second); | |||
| if (blk.size == riter->second) { | |||
| to_free_by_raw.push_back(blk.addr.addr_ptr()); | |||
| free_size += blk.size; | |||
| auto j = i->second.aiter; | |||
| free_blk_size.erase(i); | |||
| free_blk_addr.erase(j); | |||
| m_alloc_from_raw.erase(riter); | |||
| } | |||
| } | |||
| } | |||
| }; | |||
| if (auto child = get_single_child_stream_unsafe()) { | |||
| MGB_LOCK_GUARD(child->m_mutex); | |||
| return_full_free_blk_unsafe(child); | |||
| mgb_assert(free_size <= m_used_size.load()); | |||
| m_used_size -= free_size; | |||
| } else { | |||
| size_t gathered_size = 0; | |||
| for (auto &&pair: m_stream_alloc) { | |||
| auto ch = pair.second.get(); | |||
| auto &&chmtx = ch->m_mutex; | |||
| MGB_LOCK_GUARD(chmtx); | |||
| for (auto &&i: ch->m_free_blk_size) { | |||
| merge_free_unsafe(i.first); | |||
| gathered_size += i.first.size; | |||
| } | |||
| ch->m_free_blk_addr.clear(); | |||
| ch->m_free_blk_size.clear(); | |||
| } | |||
| mgb_assert(gathered_size <= m_used_size.load()); | |||
| m_used_size -= gathered_size; | |||
| } | |||
| return_full_free_blk_unsafe(this); | |||
| m_tot_allocated_from_raw -= free_size; | |||
| // we have to sync to ensure no kernel on the child stream still uses | |||
| @@ -359,6 +373,25 @@ FreeMemStat DevMemAllocImpl::get_free_memory_dev() { | |||
| return ret; | |||
| } | |||
| void DevMemAllocImpl::insert_free_unsafe(const FreeBlock &block) { | |||
| if (auto child = get_single_child_stream_unsafe()) { | |||
| { | |||
| MGB_LOCK_GUARD(child->m_mutex); | |||
| child->insert_free_unsafe(block); | |||
| } | |||
| m_used_size += block.size; | |||
| } else { | |||
| MemAllocImplHelper::insert_free_unsafe(block); | |||
| } | |||
| } | |||
| StreamMemAllocImpl* DevMemAllocImpl::get_single_child_stream_unsafe() { | |||
| if (m_stream_alloc.size() == 1) { | |||
| return m_stream_alloc.begin()->second.get(); | |||
| } | |||
| return nullptr; | |||
| } | |||
| DevMemAllocImpl::~DevMemAllocImpl() { | |||
| for (auto &&i: m_alloc_from_raw) | |||
| m_raw_allocator->free(i.first); | |||
| @@ -94,7 +94,7 @@ class MemAllocImplHelper: virtual public MemAllocBase { | |||
| * \brief directly insert a free block into m_free_blk_size and | |||
| * m_free_blk_addr, without merging | |||
| */ | |||
| inline void insert_free_unsafe(const FreeBlock &block); | |||
| virtual void insert_free_unsafe(const FreeBlock &block); | |||
| /*! | |||
| * \brief allocate from parent allocator; this method must either return | |||
| @@ -153,6 +153,12 @@ class StreamMemAllocImpl final: public StreamMemAlloc, | |||
| {} | |||
| }; | |||
| /*! | |||
| * \Note: DevMemAlloc has two-level structure, but when only one stream was | |||
| * registered into the DevMemAlloc, the DevMemAlloc would behave like a | |||
| * single-level allocator(i.e. only the FreeBlock pool in its child stream | |||
| * allocator will be used) for better performance | |||
| */ | |||
| class DevMemAllocImpl final: public DevMemAlloc, | |||
| public MemAllocImplHelper { | |||
| friend class StreamMemAllocImpl; | |||
| @@ -193,6 +199,14 @@ class DevMemAllocImpl final: public DevMemAlloc, | |||
| size_t get_used_memory() override { return m_used_size.load(); } | |||
| void insert_free_unsafe(const FreeBlock &block) override; | |||
| /*! | |||
| * \brief return stream allocator if DevMemAlloc has single child, | |||
| * otherwise return nullptr | |||
| */ | |||
| StreamMemAllocImpl* get_single_child_stream_unsafe(); | |||
| public: | |||
| DevMemAllocImpl( | |||
| int device, size_t reserve_size, | |||
| @@ -209,18 +209,73 @@ TEST(TestMemAlloc, Alloc) { | |||
| auto ptr = strm_alloc->alloc_shared(REQ); | |||
| EXPECT_EQ(REQ, strm_alloc->get_used_memory()); | |||
| EXPECT_EQ(0u, strm_alloc->get_free_memory().tot); | |||
| EXPECT_EQ(REQ, dev_alloc->get_used_memory()); | |||
| EXPECT_EQ(TOT - REQ, dev_alloc->get_free_memory().tot); | |||
| EXPECT_EQ(TOT - REQ, strm_alloc->get_free_memory().tot); | |||
| EXPECT_EQ(TOT, dev_alloc->get_used_memory()); | |||
| EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); | |||
| auto addr = ptr.get(); | |||
| ptr.reset(); | |||
| EXPECT_EQ(0u, strm_alloc->get_used_memory()); | |||
| EXPECT_EQ(REQ, strm_alloc->get_free_memory().tot); | |||
| EXPECT_EQ(REQ, dev_alloc->get_used_memory()); | |||
| EXPECT_EQ(TOT - REQ, dev_alloc->get_free_memory().tot); | |||
| EXPECT_EQ(TOT, strm_alloc->get_free_memory().tot); | |||
| EXPECT_EQ(TOT, dev_alloc->get_used_memory()); | |||
| EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); | |||
| EXPECT_EQ(addr, strm_alloc->alloc_shared(REQ).get()); | |||
| } | |||
| TEST(TestMemAlloc, MergeFreeBlock) { | |||
| using StreamKey = DevMemAlloc::StreamKey; | |||
| auto raw_alloc = std::make_shared<DummyAllocator>(7000); | |||
| auto runtime_policy = std::make_shared<DummyRuntimePolicy>(0); | |||
| auto dev_alloc = DevMemAlloc::make(0, 7000, raw_alloc, runtime_policy); | |||
| StreamKey stream_key = nullptr; | |||
| auto strm_alloc = | |||
| dev_alloc->add_stream(static_cast<StreamKey>(&stream_key)); | |||
| auto ptr = strm_alloc->alloc_shared(2000); | |||
| auto addr = ptr.get(); | |||
| ptr.reset(); | |||
| ptr = strm_alloc->alloc_shared(3000); | |||
| EXPECT_EQ(addr, ptr.get()); | |||
| strm_alloc->alloc_shared(4000); | |||
| } | |||
| TEST(TestMemAlloc, AllocTwoStream) { | |||
| constexpr size_t TOT = 2048, REQ0 = 1000, REQ1 = 2000; | |||
| using StreamKey = DevMemAlloc::StreamKey; | |||
| auto raw_alloc = std::make_shared<DummyAllocator>(TOT); | |||
| auto runtime_policy = std::make_shared<DummyRuntimePolicy>(0); | |||
| auto dev_alloc = DevMemAlloc::make(0, TOT, raw_alloc, runtime_policy); | |||
| StreamKey stream_key0, stream_key1; | |||
| auto strm_alloc0 = | |||
| dev_alloc->add_stream(static_cast<StreamKey>(&stream_key0)), | |||
| strm_alloc1 = | |||
| dev_alloc->add_stream(static_cast<StreamKey>(&stream_key1)); | |||
| ASSERT_NE(strm_alloc0, strm_alloc1); | |||
| auto ptr0 = strm_alloc0->alloc_shared(REQ0); | |||
| EXPECT_EQ(REQ0, strm_alloc0->get_used_memory()); | |||
| EXPECT_EQ(0u, strm_alloc0->get_free_memory().tot); | |||
| EXPECT_EQ(REQ0, dev_alloc->get_used_memory()); | |||
| EXPECT_EQ(TOT - REQ0, dev_alloc->get_free_memory().tot); | |||
| ptr0.reset(); | |||
| EXPECT_EQ(0u, strm_alloc0->get_used_memory()); | |||
| EXPECT_EQ(REQ0, strm_alloc0->get_free_memory().tot); | |||
| EXPECT_EQ(REQ0, dev_alloc->get_used_memory()); | |||
| EXPECT_EQ(TOT - REQ0, dev_alloc->get_free_memory().tot); | |||
| auto ptr1 = strm_alloc1->alloc_shared(REQ1); | |||
| EXPECT_EQ(0u, strm_alloc0->get_free_memory().tot); | |||
| EXPECT_EQ(REQ1, strm_alloc1->get_used_memory()); | |||
| EXPECT_EQ(0u, strm_alloc1->get_free_memory().tot); | |||
| EXPECT_EQ(REQ1, dev_alloc->get_used_memory()); | |||
| EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); | |||
| ptr1.reset(); | |||
| EXPECT_EQ(0u, strm_alloc1->get_used_memory()); | |||
| EXPECT_EQ(REQ1, strm_alloc1->get_free_memory().tot); | |||
| EXPECT_EQ(REQ1, dev_alloc->get_used_memory()); | |||
| EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); | |||
| } | |||
| TEST(TestMemAlloc, AllocMoreThanReserve) { | |||
| constexpr size_t RES = 1000, TOT = 2048, REQ = 2048; | |||