Browse Source

!30729 add the debug info for mem dynamic pool

Merge pull request !30729 from limingqi107/new_actor_runtime
feature/build-system-rewrite
i-robot Gitee 4 years ago
parent
commit
2c3aa5dd95
No known key found for this signature in database GPG Key ID: 173E9B9CA92EEF8F
8 changed files with 128 additions and 11 deletions
  1. +76
    -7
      mindspore/ccsrc/common/mem_reuse/mem_dynamic_allocator.cc
  2. +40
    -4
      mindspore/ccsrc/common/mem_reuse/mem_dynamic_allocator.h
  3. +1
    -0
      mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/control_actor.cc
  4. +1
    -0
      mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.cc
  5. +5
    -0
      mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc
  6. +1
    -0
      mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc
  7. +3
    -0
      mindspore/ccsrc/runtime/graph_scheduler/actor/memory_manager_actor.cc
  8. +1
    -0
      mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc

+ 76
- 7
mindspore/ccsrc/common/mem_reuse/mem_dynamic_allocator.cc View File

@@ -16,7 +16,6 @@

#include "common/mem_reuse/mem_dynamic_allocator.h"
#include <string>
#include "utils/ms_utils.h"
#include "include/common/utils/convert_utils.h"
#include "utils/log_adapter.h"
#include "utils/ms_context.h"
@@ -30,6 +29,13 @@ constexpr size_t kGBToByte = 1024 << 20;
// Set experience value to 10M
const size_t kMinimumAllocMem = 10 << 20;

thread_local AllocatorDebugInfo DynamicMemAllocatorDebugInfo::debug_info_;

static const std::map<DynamicMemBufStatus, std::string> kBufStatusString = {
{kMemBufIdle, "idle"},
{kMemBufUsed, "used"},
};

DynamicMemPoolBestFit::~DynamicMemPoolBestFit() {
persistent_mem_->clear();
common_mem_->clear();
@@ -43,8 +49,11 @@ DeviceMemPtr DynamicMemPoolBestFit::AllocTensorMem(size_t size, bool from_persis
if (!device_addr) {
device_addr = AddMemBlockAndMemBuf(align_size, from_persistent_mem);
}

// Alloc memory failed and dump the info.
if (!device_addr) {
DumpDynamicMemPoolInfo();
DumpDynamicMemPoolDebugInfo();
DumpDynamicMemPoolStateInfo();
}
return device_addr;
}
@@ -79,7 +88,9 @@ std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t
DynamicMemBufPtr continuous_mem_buf;
auto buf_addr = device_addr;
for (size_t i : size_list) {
continuous_mem_buf = std::make_shared<DynamicMemBuf>(buf_addr, kMemBufUsed, i);
continuous_mem_buf =
std::make_shared<DynamicMemBuf>(buf_addr, kMemBufUsed, i, DynamicMemAllocatorDebugInfo::GetDebugInfo().name_);
MS_EXCEPTION_IF_NULL(continuous_mem_buf);
(void)mem_block->block_all_mem_buf_map_.emplace(buf_addr, continuous_mem_buf);
device_addr_list.emplace_back(buf_addr);
buf_addr = AddressOffset(buf_addr, i);
@@ -111,6 +122,7 @@ DeviceMemPtr DynamicMemPoolBestFit::FindIdleMemBuf(size_t size, bool from_persis
<< "] mem_buf_address[" << mem_buf->device_addr_ << "].";
}
mem_buf->status_ = kMemBufUsed;
mem_buf->allocator_name_ = DynamicMemAllocatorDebugInfo::GetDebugInfo().name_;
// Remove map of old idle memory buf
(void)mem_mng->idle_mem_buf_map_.erase(iter);
// Divide memory buf
@@ -200,7 +212,8 @@ DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size, bool from_
std::upper_bound(mem_mng->mem_block_list_.begin(), mem_mng->mem_block_list_.end(), device_addr, CmpMemBlock);
(void)mem_mng->mem_block_list_.insert(iter, mem_block);
// Add new memory buf
auto mem_buf = std::make_shared<DynamicMemBuf>(device_addr, kMemBufUsed, real_alloc_size);
auto mem_buf = std::make_shared<DynamicMemBuf>(device_addr, kMemBufUsed, real_alloc_size,
DynamicMemAllocatorDebugInfo::GetDebugInfo().name_);
MS_EXCEPTION_IF_NULL(mem_buf);
// Add map of new memory buf in the block
(void)mem_block->block_all_mem_buf_map_.emplace(device_addr, mem_buf);
@@ -395,7 +408,7 @@ void DynamicMemPoolBestFit::ReleaseDeviceRes() {
fn(persistent_mem_);
}

void DynamicMemPoolBestFit::DumpDynamicMemPoolInfo() {
void DynamicMemPoolBestFit::DumpDynamicMemPoolStateInfo() {
auto fn = [](const MemStatusManagerPtr &mem_mng, const std::string &mem_type) {
if (mem_mng->mem_block_list_.empty()) {
return;
@@ -409,17 +422,73 @@ void DynamicMemPoolBestFit::DumpDynamicMemPoolInfo() {
idle_size += mb->second->size_;
}
}
buf << ", block[" << i << "] idle size " << idle_size;
buf << ", block[" << i << "] block size:" << mem_mng->mem_block_list_[i]->mem_block_size_
<< " idle size:" << idle_size;
}
// Dump all the memory buf info
MS_LOG(WARNING) << mem_type << "pool info: block size " << mem_mng->unit_size_ << ", block counts "
MS_LOG(WARNING) << mem_type << " pool info: block size " << mem_mng->unit_size_ << ", block counts "
<< mem_mng->mem_block_list_.size() << buf.str() << ". Total allocated mem "
<< mem_mng->mps_.total_mem_size_ << ", peak used mem " << mem_mng->mps_.used_mem_peak_size_
<< ", in used mem " << mem_mng->mps_.total_used_mem_size_ << ", total idle mem "
<< mem_mng->mps_.total_mem_size_ - mem_mng->mps_.total_used_mem_size_;
};

fn(common_mem_, std::string(kCommonMem));
fn(persistent_mem_, std::string(kPersistentParamMem));
}

void DynamicMemPoolBestFit::DumpDynamicMemPoolDebugInfo() {
auto fn = [](const MemStatusManagerPtr &mem_mng, const std::string &mem_type) {
size_t total_mem = 0;
size_t total_used_mem = 0;
size_t total_idle_mem1 = 0;
size_t total_idle_mem2 = 0;
// Dump the memory block info and memory buf info.
MS_LOG(INFO) << mem_type << " all mem_block info: counts[" << mem_mng->mem_block_list_.size() << "].";
for (auto iter = mem_mng->mem_block_list_.begin(); iter != mem_mng->mem_block_list_.end(); ++iter) {
total_mem += (*iter)->size();
auto mem_buf_map = (*iter)->block_all_mem_buf_map_;
MS_LOG(INFO) << " MemBlock info: number[" << iter - mem_mng->mem_block_list_.begin() << "] mem_buf_counts["
<< mem_buf_map.size() << "] base_address[" << (*iter)->device_addr() << "] block_size["
<< (*iter)->size() << "].";
for (auto iter_mem_buf = mem_buf_map.begin(); iter_mem_buf != mem_buf_map.end(); ++iter_mem_buf) {
auto mem_buf = iter_mem_buf->second;
MS_EXCEPTION_IF_NULL(mem_buf);
if (mem_buf->status_ == kMemBufIdle) {
total_idle_mem1 += mem_buf->size_;
} else {
total_used_mem += mem_buf->size_;
}
auto user_name = (mem_buf->status_ == kMemBufUsed) ? ("] name[" + mem_buf->allocator_name_ + "].") : "].";
MS_LOG(INFO) << " MemBuf info: address[" << mem_buf->device_addr_ << "] size[" << mem_buf->size_ << "] status["
<< kBufStatusString.at(mem_buf->status_) << user_name;
}
}
// Dump all the idle memory buf info.
MS_LOG(INFO) << mem_type << " all idle mem_buf info: counts[" << mem_mng->idle_mem_buf_map_.size() << "].";
for (auto iter_idle = mem_mng->idle_mem_buf_map_.begin(); iter_idle != mem_mng->idle_mem_buf_map_.end();
++iter_idle) {
auto mem_buf = iter_idle->second;
MS_EXCEPTION_IF_NULL(mem_buf);
total_idle_mem2 += mem_buf->size_;
MS_LOG(INFO) << " Idle mem_buf info: size[" << mem_buf->size_ << "] address[" << mem_buf->device_addr_
<< "] status[" << kBufStatusString.at(mem_buf->status_) << "].";
}
// Dump the memory statistical info.
MS_LOG(INFO) << mem_type << " total allocated memory[" << total_mem << "], used memory[" << total_used_mem
<< "], idle memory[" << total_idle_mem1 << "].";
if (total_idle_mem1 != total_idle_mem2) {
MS_LOG(ERROR) << "Check error: the idle memory in the mem_block is not equal the global idle memory.";
}
if (total_mem != total_used_mem + total_idle_mem1) {
MS_LOG(ERROR) << "Check error: the the total memory is not equal the sum of used memory and idle memory.";
}
};

MS_LOG(INFO) << "Start dump dynamic memory pool debug info.";
fn(common_mem_, std::string(kCommonMem));
fn(persistent_mem_, std::string(kPersistentParamMem));
MS_LOG(INFO) << "Finish dump dynamic memory pool debug info.";
}
} // namespace device
} // namespace mindspore

+ 40
- 4
mindspore/ccsrc/common/mem_reuse/mem_dynamic_allocator.h View File

@@ -24,6 +24,8 @@
#include <utility>
#include <thread>
#include <mutex>
#include <string>
#include "utils/ms_utils.h"

namespace mindspore {
namespace device {
@@ -43,13 +45,42 @@ struct DeviceAddrCmp {
bool operator()(const DeviceMemPtr &addr1, const DeviceMemPtr &addr2) const { return addr1 < addr2; }
};

// Recording information for debugging the memory allocator.
struct AllocatorDebugInfo {
std::string name_{"Unknown"};
int input_index_{-1};
int output_index_{-1};
};

// The AllocatorDebugInfo warpper which is the local thread for the dynamic memory pool.
class DynamicMemAllocatorDebugInfo {
public:
static AllocatorDebugInfo &GetDebugInfo() noexcept { return debug_info_; }

// Set the debug info when memory alloc.
static void SetDebugInfo(const std::string &name, int input_index = -1, int output_index = -1) {
debug_info_.name_ = name;
debug_info_.input_index_ = input_index;
debug_info_.output_index_ = output_index;
}

private:
DynamicMemAllocatorDebugInfo() = default;
virtual ~DynamicMemAllocatorDebugInfo() = default;
DISABLE_COPY_AND_ASSIGN(DynamicMemAllocatorDebugInfo);

static thread_local AllocatorDebugInfo debug_info_;
};

// Memory buf is the smallest operation object of dynamic memory pool.
struct DynamicMemBuf {
DynamicMemBuf(DeviceMemPtr addr, DynamicMemBufStatus status, size_t size)
: device_addr_(addr), status_(status), size_(size) {}
DynamicMemBuf(DeviceMemPtr addr, DynamicMemBufStatus status, size_t size,
const std::string &allocator_name = "Unknown")
: device_addr_(addr), status_(status), size_(size), allocator_name_(allocator_name) {}
DeviceMemPtr device_addr_;
DynamicMemBufStatus status_;
size_t size_;
std::string allocator_name_;
};
using DynamicMemBufPtr = std::shared_ptr<DynamicMemBuf>;
// Multimap key is the tensor size, for finding the idle memory buf by tensor size.
@@ -123,6 +154,8 @@ class DynamicMemPoolBestFit {
void SetMemAllocUintSize(size_t common_size, size_t persist_size = DYNAMIC_MEM_ALLOC_UNIT_SIZE);
// Set mem pool block size
void SetMemPoolBlockSize(size_t available_device_mem_size);

// The statistics information.
size_t TotalMemStatistics() const {
return common_mem_->mps_.total_mem_size_ + persistent_mem_->mps_.total_mem_size_;
}
@@ -133,6 +166,11 @@ class DynamicMemPoolBestFit {
return common_mem_->mps_.used_mem_peak_size_ + persistent_mem_->mps_.used_mem_peak_size_;
}

// Display the brief state information of memory block and memory buf.
void DumpDynamicMemPoolStateInfo();
// Display the detailed debug information of memory block and memory buf.
void DumpDynamicMemPoolDebugInfo();

// The related interface of device memory real operation, needs override by device type.
virtual size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) = 0;
virtual bool FreeDeviceMem(const DeviceMemPtr &addr) = 0;
@@ -164,8 +202,6 @@ class DynamicMemPoolBestFit {
const MemStatusManagerPtr &mem_mng);
// Erase the idle memory buf by size and device address when idle memory buf is combined.
void EraseIdleMemBuf(size_t size, const DeviceMemPtr &device_addr, const MemStatusManagerPtr &mem_mng);
// Display the information of memory block and memory buf.
void DumpDynamicMemPoolInfo();

// Support multi-thread.
std::mutex mutex_;


+ 1
- 0
mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/control_actor.cc View File

@@ -394,6 +394,7 @@ void ControlActor::UpdateOutputData(OpData<DeviceTensor> *const output_data, con
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
{device_tensor->device_name(), device_tensor->device_id()});
MS_EXCEPTION_IF_NULL(device_context);
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(GetAID().Name(), 0);
if ((device_tensor->GetPtr() == nullptr) &&
(!device_context->AllocateMemory(device_tensor.get(), device_tensor->GetSize()))) {
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(GraphExecutionStrategy::kPipeline, *context, *device_context,


+ 1
- 0
mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.cc View File

@@ -175,6 +175,7 @@ void ExitActor::CopyDeviceAddress(OpContext<DeviceTensor> *const context) {

// If the address ptr can't be changed, then alloc the new device memory and copy the data.
if (input_device_tensor->is_ptr_persisted()) {
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(GetAID().Name());
if (!device_contexts_[i]->AllocateMemory(new_device_tensor.get(), new_device_tensor->GetSize())) {
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(GraphExecutionStrategy::kPipeline, *context, *device_contexts_[i],
GetAID().Name(), new_device_tensor->GetSize());


+ 5
- 0
mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc View File

@@ -38,6 +38,7 @@ void SyncTensorData(const TensorPtr &host_tensor, const DeviceTensorPtr &device_
MS_EXCEPTION_IF_NULL(device_context);
MS_EXCEPTION_IF_NULL(context);

device::DynamicMemAllocatorDebugInfo::SetDebugInfo(node->fullname_with_scope(), 0);
if ((device_tensor->GetPtr() == nullptr) &&
(!device_context->AllocateMemory(device_tensor.get(), device_tensor->GetSize()))) {
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(strategy, *context, *device_context, node->fullname_with_scope(),
@@ -128,6 +129,7 @@ void PrepareDataForValue(const ValuePtr &value, const KernelWithIndex &node_with
return;
}

device::DynamicMemAllocatorDebugInfo::SetDebugInfo(node->fullname_with_scope(), 0);
if (!device_context->AllocateMemory(device_tensor.get(), device_tensor->GetSize())) {
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(GraphExecutionStrategy::kPipeline, *context, *device_context,
node->fullname_with_scope(), device_tensor->GetSize());
@@ -555,6 +557,7 @@ void DataPrepareActor::PrepareDataForControlValueNode(const KernelWithIndex &nod
tensor->set_device_address(device_tensor);
UpdateRefCount(device_tensor.get(), true);

device::DynamicMemAllocatorDebugInfo::SetDebugInfo(node->DebugString(), 0);
if (!device_context->AllocateMemory(device_tensor.get(), device_tensor->GetSize())) {
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(real_strategy_, *context, *device_context,
node->fullname_with_scope(), device_tensor->GetSize());
@@ -594,6 +597,7 @@ void DataPrepareActor::PrepareDataForValueNode(const ValueNodePtr &node, const D
}
MS_LOG(INFO) << "Prepare device data for value node: " << node->fullname_with_scope();

device::DynamicMemAllocatorDebugInfo::SetDebugInfo(node->fullname_with_scope(), 0);
if (!device_context->AllocateMemory(device_tensor.get(), device_tensor->GetSize())) {
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(real_strategy_, *context, *device_context,
node->fullname_with_scope(), device_tensor->GetSize());
@@ -625,6 +629,7 @@ void DataPrepareActor::CopyDataFromHostToOtherDevice(const AnfNodePtr &front_nod
const auto &another_device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
{device::kDeviceTypeToName.at(another_device_type), device_context->device_context_key().device_id_});
MS_EXCEPTION_IF_NULL(another_device_context);
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(backend_node->fullname_with_scope(), 0);
if ((another_device_tensor->GetPtr() == nullptr) &&
(!another_device_context->AllocateMemory(another_device_tensor.get(), another_device_tensor->GetSize()))) {
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(real_strategy_, *context, *another_device_context,


+ 1
- 0
mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc View File

@@ -319,6 +319,7 @@ void KernelActor::CopyInputDeviceTensor(const OpData<DeviceTensor> *input_data,
// Update the input device tensor.
input_device_tensors_[input_data->index_] = new_device_tensor.get();

device::DynamicMemAllocatorDebugInfo::SetDebugInfo(GetAID().Name(), input_data->index_);
if ((new_device_tensor->GetPtr() == nullptr) &&
(!device_contexts_[0]->AllocateMemory(new_device_tensor.get(), new_device_tensor->GetSize()))) {
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(strategy_, *context, *(device_contexts_[0]), GetAID().Name(),


+ 3
- 0
mindspore/ccsrc/runtime/graph_scheduler/actor/memory_manager_actor.cc View File

@@ -74,6 +74,7 @@ void MemoryManagerActor::AllocateMemory(const std::vector<DeviceTensor *> *alloc
}
try {
// Allocate memory through the device context.
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(from_aid.Name());
if (!device_context->AllocateMemory(device_tensor, device_tensor->GetSize())) {
SetOpContextMemoryAllocFail(from_aid.Name(), device_context, device_tensor->GetSize(), op_context);
return;
@@ -112,6 +113,7 @@ void MemoryManagerActor::AllocateContinuousMemory(const std::vector<std::vector<
auto &device_context = (*device_contexts)[i];
MS_EXCEPTION_IF_NULL(device_context);
// Allocate memory through the device context.
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(from_aid.Name());
if (!device_context->AllocateContinuousMemory(alloc_list, total_size, size_list)) {
SetOpContextMemoryAllocFail(from_aid.Name(), device_context, total_size, op_context);
return;
@@ -144,6 +146,7 @@ void MemoryManagerActor::AllocateBatchMemory(const std::vector<DeviceTensor *> *

try {
// Allocate memory through the device context.
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(from_aid.Name());
if (!device_context->AllocateMemory(device_tensor, device_tensor->GetSize())) {
SetOpContextMemoryAllocFail(from_aid.Name(), device_context, device_tensor->GetSize(), op_context);
return;


+ 1
- 0
mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc View File

@@ -193,6 +193,7 @@ void OutputActor::UpdateOutputDeviceAddress() {
if (output_node->isa<ValueNode>() || output_node->isa<Parameter>() || device_tensor->is_ptr_persisted()) {
auto device_context = device_contexts_[i];
MS_EXCEPTION_IF_NULL(device_context);
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(GetAID().Name());
if (!device_context->AllocateMemory(tensor_device_address.get(), tensor_device_address->GetSize())) {
MS_LOG(EXCEPTION) << "Device(id:" << device_context->device_context_key().device_id_
<< ") memory isn't enough and alloc failed, kernel name: "


Loading…
Cancel
Save