From bd8aeefd953164c46cb84c739197f7d21285d79e Mon Sep 17 00:00:00 2001 From: laiyongqiang Date: Wed, 23 Sep 2020 20:10:19 +0800 Subject: [PATCH] disable memory reuse for selected op in e2e dump --- .../ccsrc/debug/data_dump/dump_json_parser.cc | 2 +- .../device/ascend/ascend_kernel_runtime.cc | 12 +++++++ .../device/ascend/ascend_kernel_runtime.h | 1 + .../ccsrc/runtime/device/kernel_runtime.cc | 35 +++++++++++++++++++ .../ccsrc/runtime/device/kernel_runtime.h | 2 ++ .../ccsrc/runtime/device/memory_manager.cc | 2 ++ 6 files changed, 53 insertions(+), 1 deletion(-) diff --git a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc index b01cc6b706..dfc367dee2 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc +++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc @@ -331,7 +331,7 @@ void DumpJsonParser::JudgeDumpEnabled() { e2e_dump_enabled_ = false; MS_LOG(WARNING) << "Dump not enabled. device_id:" << device_id << " not support"; } - context->set_param(MS_CTX_ENABLE_MEM_REUSE, !e2e_dump_enabled_); + JsonConfigToString(); } diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc index a14b06329e..b1213f7967 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc @@ -333,6 +333,18 @@ bool AscendKernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, return false; } +bool AscendKernelRuntime::KernelMemNotReuse(const AnfNodePtr &node) { + bool need_dump = false; + auto &dump_json_parser = DumpJsonParser::GetInstance(); + if (dump_json_parser.e2e_dump_enabled() && dump_json_parser.dump_mode() == 1) { + auto op_name = node->fullname_with_scope(); + if (dump_json_parser.NeedDump(op_name)) { + need_dump = true; + } + } + return need_dump; +} + DeviceAddressPtr AscendKernelRuntime::CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, TypeId type_id) { return std::make_shared(device_ptr, device_size, format, type_id); diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h index dff1783f94..31f90dea9a 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h @@ -54,6 +54,7 @@ class AscendKernelRuntime : public KernelRuntime { DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, TypeId type_id) override; bool NodeOutputDeviceAddressExist(const AnfNodePtr &node, size_t index) override; + bool KernelMemNotReuse(const AnfNodePtr &node) override; private: bool InitDevice(); diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.cc b/mindspore/ccsrc/runtime/device/kernel_runtime.cc index 8b3c8f0988..3f3ace92f8 100644 --- a/mindspore/ccsrc/runtime/device/kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc @@ -432,7 +432,13 @@ void KernelRuntime::AssignCommunicationNodeOutputMem(MemType type, const AnfNode if (type == kReuseDynamicMem) { // reuse communication op's all outputs' memory type = kReuseDynamicCommMem; + bool not_reuse = KernelMemNotReuse(node); + if (not_reuse) { + type = kDynamicMem; + MS_LOG(INFO) << "Disable Memory Reuse for " << node->fullname_with_scope() << "'s output."; + } } + uint8_t *output_ptr = nullptr; for (size_t j = 0; j < align_size_list.size(); ++j) { std::string output_format = AnfAlgo::GetOutputFormat(node, j); @@ -449,6 +455,7 @@ void KernelRuntime::AssignCommunicationNodeOutputMem(MemType type, const AnfNode output_ptr += align_size_list[j]; } } +bool KernelRuntime::KernelMemNotReuse(const AnfNodePtr &node) { return false; } DeviceAddressPtr KernelRuntime::PreAssignCNodeMemory(const AnfNodePtr &anf_node, size_t index) { MS_EXCEPTION_IF_NULL(anf_node); @@ -488,6 +495,15 @@ void KernelRuntime::AssignCommunicationNodeInputMem(MemType type, const AnfNodeP if (addr_size.empty()) { return; } + + if (type == kReuseDynamicMem) { + bool not_reuse = KernelMemNotReuse(node); + if (not_reuse) { + type = kDynamicMem; + MS_LOG(INFO) << "Disable Memory Reuse for " << node->fullname_with_scope() << "'s input."; + } + } + uint8_t *input_ptr = mem_manager_->MallocOutputMem(node, 0, type, total_size, addr_size[0].first); for (const auto &iter : addr_size) { MS_EXCEPTION_IF_NULL(iter.first); @@ -511,6 +527,15 @@ void KernelRuntime::AssignNodeOutputMem(MemType type, const AnfNodePtr &node, in type = kDynamicMem; } } + + if (type == kReuseDynamicMem) { + bool not_reuse = KernelMemNotReuse(node); + if (not_reuse) { + type = kDynamicMem; + MS_LOG(INFO) << "Disable Memory Reuse for " << node->fullname_with_scope() << "'s output."; + } + } + auto kernel_mod = AnfAlgo::GetKernelMod(node); MS_EXCEPTION_IF_NULL(kernel_mod); auto output_sizes = kernel_mod->GetOutputSizeList(); @@ -625,9 +650,19 @@ void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(context_ptr); bool is_enable_mem_reuse = context_ptr->get_param(MS_CTX_ENABLE_MEM_REUSE); auto mem_type = kDynamicMem; + auto &dump_json_parser = DumpJsonParser::GetInstance(); + if (dump_json_parser.e2e_dump_enabled() && dump_json_parser.dump_mode() == 0) { + context_ptr->set_param(MS_CTX_ENABLE_MEM_REUSE, false); + is_enable_mem_reuse = false; + MS_LOG(INFO) << "Disable Memory Reuse when e2e dump is enable and dump mode is set to dump all kernels"; + } + if (is_enable_mem_reuse) { + MS_LOG(INFO) << "Memory Reuse is enable..."; mem_manager_->MallocReusedDynamicMem(graph); mem_type = kReuseDynamicMem; + } else { + MS_LOG(INFO) << "Memory Reuse is disable..."; } auto &execution_nodes = graph->execution_order(); std::vector compute_nodes; diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.h b/mindspore/ccsrc/runtime/device/kernel_runtime.h index 145fa00c50..78e8f80a67 100644 --- a/mindspore/ccsrc/runtime/device/kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/kernel_runtime.h @@ -82,6 +82,8 @@ class KernelRuntime { virtual DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, TypeId type_id) = 0; virtual bool NodeOutputDeviceAddressExist(const AnfNodePtr &node, size_t index); + virtual bool KernelMemNotReuse(const AnfNodePtr &node); + void AssignStaticMemory(session::KernelGraph *graph); void AssignDynamicMemory(session::KernelGraph *graph); void ReuseAssignDynamicMemory(session::KernelGraph *graph); diff --git a/mindspore/ccsrc/runtime/device/memory_manager.cc b/mindspore/ccsrc/runtime/device/memory_manager.cc index 99ce1d021e..88d6ce8511 100644 --- a/mindspore/ccsrc/runtime/device/memory_manager.cc +++ b/mindspore/ccsrc/runtime/device/memory_manager.cc @@ -42,6 +42,8 @@ void MemoryManager::MallocReusedDynamicMem(const session::KernelGraph *graph) { MS_LOG(INFO) << "TotalReuseDynamicSize [" << total_allocated_size << "]"; mem_reuse_util_ptr_ = mem_reuse_util_ptr; auto base_ptr = MallocDynamicMem(total_allocated_size, false); + MS_LOG(INFO) << "Reuse Memory from [" << reinterpret_cast(base_ptr) << "] to [" + << reinterpret_cast(base_ptr + total_allocated_size) << "]"; mem_reuse_util_ptr_->set_mem_base(base_ptr); }