From 77e9b0c913d7a6e05fa7682ccccf193631a6ba05 Mon Sep 17 00:00:00 2001 From: jjfeing Date: Sun, 25 Apr 2021 10:37:11 +0800 Subject: [PATCH] fix addr is null --- .../ccsrc/backend/session/session_basic.cc | 51 +++++++++---------- .../ccsrc/backend/session/session_basic.h | 3 +- mindspore/ccsrc/pipeline/jit/pipeline.cc | 1 - .../device/ascend/ascend_memory_manager.cc | 5 +- .../ccsrc/runtime/device/kernel_runtime.cc | 2 +- mindspore/core/ir/param_info.h | 3 ++ mindspore/ops/op_info_register.py | 2 +- 7 files changed, 33 insertions(+), 34 deletions(-) diff --git a/mindspore/ccsrc/backend/session/session_basic.cc b/mindspore/ccsrc/backend/session/session_basic.cc index 655aa66c55..925dd1d31e 100644 --- a/mindspore/ccsrc/backend/session/session_basic.cc +++ b/mindspore/ccsrc/backend/session/session_basic.cc @@ -52,8 +52,6 @@ namespace mindspore { namespace session { -static std::shared_ptr> python_paras; -void ClearPythonParasMap() { python_paras = nullptr; } namespace { const int kSummaryGetItem = 2; const size_t max_depth = 128; @@ -681,19 +679,18 @@ ParameterPtr SessionBasic::CreateNewParameterFromParameter(const AnfNodePtr &anf MS_EXCEPTION_IF_NULL(graph_inputs); ParameterPtr new_parameter = nullptr; // if parameter's python parameter has been exist a backend parameter, reuse the exist parameter - if (python_paras == nullptr) { - python_paras = std::make_shared>(); - } - auto iter = python_paras->find(param_value); - if (iter != python_paras->end()) { - new_parameter = iter->second; + if (param_value != nullptr) { + new_parameter = param_value->parameter(); + if (new_parameter == nullptr) { + TraceGuard trace_guard(std::make_shared(anf->debug_info())); + new_parameter = graph->NewParameter(anf->cast()); + param_value->set_parameter(new_parameter); + } } else { TraceGuard trace_guard(std::make_shared(anf->debug_info())); new_parameter = graph->NewParameter(anf->cast()); - if (param_value != nullptr) { - (*python_paras)[param_value] = new_parameter; - } } + new_parameter->IncreaseUsedGraphCount(); graph_inputs->push_back(new_parameter); valid_inputs->push_back(true); @@ -1126,10 +1123,10 @@ ValueNodePtr SessionBasic::CreateValueNodeKernelGraph(const AnfNodePtr &anf, Ker MS_EXCEPTION_IF_NULL(value_node); auto sub_func_graph = AnfAlgo::GetValueNodeFuncGraph(anf); MS_EXCEPTION_IF_NULL(sub_func_graph); - if (front_backend_graph_map_.find(sub_func_graph) == front_backend_graph_map_.end()) { + if (front_backend_graph_map_.find(sub_func_graph.get()) == front_backend_graph_map_.end()) { MS_LOG(EXCEPTION) << "FuncGraph: " << sub_func_graph->ToString() << " has not been transformed to KernelGraph."; } - auto sub_kernel_graph = front_backend_graph_map_[sub_func_graph]; + auto sub_kernel_graph = front_backend_graph_map_[sub_func_graph.get()]; ValueNodePtr new_value_node = std::make_shared(sub_kernel_graph); new_value_node->set_abstract(value_node->abstract()); @@ -1155,19 +1152,19 @@ ParameterPtr SessionBasic::CreateNewParameter(const AnfNodePtr &anf, KernelGraph auto param_value = GetParamDefaultValue(anf); ParameterPtr new_parameter = nullptr; - if (python_paras == nullptr) { - python_paras = std::make_shared>(); - } - auto iter = python_paras->find(param_value); - if (iter != python_paras->end()) { - new_parameter = iter->second; + // if parameter's python parameter has been exist a backend parameter, reuse the exist parameter + if (param_value != nullptr) { + new_parameter = param_value->parameter(); + if (new_parameter == nullptr) { + TraceGuard trace_guard(std::make_shared(anf->debug_info())); + new_parameter = graph->NewParameter(anf->cast()); + param_value->set_parameter(new_parameter); + } } else { TraceGuard trace_guard(std::make_shared(anf->debug_info())); new_parameter = graph->NewParameter(anf->cast()); - if (param_value != nullptr) { - (*python_paras)[param_value] = new_parameter; - } } + new_parameter->IncreaseUsedGraphCount(); return new_parameter; @@ -1423,7 +1420,7 @@ std::shared_ptr SessionBasic::ConstructKernelGraph(const FuncGraphP auto node_list = TopoSort(func_graph->get_return()); auto graph = NewKernelGraph(); MS_EXCEPTION_IF_NULL(graph); - front_backend_graph_map_[func_graph] = graph; + front_backend_graph_map_[func_graph.get()] = graph; MS_LOG(INFO) << "Create graph: " << graph->graph_id(); for (const auto &node : node_list) { MS_EXCEPTION_IF_NULL(node); @@ -1446,15 +1443,15 @@ std::shared_ptr SessionBasic::ConstructKernelGraph(const FuncGraphP } // Create child kernel graph according ValueNode FuncGraphPtr child_graph = AnfAlgo::GetValueNodeFuncGraph(node); - if (front_backend_graph_map_.find(child_graph) == front_backend_graph_map_.end()) { + if (front_backend_graph_map_.find(child_graph.get()) == front_backend_graph_map_.end()) { (void)ConstructKernelGraph(child_graph, all_out_graph); } (void)CreateValueNodeKernelGraph(node, graph.get()); - auto &parent_graph = parent_graphs_[front_backend_graph_map_[child_graph]->graph_id()]; + auto &parent_graph = parent_graphs_[front_backend_graph_map_[child_graph.get()]->graph_id()]; auto parent_graph_it = - std::find(parent_graph.begin(), parent_graph.end(), front_backend_graph_map_[func_graph]->graph_id()); + std::find(parent_graph.begin(), parent_graph.end(), front_backend_graph_map_[func_graph.get()]->graph_id()); if (parent_graph_it == parent_graph.end()) { - parent_graph.push_back(front_backend_graph_map_[func_graph]->graph_id()); + parent_graph.push_back(front_backend_graph_map_[func_graph.get()]->graph_id()); } continue; } diff --git a/mindspore/ccsrc/backend/session/session_basic.h b/mindspore/ccsrc/backend/session/session_basic.h index 7aecdf73c1..b0baa31b03 100644 --- a/mindspore/ccsrc/backend/session/session_basic.h +++ b/mindspore/ccsrc/backend/session/session_basic.h @@ -41,7 +41,6 @@ namespace mindspore { using GraphId = uint32_t; using GraphInfo = std::string; namespace session { -void ClearPythonParasMap(); using CallBackFunc = uint32_t (*)(uint32_t graph_id, const std::map ¶ms_list); using AnyList = std::vector; @@ -254,7 +253,7 @@ class SessionBasic : public std::enable_shared_from_this { std::map free_bucket_id_map_; std::unordered_map> graphs_; std::unordered_map> run_op_graphs_; - std::unordered_map front_backend_graph_map_; + std::unordered_map front_backend_graph_map_; std::unordered_map> parent_graphs_; std::shared_ptr context_; CallBackFunc summary_callback_; diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.cc b/mindspore/ccsrc/pipeline/jit/pipeline.cc index 66e365f567..80c8f67444 100644 --- a/mindspore/ccsrc/pipeline/jit/pipeline.cc +++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc @@ -1137,7 +1137,6 @@ void FinalizeBackend() { void ClearResAtexit() { MS_LOG(DEBUG) << "Pipeline clear all resource"; pynative::ClearPyNativeSession(); - session::ClearPythonParasMap(); #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) if (ps::PSContext::instance()->is_ps_mode() && ps::PSContext::instance()->is_worker()) { if (ps::PsDataPrefetch::GetInstance().cache_enable()) { diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc index 9ab638f8e0..70bd130ff6 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc @@ -41,8 +41,9 @@ void AscendMemoryManager::MallocDeviceMemory() { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); unsigned int device_id = context_ptr->get_param(MS_CTX_DEVICE_ID); - MS_LOG(EXCEPTION) << "Device " << device_id << " is occupied, malloc device memory failed, size[" - << device_mem_size_ << "], ret[" << ret << "]"; + MS_LOG(EXCEPTION) << "Malloc device memory failed, size[" << device_mem_size_ << "], ret[" << ret << "]" + << "Device " << device_id + << " may be other processes occupying this card, check as: ps -ef|grep python"; } else { MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]"; } diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.cc b/mindspore/ccsrc/runtime/device/kernel_runtime.cc index db219965ed..5c51726321 100644 --- a/mindspore/ccsrc/runtime/device/kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc @@ -513,7 +513,7 @@ void KernelRuntime::AssignCommunicationNodeInputMem(MemType type, const AnfNodeP std::vector> addr_size; size_t input_num = AnfAlgo::GetInputTensorNum(node); for (size_t i = 0; i < input_num; ++i) { - auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(node, i); + auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(node, i, true); auto input_node = input_node_with_index.first; if (AnfAlgo::OutputAddrExist(input_node, input_node_with_index.second)) { MS_LOG(INFO) << "Communication op " << input_node->fullname_with_scope() << " has input device address"; diff --git a/mindspore/core/ir/param_info.h b/mindspore/core/ir/param_info.h index d517125802..e6edf6ab63 100644 --- a/mindspore/core/ir/param_info.h +++ b/mindspore/core/ir/param_info.h @@ -86,6 +86,8 @@ class ParamInfo { std::vector cache_shape() const { return cache_shape_; } void set_cache_shape(const std::vector &cache_shape) { cache_shape_ = cache_shape; } + ParameterPtr parameter() { return parameter_; } + void set_parameter(const ParameterPtr ¶meter) { parameter_ = parameter; } private: std::string name_{"Parameter"}; @@ -100,6 +102,7 @@ class ParamInfo { bool parallel_optimizer_{true}; bool cache_enable_{false}; std::vector cache_shape_; + ParameterPtr parameter_{nullptr}; }; } // namespace mindspore #endif // MINDSPORE_CORE_IR_PARAM_INFO_H_ diff --git a/mindspore/ops/op_info_register.py b/mindspore/ops/op_info_register.py index 56222d2736..f96d60914a 100644 --- a/mindspore/ops/op_info_register.py +++ b/mindspore/ops/op_info_register.py @@ -451,7 +451,7 @@ class TBERegOp(RegOp): Whether the operator need calop_select_format api. Args: - is_dynamic_format (bool): Value of is_dynamic_format_. Default: false. + is_dynamic_format (bool): Value of is_dynamic_format. Default: false. """ self._is_bool(is_dynamic_format) self.is_dynamic_format_ = is_dynamic_format