Merge pull request !1201 from JoyLvliang/pynative-release-memory-after-run-finishtags/v0.3.0-alpha
| @@ -149,23 +149,19 @@ OpExecInfoPtr GenerateOpExecInfo(const py::args &args) { | |||||
| return op_exec_info; | return op_exec_info; | ||||
| } | } | ||||
| std::string GetSingleOpGraphInfo(const OpExecInfoPtr &op_exec_info) { | |||||
| std::string GetSingleOpGraphInfo(const OpExecInfoPtr &op_exec_info, | |||||
| const std::vector<tensor::TensorPtr> &input_tensors) { | |||||
| MS_EXCEPTION_IF_NULL(op_exec_info); | MS_EXCEPTION_IF_NULL(op_exec_info); | ||||
| std::string graph_info; | std::string graph_info; | ||||
| MS_EXCEPTION_IF_NULL(op_exec_info->abstract); | |||||
| // get input tensor info | // get input tensor info | ||||
| size_t input_num = op_exec_info->op_inputs.size(); | |||||
| for (size_t index = 0; index < input_num; ++index) { | |||||
| if (py::isinstance<tensor::Tensor>(op_exec_info->op_inputs[index])) { | |||||
| auto tensor_ptr = py::cast<tensor::TensorPtr>(op_exec_info->op_inputs[index]); | |||||
| MS_EXCEPTION_IF_NULL(tensor_ptr); | |||||
| (void)graph_info.append(tensor_ptr->GetShapeAndDataTypeInfo() + "_"); | |||||
| } | |||||
| for (const auto &input_tensor : input_tensors) { | |||||
| MS_EXCEPTION_IF_NULL(input_tensor); | |||||
| (void)graph_info.append(input_tensor->GetShapeAndDataTypeInfo() + "_"); | |||||
| } | } | ||||
| // get prim and abstract info | // get prim and abstract info | ||||
| MS_EXCEPTION_IF_NULL(op_exec_info->abstract); | |||||
| (void)graph_info.append(std::to_string((uintptr_t)(op_exec_info->py_primitive.get())) + "_" + | (void)graph_info.append(std::to_string((uintptr_t)(op_exec_info->py_primitive.get())) + "_" + | ||||
| op_exec_info->abstract->ToString()); | op_exec_info->abstract->ToString()); | ||||
| MS_LOG(INFO) << "Graph info [" << graph_info << "]"; | |||||
| return graph_info; | return graph_info; | ||||
| } | } | ||||
| @@ -337,14 +333,14 @@ py::object RunOpInMs(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *stat | |||||
| if (session == nullptr) { | if (session == nullptr) { | ||||
| session = session::SessionFactory::Get().Create(device_target); | session = session::SessionFactory::Get().Create(device_target); | ||||
| } | } | ||||
| MS_EXCEPTION_IF_NULL(session); | MS_EXCEPTION_IF_NULL(session); | ||||
| session->Init(ms_context->device_id()); | session->Init(ms_context->device_id()); | ||||
| std::string graph_info = GetSingleOpGraphInfo(op_exec_info); | |||||
| std::vector<tensor::TensorPtr> input_tensors; | std::vector<tensor::TensorPtr> input_tensors; | ||||
| std::vector<int> tensors_mask; | std::vector<int> tensors_mask; | ||||
| ConstructInputTensor(op_exec_info, &tensors_mask, &input_tensors); | ConstructInputTensor(op_exec_info, &tensors_mask, &input_tensors); | ||||
| // get graph info for checking it whether existing in the cache | |||||
| std::string graph_info = GetSingleOpGraphInfo(op_exec_info, input_tensors); | |||||
| session->BuildOp(*op_exec_info, graph_info, input_tensors, tensors_mask); | session->BuildOp(*op_exec_info, graph_info, input_tensors, tensors_mask); | ||||
| EraseValueNodeTensor(tensors_mask, &input_tensors); | EraseValueNodeTensor(tensors_mask, &input_tensors); | ||||
| py::tuple result = session->RunOp(*op_exec_info, graph_info, input_tensors); | py::tuple result = session->RunOp(*op_exec_info, graph_info, input_tensors); | ||||
| @@ -342,7 +342,7 @@ void AscendSession::BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph | |||||
| const std::vector<tensor::TensorPtr> &input_tensors, const std::vector<int> &tensors_mask) { | const std::vector<tensor::TensorPtr> &input_tensors, const std::vector<int> &tensors_mask) { | ||||
| MS_LOG(INFO) << "Build op " << op_run_info.op_name << " start !"; | MS_LOG(INFO) << "Build op " << op_run_info.op_name << " start !"; | ||||
| if (GraphCacheExist(graph_info)) { | if (GraphCacheExist(graph_info)) { | ||||
| MS_LOG(INFO) << "Build op " << op_run_info.op_name << " finish !"; | |||||
| MS_LOG(INFO) << "Build op " << op_run_info.op_name << " graph cache has existed !"; | |||||
| return; | return; | ||||
| } | } | ||||
| @@ -301,9 +301,13 @@ size_t LoadCtrlInputTensor(const std::shared_ptr<Context> &context, std::vector< | |||||
| ValueNodePtr ConstructRunOpValueNode(const std::shared_ptr<KernelGraph> &graph, const tensor::TensorPtr &input_tensor) { | ValueNodePtr ConstructRunOpValueNode(const std::shared_ptr<KernelGraph> &graph, const tensor::TensorPtr &input_tensor) { | ||||
| MS_EXCEPTION_IF_NULL(graph); | MS_EXCEPTION_IF_NULL(graph); | ||||
| MS_EXCEPTION_IF_NULL(input_tensor); | MS_EXCEPTION_IF_NULL(input_tensor); | ||||
| auto abstract = std::make_shared<abstract::AbstractTensor>(input_tensor); | |||||
| auto value_node = std::make_shared<ValueNode>(input_tensor); | auto value_node = std::make_shared<ValueNode>(input_tensor); | ||||
| // construct abstract of value node | |||||
| auto type_of_tensor = input_tensor->Dtype(); | |||||
| auto shape_of_tensor = input_tensor->shape(); | |||||
| auto abstract = std::make_shared<abstract::AbstractTensor>(type_of_tensor, shape_of_tensor); | |||||
| value_node->set_abstract(abstract); | value_node->set_abstract(abstract); | ||||
| // add value node to graph | |||||
| auto input_value_node = graph->NewValueNode(value_node); | auto input_value_node = graph->NewValueNode(value_node); | ||||
| graph->AddValueNodeToGraph(input_value_node); | graph->AddValueNodeToGraph(input_value_node); | ||||
| return input_value_node; | return input_value_node; | ||||
| @@ -313,7 +317,7 @@ ParameterPtr ConstructRunOpParameter(const std::shared_ptr<KernelGraph> &graph, | |||||
| int tensor_mask) { | int tensor_mask) { | ||||
| auto param = graph->NewParameter(); | auto param = graph->NewParameter(); | ||||
| MS_EXCEPTION_IF_NULL(param); | MS_EXCEPTION_IF_NULL(param); | ||||
| if (tensor_mask == 1) { | |||||
| if (tensor_mask == kParameterWeightTensorMask) { | |||||
| py::object obj; | py::object obj; | ||||
| param->set_default_param(obj); | param->set_default_param(obj); | ||||
| } | } | ||||
| @@ -329,8 +333,10 @@ ParameterPtr ConstructRunOpParameter(const std::shared_ptr<KernelGraph> &graph, | |||||
| kernel_build_info_builder->SetOutputsDeviceType(std::vector<TypeId>{input_tensor->device_address()->type_id()}); | kernel_build_info_builder->SetOutputsDeviceType(std::vector<TypeId>{input_tensor->device_address()->type_id()}); | ||||
| } | } | ||||
| AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), param.get()); | AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), param.get()); | ||||
| // ftruct abstract of parameter | |||||
| auto abstract = std::make_shared<abstract::AbstractTensor>(input_tensor); | |||||
| // construct abstract of parameter | |||||
| auto type_of_tensor = input_tensor->Dtype(); | |||||
| auto shape_of_tensor = input_tensor->shape(); | |||||
| auto abstract = std::make_shared<abstract::AbstractTensor>(type_of_tensor, shape_of_tensor); | |||||
| param->set_abstract(abstract); | param->set_abstract(abstract); | ||||
| return param; | return param; | ||||
| } | } | ||||
| @@ -199,6 +199,8 @@ const size_t kShape4dDims = 4; | |||||
| const size_t kShape5dDims = 5; | const size_t kShape5dDims = 5; | ||||
| const size_t kCubeSize = 16; | const size_t kCubeSize = 16; | ||||
| const size_t kMemAlignSize = 512; | const size_t kMemAlignSize = 512; | ||||
| const int kParameterDataTensorMask = 0; | |||||
| const int kParameterWeightTensorMask = 1; | |||||
| const int kValueNodeTensorMask = 2; | const int kValueNodeTensorMask = 2; | ||||
| // define special index in special node | // define special index in special node | ||||