From 54ccab295c42f97ecb7ec9b482b62b08823e6a0c Mon Sep 17 00:00:00 2001 From: fary86 Date: Fri, 29 May 2020 20:03:41 +0800 Subject: [PATCH] 1.Update log level of some statements in validator.cc 2.Fix core dump of exporting onnx model when device target is 'GPU' 3.Fix numbers of arguments and graph parameters check error 4.Fix log prefix of some files of gpu submodule is error --- mindspore/ccsrc/CMakeLists.txt | 1 + .../ccsrc/dataset/engine/gnn/CMakeLists.txt | 2 ++ mindspore/ccsrc/dataset/text/CMakeLists.txt | 2 ++ mindspore/ccsrc/device/CMakeLists.txt | 25 +++++++++++-------- mindspore/ccsrc/onnx/onnx_exporter.cc | 4 +++ mindspore/ccsrc/pipeline/pipeline.cc | 18 ++++++++++++- mindspore/ccsrc/pipeline/pipeline.h | 3 +++ mindspore/ccsrc/pipeline/validator.cc | 6 ++--- mindspore/ccsrc/transform/convert.cc | 2 +- mindspore/ccsrc/utils/convert_utils.cc | 6 ++--- mindspore/train/serialization.py | 2 ++ 11 files changed, 52 insertions(+), 19 deletions(-) diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index a7513699fa..1e1c650239 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -49,6 +49,7 @@ if(ENABLE_GPU) set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) string(REPLACE "-std=c++17" "-std=c++11" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST}) set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS}) endif () diff --git a/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt index d7a295e32a..401fce6d11 100644 --- a/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt @@ -1,3 +1,5 @@ +file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") +set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) add_library(engine-gnn OBJECT graph.cc graph_loader.cc diff --git a/mindspore/ccsrc/dataset/text/CMakeLists.txt b/mindspore/ccsrc/dataset/text/CMakeLists.txt index 08620458c7..605b2644b7 100644 --- a/mindspore/ccsrc/dataset/text/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/text/CMakeLists.txt @@ -1,5 +1,7 @@ add_subdirectory(kernels) +file(GLOB _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") +set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) add_library(text OBJECT vocab.cc ) diff --git a/mindspore/ccsrc/device/CMakeLists.txt b/mindspore/ccsrc/device/CMakeLists.txt index fba0b20711..2ade0f0ef3 100644 --- a/mindspore/ccsrc/device/CMakeLists.txt +++ b/mindspore/ccsrc/device/CMakeLists.txt @@ -20,25 +20,28 @@ endif () if (ENABLE_GPU) file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cc" "gpu/*.cu") + set(GPU_QUEUE_SRCS "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc") + set(GPU_COLLECTIVE_SRCS "gpu/distribution/collective_wrapper.cc" + "gpu/distribution/mpi_wrapper.cc" + "gpu/distribution/nccl_wrapper.cc") + # gpu_queue - list(REMOVE_ITEM CUDA_SRC_LIST "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc") - add_library(gpu_queue SHARED "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc") + list(REMOVE_ITEM CUDA_SRC_LIST ${GPU_QUEUE_SRCS}) + set_property(SOURCE ${GPU_QUEUE_SRCS} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) + add_library(gpu_queue SHARED ${GPU_QUEUE_SRCS}) target_link_libraries(gpu_queue ${CMAKE_THREAD_LIBS_INIT} ${CUDA_PATH}/lib64/libcudart.so) - list(REMOVE_ITEM CUDA_SRC_LIST "gpu/mpi/mpi_initializer.cc" - "gpu/distribution/collective_wrapper.cc" - "gpu/distribution/mpi_wrapper.cc" - "gpu/distribution/nccl_wrapper.cc" - ) + list(REMOVE_ITEM CUDA_SRC_LIST "gpu/mpi/mpi_initializer.cc" ${GPU_COLLECTIVE_SRCS}) if (ENABLE_MPI) include(ExternalProject) # gpu_collective - add_library(gpu_collective SHARED "gpu/distribution/collective_wrapper.cc" - "gpu/distribution/mpi_wrapper.cc" - "gpu/distribution/nccl_wrapper.cc" - ) + set_property(SOURCE ${GPU_COLLECTIVE_SRCS} + PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) + add_library(gpu_collective SHARED ${GPU_COLLECTIVE_SRCS}) # _ms_mpi + set_property(SOURCE "gpu/mpi/mpi_initializer.cc" + PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc") target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi) target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl) diff --git a/mindspore/ccsrc/onnx/onnx_exporter.cc b/mindspore/ccsrc/onnx/onnx_exporter.cc index a0c8de75af..2a038bbf1a 100644 --- a/mindspore/ccsrc/onnx/onnx_exporter.cc +++ b/mindspore/ccsrc/onnx/onnx_exporter.cc @@ -411,6 +411,8 @@ void OnnxExporter::InitModelInfo() { void OnnxExporter::ExportFuncGraph(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto) { std::map node_map; + MS_LOG(INFO) << "Begin exporting onnx model for graph " << func_graph->ToString(); + onnx_node_index_ = func_graph->parameters().size(); // set graph name @@ -423,6 +425,8 @@ void OnnxExporter::ExportFuncGraph(const FuncGraphPtr &func_graph, onnx::GraphPr // export computational nodes and output nodes ExportNodes(func_graph, &node_map, graph_proto); + + MS_LOG(INFO) << "End exporting onnx model for graph " << func_graph->ToString(); } void OnnxExporter::ExportParameters(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto) { diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc index 3606fb8cd6..bfe1e9f3ba 100644 --- a/mindspore/ccsrc/pipeline/pipeline.cc +++ b/mindspore/ccsrc/pipeline/pipeline.cc @@ -374,7 +374,7 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons p_actions = GePipeline(); } - std::shared_ptr pip = std::make_shared(resource, p_actions); + std::shared_ptr pip = std::make_shared(resource, FilterActions(p_actions, phase_s)); // get the parameters items and add the value to args_spec abstract::AbstractBasePtrList args_spec; @@ -408,6 +408,22 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons return true; } +std::vector ExecutorPy::FilterActions(const std::vector &actions, const std::string &phase) { + // phase does not contain 'export_onnx' + if (GetPhasePrefix(phase).find("export_onnx") == std::string::npos) { + return actions; + } + MS_LOG(INFO) << "Phase is '" << phase << "', filter out actions after stage 'validate'"; + std::vector filtered_actions; + for (const auto &item : actions) { + filtered_actions.emplace_back(item); + if (item.first == "validate") { + break; + } + } + return filtered_actions; +} + void ExecutorPy::ReleaseResource(const py::object &phase) { ResourcePtr res = GetResource(py::cast(phase)); if (res != nullptr) { diff --git a/mindspore/ccsrc/pipeline/pipeline.h b/mindspore/ccsrc/pipeline/pipeline.h index cfe84c448e..8134912a75 100644 --- a/mindspore/ccsrc/pipeline/pipeline.h +++ b/mindspore/ccsrc/pipeline/pipeline.h @@ -100,6 +100,9 @@ class ExecutorPy : public std::enable_shared_from_this { void ConvertObjectToTensors(const py::dict &dict, std::map *tensors); bool ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const; void GetGeBackendPolicy() const; + // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after + // 'validate' stage + static std::vector FilterActions(const std::vector &actions, const std::string &phase); std::map info_; static std::shared_ptr executor_; diff --git a/mindspore/ccsrc/pipeline/validator.cc b/mindspore/ccsrc/pipeline/validator.cc index 73a54bb180..4866d43b93 100644 --- a/mindspore/ccsrc/pipeline/validator.cc +++ b/mindspore/ccsrc/pipeline/validator.cc @@ -62,12 +62,12 @@ void ValidateOperation(const AnfNodePtr &node) { void ValidateAbstract(const AnfNodePtr &node) { if (node == nullptr) { - MS_LOG(WARNING) << "Node to validate is invalid"; + MS_LOG(DEBUG) << "Node to validate is invalid"; return; } AbstractBasePtr ptrBase = node->abstract(); if (ptrBase == nullptr) { - MS_LOG(WARNING) << "Abstract is null in node: " << node->DebugString(); + MS_LOG(DEBUG) << "Abstract is null in node: " << node->DebugString(); return; } if (ptrBase->isa() || ptrBase->isa()) { @@ -88,7 +88,7 @@ void ValidateAbstract(const AnfNodePtr &node) { } if (ptrBase->isa()) { // NOTICE: validate dead code? - MS_LOG(WARNING) << "AbstractError in the graph: " << ptrBase->ToString(); + MS_LOG(DEBUG) << "AbstractError in the graph: " << ptrBase->ToString(); return; } diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc index aee0654c45..1450572e4b 100644 --- a/mindspore/ccsrc/transform/convert.cc +++ b/mindspore/ccsrc/transform/convert.cc @@ -640,7 +640,7 @@ void DfGraphConvertor::InitParamWithData(const TensorOrderMap &tensors) { // if name not in params_, create a node in graph if (node_itor == params_.end()) { MS_LOG(WARNING) << name << " is not in params, and create a new node."; - ParameterPtr param = anf_graph_->add_parameter(); + ParameterPtr param = std::make_shared(nullptr); name = name + "_temp"; param->set_name(name); (void)ConvertParameter(param); diff --git a/mindspore/ccsrc/utils/convert_utils.cc b/mindspore/ccsrc/utils/convert_utils.cc index 97c2be142d..342f8fc7fe 100644 --- a/mindspore/ccsrc/utils/convert_utils.cc +++ b/mindspore/ccsrc/utils/convert_utils.cc @@ -411,9 +411,9 @@ bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &output, const py::tuple if (params.empty()) { MS_EXCEPTION(UnknownError) << "Graph's parameters size is 0"; } - if (args.size() != params.size()) { - MS_LOG(EXCEPTION) << "Input size " << args.size() << " not equal to params size " << params.size() - << ", let graph to be executed."; + if ((args.size() + func_graph->hyper_param_count()) != params.size()) { + MS_LOG(EXCEPTION) << "Input size " << args.size() << " add Parameter count " << func_graph->hyper_param_count() + << " not equal to graph input size " << params.size() << ", let graph to be executed."; } auto it = std::find(params.begin(), params.end(), output); diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py index 4e6e67e32b..502f00572f 100644 --- a/mindspore/train/serialization.py +++ b/mindspore/train/serialization.py @@ -420,6 +420,8 @@ def export(net, *inputs, file_name, file_format='GEIR'): _executor.compile(net, *inputs, phase='export') _executor.export(net, file_name, file_format) elif file_format == 'ONNX': # file_format is 'ONNX' + # NOTICE: the pahse name `export_onnx` is used for judging whether is exporting onnx in the compile pipeline, + # do not change it to other values. phase_name = 'export_onnx' graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False) onnx_stream = _executor._get_func_graph_proto(graph_id)