1.Update log level of some statements in validator.cc

2.Fix core dump of exporting onnx model when device target is 'GPU' 3.Fix numbers of arguments and graph parameters check error 4.Fix log prefix of some files of gpu submodule is error
5 years ago · 54ccab295c
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -49,6 +49,7 @@ if(ENABLE_GPU)

    set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
    string(REPLACE "-std=c++17" "-std=c++11" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
    set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
    cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST})
    set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})
 endif ()
--- a/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt
@@ -1,3 +1,5 @@
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(engine-gnn OBJECT
    graph.cc
    graph_loader.cc
--- a/mindspore/ccsrc/dataset/text/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/text/CMakeLists.txt
@@ -1,5 +1,7 @@
 add_subdirectory(kernels)

 file(GLOB _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(text OBJECT
        vocab.cc
        )
--- a/mindspore/ccsrc/device/CMakeLists.txt
+++ b/mindspore/ccsrc/device/CMakeLists.txt
@@ -20,25 +20,28 @@ endif ()
 if (ENABLE_GPU)
    file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cc" "gpu/*.cu")

    set(GPU_QUEUE_SRCS "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc")
    set(GPU_COLLECTIVE_SRCS "gpu/distribution/collective_wrapper.cc"
                            "gpu/distribution/mpi_wrapper.cc"
                            "gpu/distribution/nccl_wrapper.cc")

    # gpu_queue
    list(REMOVE_ITEM CUDA_SRC_LIST "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc")
    add_library(gpu_queue SHARED "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc")
    list(REMOVE_ITEM CUDA_SRC_LIST ${GPU_QUEUE_SRCS})
    set_property(SOURCE ${GPU_QUEUE_SRCS} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
    add_library(gpu_queue SHARED ${GPU_QUEUE_SRCS})
    target_link_libraries(gpu_queue ${CMAKE_THREAD_LIBS_INIT} ${CUDA_PATH}/lib64/libcudart.so)

    list(REMOVE_ITEM CUDA_SRC_LIST "gpu/mpi/mpi_initializer.cc"
                                   "gpu/distribution/collective_wrapper.cc"
                                   "gpu/distribution/mpi_wrapper.cc"
                                   "gpu/distribution/nccl_wrapper.cc"
    )
    list(REMOVE_ITEM CUDA_SRC_LIST "gpu/mpi/mpi_initializer.cc" ${GPU_COLLECTIVE_SRCS})

    if (ENABLE_MPI)
        include(ExternalProject)
        # gpu_collective
        add_library(gpu_collective SHARED "gpu/distribution/collective_wrapper.cc"
                                          "gpu/distribution/mpi_wrapper.cc"
                                          "gpu/distribution/nccl_wrapper.cc"
        )
        set_property(SOURCE ${GPU_COLLECTIVE_SRCS}
            PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
        add_library(gpu_collective SHARED ${GPU_COLLECTIVE_SRCS})
        # _ms_mpi
        set_property(SOURCE "gpu/mpi/mpi_initializer.cc"
            PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
        pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc")
        target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi)
        target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl)
--- a/mindspore/ccsrc/onnx/onnx_exporter.cc
+++ b/mindspore/ccsrc/onnx/onnx_exporter.cc
@@ -411,6 +411,8 @@ void OnnxExporter::InitModelInfo() {
 void OnnxExporter::ExportFuncGraph(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto) {
  std::map<AnfNodePtr, size_t> node_map;

  MS_LOG(INFO) << "Begin exporting onnx model for graph " << func_graph->ToString();

  onnx_node_index_ = func_graph->parameters().size();

  // set graph name
@@ -423,6 +425,8 @@ void OnnxExporter::ExportFuncGraph(const FuncGraphPtr &func_graph, onnx::GraphPr

  // export computational nodes and output nodes
  ExportNodes(func_graph, &node_map, graph_proto);

  MS_LOG(INFO) << "End exporting onnx model for graph " << func_graph->ToString();
 }

 void OnnxExporter::ExportParameters(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto) {
--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/pipeline.cc
@@ -374,7 +374,7 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
    p_actions = GePipeline();
  }

  std::shared_ptr<Pipeline> pip = std::make_shared<Pipeline>(resource, p_actions);
  std::shared_ptr<Pipeline> pip = std::make_shared<Pipeline>(resource, FilterActions(p_actions, phase_s));

  // get the parameters items and add the value to args_spec
  abstract::AbstractBasePtrList args_spec;
@@ -408,6 +408,22 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
  return true;
 }

 std::vector<ActionItem> ExecutorPy::FilterActions(const std::vector<ActionItem> &actions, const std::string &phase) {
  // phase does not contain 'export_onnx'
  if (GetPhasePrefix(phase).find("export_onnx") == std::string::npos) {
    return actions;
  }
  MS_LOG(INFO) << "Phase is '" << phase << "', filter out actions after stage 'validate'";
  std::vector<ActionItem> filtered_actions;
  for (const auto &item : actions) {
    filtered_actions.emplace_back(item);
    if (item.first == "validate") {
      break;
    }
  }
  return filtered_actions;
 }

 void ExecutorPy::ReleaseResource(const py::object &phase) {
  ResourcePtr res = GetResource(py::cast<std::string>(phase));
  if (res != nullptr) {
--- a/mindspore/ccsrc/pipeline/pipeline.h
+++ b/mindspore/ccsrc/pipeline/pipeline.h
@@ -100,6 +100,9 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
  void ConvertObjectToTensors(const py::dict &dict, std::map<std::string, tensor::TensorPtr> *tensors);
  bool ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const;
  void GetGeBackendPolicy() const;
  // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after
  // 'validate' stage
  static std::vector<ActionItem> FilterActions(const std::vector<ActionItem> &actions, const std::string &phase);

  std::map<std::string, ExecutorInfoPtr> info_;
  static std::shared_ptr<ExecutorPy> executor_;
--- a/mindspore/ccsrc/pipeline/validator.cc
+++ b/mindspore/ccsrc/pipeline/validator.cc
@@ -62,12 +62,12 @@ void ValidateOperation(const AnfNodePtr &node) {

 void ValidateAbstract(const AnfNodePtr &node) {
  if (node == nullptr) {
    MS_LOG(WARNING) << "Node to validate is invalid";
    MS_LOG(DEBUG) << "Node to validate is invalid";
    return;
  }
  AbstractBasePtr ptrBase = node->abstract();
  if (ptrBase == nullptr) {
    MS_LOG(WARNING) << "Abstract is null in node: " << node->DebugString();
    MS_LOG(DEBUG) << "Abstract is null in node: " << node->DebugString();
    return;
  }
  if (ptrBase->isa<AbstractClass>() || ptrBase->isa<AbstractJTagged>()) {
@@ -88,7 +88,7 @@ void ValidateAbstract(const AnfNodePtr &node) {
  }
  if (ptrBase->isa<AbstractError>()) {
    // NOTICE: validate dead code?
    MS_LOG(WARNING) << "AbstractError in the graph: " << ptrBase->ToString();
    MS_LOG(DEBUG) << "AbstractError in the graph: " << ptrBase->ToString();
    return;
  }

--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@@ -640,7 +640,7 @@ void DfGraphConvertor::InitParamWithData(const TensorOrderMap &tensors) {
    // if name not in params_, create a node in graph
    if (node_itor == params_.end()) {
      MS_LOG(WARNING) << name << " is not in params, and create a new node.";
      ParameterPtr param = anf_graph_->add_parameter();
      ParameterPtr param = std::make_shared<Parameter>(nullptr);
      name = name + "_temp";
      param->set_name(name);
      (void)ConvertParameter(param);
--- a/mindspore/ccsrc/utils/convert_utils.cc
+++ b/mindspore/ccsrc/utils/convert_utils.cc
@@ -411,9 +411,9 @@ bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &output, const py::tuple
    if (params.empty()) {
      MS_EXCEPTION(UnknownError) << "Graph's parameters size is 0";
    }
    if (args.size() != params.size()) {
      MS_LOG(EXCEPTION) << "Input size " << args.size() << " not equal to params size " << params.size()
                        << ", let graph to be executed.";
    if ((args.size() + func_graph->hyper_param_count()) != params.size()) {
      MS_LOG(EXCEPTION) << "Input size " << args.size() << " add Parameter count " << func_graph->hyper_param_count()
                        << " not equal to graph input size " << params.size() << ", let graph to be executed.";
    }

    auto it = std::find(params.begin(), params.end(), output);
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -420,6 +420,8 @@ def export(net, *inputs, file_name, file_format='GEIR'):
        _executor.compile(net, *inputs, phase='export')
        _executor.export(net, file_name, file_format)
    elif file_format == 'ONNX':  # file_format is 'ONNX'
        # NOTICE: the pahse name `export_onnx` is used for judging whether is exporting onnx in the compile pipeline,
        #         do not change it to other values.
        phase_name = 'export_onnx'
        graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False)
        onnx_stream = _executor._get_func_graph_proto(graph_id)