diff --git a/Third_Party_Open_Source_Software_Notice b/Third_Party_Open_Source_Software_Notice index 0d79cfa5..ba8da1fb 100644 --- a/Third_Party_Open_Source_Software_Notice +++ b/Third_Party_Open_Source_Software_Notice @@ -458,3 +458,76 @@ Copyright (c) Facebook Inc. and Microsoft Corporation. License: MIT License Please see above. + + + +Software: caffe 1.0 + +License: BSD 2-Clause License + +Open Source Software Licensed Under the BSD 2-Clause License + +GraphEngine uses source code files from caffe so as to support model format conversion from caffe model to GraphEngine model. +Please see below for the full list of source code files from caffe that are used by GraphEngine. +The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. +---------------------------------------------------------------------------------------- +1. caffe.proto master +All contributions by the University of California: +Copyright (c) 2014-2017 The Regents of the University of California (Regents) +All rights reserved. + + +Terms of the BSD 2-Clause License: +-------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +Software: tensorflow 1.15.0 + +License: Apache-2.0 License + +Open Source Software Licensed Under the Apache-2.0 License + + +GraphEngine uses source code files from tensorflow so as to support model format conversion from tensorflow model to GraphEngine model. +Please see below for the full list of source code files from tensorflow that are used by GraphEngine. +The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. +---------------------------------------------------------------------------------------- +1. attr_value.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +2. function.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +3. graph.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +4. node_def.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +5. op_def.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +6. resource_handle.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +7. tensor.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +8. tensor_shape.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +9. types.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +10. versions.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Terms of the Apache-2.0 License: +Please see above. diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 88d74730..306b3eda 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -125,6 +125,7 @@ set(TRAIN_SRC_LIST "graph/manager/graph_var_manager.cc" "graph/manager/host_mem_manager.cc" "graph/manager/rdma_pool_allocator.cc" + $<$>:graph/manager/host_mem_allocator.cc> "graph/manager/memory_api.cc" "graph/manager/model_manager/event_manager.cc" "graph/manager/trans_var_data_utils.cc" @@ -165,7 +166,8 @@ set(TRAIN_SRC_LIST "graph/passes/dropout_pass.cc" "graph/passes/hccl_group_pass.cc" "graph/passes/enter_pass.cc" - "graph/passes/assign_pass.cc" + "graph/passes/assign_remove_pass.cc" + $<$>:graph/passes/inplace_support_check_pass.cc> "graph/passes/flow_ctrl_pass.cc" "graph/passes/global_step_insert_pass.cc" "host_kernels/transpose_kernel.cc" @@ -401,6 +403,7 @@ set(INFER_SRC_LIST "graph/manager/graph_var_manager.cc" "graph/manager/host_mem_manager.cc" "graph/manager/rdma_pool_allocator.cc" + $<$>:graph/manager/host_mem_allocator.cc> "graph/manager/graph_mem_allocator.cc" "graph/manager/graph_caching_allocator.cc" "model/ge_model.cc" @@ -521,7 +524,8 @@ set(INFER_SRC_LIST "graph/passes/cond_remove_pass.cc" "graph/passes/for_pass.cc" "graph/passes/enter_pass.cc" - "graph/passes/assign_pass.cc" + "graph/passes/assign_remove_pass.cc" + $<$>:graph/passes/inplace_support_check_pass.cc> "graph/passes/addn_pass.cc" "graph/passes/common_subexpression_elimination_pass.cc" "graph/passes/remove_same_const_pass.cc" @@ -625,6 +629,7 @@ target_compile_definitions(ge_runner PRIVATE target_compile_options(ge_runner PRIVATE -O2 + -fno-common ) target_include_directories(ge_runner PRIVATE @@ -669,7 +674,6 @@ target_link_libraries(ge_runner PRIVATE c_sec slog runtime - resource error_manager ascend_hal_stub -Wl,--as-needed @@ -693,6 +697,7 @@ target_compile_definitions(ge_compiler PRIVATE target_compile_options(ge_compiler PRIVATE -O2 + -fno-common ) target_include_directories(ge_compiler PRIVATE @@ -734,7 +739,6 @@ target_link_libraries(ge_compiler PRIVATE error_manager slog runtime_compile - resource -Wl,--as-needed json -lrt diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index 05838df8..bb08570a 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -80,6 +80,7 @@ target_compile_options(ge_common PRIVATE -O2 -Werror -Wno-deprecated-declarations + -fno-common ) target_include_directories(ge_common PRIVATE @@ -134,7 +135,7 @@ target_compile_definitions(ge_common_static PRIVATE ) target_compile_options(ge_common_static PRIVATE - $<$,$>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations> + $<$,$>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common> $<$,$>:/MTd> $<$,$>:/MT> ) diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index 52917abe..1d5a4a9b 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -76,6 +76,48 @@ Status ModelHelper::SaveModelPartition(std::shared_ptr &om_fil return SUCCESS; } +Status ModelHelper::SaveSizeToModelDef(const GeModelPtr &ge_model) { + vector om_info; + ModelPtr model_tmp = ge::MakeShared(ge_model->GetName(), ge_model->GetPlatformVersion()); + if (model_tmp == nullptr) { + GELOGE(FAILED, "Create Model %s Ptr failed", ge_model->GetName().c_str()); + return FAILED; + } + model_tmp->SetGraph(ge_model->GetGraph()); + model_tmp->SetVersion(ge_model->GetVersion()); + model_tmp->SetAttr(ge_model->MutableAttrMap()); + ge::Buffer model_buffer; + (void)model_tmp->Save(model_buffer); + GELOGD("SaveSizeToModelDef modeldef_size is %zu", model_buffer.GetSize()); + om_info.push_back(model_buffer.GetSize()); + + auto ge_model_weight = ge_model->GetWeight(); + GELOGD("SaveSizeToModelDef weight_data_size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData()); + om_info.push_back(ge_model_weight.GetSize()); + + TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore(); + GELOGD("SaveSizeToModelDef tbe_kernels_size is %zu", tbe_kernel_store.DataSize()); + om_info.push_back(tbe_kernel_store.DataSize()); + + CustAICPUKernelStore cust_aicpu_kernel_store = ge_model->GetCustAICPUKernelStore(); + GELOGD("SaveSizeToModelDef cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize()); + om_info.push_back(cust_aicpu_kernel_store.DataSize()); + + std::shared_ptr model_task_def = ge_model->GetModelTaskDefPtr(); + if (model_task_def == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create model task def ptr failed"); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + size_t partition_task_size = model_task_def->ByteSizeLong(); + GELOGD("SaveSizeToModelDef task_info_size is %zu", partition_task_size); + om_info.push_back(partition_task_size); + + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(*(ge_model.get()), "om_info_list", om_info), + GELOGE(FAILED, "SetListInt of om_info_list failed."); + return FAILED); + + return SUCCESS; +} Status ModelHelper::SaveModelDef(std::shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, ge::Buffer &model_buffer, size_t model_index) { @@ -87,7 +129,11 @@ Status ModelHelper::SaveModelDef(std::shared_ptr &om_file_save model_tmp->SetGraph(ge_model->GetGraph()); model_tmp->SetVersion(ge_model->GetVersion()); model_tmp->SetAttr(ge_model->MutableAttrMap()); - + Status ret = SaveSizeToModelDef(ge_model); + if (ret != SUCCESS) { + GELOGE(ret, "SaveSizeToModelDef failed"); + return ret; + } (void)model_tmp->Save(model_buffer); GELOGD("MODEL_DEF size is %zu", model_buffer.GetSize()); diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 73d0cc3c..abc4a6df 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -89,13 +89,12 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt #ifdef DAVINCI_SUPPORT_PROFILING // enable profiling by env char env_profiling_mode[MMPA_MAX_PATH] = { 0x00 }; - is_load_profiling_ = false; // Change in ProfInit is_execute_profiling_ = false; if (options.profiling_mode == "1" && !options.profiling_options.empty()) { // enable profiling by ge option - if (memcpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(), - options.profiling_options.size()) != EOK) { + if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(), + MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) { GELOGE(INTERNAL_ERROR, "copy profiling_options failed."); return INTERNAL_ERROR; } @@ -125,11 +124,12 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt return ge::PARAM_INVALID; } - if (memcpy_s(prof_conf.jobId, sizeof(prof_conf.jobId), options.job_id.c_str(), - sizeof(options.job_id.c_str())) != EOK) { + if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(), + MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) { GELOGE(INTERNAL_ERROR, "copy job_id failed."); return INTERNAL_ERROR; } + GELOGI("Job id: %s, original job id: %s.", prof_conf.jobId, options.job_id.c_str()); #endif return ge::SUCCESS; } @@ -159,6 +159,7 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) { if (!fp_point_.empty() && !bp_point_.empty()) { GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); } + is_training_trace_ = true; } catch (...) { GELOGE(FAILED, "Json prof_conf options is invalid."); return ge::PARAM_INVALID; @@ -632,6 +633,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt uint64_t module, const std::map &config_para) { #ifdef DAVINCI_SUPPORT_PROFILING std::lock_guard lock(mutex_); + uint64_t training_trace_mask = module & PROF_TRAINING_TRACE_MASK; + if (training_trace_mask == PROF_TRAINING_TRACE_MASK) { + is_training_trace_ = true; + } int32_t device_num = 0; vector device_list; if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) { diff --git a/ge/common/proto/tensorflow/attr_value.proto b/ge/common/proto/tensorflow/attr_value.proto index 1cc67d62..438d7163 100644 --- a/ge/common/proto/tensorflow/attr_value.proto +++ b/ge/common/proto/tensorflow/attr_value.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/function.proto b/ge/common/proto/tensorflow/function.proto index 075897c6..44681e32 100644 --- a/ge/common/proto/tensorflow/function.proto +++ b/ge/common/proto/tensorflow/function.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/graph.proto b/ge/common/proto/tensorflow/graph.proto index d639a7d6..73bfc6ee 100644 --- a/ge/common/proto/tensorflow/graph.proto +++ b/ge/common/proto/tensorflow/graph.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/graph_library.proto b/ge/common/proto/tensorflow/graph_library.proto index e393d38d..7bca0838 100644 --- a/ge/common/proto/tensorflow/graph_library.proto +++ b/ge/common/proto/tensorflow/graph_library.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/node_def.proto b/ge/common/proto/tensorflow/node_def.proto index b9bc97ee..50cf5cac 100644 --- a/ge/common/proto/tensorflow/node_def.proto +++ b/ge/common/proto/tensorflow/node_def.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/op_def.proto b/ge/common/proto/tensorflow/op_def.proto index 3485d045..7f0e8ce2 100644 --- a/ge/common/proto/tensorflow/op_def.proto +++ b/ge/common/proto/tensorflow/op_def.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/resource_handle.proto b/ge/common/proto/tensorflow/resource_handle.proto index a3452351..91c46c9a 100644 --- a/ge/common/proto/tensorflow/resource_handle.proto +++ b/ge/common/proto/tensorflow/resource_handle.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/tensor.proto b/ge/common/proto/tensorflow/tensor.proto index d0a4d024..48eeb6c4 100644 --- a/ge/common/proto/tensorflow/tensor.proto +++ b/ge/common/proto/tensorflow/tensor.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/tensor_shape.proto b/ge/common/proto/tensorflow/tensor_shape.proto index 4225a2e3..3a6d8c5a 100644 --- a/ge/common/proto/tensorflow/tensor_shape.proto +++ b/ge/common/proto/tensorflow/tensor_shape.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + // Protocol buffer representing the shape of tensors. syntax = "proto3"; diff --git a/ge/common/proto/tensorflow/types.proto b/ge/common/proto/tensorflow/types.proto index ba7a72b3..f40e49cb 100644 --- a/ge/common/proto/tensorflow/types.proto +++ b/ge/common/proto/tensorflow/types.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/versions.proto b/ge/common/proto/tensorflow/versions.proto index 48061218..4e81548f 100644 --- a/ge/common/proto/tensorflow/versions.proto +++ b/ge/common/proto/tensorflow/versions.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index ee815550..4ca18864 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -28,6 +28,7 @@ set(SRC_LIST "../graph/manager/trans_var_data_utils.cc" "../graph/manager/util/debug.cc" "../graph/manager/rdma_pool_allocator.cc" + $<$>:../graph/manager/host_mem_allocator.cc> "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" "../model/ge_model.cc" "../model/ge_root_model.cc" @@ -162,7 +163,7 @@ set(SRC_LIST add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_HDRS}) target_compile_options(ge_executor PRIVATE - $<$,$>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations> + $<$,$>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common> $<$,$>:/MTd> $<$,$>:/MT> ) diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index e506994e..abdc0c3f 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -39,8 +39,6 @@ #include "graph/manager/graph_var_manager.h" #include "graph/load/new_model_manager/davinci_model.h" #include "opskernel_manager/ops_kernel_builder_manager.h" -#include "graph/opsproto_manager.h" -#include "ge_local_engine/engine/host_cpu_engine.h" using std::string; using std::vector; @@ -210,33 +208,6 @@ bool IsDynmaicDimsSizeMatchModel(const vector cur_dynamic_dims, namespace ge { bool GeExecutor::isInit_ = false; -static void InitOpsProtoManger() { - string opsproto_path; - const char *path_env = std::getenv("ASCEND_OPP_PATH"); - if (path_env != nullptr) { - string path = path_env; - string file_path = RealPath(path.c_str()); - if (file_path.empty()) { - GELOGE(FAILED, "File path %s is invalid.", path.c_str()); - return; - } - opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/"); - GELOGI("Get opsproto so path from env : %s", path.c_str()); - } else { - string path_base = PluginManager::GetPath(); - GELOGI("path_base is %s", path_base.c_str()); - path_base = path_base.substr(0, path_base.rfind('/')); - path_base = path_base.substr(0, path_base.rfind('/') + 1); - opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); - } - - GELOGI("Get opsproto path is %s", opsproto_path.c_str()); - OpsProtoManager *manager = OpsProtoManager::Instance(); - map option_tmp; - option_tmp.emplace(std::pair(string("ge.opsProtoLibPath"), opsproto_path)); - (void)manager->Initialize(option_tmp); -} - GeExecutor::GeExecutor() {} Status GeExecutor::Initialize() { @@ -246,16 +217,6 @@ Status GeExecutor::Initialize() { return ge::SUCCESS; } - OpTilingManager::GetInstance().LoadSo(); - - Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize(); - if (initHostCpuEngineStatus != SUCCESS) { - GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine"); - return initHostCpuEngineStatus; - } - - InitOpsProtoManger(); - std::vector mem_type(1, RT_MEMORY_HBM); mem_type.push_back(RT_MEMORY_P2P_DDR); auto ret = MemManager::Instance().Initialize(mem_type); diff --git a/ge/executor/module.mk b/ge/executor/module.mk index 34c2a37e..87abdade 100644 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -15,6 +15,7 @@ local_ge_executor_src_files := \ ../graph/manager/graph_manager_utils.cc \ ../graph/manager/graph_var_manager.cc \ ../graph/manager/rdma_pool_allocator.cc \ + ../graph/manager/host_mem_allocator.cc \ ../graph/manager/graph_mem_allocator.cc \ ../graph/manager/graph_caching_allocator.cc \ ../graph/manager/trans_var_data_utils.cc \ diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index e20456d5..74d09404 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -64,6 +64,7 @@ GRAPH_MANAGER_LOCAL_SRC_FILES := \ graph/manager/graph_var_manager.cc \ graph/manager/host_mem_manager.cc \ graph/manager/rdma_pool_allocator.cc \ + graph/manager/host_mem_allocator.cc \ graph/manager/graph_mem_allocator.cc \ graph/manager/graph_caching_allocator.cc \ @@ -195,7 +196,8 @@ OMG_HOST_SRC_FILES := \ graph/passes/useless_control_out_remove_pass.cc \ graph/passes/for_pass.cc \ graph/passes/enter_pass.cc \ - graph/passes/assign_pass.cc \ + graph/passes/assign_remove_pass.cc \ + graph/passes/inplace_support_check_pass.cc \ graph/passes/addn_pass.cc \ graph/passes/common_subexpression_elimination_pass.cc \ graph/passes/transop_symmetry_elimination_pass.cc \ diff --git a/ge/ge_local_engine/CMakeLists.txt b/ge/ge_local_engine/CMakeLists.txt index 8f5c9777..7189e8ff 100755 --- a/ge/ge_local_engine/CMakeLists.txt +++ b/ge/ge_local_engine/CMakeLists.txt @@ -26,6 +26,7 @@ add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) target_compile_options(ge_local_engine PRIVATE -Werror + -fno-common ) target_compile_definitions(ge_local_engine PRIVATE @@ -55,10 +56,8 @@ target_link_libraries(ge_local_engine PRIVATE -Wl,--no-as-needed graph ascend_protobuf - register c_sec slog - runtime -Wl,--as-needed ) @@ -67,6 +66,7 @@ add_library(atc_ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) target_compile_options(atc_ge_local_engine PRIVATE -Werror + -fno-common ) target_compile_definitions(atc_ge_local_engine PRIVATE @@ -97,10 +97,8 @@ target_link_libraries(atc_ge_local_engine PRIVATE -Wl,--no-as-needed graph ascend_protobuf - register c_sec slog - runtime_compile -Wl,--as-needed ) @@ -114,6 +112,7 @@ add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDR target_compile_options(ge_local_opskernel_builder PRIVATE -Werror + -fno-common ) target_compile_definitions(ge_local_opskernel_builder PRIVATE @@ -154,6 +153,7 @@ add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO target_compile_options(atc_ge_local_opskernel_builder PRIVATE -Werror + -fno-common ) target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE @@ -199,6 +199,7 @@ add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PR target_compile_options(ge_local_opskernel_builder_static PRIVATE -Werror + -fno-common ) target_compile_definitions(ge_local_opskernel_builder_static PRIVATE diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc index e17f73de..0f46b4cb 100755 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -26,6 +26,31 @@ #include "common/math/math_util.h" namespace { +#ifndef ONLY_COMPILE_OPEN_SRC +#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ + case (DTYPE): { \ + GeTensorPtr ge_tensor = nullptr; \ + if (need_create_flag) { \ + uint64_t size = data_num * sizeof(TYPE); \ + ge_tensor = MakeShared(out_desc, size); \ + GE_CHECK_NOTNULL(ge_tensor); \ + GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, size); \ + ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ + ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ + outputs.emplace_back(ge_tensor); \ + } else { \ + ge_tensor = outputs[i]; \ + GE_CHECK_NOTNULL(ge_tensor); \ + GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \ + } \ + auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ + auto tensor_name = op_desc->GetOutputNameByIndex(i); \ + GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \ + op_desc->GetName().c_str(), i); \ + named_outputs.emplace(tensor_name, tensor); \ + break; \ + } +#else #define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ case (DTYPE): { \ GeTensorPtr ge_tensor = nullptr; \ @@ -61,6 +86,7 @@ namespace { named_outputs.emplace(tensor_name, tensor); \ break; \ } +#endif } namespace ge { diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 9706dadb..5a99dc8c 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -94,6 +94,7 @@ LIBGE_LOCAL_SRC_FILES := \ graph/manager/graph_var_manager.cc \ graph/manager/host_mem_manager.cc \ graph/manager/rdma_pool_allocator.cc \ + graph/manager/host_mem_allocator.cc \ graph/manager/memory_api.cc \ graph/manager/model_manager/event_manager.cc \ graph/manager/trans_var_data_utils.cc \ @@ -134,7 +135,8 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/dropout_pass.cc \ graph/passes/hccl_group_pass.cc \ graph/passes/enter_pass.cc \ - graph/passes/assign_pass.cc \ + graph/passes/assign_remove_pass.cc \ + graph/passes/inplace_support_check_pass.cc \ graph/passes/flow_ctrl_pass.cc \ graph/passes/global_step_insert_pass.cc \ host_kernels/transpose_kernel.cc \ diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 016f9ef2..f8d4900a 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -336,6 +336,7 @@ class GeGenerator::Impl { bool GetVersionFromPath(const std::string &file_path, std::string &version); bool SetAtcVersionInfo(AttrHolder &obj); bool SetOppVersionInfo(AttrHolder &obj); + bool SetOmSystemInfo(AttrHolder &obj); }; Status GeGenerator::Initialize(const map &options) { @@ -546,6 +547,32 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) { return true; } +bool GeGenerator::Impl::SetOmSystemInfo(AttrHolder &obj) { + std::string soc_version; + (void)ge::GetContext().GetOption(ge::SOC_VERSION, soc_version); + GELOGI("SetOmSystemInfo soc_version: %s", soc_version.c_str()); + if (!ge::AttrUtils::SetStr(obj, "soc_version", soc_version)) { + GELOGW("SetStr of soc_version failed."); + return false; + } + + // 0(Caffe) 1(MindSpore) 3(TensorFlow) 5(Onnx) + std::map framework_type_to_string = { + {"0", "Caffe"}, + {"1", "MindSpore"}, + {"3", "TensorFlow"}, + {"5", "Onnx"} + }; + std::string framework_type; + (void)ge::GetContext().GetOption(ge::FRAMEWORK_TYPE, framework_type); + GELOGI("SetOmSystemInfo framework_type: %s", framework_type.c_str()); + if (!ge::AttrUtils::SetStr(obj, "framework_type", framework_type_to_string[framework_type.c_str()])) { + GELOGW("SetStr of framework_type failed."); + return false; + } + return true; +} + Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector &inputs, ModelBufferData &model, bool is_offline) { rtContext_t ctx = nullptr; @@ -842,6 +869,9 @@ Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootMo if (!SetOppVersionInfo(*(model_root.get()))) { GELOGW("SetPackageVersionInfo of ops failed!"); } + if (!SetOmSystemInfo(*(model_root.get()))) { + GELOGW("SetOmsystemInfo failed!"); + } ModelHelper model_helper; model_helper.SetSaveMode(is_offline_); ret = model_helper.SaveToOmRootModel(ge_root_model, save_param_, file_name_prefix, model_buff, is_unknown_shape); diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt index e988b4ce..126e0187 100644 --- a/ge/graph/build/memory/CMakeLists.txt +++ b/ge/graph/build/memory/CMakeLists.txt @@ -14,6 +14,7 @@ add_library(ge_memory STATIC ${SRC_LIST}) target_compile_options(ge_memory PRIVATE -Werror -O2 + -fno-common ) target_compile_definitions(ge_memory PRIVATE diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 40b3b9dc..2af94d40 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -402,6 +402,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } + continuous_mem_start = iter->second.mem_offset_; for (auto &in_data_anchor : node->GetAllInDataAnchors()) { auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 43fe3fa3..b7714c4a 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -163,7 +163,6 @@ DavinciModel::~DavinciModel() { op_list_.clear(); data_op_list_.clear(); - output_op_list_.clear(); tensor_name_to_fixed_addr_size_.clear(); tensor_name_to_peer_output_index_.clear(); GE_DELETE_NEW_SINGLE(data_inputer_); @@ -830,12 +829,11 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { {CASE, &DavinciModel::InitCase}, }; - GE_CHK_STATUS_RET(InitInputOutputForDynamic(compute_graph), "InitInputOutputForDynamic failed."); - + vector output_op_list; map data_by_index; auto nodes = compute_graph->GetAllNodes(); const CustAICPUKernelStore &aicpu_kernel_store = ge_model_->GetCustAICPUKernelStore(); - for (size_t i = 0; i < nodes.size(); i++) { + for (size_t i = 0; i < nodes.size(); ++i) { auto node = nodes.at(i); auto op_desc = node->GetOpDesc(); if (op_desc == nullptr) { @@ -850,7 +848,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); if (IsDataOp(op_desc->GetType())) { - if (InitDataOp(node, data_op_index, data_by_index) != SUCCESS) { + if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) { GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); return PARAM_INVALID; } @@ -859,7 +857,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { } if (op_desc->GetType() == NETOUTPUT) { - if (InitNetOutput(node) != SUCCESS) { + if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) { GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); return PARAM_INVALID; } @@ -919,33 +917,10 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { } GE_TIMESTAMP_ADD(InitTbeHandle); } - AdjustDataOpList(data_by_index); + GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); - return SUCCESS; -} - -Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph) { - if (!known_node_) return SUCCESS; - // for dynamic shape - auto direct_nodes = compute_graph->GetDirectNode(); - for (size_t i = 0; i < direct_nodes.size(); i++) { - auto node = direct_nodes.at(i); - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(PARAM_INVALID, "op_desc is null."); - return PARAM_INVALID; - } - if (IsDataOp(op_desc->GetType())) { - GELOGD("init data op %s", op_desc->GetName().c_str()); - data_op_list_.push_back(op_desc); - } - if (op_desc->GetType() == NETOUTPUT) { - GELOGD("init netouput op %s", op_desc->GetName().c_str()); - output_op_list_.push_back(op_desc); - } - } - return SUCCESS; + return OptInputOutputInfo(data_by_index, output_op_list); } void DavinciModel::SetLabelForDynamic(const NodePtr &node) { @@ -963,24 +938,35 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) { } } +/// /// @ingroup ge /// @brief Data Op Initialize. +/// @param [in] ComputeGraphPtr: root graph of the model. /// @param [in] NodePtr: Data Op. -/// @param [in/out] data_op_index: NetOutput addr size info. +/// @param [in/out] data_op_index: index of courrent count. +/// @param [in/out] data_by_index: Data ordered by index. /// @return Status -Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, map &data_by_index) { +/// +Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, + map &data_by_index) { // op_desc Checked by Init: Data, valid. auto op_desc = node->GetOpDesc(); - if (known_node_) { + if (node->GetOwnerComputeGraph() != graph) { + GELOGI("Skip subgraph Data node: %s.", op_desc->GetName().c_str()); return SUCCESS; } - uint32_t parent_index = 0; // Ignore subgraph Data Node. - if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); - return SUCCESS; + + GELOGI("Init Data node: %s.", op_desc->GetName().c_str()); + auto data_index = data_op_index++; + if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { + GELOGD("Get new index %u, old %u", data_index, data_op_index - 1); } + data_by_index[data_index] = op_desc; data_op_list_.push_back(op_desc); + if (known_node_) { + return SUCCESS; + } // Make information for copy input data. const vector output_size_list = ModelUtils::GetOutputSize(op_desc); @@ -992,10 +978,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma op_desc->GetName().c_str(), output_size_list.size(), virtual_addr_list.size(), output_offset_list.size()); return PARAM_INVALID; } - auto data_index = data_op_index; - if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { - GELOGD("ge_train: get new index %u, old %u", data_index, data_op_index); - } + bool fusion_flag = false; ZeroCopyOffset zero_copy_offset; int64_t data_size = output_size_list[kDataIndex]; @@ -1006,7 +989,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma return PARAM_INVALID; } new_input_data_info_[data_index] = zero_copy_offset; - data_by_index[data_index] = op_desc; for (size_t index = 0; index < virtual_addr_list.size(); ++index) { void *addr = virtual_addr_list.at(index); @@ -1017,7 +999,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma new_input_outside_addrs_[addr] = zero_copy_offset; } - data_op_index++; return SUCCESS; } @@ -1025,18 +1006,52 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma /// @ingroup ge /// @brief Sort Data op list by index. /// @param [in] data_by_index: map of Data Op. -/// @return +/// @param [in] output_op_list: list of NetOutput op. +/// @return Status /// -void DavinciModel::AdjustDataOpList(const map &data_by_index) { +Status DavinciModel::OptInputOutputInfo(const map &data_by_index, + const vector &output_op_list) { + GELOGD("Data node size: %zu, NetOutput node size: %zu", data_op_list_.size(), output_op_list.size()); if (data_by_index.size() != data_op_list_.size()) { - GELOGW("Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size()); - return; + GELOGE(INTERNAL_ERROR, "Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size()); + return INTERNAL_ERROR; } data_op_list_.clear(); for (auto &item : data_by_index) { data_op_list_.emplace_back(item.second); + auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); + GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); + input_addrs_list_.emplace_back(output_addrs); + + if (item.second->GetType() == AIPP_DATA_TYPE) { + GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); + is_dynamic_aipp_ = true; + } } + + for (const auto &op_desc : output_op_list) { + auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); + GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size()); + output_addrs_list_.emplace_back(input_addrs); + + bool getnext_sink_dynamic = false; + if (AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) { + GELOGI("ATTR_GETNEXT_SINK_DYNMAIC has been set and is true, node: %s", op_desc->GetName().c_str()); + is_getnext_sink_dynamic_ = true; + } + + vector shape_info; + if (AttrUtils::GetListStr(op_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, shape_info)) { + dynamic_output_shape_info_.insert(dynamic_output_shape_info_.end(), shape_info.begin(), shape_info.end()); + } + + if (InitOutputTensorInfo(op_desc) != SUCCESS) { + return INTERNAL_ERROR; + } + } + + return InitOutputDescInfo(output_op_list, output_descs_, output_formats_); } bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { @@ -1050,24 +1065,27 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { /// @ingroup ge /// @brief NetOutput Op Initialize. +/// @param [in] ComputeGraphPtr: root graph of the model. /// @param [in] NodePtr: NetOutput Op. +/// @param [in/out] vector: All NetOutput node in model. /// @return Status -Status DavinciModel::InitNetOutput(const NodePtr &node) { +Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, + vector &output_op_list) { // node->GetOpDesc Checked by Init: NetOutput, valid. auto op_desc = node->GetOpDesc(); // excludes the function op sub graph, e.g. case,if - if (known_node_) { + if (node->GetOwnerComputeGraph() != graph) { + GELOGI("Skip subgraph NetOutput node: %s.", op_desc->GetName().c_str()); + op_list_.erase(op_desc->GetId()); return SUCCESS; } - ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph(); - GE_CHECK_NOTNULL(owner_graph); - if (owner_graph->GetParentGraph() != nullptr) { - GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); - op_list_.erase(op_desc->GetId()); + + GELOGI("Init NetOutput node: %s.", op_desc->GetName().c_str()); + output_op_list.push_back(op_desc); + if (known_node_) { return SUCCESS; } - output_op_list_.push_back(op_desc); // Make information for copy output data. const vector input_size_list = ModelUtils::GetInputSize(op_desc); const vector virtual_addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); @@ -1665,32 +1683,30 @@ Status DavinciModel::CpuModelRepeat() { Status DavinciModel::GetInputOutputDescInfo(vector &input_desc, vector &output_desc) { - if ((data_op_list_.empty()) || (data_op_list_[0]->GetInputsSize()) != 1) { + if (input_addrs_list_.empty() || input_addrs_list_[0].size() != 1) { GELOGI("data_op_list_ is empty or input_desc size is not 1."); } else { - std::vector input_formats; + vector input_formats; GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed."); } - std::vector outputFormats; - GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get output desc info failed."); - + vector output_formats; + GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get output desc info failed"); return SUCCESS; } Status DavinciModel::GetInputOutputDescInfo(vector &input_desc, vector &output_desc, - std::vector &input_formats, - std::vector &outputFormats) { - if ((data_op_list_.empty()) || (data_op_list_[0]->GetInputsSize()) != 1) { + vector &input_formats, + vector &output_formats) { + if (input_addrs_list_.empty() || input_addrs_list_[0].size() != 1) { GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!"); return FAILED; } GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); - GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get ouput desc info failed"); - + GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get output desc info failed"); return SUCCESS; } @@ -1828,29 +1844,22 @@ void DavinciModel::GetCurShape(std::vector &batch_info, int32_t &dynami dynamic_type = dynamic_type_; } -void DavinciModel::GetModelAttr(std::vector &dynamic_output_shape_info) { - for (auto &op : output_op_list_) { - if (op->GetType() != NETOUTPUT) { - continue; - } - if (!AttrUtils::GetListStr(op, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { - GELOGD("Can not get dynamic output dims attr"); - } - } +void DavinciModel::GetModelAttr(vector &out_shape_info) { + out_shape_info.insert(out_shape_info.end(), dynamic_output_shape_info_.begin(), dynamic_output_shape_info_.end()); } Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector &input_desc, vector &output_desc, std::vector &input_formats, - std::vector &outputFormats) { - if ((data_op_list_.empty()) || (1 != data_op_list_[0]->GetInputsSize())) { + std::vector &output_formats) { + if (input_addrs_list_.empty() || input_addrs_list_[0].size() != kOutputNum) { GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!"); return FAILED; } GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); - GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get ouput desc info failed"); + GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed"); GE_CHK_BOOL_RET_STATUS(output_desc.size() == output_memory_size_list_.size(), INTERNAL_ERROR, "output_desc size[%zu] not equal output_size_list_[%zu] size!", output_desc.size(), @@ -1939,7 +1948,7 @@ Status DavinciModel::GetInputDescInfo(vector &input_desc, s return SUCCESS; } -void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, +void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result) { /// netoutput input tensor desc GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr"); @@ -1992,10 +2001,10 @@ void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputD output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); } -Status DavinciModel::GetOutputDescInfo(vector &output_desc, std::vector &formats) { - GELOGD("Output node size: %zu", output_op_list_.size()); - for (size_t i = 0; i < output_op_list_.size(); i++) { - auto &op_desc = output_op_list_[i]; +Status DavinciModel::InitOutputDescInfo(const vector &output_op_list, + vector &output_descs, vector &output_formats) { + GELOGD("Output node size: %zu", output_op_list.size()); + for (const auto &op_desc : output_op_list) { uint32_t out_size = static_cast(op_desc->GetInputsSize()); for (uint32_t index = 0; index < out_size; index++) { string output_name; @@ -2018,13 +2027,19 @@ Status DavinciModel::GetOutputDescInfo(vector &output_desc, std::to_string(src_index[index]); } output.name = output_name; - output_desc.push_back(output); - formats.push_back(format_result); + output_descs.push_back(output); + output_formats.push_back(format_result); } } return SUCCESS; } +Status DavinciModel::GetOutputDescInfo(vector &output_descs, vector &output_formats) { + output_descs.insert(output_descs.end(), output_descs_.begin(), output_descs_.end()); + output_formats.insert(output_formats.end(), output_formats_.begin(), output_formats_.end()); + return SUCCESS; +} + ge::Format DavinciModel::GetFormat() { if ((data_op_list_.empty()) || data_op_list_[0] == nullptr || data_op_list_[0]->GetInputDescPtr(0) == nullptr) { GELOGW("OP List Pointer is null or input_desc size is not 1!"); @@ -2368,7 +2383,7 @@ void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) { /// @author /// Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, rtMemcpyKind_t kind) { - if (output_op_list_.empty()) { + if (output_addrs_list_.empty()) { Status ret = SyncVarData(); return ret; } @@ -2427,20 +2442,12 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r return SUCCESS; } -Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, - std::vector &outputs) { - GE_CHECK_NOTNULL(op_desc); - GE_CHECK_NOTNULL(output_data); - if (output_data->blobs.size() > data_index) { - GELOGI("No need to generate output tensor info, model id:%u", model_id_); - return SUCCESS; - } - std::vector out_buffer_size_vec; - std::vector> shape_info_vec; +Status DavinciModel::InitOutputTensorInfo(const OpDescPtr &op_desc) { size_t input_num = op_desc->GetInputsSize(); if (is_getnext_sink_dynamic_) { input_num = input_num - kGetDynamicDimsCount; } + for (size_t i = 0; i < input_num; ++i) { int64_t size = 0; auto input_desc = op_desc->GetInputDescPtr(i); @@ -2460,25 +2467,37 @@ Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data } } GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str()); - out_buffer_size_vec.push_back(size); - shape_info_vec.push_back(output_shape); + output_buffer_size_.push_back(size); + output_shape_info_.push_back(output_shape); + } + + return SUCCESS; +} + +Status DavinciModel::GenOutputTensorInfo(OutputData *output_data, vector &outputs) { + GE_CHECK_NOTNULL(output_data); + if (!output_data->blobs.empty()) { + GELOGI("No need to generate output tensor info, model id:%u", model_id_); + return SUCCESS; } - GELOGI("Output blobs size:%zu, data index:%u, model id:%u", out_buffer_size_vec.size(), data_index, model_id_); - for (size_t i = 0; i < out_buffer_size_vec.size(); ++i) { - std::unique_ptr data_buf(new (std::nothrow) uint8_t[out_buffer_size_vec[i]]); + + GELOGI("Output blobs size:%zu, model id:%u", output_buffer_size_.size(), model_id_); + for (size_t i = 0; i < output_buffer_size_.size(); ++i) { + std::unique_ptr data_buf(new (std::nothrow) uint8_t[output_buffer_size_[i]]); if (data_buf == nullptr) { GELOGE(GE_GRAPH_MALLOC_FAILED, "Malloc buffer failed."); return GE_GRAPH_MALLOC_FAILED; } - output_data->blobs.push_back({data_buf.get(), static_cast(out_buffer_size_vec[i]), false}); + output_data->blobs.push_back({data_buf.get(), static_cast(output_buffer_size_[i]), false}); ge::OutputTensorInfo output; - output.dims = shape_info_vec[i]; + output.dims = output_shape_info_[i]; output.data = std::move(data_buf); - output.length = out_buffer_size_vec[i]; + output.length = output_buffer_size_[i]; outputs.emplace_back(std::move(output)); GELOGD("Output index:%zu, output dims is %s, data length:%lu.", i, formats::JoinToString(output.dims).c_str(), output.length); } + return SUCCESS; } @@ -2513,36 +2532,28 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b return INTERNAL_ERROR; } - if (output_op_list_.empty()) { + if (output_addrs_list_.empty()) { GELOGW("Output tensor list is empty, model id: %u", model_id_); GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs), "OnComputeDone failed."); return INTERNAL_ERROR; } GE_CHECK_NOTNULL(output_data); - // index of data in output_data - uint32_t data_index = 0; - output_data->index = data_id; output_data->model_id = model_id_; - is_getnext_sink_dynamic_ = false; - // copy output data from op to designated position - for (auto &op_desc : output_op_list_) { - if (IsGetNextSinkDynamic(op_desc)) { - GELOGD("Reinit cur dynamic dims when getnext sink dynamic."); - is_getnext_sink_dynamic_ = true; - cur_dynamic_dims_.clear(); - cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); - auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), - netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); - GE_CHK_RT_RET(ret); - } - GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str()); - if (GenOutputTensorInfo(op_desc, data_index, output_data, outputs) != SUCCESS) { - return INTERNAL_ERROR; - } - data_index += op_desc->GetInputsSize(); + if (is_getnext_sink_dynamic_) { + GELOGD("Reinit cur dynamic dims when getnext sink dynamic."); + cur_dynamic_dims_.clear(); + cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); + auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), + netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); + GE_CHK_RT_RET(ret); + } + + GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str()); + if (GenOutputTensorInfo(output_data, outputs) != SUCCESS) { + return INTERNAL_ERROR; } if (CopyOutputData(data_id, *output_data, RT_MEMCPY_DEVICE_TO_HOST) != SUCCESS) { @@ -2680,10 +2691,10 @@ void *DavinciModel::Run(DavinciModel *model) { model->SetProfileTime(MODEL_AFTER_PROC_START)); GE_TIMESTAMP_START(ReturnResult3); // copy output data from device to host - GE_IF_BOOL_EXEC(!model->output_op_list_.empty(), + GE_IF_BOOL_EXEC(!model->output_addrs_list_.empty(), (void)model->ReturnResult(current_data.index, rslt_flg, false, data_wrapper->GetOutput())) // copy output data from device to host for variable graph - GE_IF_BOOL_EXEC(model->output_op_list_.empty(), (void)model->ReturnNoOutput(current_data.index)); + GE_IF_BOOL_EXEC(model->output_addrs_list_.empty(), (void)model->ReturnNoOutput(current_data.index)); GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(ReturnResult3, "GraphExcute::CopyDataFromDeviceToHost")); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), @@ -2803,30 +2814,49 @@ void DavinciModel::UnbindTaskSinkStream() { } } +void *DavinciModel::GetRunAddress(void *addr) const { + if (fixed_mem_base_ == reinterpret_cast(mem_base_)) { + return addr; + } + + uintptr_t ptr = reinterpret_cast(addr); + if ((fixed_mem_base_ <= ptr) && (ptr < fixed_mem_base_ + runtime_param_.mem_size)) { + return mem_base_ + (ptr - fixed_mem_base_); + } else { + return addr; + } +} + Status DavinciModel::CreateKnownZeroCopyMap(const vector &inputs, const vector &outputs) { - GELOGI("DavinciModel::CreateKnownZeroCopyMap in."); - if (inputs.size() > data_op_list_.size()) { - GELOGE(FAILED, "input data addr %zu should less than input op number %zu.", inputs.size(), data_op_list_.size()); + GELOGI("in, inputs size: %zu, input addr size: %zu, outputs size: %zu, output addr size: %zu", + inputs.size(), input_addrs_list_.size(), outputs.size(), output_addrs_list_.size()); + if (inputs.size() > input_addrs_list_.size()) { + GELOGE(FAILED, "input data addr %zu should less than input op num %zu.", inputs.size(), input_addrs_list_.size()); return FAILED; } // remove zero copy addr in last iteration - knonw_input_data_info_.clear(); - knonw_output_data_info_.clear(); + known_input_data_info_.clear(); + known_output_data_info_.clear(); for (size_t i = 0; i < inputs.size(); ++i) { - const vector addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, data_op_list_[i]); - knonw_input_data_info_[addr_list[kDataIndex]] = inputs[i]; - GELOGI("DavinciModel::CreateKnownZeroCopyMap input %zu,v addr %p,p addr %p .", i, addr_list[kDataIndex], inputs[i]); + const vector &addr_list = input_addrs_list_[i]; + void *addr = GetRunAddress(addr_list[kDataIndex]); + known_input_data_info_[addr] = inputs[i]; + GELOGI("input %zu, v addr %p, r addr %p, p addr %p", i, addr_list[kDataIndex], addr, inputs[i]); } - if (output_op_list_.size() < kOutputNum) { - GELOGW("output op num in graph is %zu.", output_op_list_.size()); + + if (output_addrs_list_.empty()) { + GELOGW("output op num in graph is %zu", output_addrs_list_.size()); return SUCCESS; } - const vector addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, output_op_list_[kDataIndex]); + const vector &addr_list = output_addrs_list_.front(); for (size_t i = 0; i < addr_list.size() && i < outputs.size(); ++i) { - knonw_output_data_info_[addr_list[i]] = outputs[i]; - GELOGI("DavinciModel::CreateKnownZeroCopyMap output %zu,v addr %p,p addr %p .", i, addr_list[i], outputs[i]); + void *addr = GetRunAddress(addr_list[i]); + known_output_data_info_[addr] = outputs[i]; + GELOGI("output %zu, v addr %p, r addr %p, p addr %p", i, addr_list[i], addr, outputs[i]); } - GELOGI("DavinciModel::CreateKnownZeroCopyMap success."); + + GELOGI("success, known input data info size: %zu, known output data info size: %zu", + known_input_data_info_.size(), known_output_data_info_.size()); return SUCCESS; } @@ -2837,40 +2867,30 @@ void DavinciModel::SetTotalIOAddrs(const vector &io_addrs) { } for (size_t i = 0; i < io_addrs.size(); ++i) { - uintptr_t addr = reinterpret_cast(io_addrs[i]); - if ((fixed_mem_base_ <= addr) && (addr < fixed_mem_base_ + runtime_param_.mem_size)) { - total_io_addrs_.emplace_back(mem_base_ + (addr - fixed_mem_base_)); - } else { - total_io_addrs_.emplace_back(io_addrs[i]); - } + total_io_addrs_.emplace_back(GetRunAddress(io_addrs[i])); } } Status DavinciModel::UpdateKnownZeroCopyAddr(vector &total_io_addrs) { if (fixed_mem_base_ != reinterpret_cast(mem_base_)) { for (size_t i = 0; i < total_io_addrs.size(); ++i) { - uintptr_t addr = reinterpret_cast(total_io_addrs[i]); - if ((fixed_mem_base_ <= addr) && (addr < fixed_mem_base_ + runtime_param_.mem_size)) { - total_io_addrs[i] = mem_base_ + (addr - fixed_mem_base_); - } + total_io_addrs[i] = GetRunAddress(total_io_addrs[i]); } } for (size_t i = 0; i < total_io_addrs.size(); ++i) { - auto it_in = knonw_input_data_info_.find(total_io_addrs[i]); - if (it_in != knonw_input_data_info_.end()) { - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i], - knonw_input_data_info_.at(total_io_addrs[i])); - total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]); + auto it_in = known_input_data_info_.find(total_io_addrs[i]); + if (it_in != known_input_data_info_.end()) { + GELOGI("input %zu, v addr %p, p addr %p", i, total_io_addrs[i], known_input_data_info_.at(total_io_addrs[i])); + total_io_addrs[i] = known_input_data_info_.at(total_io_addrs[i]); } - auto it_out = knonw_output_data_info_.find(total_io_addrs[i]); - if (it_out != knonw_output_data_info_.end()) { - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i], - knonw_output_data_info_.at(total_io_addrs[i])); - total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]); + auto it_out = known_output_data_info_.find(total_io_addrs[i]); + if (it_out != known_output_data_info_.end()) { + GELOGI("output %zu, v addr %p, p addr %p", i, total_io_addrs[i], known_output_data_info_.at(total_io_addrs[i])); + total_io_addrs[i] = known_output_data_info_.at(total_io_addrs[i]); } } - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); + GELOGI("success, total io addrs size: %zu", total_io_addrs.size()); return SUCCESS; } @@ -3171,15 +3191,8 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 "MAY cause inference result ERROR, please check model input", input_size, op_size); } - bool is_dynamic_aipp = false; - for (const auto &op_desc : data_op_list_) { - if (op_desc->GetType() == AIPP_DATA_TYPE) { - GELOGI("This is dynamic aipp model."); - is_dynamic_aipp = true; - break; - } - } - if (is_dynamic_aipp) { + + if (is_dynamic_aipp_) { GELOGI("This is dynamic aipp model, no need to judge smaller input size"); return true; } diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index a8013f7d..906c0548 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -49,6 +49,10 @@ #include "task_info/task_info.h" #include "graph/common/local_context.h" +using std::mutex; +using std::thread; +using std::multimap; + namespace ge { // op debug need 2048 bits buffer const size_t kOpDebugMemorySize = 2048UL; @@ -84,11 +88,11 @@ struct SuperKernelTaskInfo { uint32_t last_stream_id; void *last_stream; void *last_sm_desc; - std::vector kernel_list; - std::vector arg_list; - std::vector dump_flag_list; - std::vector op_desc_list; - std::vector dump_args_list; + vector kernel_list; + vector arg_list; + vector dump_flag_list; + vector op_desc_list; + vector dump_args_list; uint32_t last_dump_flag; int64_t last_group_key; uintptr_t last_dump_args; @@ -123,7 +127,7 @@ class DavinciModel { /// @brief DavinciModel constructor /// @author /// - DavinciModel(int32_t priority, const std::shared_ptr &listener); + DavinciModel(int32_t priority, const shared_ptr &listener); /// /// @ingroup ge @@ -153,7 +157,7 @@ class DavinciModel { /// @param [in] output_que_ids: input queue ids from user, nums equal NetOutput Op. /// @return: 0 for success / others for fail /// - Status SetQueIds(const std::vector &input_queue_ids, const std::vector &output_queue_ids); + Status SetQueIds(const vector &input_queue_ids, const vector &output_queue_ids); /// /// @ingroup ge @@ -223,13 +227,14 @@ class DavinciModel { // get total mem size size_t TotalMemSize() const { return runtime_param_.mem_size; } - const std::map &P2PMemInfos() const {return runtime_param_.memory_infos;} + const map &P2PMemInfos() const { return runtime_param_.memory_infos; } // model name string Name() const { return name_; } // om_name string OmName() const { return om_name_; } + // version uint32_t Version() const { return version_; } @@ -255,9 +260,6 @@ class DavinciModel { Status DestroyThread(); - // Get Data Op. - const vector &GetDataList() const { return data_op_list_; } - // get Op OpDescPtr GetOpByIndex(uint32_t index) const { if (op_list_.find(index) == op_list_.end()) { @@ -274,11 +276,12 @@ class DavinciModel { } return nullptr; } + // get task info for profiling - const std::vector &GetTaskDescInfo() const { return task_desc_info_; } + const vector &GetTaskDescInfo() const { return task_desc_info_; } // get updated task info list - std::vector GetTaskList() { return task_list_; } + vector GetTaskList() { return task_list_; } // Modified from KernelTaskInfo. SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; } @@ -323,7 +326,7 @@ class DavinciModel { Status GetInputOutputDescInfo(vector &input_desc, vector &output_desc); Status GetInputOutputDescInfo(vector &input_desc, vector &output_desc, - std::vector &inputFormats, std::vector &output_formats); + vector &inputFormats, vector &output_formats); /// /// @ingroup ge @@ -332,7 +335,7 @@ class DavinciModel { /// @param [out] dynamic_type /// @return execute result /// - Status GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) const; + Status GetDynamicBatchInfo(vector> &batch_info, int32_t &dynamic_type) const; /// /// @ingroup ge @@ -340,13 +343,13 @@ class DavinciModel { /// @param [out] batch_info /// @return None /// - void GetCombinedDynamicDims(std::vector> &batch_info) const; + void GetCombinedDynamicDims(vector> &batch_info) const; - void GetUserDesignateShapeOrder(std::vector &user_input_shape_order) const; + void GetUserDesignateShapeOrder(vector &user_input_shape_order) const; - void GetCurShape(std::vector &batch_info, int32_t &dynamic_type); + void GetCurShape(vector &batch_info, int32_t &dynamic_type); - void GetModelAttr(std::vector &dynamic_output_shape_info); + void GetModelAttr(vector &dynamic_output_shape_info); /// /// @ingroup ge @@ -373,7 +376,7 @@ class DavinciModel { /// @param [in] string identification: unique identification for current op. /// @return None /// - void GetUniqueId(const OpDescPtr &op_desc, std::string &unique_identification); + void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification); /// /// @ingroup ge @@ -384,7 +387,7 @@ class DavinciModel { /// Status GetInputOutputDescInfoForZeroCopy(vector &input_desc, vector &output_desc, - std::vector &inputFormats, std::vector &output_formats); + vector &inputFormats, vector &output_formats); Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data); @@ -406,8 +409,6 @@ class DavinciModel { /// bool RunFlag() const { return run_flg_; } - Status GetOutputDescInfo(vector &output_desc, std::vector &formats); - /// /// @ingroup ge /// @brief Set Session Id @@ -453,14 +454,14 @@ class DavinciModel { /// @ingroup ge /// @brief Save outside address of Data or NetOutput used info for ZeroCopy. /// @param [in] const OpDescPtr &op_desc: current op desc - /// @param [in] const std::vector &outside_addrs: address of task + /// @param [in] const vector &outside_addrs: address of task /// @param [in] const void *args_offset: arguments address save the address. /// @return None. /// - void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector &outside_addrs, const void *info, void *args, + void SetZeroCopyAddr(const OpDescPtr &op_desc, const vector &outside_addrs, const void *info, void *args, size_t size, size_t offset); - void SetDynamicSize(const std::vector &batch_num, int32_t dynamic_type); + void SetDynamicSize(const vector &batch_num, int32_t dynamic_type); bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; } @@ -476,7 +477,7 @@ class DavinciModel { data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); } - void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr &op_desc, uintptr_t args) { + void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const shared_ptr &op_desc, uintptr_t args) { data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); } @@ -485,7 +486,7 @@ class DavinciModel { DavinciModel(const DavinciModel &model) = delete; - const map> &GetHcclFolowStream() { + const map> &GetHcclFolowStream() { return main_follow_stream_mapping_; } void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream); @@ -534,8 +535,8 @@ class DavinciModel { void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); - Status GetAllAippInputOutputDims(uint32_t index, std::vector &input_dims, - std::vector &output_dims); + Status GetAllAippInputOutputDims(uint32_t index, vector &input_dims, + vector &output_dims); void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } // om file name void SetOmName(string om_name) { om_name_ = om_name; } @@ -546,7 +547,6 @@ class DavinciModel { bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); } - Status InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph); private: // memory address of weights @@ -566,6 +566,8 @@ class DavinciModel { struct timeInfo time_info_; int32_t dataInputTid; + void *GetRunAddress(void *addr) const; + /// /// @ingroup ge /// @brief Copy Check input size and model op size. @@ -603,7 +605,7 @@ class DavinciModel { /// @param [in] batch_label: batch label for multi-batch scenes /// @return SUCCESS handle successfully / others handle failed /// - Status UpdateIoTaskArgs(const std::map &data_info, bool is_input, + Status UpdateIoTaskArgs(const map &data_info, bool is_input, const vector &blobs, bool is_dynamic, const string &batch_label); Status CopyInputData(const InputData &input_data, bool device_data = false); @@ -619,7 +621,8 @@ class DavinciModel { void SetInputDimsInfo(const vector &model_input_dims, Format &format, InputOutputDescInfo &input); - Status GetInputDescInfo(vector &input_desc, std::vector &formats); + Status GetInputDescInfo(vector &input_desc, vector &input_formats); + Status GetOutputDescInfo(vector &output_desc, vector &output_formats); Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); @@ -631,7 +634,7 @@ class DavinciModel { uint8_t *MallocWeightsMem(size_t weights_size); - uint8_t* MallocP2PMem(size_t p2p_data_size); + uint8_t *MallocP2PMem(size_t p2p_data_size); void FreeFeatureMapMem(); @@ -663,27 +666,33 @@ class DavinciModel { /// /// @ingroup ge /// @brief Data Op Initialize. + /// @param [in] ComputeGraphPtr: root graph of the model. /// @param [in] NodePtr: Data Op. - /// @param [in/out] data_op_index: NetOutput addr size info. + /// @param [in/out] data_op_index: index of courrent count. + /// @param [in/out] data_by_index: Data ordered by index. /// @return Status /// - Status InitDataOp(const NodePtr &node, uint32_t &data_op_index, map &data_by_index); + Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, + map &data_by_index); /// /// @ingroup ge /// @brief Sort Data op list by index. /// @param [in] data_by_index: map of Data Op. - /// @return + /// @param [in] output_op_list: list of NetOutput op. + /// @return Status /// - void AdjustDataOpList(const map &data_by_index); + Status OptInputOutputInfo(const map &data_by_index, const vector &output_op_list); /// /// @ingroup ge /// @brief NetOutput Op Initialize. + /// @param [in] ComputeGraphPtr: root graph of the model. /// @param [in] NodePtr: NetOutput Op. + /// @param [in/out] vector: All NetOutput node in model. /// @return Status /// - Status InitNetOutput(const NodePtr &node); + Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector &output_op_list); /// /// @ingroup ge @@ -722,7 +731,7 @@ class DavinciModel { /// Status InitTbeHandle(const OpDescPtr &op_desc); - void StoreTbeHandle(const std::string &handle_key); + void StoreTbeHandle(const string &handle_key); void CleanTbeHandle(); /// @@ -753,7 +762,7 @@ class DavinciModel { /// Status BindInputQueue(); - Status CpuTaskModelZeroCopy(std::vector &mbuf_list, std::map &outside_addrs); + Status CpuTaskModelZeroCopy(vector &mbuf_list, map &outside_addrs); /// /// @ingroup ge @@ -824,7 +833,7 @@ class DavinciModel { Status DoTaskSink(); - void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); + void CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); @@ -838,13 +847,16 @@ class DavinciModel { Status SinkTimeProfile(const InputData ¤t_data); - Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, - std::vector &outputs); + Status InitOutputTensorInfo(const OpDescPtr &op_desc); + Status GenOutputTensorInfo(OutputData *output_data, vector &outputs); - void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); + Status InitOutputDescInfo(const vector &output_op_list, + vector &output_desc, vector &formats); + + void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info); void SetLabelForDynamic(const NodePtr &node); - void ParseDynamicOutShape(const std::vector &str_info, std::vector> &vec_info); + void ParseDynamicOutShape(const vector &str_info, vector> &vec_info); bool IsGetNextSinkDynamic(const OpDescPtr &op_desc); void GetAllGearsInfo(const NodePtr &node); Status GetGetDynamicDimsNodeInfo(const NodePtr &node); @@ -866,56 +878,54 @@ class DavinciModel { GeModelPtr ge_model_; bool need_destroy_aicpu_kernel_{false}; - vector out_node_name_; + vector out_node_name_; map op_list_; // data op_desc vector data_op_list_; - vector output_op_list_; - vector variable_op_list_; - std::map new_input_data_info_; - std::map new_output_data_info_; - std::map new_input_outside_addrs_; - std::map new_output_outside_addrs_; + map new_input_data_info_; + map new_output_data_info_; + map new_input_outside_addrs_; + map new_output_outside_addrs_; - std::set real_virtual_addrs_; + set real_virtual_addrs_; // output op: save cce op actual needed memory size vector output_memory_size_list_; - std::thread thread_id_; + thread thread_id_; - std::shared_ptr listener_; + shared_ptr listener_; bool run_flg_; - std::mutex mux_run_flg_; + mutex mux_run_flg_; int32_t priority_; vector stream_list_; - std::mutex all_hccl_stream_list_mutex_; + mutex all_hccl_stream_list_mutex_; vector all_hccl_stream_list_; // for reuse hccl_follow_stream - std::mutex capacity_of_stream_mutex_; - std::map> main_follow_stream_mapping_; + mutex capacity_of_stream_mutex_; + map> main_follow_stream_mapping_; vector event_list_; vector label_list_; set label_id_indication_; - std::mutex outside_addrs_mutex_; - std::vector zero_copy_tasks_; // Task used Data or NetOutput addr. - std::set copy_only_addrs_; // Address need copy to original place. + mutex outside_addrs_mutex_; + vector zero_copy_tasks_; // Task used Data or NetOutput addr. + set copy_only_addrs_; // Address need copy to original place. - std::vector task_list_; + vector task_list_; // rt_moodel_handle rtModel_t rt_model_handle_; @@ -933,39 +943,39 @@ class DavinciModel { rtAicpuDeployType_t deploy_type_{AICPU_DEPLOY_RESERVED}; // ACL queue schedule, save queue ids for Init. - std::vector cpu_task_list_; - std::vector input_queue_ids_; // input queue ids created by caller. - std::vector output_queue_ids_; // output queue ids created by caller. - std::vector input_mbuf_list_; // input mbuf created by dequeue task. - std::vector output_mbuf_list_; // output mbuf created by dequeue task. + vector cpu_task_list_; + vector input_queue_ids_; // input queue ids created by caller. + vector output_queue_ids_; // output queue ids created by caller. + vector input_mbuf_list_; // input mbuf created by dequeue task. + vector output_mbuf_list_; // output mbuf created by dequeue task. uint64_t session_id_; uint32_t device_id_; - std::mutex flowctrl_op_index_internal_map_mutex_; - std::map flowctrl_op_index_internal_map_; + mutex flowctrl_op_index_internal_map_mutex_; + map flowctrl_op_index_internal_map_; - std::vector active_stream_list_; - std::set active_stream_indication_; + vector active_stream_list_; + set active_stream_indication_; - std::set hcom_streams_; + set hcom_streams_; RuntimeParam runtime_param_; - static std::mutex tvm_bin_mutex_; - std::set tvm_bin_kernel_; + static mutex tvm_bin_mutex_; + set tvm_bin_kernel_; - std::map used_tbe_handle_map_; + map used_tbe_handle_map_; // for profiling task and graph info - std::vector task_desc_info_; + vector task_desc_info_; int64_t maxDumpOpNum_; // for data dump DataDumper data_dumper_; uint64_t iterator_count_; bool is_l1_fusion_enable_; - std::map saved_task_addrs_; + map saved_task_addrs_; void *l1_fusion_addr_ = nullptr; bool known_node_ = false; @@ -976,14 +986,14 @@ class DavinciModel { void *hybrid_addrs_ = nullptr; uint32_t total_hybrid_args_size_ = 0; int64_t total_fixed_addr_size_ = 0; - std::map knonw_input_data_info_; - std::map knonw_output_data_info_; + map known_input_data_info_; + map known_output_data_info_; vector total_io_addrs_; vector orig_total_io_addrs_; bool base_addr_not_changed_ = false; vector> batch_info_; - std::vector> combined_batch_info_; + vector> combined_batch_info_; vector user_designate_shape_order_; int32_t dynamic_type_ = 0; bool is_dynamic_ = false; @@ -991,35 +1001,47 @@ class DavinciModel { vector batch_size_; // key: input tensor name, generally rts op; // value: the fixed addr of input anchor, same as the peer output anchor addr of the peer op - std::map tensor_name_to_fixed_addr_size_; + map tensor_name_to_fixed_addr_size_; // key: input tensor name, generally rts op; value: the peer output anchor of the peer op - std::map tensor_name_to_peer_output_index_; + map tensor_name_to_peer_output_index_; // if model is first execute bool is_first_execute_; // for op debug - std::mutex debug_reg_mutex_; + mutex debug_reg_mutex_; bool is_op_debug_reg_ = false; void *op_debug_addr_ = nullptr; void *p2p_debug_addr_ = nullptr; bool is_new_model_desc_{false}; bool is_online_infer_dynamic_ = false; bool is_getnext_sink_dynamic_ = false; - std::vector cur_dynamic_dims_; + vector cur_dynamic_dims_; void *netoutput_last_input_addr_ = nullptr; int64_t netoutput_last_input_size_ = 0; size_t shape_of_cur_dynamic_dims_ = 0; // key: input_index: input is merge node; value: each gear info and each output size - std::map, int64_t>> merge_nodes_gear_and_real_out_size_info_; + map, int64_t>> merge_nodes_gear_and_real_out_size_info_; // key: input_index: input is merge node; value: each gear info and each output shape - std::map, vector>> merge_nodes_gear_and_real_out_shape_info_; - std::vector> all_gears_info_; + map, vector>> merge_nodes_gear_and_real_out_shape_info_; + vector> all_gears_info_; - std::multimap op_id_map_; - std::vector profile_list_; + multimap op_id_map_; + vector profile_list_; // For super kernel. SuperKernelTaskInfo skt_info_; + + bool is_dynamic_aipp_ = false; + vector dynamic_output_shape_info_; + + vector> input_addrs_list_; + vector> output_addrs_list_; + + vector output_buffer_size_; + vector> output_shape_info_; + + vector output_descs_; + vector output_formats_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 12513e2f..beb7cd42 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -38,6 +38,10 @@ #include "graph/partition/stage_partition.h" #include "graph/passes/addn_pass.h" #include "graph/passes/bitcast_pass.h" +#ifndef ONLY_COMPILE_OPEN_SRC +#include "graph/passes/assign_remove_pass.h" +#include "graph/passes/inplace_support_check_pass.h" +#endif #include "graph/passes/atomic_addr_clean_pass.h" #include "graph/passes/attach_stream_label_pass.h" #include "graph/passes/cast_remove_pass.h" @@ -2247,10 +2251,20 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { ReshapeRemovePass reshape_remove_pass; CondRemovePass condition_remove_pass; BitcastPass bitcast_pass; +#ifndef ONLY_COMPILE_OPEN_SRC + AssignRemovePass assign_remove_pass; + InplaceSupportCheckPass inplace_support_check_pass; +#endif names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); names_to_passes.emplace_back("BitcastPass", &bitcast_pass); +#ifndef ONLY_COMPILE_OPEN_SRC + if (GetContext().GetHostExecFlag()) { + names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass); + names_to_passes.emplace_back("InplaceSupportCheckPass", &inplace_support_check_pass); + } +#endif GE_TIMESTAMP_START(names_to_passes); ret = GEPass(compute_graph).Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses"); diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc index 7ee7df20..4e31d835 100755 --- a/ge/graph/manager/graph_mem_allocator.cc +++ b/ge/graph/manager/graph_mem_allocator.cc @@ -19,7 +19,9 @@ #include #include "graph/manager/graph_caching_allocator.h" #include "graph/manager/rdma_pool_allocator.h" - +#ifndef ONLY_COMPILE_OPEN_SRC +#include "graph/manager/host_mem_allocator.h" +#endif namespace ge { void MemoryAllocator::Initialize(uint32_t device_id) { GELOGI("MemoryAllocator::Initialize"); @@ -190,6 +192,12 @@ Status MemManager::Initialize(const std::vector &memory_type) { GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed."); return ge::INTERNAL_ERROR; } +#ifndef ONLY_COMPILE_OPEN_SRC + if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) { + GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed."); + return ge::INTERNAL_ERROR; + } +#endif return SUCCESS; } @@ -211,6 +219,9 @@ void MemManager::Finalize() noexcept { // caching and rdma allocator use memory allocator, so finalize them first FinalizeAllocatorMap(caching_allocator_map_); FinalizeAllocatorMap(rdma_allocator_map_); +#ifndef ONLY_COMPILE_OPEN_SRC + FinalizeAllocatorMap(host_allocator_map_); +#endif FinalizeAllocatorMap(memory_allocator_map_); } @@ -239,4 +250,9 @@ CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) { RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { return Instance().GetAllocator(memory_type, rdma_allocator_map_); } +#ifndef ONLY_COMPILE_OPEN_SRC +HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) { + return Instance().GetAllocator(memory_type, host_allocator_map_); +} +#endif } // namespace ge diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h index 2723ae5c..6cdbd9b4 100644 --- a/ge/graph/manager/graph_mem_allocator.h +++ b/ge/graph/manager/graph_mem_allocator.h @@ -139,7 +139,9 @@ class MemoryAllocator { using MemoryAllocatorPtr = std::shared_ptr; class CachingAllocator; class RdmaPoolAllocator; - +#ifndef ONLY_COMPILE_OPEN_SRC +class HostMemAllocator; +#endif class MemManager { public: MemManager(); @@ -148,6 +150,9 @@ class MemManager { static MemoryAllocator *Instance(rtMemType_t memory_type); CachingAllocator &CachingInstance(rtMemType_t memory_type); RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); +#ifndef ONLY_COMPILE_OPEN_SRC + HostMemAllocator &HostMemInstance(rtMemType_t memory_type); +#endif MemManager(const MemManager &) = delete; MemManager &operator=(const MemManager &) = delete; /// @@ -235,6 +240,9 @@ class MemManager { std::map memory_allocator_map_; std::map caching_allocator_map_; std::map rdma_allocator_map_; +#ifndef ONLY_COMPILE_OPEN_SRC + std::map host_allocator_map_; +#endif std::recursive_mutex allocator_mutex_; }; } // namespace ge diff --git a/ge/graph/manager/host_mem_allocator.cc b/ge/graph/manager/host_mem_allocator.cc new file mode 100644 index 00000000..ca2b5124 --- /dev/null +++ b/ge/graph/manager/host_mem_allocator.cc @@ -0,0 +1,69 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/manager/host_mem_allocator.h" +#include "framework/common/debug/ge_log.h" +#include "common/ge/ge_util.h" + +namespace ge { +const void *HostMemAllocator::Malloc(const std::shared_ptr &aligned_ptr, size_t size) { + if (aligned_ptr == nullptr) { + GELOGW("Insert a null aligned_ptr"); + return nullptr; + } + GELOGD("allocate existed host memory succ, size=%zu", size); + allocated_blocks_[aligned_ptr->Get()] = { size, aligned_ptr }; + return aligned_ptr->Get(); +} + +uint8_t *HostMemAllocator::Malloc(size_t size) { + GELOGD("start to malloc host memory, size=%zu", size); + std::lock_guard lock(mutex_); + std::shared_ptr aligned_ptr = MakeShared(size); + if (aligned_ptr == nullptr) { + GELOGE(INTERNAL_ERROR, "make shared_ptr for AlignedPtr failed"); + return nullptr; + } + allocated_blocks_[aligned_ptr->Get()] = { size, aligned_ptr }; + GELOGD("allocate host memory succ, size=%zu", size); + return aligned_ptr->MutableGet(); +} + +Status HostMemAllocator::Free(const void *memory_addr) { + if (memory_addr == nullptr) { + GELOGE(GE_GRAPH_FREE_FAILED, "Invalid memory pointer"); + return GE_GRAPH_FREE_FAILED; + } + + std::lock_guard lock(mutex_); + auto it = allocated_blocks_.find(memory_addr); + if (it == allocated_blocks_.end()) { + GELOGE(PARAM_INVALID, "Invalid memory pointer"); + return PARAM_INVALID; + } + it->second.second.reset(); + allocated_blocks_.erase(it); + + return SUCCESS; +} + +void HostMemAllocator::Clear() { + for (auto &block : allocated_blocks_) { + block.second.second.reset(); + } + allocated_blocks_.clear(); +} +} // namespace ge diff --git a/ge/graph/manager/host_mem_allocator.h b/ge/graph/manager/host_mem_allocator.h new file mode 100644 index 00000000..b9dbdc4c --- /dev/null +++ b/ge/graph/manager/host_mem_allocator.h @@ -0,0 +1,57 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_MANAGER_HOST_MEM_ALLOCATOR_H_ +#define GE_GRAPH_MANAGER_HOST_MEM_ALLOCATOR_H_ + +#include +#include + +#include "framework/common/ge_inner_error_codes.h" +#include "graph/aligned_ptr.h" +#include "runtime/mem.h" + +namespace ge { +class HostMemAllocator { + public: + explicit HostMemAllocator(rtMemType_t) {} + ~HostMemAllocator() = default; + + HostMemAllocator(const HostMemAllocator &) = delete; + HostMemAllocator &operator=(const HostMemAllocator &) = delete; + + Status Initialize() { + Clear(); + return SUCCESS; + } + void Finalize() { Clear(); } + + const void *Malloc(const std::shared_ptr& aligned_ptr, size_t size); + uint8_t *Malloc(size_t size); + Status Free(const void *memory_addr); + + std::pair> GetAlignedPtr(const void *addr) { return allocated_blocks_[addr]; } + + private: + void Clear(); + + std::map>> allocated_blocks_; + // lock around all operations + mutable std::mutex mutex_; +}; +} // namespace ge + +#endif // GE_GRAPH_MANAGER_HOST_MEM_ALLOCATOR_H_ diff --git a/ge/graph/manager/host_mem_manager.cc b/ge/graph/manager/host_mem_manager.cc index c99c9e87..c9a33f5c 100644 --- a/ge/graph/manager/host_mem_manager.cc +++ b/ge/graph/manager/host_mem_manager.cc @@ -43,16 +43,29 @@ Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) { return GE_GRAPH_MEMORY_ALLOC_FAILED; } mem_info.fd = output_para.fd; +#ifndef ONLY_COMPILE_OPEN_SRC + mem_info.host_aligned_ptr = AlignedPtr::BuildFromAllocFunc([&output_para](std::unique_ptr &ptr) { + ptr.reset(reinterpret_cast(output_para.ptr)); + }, + [](uint8_t *ptr) { + ptr = nullptr; + }); +#else mem_info.host_address = reinterpret_cast(output_para.ptr); +#endif mem_info.device_address = reinterpret_cast(output_para.devPtr); return SUCCESS; } Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) { GELOGD("SharedMemAllocator::DeAllocate"); +#ifndef ONLY_COMPILE_OPEN_SRC + rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, + mem_info.host_aligned_ptr->MutableGet(), mem_info.device_address}; +#else rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, mem_info.host_address, mem_info.device_address}; - +#endif rtError_t rt_ret = rtFreeHostSharedMemory(&free_para); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret); diff --git a/ge/graph/manager/host_mem_manager.h b/ge/graph/manager/host_mem_manager.h index 66bd5826..f204c9e4 100644 --- a/ge/graph/manager/host_mem_manager.h +++ b/ge/graph/manager/host_mem_manager.h @@ -42,7 +42,11 @@ struct SharedMemInfo { uint64_t mem_size = 0; int fd = 0; uint8_t *device_address = nullptr; +#ifndef ONLY_COMPILE_OPEN_SRC + std::shared_ptr host_aligned_ptr = nullptr; +#else uint8_t *host_address = nullptr; +#endif SharedMemInfo() = default; SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {} }; diff --git a/ge/graph/passes/assign_pass.cc b/ge/graph/passes/assign_pass.cc deleted file mode 100644 index bb7a0f04..00000000 --- a/ge/graph/passes/assign_pass.cc +++ /dev/null @@ -1,133 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/passes/assign_pass.h" - -#include "framework/common/debug/ge_log.h" -#include "framework/common/debug/log.h" -#include "graph/utils/graph_utils.h" -#include "graph/debug/ge_attr_define.h" - -namespace { -const uint32_t kValidInputNodeOutputNum = 1; -const int32_t kAssignRefInputIndex = 0; -const int32_t kAssignValueInputIndex = 1; -} - -namespace ge { -Status AssignPass::Run(NodePtr &node) { - GELOGD("AssignPass running"); - if (node->GetType() != ASSIGN) { - GELOGD("No need run AssignPass on [%s, %s].", node->GetName().c_str(), node->GetType().c_str()); - return SUCCESS; - } - - const auto &ref_in_anchor = node->GetInDataAnchor(kAssignRefInputIndex); - const auto &value_in_anchor = node->GetInDataAnchor(kAssignValueInputIndex); - if ((ref_in_anchor == nullptr) || (value_in_anchor == nullptr)) { - GELOGE(FAILED, "In data anchor is null, node:%s", node->GetName().c_str()); - return FAILED; - } - const auto &ref_peer_anchor = ref_in_anchor->GetPeerOutAnchor(); - const auto &value_peer_anchor = value_in_anchor->GetPeerOutAnchor(); - if ((ref_peer_anchor == nullptr) || (value_peer_anchor == nullptr)) { - GELOGE(FAILED, "Peer data anchor is null, node:%s", node->GetName().c_str()); - return FAILED; - } - - if (IsCondMatch(node, ref_peer_anchor, value_peer_anchor)) { - /// - /// variable not-const not-const - /// \ / | - /// \ / | - /// Assign ----> variable - /// | | - /// | | - /// node node - /// - GELOGI("Optimization for assign_node %s start", node->GetName().c_str()); - if (IsolateAndDeleteNode(node, {kAssignRefInputIndex}) != SUCCESS) { - GELOGE(FAILED, "Isolate and delete assign_node %s failed.", node->GetName().c_str()); - return FAILED; - } - AddNodeDeleted(node); - - const auto &ref_input = ref_peer_anchor->GetOwnerNode()->GetOpDesc(); - const auto &value_input = value_peer_anchor->GetOwnerNode()->GetOpDesc(); - if ((ref_input == nullptr) || (value_input == nullptr)) { - GELOGE(FAILED, "value input is null"); - return FAILED; - } - if (!AttrUtils::SetStr(value_input->MutableOutputDesc(value_peer_anchor->GetIdx()), ASSIGN_VAR_NAME, - ref_input->GetName())) { - GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed."); - return FAILED; - } - - // variable has and only has one input - if (ref_input->UpdateInputDesc(0, value_input->GetOutputDesc(value_peer_anchor->GetIdx())) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Update input_desc for variable %s failed.", ref_input->GetName().c_str()); - return FAILED; - } - if (GraphUtils::AddEdge(value_peer_anchor, ref_peer_anchor->GetOwnerNode()->GetInDataAnchor(0)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Add data edge %s->%s failed", value_input->GetName().c_str(), ref_input->GetName().c_str()); - return FAILED; - } - } - - GELOGD("AssignPass success"); - return SUCCESS; -} - -/// -/// @brief Check if need optimize for assign_node -/// @param [in] assign_node -/// @param [in] peer_data_anchor for ref_input of assign_node -/// @param [in] peer_data_anchor for value_input of assign_node -/// @return Status -/// -bool AssignPass::IsCondMatch(const NodePtr &node, const OutDataAnchorPtr &ref_peer_anchor, - const OutDataAnchorPtr &value_peer_anchor) { - GELOGD("Check if assign_node %s match optimization condition, ref_input: %s, value_input: %s", - node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(), - value_peer_anchor->GetOwnerNode()->GetName().c_str()); - - const std::string &value_type = value_peer_anchor->GetOwnerNode()->GetType(); - if ((value_type == CONSTANTOP) || (value_type == CONSTANT)) { - GELOGD("value input is const"); - return false; - } - - const std::string &ref_type = ref_peer_anchor->GetOwnerNode()->GetType(); - if ((ref_type != VARIABLE) && (ref_type != VARIABLEV2)) { - GELOGD("ref input is not var"); - return false; - } - if (!ref_peer_anchor->GetOwnerNode()->GetInDataNodes().empty()) { - GELOGD("ref input has data input"); - return false; - } - - if ((ref_peer_anchor->GetPeerInDataNodesSize() != kValidInputNodeOutputNum) || - (value_peer_anchor->GetPeerInDataNodesSize() != kValidInputNodeOutputNum)) { - GELOGD("ref / value input has other output(s)"); - return false; - } - - GELOGD("Optimization condition matches, assign_node: %s", node->GetName().c_str()); - return true; -} -} // namespace ge diff --git a/ge/graph/passes/assign_remove_pass.cc b/ge/graph/passes/assign_remove_pass.cc new file mode 100644 index 00000000..5029b9c3 --- /dev/null +++ b/ge/graph/passes/assign_remove_pass.cc @@ -0,0 +1,250 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/assign_remove_pass.h" +#include "framework/common/debug/log.h" +#include "graph/utils/graph_utils.h" +#include "graph/debug/ge_attr_define.h" + +namespace { +constexpr uint32_t kValidInputNodeOutputNum = 1; +constexpr int32_t kAssignRefInputIndex = 0; +constexpr int32_t kAssignValueInputIndex = 1; +static const std::set kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, + ge::CONSTANT, ge::CONSTANTOP, + ge::VARIABLE, ge::VARIABLEV2 }; +} + +namespace ge { +#ifndef ONLY_COMPILE_OPEN_SRC +Status AssignRemovePass::Run(NodePtr &node) { + GELOGD("AssignRemovePass running"); + + if (TransformAttr(node) != SUCCESS) { + GELOGE(FAILED, "Transform assign_var_name attr failed, node=%s", node->GetName().c_str()); + return FAILED; + } + + if (node->GetType() == ASSIGN) { + if (OptimizedAssignNode(node) != SUCCESS) { + GELOGE(FAILED, "Optimize for assign_node %s failed", node->GetName().c_str()); + return FAILED; + } + } + + GELOGD("AssignRemovePass success"); + return SUCCESS; +} + +/// +/// @brief Optimize for assign_node +/// @param [in] assign_node +/// @return Status +/// +Status AssignRemovePass::OptimizedAssignNode(NodePtr &assign_node) { + const auto &ref_in_anchor = assign_node->GetInDataAnchor(kAssignRefInputIndex); + const auto &value_in_anchor = assign_node->GetInDataAnchor(kAssignValueInputIndex); + if ((ref_in_anchor == nullptr) || (value_in_anchor == nullptr)) { + GELOGE(FAILED, "In data anchor is null, node:%s", assign_node->GetName().c_str()); + return FAILED; + } + const auto &ref_peer_anchor = ref_in_anchor->GetPeerOutAnchor(); + const auto &value_peer_anchor = value_in_anchor->GetPeerOutAnchor(); + if ((ref_peer_anchor == nullptr) || (value_peer_anchor == nullptr)) { + GELOGE(FAILED, "Peer data anchor is null, node:%s", assign_node->GetName().c_str()); + return FAILED; + } + + if (IsCondMatch(assign_node, ref_peer_anchor, value_peer_anchor)) { + /// + /// variable not-const not-const + /// \ / | + /// \ / | + /// Assign ----> variable + /// | | + /// | | + /// node node + /// + GELOGD("Optimization for assign_node %s start", assign_node->GetName().c_str()); + if (IsolateAndDeleteNode(assign_node, {kAssignRefInputIndex}) != SUCCESS) { + GELOGE(FAILED, "Isolate and delete assign_node %s failed.", assign_node->GetName().c_str()); + return FAILED; + } + + const auto &ref_input = ref_peer_anchor->GetOwnerNode()->GetOpDesc(); + const auto &value_input = value_peer_anchor->GetOwnerNode()->GetOpDesc(); + if ((ref_input == nullptr) || (value_input == nullptr)) { + GELOGE(FAILED, "value input is null"); + return FAILED; + } + + // variable has and only has one input + if (ref_input->UpdateInputDesc(0, value_input->GetOutputDesc(value_peer_anchor->GetIdx())) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Update input_desc for variable %s failed.", ref_input->GetName().c_str()); + return FAILED; + } + if (GraphUtils::AddEdge(value_peer_anchor, ref_peer_anchor->GetOwnerNode()->GetInDataAnchor(0)) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Add data edge %s->%s failed", value_input->GetName().c_str(), ref_input->GetName().c_str()); + return FAILED; + } + + GELOGD("add attr ASSIGN_VAR_NAME on node %s, var_name=%s", + value_input->GetName().c_str(), ref_input->GetName().c_str()); + if (!AttrUtils::SetStr(value_input->MutableOutputDesc(value_peer_anchor->GetIdx()), ASSIGN_VAR_NAME, + ref_input->GetName())) { + GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed."); + return FAILED; + } + auto value_node = value_peer_anchor->GetOwnerNode(); + AddRePassNode(value_node); + } + return SUCCESS; +} + +/// +/// @brief Transform assign_var_name attr +/// @param [in] node +/// @return Status +/// +Status AssignRemovePass::TransformAttr(NodePtr &node) { + GE_CHECK_NOTNULL(node->GetOpDesc()); + for (const auto &output_desc : node->GetOpDesc()->GetAllOutputsDesc()) { + int32_t inplace_input_idx = -1; + std::string assign_var_name; + if (AttrUtils::GetInt(output_desc, INPLACE_SUPPORT_INPUT_INDEX, inplace_input_idx) && + AttrUtils::GetStr(output_desc, ASSIGN_VAR_NAME, assign_var_name)) { + GELOGD("Transform attr ASSIGN_VAR_NAME on node %s, assign_var_name=%s, inplace_input_idx=%d, ", + node->GetName().c_str(), assign_var_name.c_str(), inplace_input_idx); + const auto &in_data_anchor = node->GetInDataAnchor(inplace_input_idx); + GE_CHECK_NOTNULL(in_data_anchor); + const auto &peer_data_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(peer_data_anchor); + auto in_node = peer_data_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(in_node->GetOpDesc()); + GELOGD("add attr ASSIGN_VAR_NAME on node %s, var_name=%s", in_node->GetName().c_str(), assign_var_name.c_str()); + if (!AttrUtils::SetStr(in_node->GetOpDesc()->MutableOutputDesc(peer_data_anchor->GetIdx()), + ASSIGN_VAR_NAME, assign_var_name)) { + GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed."); + return FAILED; + } + AddRePassNode(in_node); + } + } + return SUCCESS; +} +#else +Status AssignRemovePass::Run(NodePtr &node) { + GELOGD("AssignRemovePass running"); + if (node->GetType() != ASSIGN) { + GELOGD("No need run AssignRemovePass on [%s, %s].", node->GetName().c_str(), node->GetType().c_str()); + return SUCCESS; + } + + const auto &ref_in_anchor = node->GetInDataAnchor(kAssignRefInputIndex); + const auto &value_in_anchor = node->GetInDataAnchor(kAssignValueInputIndex); + if ((ref_in_anchor == nullptr) || (value_in_anchor == nullptr)) { + GELOGE(FAILED, "In data anchor is null, node:%s", node->GetName().c_str()); + return FAILED; + } + const auto &ref_peer_anchor = ref_in_anchor->GetPeerOutAnchor(); + const auto &value_peer_anchor = value_in_anchor->GetPeerOutAnchor(); + if ((ref_peer_anchor == nullptr) || (value_peer_anchor == nullptr)) { + GELOGE(FAILED, "Peer data anchor is null, node:%s", node->GetName().c_str()); + return FAILED; + } + + if (IsCondMatch(node, ref_peer_anchor, value_peer_anchor)) { + /// + /// variable not-const not-const + /// \ / | + /// \ / | + /// Assign ----> variable + /// | | + /// | | + /// node node + /// + GELOGI("Optimization for assign_node %s start", node->GetName().c_str()); + if (IsolateAndDeleteNode(node, {kAssignRefInputIndex}) != SUCCESS) { + GELOGE(FAILED, "Isolate and delete assign_node %s failed.", node->GetName().c_str()); + return FAILED; + } + AddNodeDeleted(node); + + const auto &ref_input = ref_peer_anchor->GetOwnerNode()->GetOpDesc(); + const auto &value_input = value_peer_anchor->GetOwnerNode()->GetOpDesc(); + if ((ref_input == nullptr) || (value_input == nullptr)) { + GELOGE(FAILED, "value input is null"); + return FAILED; + } + if (!AttrUtils::SetStr(value_input->MutableOutputDesc(value_peer_anchor->GetIdx()), ASSIGN_VAR_NAME, + ref_input->GetName())) { + GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed."); + return FAILED; + } + + // variable has and only has one input + if (ref_input->UpdateInputDesc(0, value_input->GetOutputDesc(value_peer_anchor->GetIdx())) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Update input_desc for variable %s failed.", ref_input->GetName().c_str()); + return FAILED; + } + if (GraphUtils::AddEdge(value_peer_anchor, ref_peer_anchor->GetOwnerNode()->GetInDataAnchor(0)) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Add data edge %s->%s failed", value_input->GetName().c_str(), ref_input->GetName().c_str()); + return FAILED; + } + } + + GELOGD("AssignRemovePass success"); + return SUCCESS; +} +#endif +/// +/// @brief Check if need optimize for assign_node +/// @param [in] assign_node +/// @param [in] peer_data_anchor for ref_input of assign_node +/// @param [in] peer_data_anchor for value_input of assign_node +/// @return Status +/// +bool AssignRemovePass::IsCondMatch(const NodePtr &node, const OutDataAnchorPtr &ref_peer_anchor, + const OutDataAnchorPtr &value_peer_anchor) { + GELOGD("Check if assign_node %s match optimization condition, ref_input: %s, value_input: %s", + node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(), + value_peer_anchor->GetOwnerNode()->GetName().c_str()); + + if (kNoTaskNodeTypes.count(value_peer_anchor->GetOwnerNode()->GetType()) > 0) { + GELOGD("value input is not calculate node"); + return false; + } + + const std::string &ref_type = ref_peer_anchor->GetOwnerNode()->GetType(); + if ((ref_type != VARIABLE) && (ref_type != VARIABLEV2)) { + GELOGD("ref input is not var"); + return false; + } + if (!ref_peer_anchor->GetOwnerNode()->GetInDataNodes().empty()) { + GELOGD("ref input has data input"); + return false; + } + + if ((ref_peer_anchor->GetPeerInDataNodesSize() != kValidInputNodeOutputNum) || + (value_peer_anchor->GetPeerInDataNodesSize() != kValidInputNodeOutputNum)) { + GELOGD("ref / value input has other output(s)"); + return false; + } + + GELOGD("Optimization condition matches, assign_node: %s", node->GetName().c_str()); + return true; +} +} // namespace ge diff --git a/ge/graph/passes/assign_pass.h b/ge/graph/passes/assign_remove_pass.h similarity index 68% rename from ge/graph/passes/assign_pass.h rename to ge/graph/passes/assign_remove_pass.h index 11cf1073..f8ef2e13 100644 --- a/ge/graph/passes/assign_pass.h +++ b/ge/graph/passes/assign_remove_pass.h @@ -14,17 +14,32 @@ * limitations under the License. */ -#ifndef GE_GRAPH_PASSES_ASSIGN_PASS_H_ -#define GE_GRAPH_PASSES_ASSIGN_PASS_H_ +#ifndef GE_GRAPH_PASSES_ASSIGN_REMOVE_PASS_H_ +#define GE_GRAPH_PASSES_ASSIGN_REMOVE_PASS_H_ #include "graph/passes/base_pass.h" namespace ge { -class AssignPass : public BaseNodePass { +class AssignRemovePass : public BaseNodePass { public: Status Run(NodePtr &node) override; private: +#ifndef ONLY_COMPILE_OPEN_SRC + /// + /// @brief Optimize for assign_node + /// @param [in] assign_node + /// @return Status + /// + Status OptimizedAssignNode(NodePtr &assign_node); + + /// + /// @brief Transform assign_var_name attr + /// @param [in] node + /// @return Status + /// + Status TransformAttr(NodePtr &node); +#endif /// /// @brief Check if need optimize for assign_node /// @param [in] assign_node @@ -36,4 +51,4 @@ class AssignPass : public BaseNodePass { const OutDataAnchorPtr &value_peer_anchor); }; } // namespace ge -#endif // GE_GRAPH_PASSES_ASSIGN_PASS_H_ +#endif // GE_GRAPH_PASSES_ASSIGN_REMOVE_PASS_H_ diff --git a/ge/graph/passes/constant_fuse_same_pass.cc b/ge/graph/passes/constant_fuse_same_pass.cc index d0970c59..8ee89648 100644 --- a/ge/graph/passes/constant_fuse_same_pass.cc +++ b/ge/graph/passes/constant_fuse_same_pass.cc @@ -19,13 +19,7 @@ #include #include #include -#include #include - -#include "common/ge/ge_util.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/ge_inner_error_codes.h" -#include "graph/debug/ge_attr_define.h" #include "graph/utils/op_desc_utils.h" #include "graph/utils/type_utils.h" @@ -121,11 +115,21 @@ void ConstantFuseSamePass::GetFuseConstNodes(ComputeGraphPtr &graph, TypeUtils::DataTypeToSerialString(data_type).c_str()); continue; } +#ifndef ONLY_COMPILE_OPEN_SRC + if ((type_size != 0) && (weight->MutableData().GetAlignedPtr() == nullptr)) { + GELOGW("aligned_ptr is null while size is not 0"); + continue; + } +#endif ++insert_const_nums; SameConstKey map_key; map_key.data_size = type_size; +#ifndef ONLY_COMPILE_OPEN_SRC + map_key.aligned_ptr = weight->MutableData().GetAlignedPtr(); +#else map_key.data = weight->GetData().GetData(); +#endif map_key.data_type = data_type; map_key.format = output_tensor->GetFormat(); map_key.shape = output_tensor->GetShape().GetDims(); diff --git a/ge/graph/passes/constant_fuse_same_pass.h b/ge/graph/passes/constant_fuse_same_pass.h index 4935da84..ae39c707 100755 --- a/ge/graph/passes/constant_fuse_same_pass.h +++ b/ge/graph/passes/constant_fuse_same_pass.h @@ -21,14 +21,20 @@ #include #include #include - +#ifndef ONLY_COMPILE_OPEN_SRC +#include "graph/aligned_ptr.h" +#endif #include "graph/types.h" #include "inc/graph_pass.h" namespace ge { struct SameConstKey { int data_size; +#ifndef ONLY_COMPILE_OPEN_SRC + std::shared_ptr aligned_ptr; +#else const uint8_t *data; +#endif DataType data_type; Format format; std::vector shape; @@ -38,10 +44,19 @@ struct SameConstKey { if (data_size != key.data_size) { return data_size < key.data_size; } +#ifndef ONLY_COMPILE_OPEN_SRC + if (data_size != 0) { + int ret = memcmp(aligned_ptr->Get(), key.aligned_ptr->Get(), data_size); + if (ret != 0) { + return ret < 0; + } + } +#else int ret = memcmp(data, key.data, data_size); if (ret != 0) { return ret < 0; } +#endif if (data_type != key.data_type) { return data_type < key.data_type; } diff --git a/ge/graph/passes/inplace_support_check_pass.cc b/ge/graph/passes/inplace_support_check_pass.cc new file mode 100644 index 00000000..73cc7f3b --- /dev/null +++ b/ge/graph/passes/inplace_support_check_pass.cc @@ -0,0 +1,83 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/inplace_support_check_pass.h" +#include "framework/common/debug/log.h" +#include "graph/utils/graph_utils.h" +#include "graph/debug/ge_attr_define.h" + +namespace { +constexpr uint32_t kInplaceSupportOutputIndex = 0; +constexpr uint32_t kInplaceSupportOutputNum = 1; +static const std::set kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, + ge::CONSTANT, ge::CONSTANTOP, + ge::VARIABLE, ge::VARIABLEV2 }; +} + +namespace ge { +Status InplaceSupportCheckPass::Run(NodePtr &node) { + GELOGD("InplaceSupportCheckPass running"); + if (node->GetAllOutDataAnchorsSize() != kInplaceSupportOutputNum) { + GELOGD("output num of node %s is not %u, skip InplaceSupportCheckPass", + node->GetName().c_str(), kInplaceSupportOutputNum); + return SUCCESS; + } + GE_CHECK_NOTNULL(node->GetOpDesc()); + const DataType &output_type = node->GetOpDesc()->GetOutputDesc(kInplaceSupportOutputIndex).GetDataType(); + const GeShape &output_shape = node->GetOpDesc()->GetOutputDesc(kInplaceSupportOutputIndex).GetShape(); + GELOGD("process InplaceSupportCheckPass on node %s", node->GetName().c_str()); + for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { + const auto &peer_data_anchor = in_data_anchor->GetPeerOutAnchor(); + if (peer_data_anchor == nullptr) { + continue; + } + auto in_node = peer_data_anchor->GetOwnerNode(); + if (kSrcNodeTypes.count(in_node->GetType()) > 0) { + GELOGD("meet src_node %s", in_node->GetName().c_str()); + continue; + } + if (peer_data_anchor->GetPeerInDataNodesSize() != kInplaceSupportOutputNum) { + GELOGD("peer_data_anchor links with multi in_data_anchors"); + continue; + } + + int32_t inplace_input_idx = in_data_anchor->GetIdx(); + const DataType &input_type = node->GetOpDesc()->GetInputDesc(inplace_input_idx).GetDataType(); + const GeShape &input_shape = node->GetOpDesc()->GetInputDesc(inplace_input_idx).GetShape(); + if (input_type != output_type) { + GELOGW("DataType mismatch, in_idx=%d, input_type=%u, output_type=%u", inplace_input_idx, input_type, output_type); + continue; + } + if (input_shape.GetDims() != output_shape.GetDims()) { + GELOGW("Shape mismatch, in_idx=%d, input_shape=[%s], output_shape=[%s]", + inplace_input_idx, input_shape.ToString().c_str(), output_shape.ToString().c_str()); + continue; + } + + GELOGD("add attr INPLACE_SUPPORT_INPUT_INDEX on node %s, input_idx=%d", node->GetName().c_str(), inplace_input_idx); + if (!AttrUtils::SetInt(node->GetOpDesc()->MutableOutputDesc(kInplaceSupportOutputIndex), + INPLACE_SUPPORT_INPUT_INDEX, inplace_input_idx)) { + GELOGE(FAILED, "Set attr INPLACE_SUPPORT_INPUT_INDEX on node %s failed.", node->GetName().c_str()); + return FAILED; + } + AddRePassNode(node); + break; + } + + GELOGD("InplaceSupportCheckPass success"); + return SUCCESS; +} +} // namespace ge diff --git a/ge/graph/passes/inplace_support_check_pass.h b/ge/graph/passes/inplace_support_check_pass.h new file mode 100644 index 00000000..be2d6c75 --- /dev/null +++ b/ge/graph/passes/inplace_support_check_pass.h @@ -0,0 +1,28 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_INPLACE_SUPPORT_CHECK_PASS_H_ +#define GE_GRAPH_PASSES_INPLACE_SUPPORT_CHECK_PASS_H_ + +#include "graph/passes/base_pass.h" + +namespace ge { +class InplaceSupportCheckPass : public BaseNodePass { + public: + Status Run(NodePtr &node) override; +}; +} // namespace ge +#endif // GE_GRAPH_PASSES_INPLACE_SUPPORT_CHECK_PASS_H_ diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc index 872f94fb..f8451ace 100755 --- a/ge/graph/passes/multi_batch_clone_pass.cc +++ b/ge/graph/passes/multi_batch_clone_pass.cc @@ -22,6 +22,8 @@ #include "graph/preprocess/multi_batch_options.h" #include "graph/utils/node_utils.h" #include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/type_utils.h" #include "register/op_registry.h" namespace ge { @@ -478,8 +480,28 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { return SUCCESS; } - (void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims()); + + GeTensorDesc tensor(NodeUtils::GetOutputDesc(*data, kDataOutIndex)); + std::vector input_dims_str; + for (size_t i = 0; i < batch_shapes_.size(); ++i) { + auto shape = data_shape; + auto ret = multibatch::CalcShape(data_to_dynamic_info_.at(data_name).at(i), shape); + if (ret != SUCCESS) { + GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", data->GetName().c_str()); + return ret; + } + tensor.SetShape(shape); + int64_t tensor_size = 0; + (void)TensorUtils::GetTensorSizeInBytes(tensor, tensor_size); + string input_str = TypeUtils::FormatToSerialString(tensor.GetFormat()) + ":" + + TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + data->GetName() + ":" + + std::to_string(tensor_size) + ":" + std::to_string(tensor.GetShape().GetDimNum()) + ":" + + formats::JoinToString(tensor.GetShape().GetDims()); + input_dims_str.emplace_back(input_str); + } + (void)AttrUtils::SetListStr(data->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str); + size_t max_shape_index = 0; int64_t max_size = 0; for (size_t i = 0; i < batch_shapes_.size(); ++i) { @@ -593,7 +615,7 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const graph->AddSubgraph(subgraph->GetName(), subgraph); all_branch_output_[subgraph] = subgraph->FindFirstNodeMatchType(NETOUTPUT); GE_CHK_STATUS_RET(UpdateSubgraphOutput(all_branch_output_[subgraph]), - "Update %s failed", all_branch_output_[subgraph]->GetName().c_str()); + "Update %s failed", all_branch_output_[subgraph]->GetName().c_str()); const string key_name = "branches" + std::to_string(i); op_desc->AddSubgraphName(key_name); diff --git a/ge/graph/passes/switch_to_stream_switch_pass.cc b/ge/graph/passes/switch_to_stream_switch_pass.cc index a7b922e0..392968e7 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.cc +++ b/ge/graph/passes/switch_to_stream_switch_pass.cc @@ -598,7 +598,7 @@ Status SwitchToStreamSwitchPass::AddConstNode(const ComputeGraphPtr &graph, cons /// Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node, const std::set &same_cond_switch) { - GELOGD("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(), + GELOGD("ModifySwitchInCtlEdges: switch_node=%s, cast_node=%s", switch_node->GetName().c_str(), cast_node->GetName().c_str()); std::string orig_switch_name = switch_node->GetName(); OpDescPtr switch_desc = switch_node->GetOpDesc(); diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index da862836..6bb3105c 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -19,7 +19,6 @@ #include #include #include "common/formats/format_transfers/format_transfer_fractal_nz.h" -#include "common/formats/format_transfers/format_transfer_fractal_z.h" #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" #include "common/formats/format_transfers/format_transfer_transpose.h" @@ -38,7 +37,9 @@ #include "graph/passes/addn_pass.h" #include "graph/passes/aicpu_constant_folding_pass.h" #include "graph/passes/assert_pass.h" -#include "graph/passes/assign_pass.h" +#ifdef ONLY_COMPILE_OPEN_SRC +#include "graph/passes/assign_remove_pass.h" +#endif #include "graph/passes/common_subexpression_elimination_pass.h" #include "graph/passes/cond_pass.h" #include "graph/passes/cond_remove_pass.h" @@ -1699,7 +1700,9 @@ Status GraphPrepare::PrepareOptimize() { VarIsInitializedOpPass var_is_initialized_pass; ParallelConcatStartOpPass parallel_concat_start_op_pass; IdentityPass identity_pass(false); - AssignPass assign_pass; +#ifdef ONLY_COMPILE_OPEN_SRC + AssignRemovePass assign_remove_pass; +#endif SnapshotPass snapshot_pass; if (!options_.train_graph_flag) { names_to_passes.emplace_back("DropOutPass", &dropout_pass); @@ -1714,9 +1717,11 @@ Status GraphPrepare::PrepareOptimize() { names_to_passes.emplace_back("VarIsInitializedOpPass", &var_is_initialized_pass); names_to_passes.emplace_back("ParallelConcatStartOpPass", ¶llel_concat_start_op_pass); names_to_passes.emplace_back("IdentityPass", &identity_pass); +#ifdef ONLY_COMPILE_OPEN_SRC if (GetContext().GetHostExecFlag()) { - names_to_passes.emplace_back("AssignPass", &assign_pass); + names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass); } +#endif GE_TIMESTAMP_START(names_to_passes); ret = ge_passes.Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "GraphPrepare::NamesToPasses"); diff --git a/ge/host_cpu_engine/CMakeLists.txt b/ge/host_cpu_engine/CMakeLists.txt index d5ed7674..cbd0bd8b 100644 --- a/ge/host_cpu_engine/CMakeLists.txt +++ b/ge/host_cpu_engine/CMakeLists.txt @@ -20,6 +20,7 @@ add_library(host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) target_compile_options(host_cpu_engine PRIVATE -Werror + -fno-common ) target_compile_definitions(host_cpu_engine PRIVATE @@ -49,9 +50,7 @@ target_link_libraries(host_cpu_engine PRIVATE ascend_protobuf c_sec graph - register slog - runtime -Wl,--as-needed ) @@ -60,6 +59,7 @@ add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) target_compile_options(atc_host_cpu_engine PRIVATE -Werror + -fno-common ) target_compile_definitions(atc_host_cpu_engine PRIVATE @@ -90,9 +90,7 @@ target_link_libraries(atc_host_cpu_engine PRIVATE ascend_protobuf c_sec graph - register slog - runtime_compile -Wl,--as-needed ) @@ -106,6 +104,7 @@ add_library(host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) target_compile_options(host_cpu_opskernel_builder PRIVATE -Werror + -fno-common ) target_compile_definitions(host_cpu_opskernel_builder PRIVATE @@ -145,6 +144,7 @@ add_library(atc_host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) target_compile_options(atc_host_cpu_opskernel_builder PRIVATE -Werror + -fno-common ) target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE @@ -189,6 +189,7 @@ add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST}) target_compile_options(host_cpu_opskernel_builder_static PRIVATE -Werror + -fno-common ) target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE diff --git a/ge/hybrid/common/npu_memory_allocator.cc b/ge/hybrid/common/npu_memory_allocator.cc index 2c38367a..c2602f37 100644 --- a/ge/hybrid/common/npu_memory_allocator.cc +++ b/ge/hybrid/common/npu_memory_allocator.cc @@ -20,6 +20,9 @@ #include "graph/manager/graph_caching_allocator.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/rdma_pool_allocator.h" +#ifndef ONLY_COMPILE_OPEN_SRC +#include "graph/manager/host_mem_allocator.h" +#endif namespace ge { namespace hybrid { @@ -64,7 +67,11 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { if (mem_type == RDMA_HBM) { buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(allocate_size, device_id_); } else if (mem_type == HOST_DDR) { +#ifndef ONLY_COMPILE_OPEN_SRC + buffer = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(allocate_size); +#else buffer = malloc(allocate_size); +#endif } else { if (allocate_size > kMaxHbmMemorySize) { GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size); @@ -101,7 +108,11 @@ void NpuMemoryAllocator::Deallocate(void *data, MemStorageType mem_type) { if (mem_type == RDMA_HBM) { MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(reinterpret_cast(data), device_id_); } else if (mem_type == HOST_DDR) { +#ifndef ONLY_COMPILE_OPEN_SRC + MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(data); +#else free(data); +#endif } else { MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(reinterpret_cast(data), device_id_); } diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index da5218c5..46c9c39b 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -25,11 +25,13 @@ #include "graph/manager/graph_var_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" +#ifndef ONLY_COMPILE_OPEN_SRC +#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/host_mem_allocator.h" +#endif #include "graph/utils/graph_utils.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/node_executor/node_executor.h" -#include "framework/common/debug/ge_log.h" -#include "graph/utils/attr_utils.h" namespace ge { namespace hybrid { @@ -852,9 +854,24 @@ Status HybridModelBuilder::InitConstantOps() { std::unique_ptr var_tensor; if (GetContext().GetHostExecFlag()) { +#ifndef ONLY_COMPILE_OPEN_SRC + GE_CHECK_NOTNULL(ge_tensor); + // Address for eigen kernel should be aligned with 16 bytes + // Tensors return by api GetWeights share data with proto, whose addr is not confirmed to be aligned + GeTensor aligned_tensor = ge_tensor->Clone(); + GELOGD("Init tensor with host constant %s size = %zu", var_name.c_str(), aligned_tensor.MutableData().GetSize()); + if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(aligned_tensor.GetAlignedPtr(), + aligned_tensor.GetData().size()) == nullptr) { + GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed."); + return MEMALLOC_FAILED; + } + var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(), + aligned_tensor.GetData().size())); +#else auto buffer = ge_tensor->MutableData(); GELOGD("Init tensor with host constant. size = %zu", buffer.GetSize()); var_tensor.reset(new(std::nothrow)TensorValue(buffer.GetData(), buffer.GetSize())); +#endif } else { GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); @@ -909,9 +926,21 @@ Status HybridModelBuilder::InitVariableTensors() { GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str()); return GE_GRAPH_MALLOC_FAILED; } +#ifndef ONLY_COMPILE_OPEN_SRC + if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr, + tensor_size) == nullptr) { + GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed."); + return MEMALLOC_FAILED; + } + GELOGD("Host variable [%s] malloc success, size=%lld.", it.first.c_str(), tensor_size); + + std::unique_ptr tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(), + tensor_size)); +#else GELOGD("Host variable [%s] malloc success.", it.first.c_str()); std::unique_ptr tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size)); +#endif GE_CHECK_NOTNULL(tensor); hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); } @@ -933,7 +962,7 @@ Status HybridModelBuilder::InitWeights() { auto op_desc = constant_node->GetOpDesc(); auto v_weights = ModelUtils::GetWeights(op_desc); if (v_weights.empty()) { - GELOGE(INTERNAL_ERROR, "[%s] Constant no not have value", constant_node->GetName().c_str()); + GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", constant_node->GetName().c_str()); return INTERNAL_ERROR; } auto *ge_tensor = const_cast(v_weights[0].get()); diff --git a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc index a61195b0..32522fe8 100755 --- a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc +++ b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc @@ -18,6 +18,10 @@ #include "hybrid/node_executor/host_cpu/kernel_factory.h" #include "graph/passes/folding_pass.h" #include "hybrid/model/hybrid_model.h" +#ifndef ONLY_COMPILE_OPEN_SRC +#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/host_mem_allocator.h" +#endif #include "ge_local_engine/engine/host_cpu_engine.h" namespace ge { @@ -50,15 +54,23 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { auto input_desc_ptr = context.GetInputDesc(i); GE_CHECK_NOTNULL(input_desc_ptr); const auto &input_desc = *input_desc_ptr; +#ifndef ONLY_COMPILE_OPEN_SRC + auto tensor = context.GetInput(i); + GE_CHECK_NOTNULL(tensor); + auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); + GE_CHECK_NOTNULL(item.second); + auto in_tensor = MakeShared(input_desc, item.second, item.first); +#else GE_CHECK_NOTNULL(context.GetInput(i)); auto in_tensor = MakeShared(input_desc, reinterpret_cast(context.GetInput(i)->GetData()), context.GetInput(i)->GetSize()); +#endif GE_CHECK_NOTNULL(in_tensor); in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType()); in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape()); inputs.emplace_back(in_tensor); - GELOGI("node:%s allocate input %d, size=%zu", op_desc->GetName().c_str(), i, in_tensor->GetData().size()); + GELOGD("node:%s allocate input %d, size=%zu", op_desc->GetName().c_str(), i, in_tensor->GetData().size()); } std::vector outputs; @@ -72,14 +84,20 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { } auto tensor = context.GetOutput(i); GE_CHECK_NOTNULL(tensor); +#ifndef ONLY_COMPILE_OPEN_SRC + auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); + GE_CHECK_NOTNULL(item.second); + auto out_tensor = MakeShared(output_desc, item.second, item.first); +#else auto out_tensor = MakeShared(output_desc, reinterpret_cast(tensor->GetData()), tensor->GetSize()); +#endif GE_CHECK_NOTNULL(out_tensor); out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType()); out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape()); outputs.emplace_back(out_tensor); - GELOGI("node:%s allocate output %d, size=%zu", op_desc->GetName().c_str(), i, out_tensor->GetData().size()); + GELOGD("node:%s allocate output %d, size=%zu", op_desc->GetName().c_str(), i, out_tensor->GetData().size()); } return HostCpuEngine::GetInstance().Run(node_, inputs, outputs); diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index e7cdd7c9..fe89464b 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -243,8 +243,8 @@ Status NoOpTask::UpdateArgs(TaskContext &context) { return SUCCESS; } Status NoOpTask::ExecuteAsync(TaskContext &context, std::function done_callback) { - GELOGD("[%s] Skipping execute for op with empty outputs", context.GetNodeName()); - return SUCCESS; + GELOGD("[%s] Skipping execution for op with empty outputs", context.GetNodeName()); + return context.TryExecuteCallback(done_callback); } } // namespace hybrid } // namespace ge diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index 48c1cbe7..d195e06f 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -22,6 +22,7 @@ target_compile_options(atc PRIVATE -Werror -O2 -Wno-deprecated-declarations + -fno-common ) target_compile_definitions(atc PRIVATE @@ -83,6 +84,7 @@ target_compile_options(atc_atc.bin PRIVATE -Werror -O2 -Wno-deprecated-declarations + -fno-common ) target_compile_definitions(atc_atc.bin PRIVATE @@ -149,6 +151,7 @@ target_compile_options(fwk_atc.bin PRIVATE -Werror -O2 -Wno-deprecated-declarations + -fno-common ) target_compile_definitions(fwk_atc.bin PRIVATE diff --git a/ge/offline/main.cc b/ge/offline/main.cc index 2b5bb41a..ed67b913 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -206,6 +206,8 @@ DEFINE_string(mdl_bank_path, "", "Optional; model bank path"); DEFINE_string(op_bank_path, "", "Optional; op bank path"); +DEFINE_string(display_model_info, "0", "Optional; display model info"); + class GFlagUtils { public: /** @@ -225,7 +227,8 @@ class GFlagUtils { "===== Basic Functionality =====\n" "[General]\n" " --h/help Show this help message\n" - " --mode Run mode. 0(default): generate offline model; 1: convert model to JSON format " + " --mode Run mode. 0(default): generate offline model; 1: convert model to JSON format; " + "6: display model info" "3: only pre-check; 5: convert ge dump txt file to JSON format\n" "\n[Input]\n" " --model Model file\n" @@ -313,7 +316,8 @@ class GFlagUtils { " --op_compiler_cache_dir Set the save path of operator compilation cache files.\n" "Default value: $HOME/atc_data\n" " --op_compiler_cache_mode Set the operator compilation cache mode." - "Options are disable(default), enable and force(force to refresh the cache)"); + "Options are disable(default), enable and force(force to refresh the cache)\n" + " --display_model_info enable for display model info; 0(default): close display, 1: open display"); gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); // Using gflags to analyze input parameters @@ -862,7 +866,7 @@ domi::Status GenerateInfershapeJson() { static Status ConvertModelToJson(int fwk_type, const string &model_file, const string &json_file) { Status ret = ge::SUCCESS; if (fwk_type == -1) { - ret = ge::ConvertOmModelToJson(model_file.c_str(), json_file.c_str()); + ret = ge::ConvertOm(model_file.c_str(), json_file.c_str(), true); return ret; } @@ -1176,6 +1180,8 @@ domi::Status GenerateOmModel() { options.insert(std::pair(string(ge::MDL_BANK_PATH_FLAG), FLAGS_mdl_bank_path)); options.insert(std::pair(string(ge::OP_BANK_PATH_FLAG), FLAGS_op_bank_path)); + + options.insert(std::pair(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info)); // set enable scope fusion passes SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes); // print atc option map @@ -1188,6 +1194,11 @@ domi::Status GenerateOmModel() { return domi::FAILED; } + if (FLAGS_display_model_info == "1") { + GELOGI("need to display model info."); + return ge::ConvertOm(FLAGS_output.c_str(), "", false); + } + return domi::SUCCESS; } @@ -1201,6 +1212,26 @@ domi::Status ConvertModelToJson() { return domi::SUCCESS; } +domi::Status DisplayModelInfo() { + // No model path passed in + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_om == "", + ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"om"}); + return ge::FAILED, + "Input parameter[--om]'s value is empty!!"); + + // Check if the model path is valid + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_om != "" && !ge::CheckInputPathValid(FLAGS_om, "--om"), + return ge::FAILED, + "model file path is invalid: %s.", FLAGS_om.c_str()); + + if (FLAGS_framework == -1) { + return ge::ConvertOm(FLAGS_om.c_str(), "", false); + } + + return ge::FAILED; +} + bool CheckRet(domi::Status ret) { if (ret != domi::SUCCESS) { if (FLAGS_mode == ONLY_PRE_CHECK) { @@ -1344,6 +1375,9 @@ int main(int argc, char* argv[]) { } else if (FLAGS_mode == ge::RunMode::PBTXT_TO_JSON) { GE_CHK_BOOL_EXEC(ConvertPbtxtToJson() == domi::SUCCESS, ret = domi::FAILED; break, "ATC convert pbtxt to json execute failed!!"); + } else if (FLAGS_mode == ge::RunMode::DISPLAY_OM_INFO) { + GE_CHK_BOOL_EXEC(DisplayModelInfo() == domi::SUCCESS, ret = domi::FAILED; + break, "ATC DisplayModelInfo failed!!"); } else { ErrorManager::GetInstance().ATCReportErrMessage( "E10001", {"parameter", "value", "reason"}, {"--mode", std::to_string(FLAGS_mode), kModeSupport}); diff --git a/ge/plugin/engine/CMakeLists.txt b/ge/plugin/engine/CMakeLists.txt index 87a6d682..f6353231 100644 --- a/ge/plugin/engine/CMakeLists.txt +++ b/ge/plugin/engine/CMakeLists.txt @@ -8,6 +8,7 @@ add_library(engine SHARED ${SRC_LIST}) target_compile_options(engine PRIVATE -Werror + -fno-common ) target_compile_definitions(engine PRIVATE diff --git a/ge/proto/caffe/caffe.proto b/ge/proto/caffe/caffe.proto index 3f45aae2..20615fed 100644 --- a/ge/proto/caffe/caffe.proto +++ b/ge/proto/caffe/caffe.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software caffe, version 1.0 https://github.com/BVLC/caffe + * + * This file is included by GraphEngine so as to support model format conversion from caffe model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto2"; package domi.caffe; diff --git a/ge/proto/tensorflow/attr_value.proto b/ge/proto/tensorflow/attr_value.proto index 1cc67d62..438d7163 100644 --- a/ge/proto/tensorflow/attr_value.proto +++ b/ge/proto/tensorflow/attr_value.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/function.proto b/ge/proto/tensorflow/function.proto index 075897c6..44681e32 100644 --- a/ge/proto/tensorflow/function.proto +++ b/ge/proto/tensorflow/function.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/graph.proto b/ge/proto/tensorflow/graph.proto index d639a7d6..73bfc6ee 100644 --- a/ge/proto/tensorflow/graph.proto +++ b/ge/proto/tensorflow/graph.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/graph_library.proto b/ge/proto/tensorflow/graph_library.proto index e393d38d..7bca0838 100644 --- a/ge/proto/tensorflow/graph_library.proto +++ b/ge/proto/tensorflow/graph_library.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/node_def.proto b/ge/proto/tensorflow/node_def.proto index b9bc97ee..50cf5cac 100644 --- a/ge/proto/tensorflow/node_def.proto +++ b/ge/proto/tensorflow/node_def.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/op_def.proto b/ge/proto/tensorflow/op_def.proto index 3485d045..7f0e8ce2 100644 --- a/ge/proto/tensorflow/op_def.proto +++ b/ge/proto/tensorflow/op_def.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/resource_handle.proto b/ge/proto/tensorflow/resource_handle.proto index a3452351..91c46c9a 100644 --- a/ge/proto/tensorflow/resource_handle.proto +++ b/ge/proto/tensorflow/resource_handle.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/tensor.proto b/ge/proto/tensorflow/tensor.proto index d0a4d024..48eeb6c4 100644 --- a/ge/proto/tensorflow/tensor.proto +++ b/ge/proto/tensorflow/tensor.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/tensor_shape.proto b/ge/proto/tensorflow/tensor_shape.proto index 4225a2e3..3a6d8c5a 100644 --- a/ge/proto/tensorflow/tensor_shape.proto +++ b/ge/proto/tensorflow/tensor_shape.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + // Protocol buffer representing the shape of tensors. syntax = "proto3"; diff --git a/ge/proto/tensorflow/types.proto b/ge/proto/tensorflow/types.proto index ba7a72b3..f40e49cb 100644 --- a/ge/proto/tensorflow/types.proto +++ b/ge/proto/tensorflow/types.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/versions.proto b/ge/proto/tensorflow/versions.proto index 48061218..4e81548f 100644 --- a/ge/proto/tensorflow/versions.proto +++ b/ge/proto/tensorflow/versions.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/session/omg.cc b/ge/session/omg.cc index 7ff52e82..11384cfb 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -71,6 +71,7 @@ const char *const kOutputTypeError = "The multiple out nodes set in output_type const size_t kNodeNameIndex = 0; const size_t kIndexStrIndex = 1; const size_t kDTValueIndex = 2; +const size_t kOmInfoSize = 5; } // namespace // When the model is converted to a JSON file, the following operator attributes in the blacklist will be ignored @@ -869,9 +870,78 @@ void GetGroupName(ge::proto::ModelDef &model_def) { }); } -FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, const char *json_file) { +FMK_FUNC_HOST_VISIBILITY void PrintModelInfo(ge::proto::ModelDef *model_def) { + std::cout << "============ Display Model Info start ============" << std::endl; + + auto model_attr_map = model_def->mutable_attr(); + // system info + auto iter = model_attr_map->find(ATTR_MODEL_ATC_VERSION); + auto atc_version = (iter != model_attr_map->end()) ? iter->second.s() : ""; + iter = model_attr_map->find("soc_version"); + auto soc_version = (iter != model_attr_map->end()) ? iter->second.s() : ""; + iter = model_attr_map->find("framework_type"); + auto framework_type = (iter != model_attr_map->end()) ? iter->second.s() : ""; + std::cout << "system info: " + << ATTR_MODEL_ATC_VERSION + << "[" << atc_version << "], " + << "soc_version" + << "[" << soc_version << "], " + << "framework_type" + << "[" << framework_type << "]." << std::endl; + + // resource info + iter = model_attr_map->find(ATTR_MODEL_MEMORY_SIZE); + auto memory_size = (iter != model_attr_map->end()) ? iter->second.i() : -1; + iter = model_attr_map->find(ATTR_MODEL_WEIGHT_SIZE); + auto weight_size = (iter != model_attr_map->end()) ? iter->second.i() : -1; + iter = model_attr_map->find(ATTR_MODEL_STREAM_NUM); + auto stream_num = (iter != model_attr_map->end()) ? iter->second.i() : -1; + iter = model_attr_map->find(ATTR_MODEL_EVENT_NUM); + auto event_num = (iter != model_attr_map->end()) ? iter->second.i() : -1; + std::cout << "resource info: " + << ATTR_MODEL_MEMORY_SIZE + << "[" << memory_size << " B], " + << ATTR_MODEL_WEIGHT_SIZE + << "[" << weight_size << " B], " + << ATTR_MODEL_STREAM_NUM + << "[" << stream_num << "], " + << ATTR_MODEL_EVENT_NUM + << "[" << event_num << "]." + << std::endl; + + // om info + iter = model_attr_map->find("om_info_list"); + if (iter == model_attr_map->end()) { + std::cout << "Display Model Info failed, attr \"om_info_list\" is not found in om, check the version is matched." + << std::endl; + std::cout << "============ Display Model Info end ============" << std::endl; + return; + } + auto list_size = iter->second.list().i_size(); + if (list_size == kOmInfoSize) { + std::cout << "om info: " + << "modeldef_size" + << "[" << iter->second.list().i(0) << " B], " + << "weight_data_size" + << "[" << iter->second.list().i(1) << " B], " + << "tbe_kernels_size" + << "[" << iter->second.list().i(2) << " B], " + << "cust_aicpu_kernel_store_size" + << "[" << iter->second.list().i(3) << " B], " + << "task_info_size" + << "[" << iter->second.list().i(4) << " B]." << std::endl; + } else { + std::cout << "Display Model Info error, please check!" << std::endl; + }; + + std::cout << "============ Display Model Info end ============" << std::endl; +} + +FMK_FUNC_HOST_VISIBILITY Status ConvertOm(const char *model_file, const char *json_file, bool is_covert_to_json) { GE_CHECK_NOTNULL(model_file); - GE_CHECK_NOTNULL(json_file); + if (is_covert_to_json) { + GE_CHECK_NOTNULL(json_file); + } ge::ModelData model; // Mode 2 does not need to verify the priority, and a default value of 0 is passed @@ -917,12 +987,16 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con // De serialization bool flag = ReadProtoFromArray(ir_part.data, ir_part.size, &model_def); if (flag) { - GetGroupName(model_def); + if (is_covert_to_json) { + GetGroupName(model_def); - json j; - Pb2Json::Message2Json(model_def, kOmBlackFields, j, true); + json j; + Pb2Json::Message2Json(model_def, kOmBlackFields, j, true); - ret = ModelSaver::SaveJsonToFile(json_file, j); + ret = ModelSaver::SaveJsonToFile(json_file, j); + } else { + PrintModelInfo(&model_def); + } } else { ret = INTERNAL_ERROR; GELOGE(ret, "ReadProtoFromArray failed."); diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 9b361b96..4fe0ab1e 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -291,6 +291,9 @@ const std::string OP_DEBUG_LEVEL = "ge.opDebugLevel"; // Configure model bank path const std::string MDL_BANK_PATH_FLAG = "ge.mdl_bank_path"; +// Configure display_model_info flag +const std::string DISPLAY_MODEL_INFO = "ge.display_model_info"; + // Configure op bank path const std::string OP_BANK_PATH_FLAG = "ge.op_bank_path"; const std::string OP_BANK_UPDATE_FLAG = "ge.op_bank_update"; @@ -397,7 +400,7 @@ const std::set ir_builder_suppported_options = {INPUT_FORMAT, // for interface: aclgrphParse const std::set ir_parser_suppported_options = { INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT, - OUT_NODES, COMPRESS_WEIGHT_CONF, ENABLE_SCOPE_FUSION_PASSES}; + OUT_NODES, ENABLE_SCOPE_FUSION_PASSES}; // for interface: aclgrphBuildInitialize const std::set global_options = {CORE_TYPE, diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h index bc0444bc..4a169dda 100644 --- a/inc/framework/common/helper/model_helper.h +++ b/inc/framework/common/helper/model_helper.h @@ -84,6 +84,7 @@ class ModelHelper { const uint8_t *data, size_t size, size_t model_index); Status SaveModelDef(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, Buffer &model_buffer, size_t model_index = 0); + Status SaveSizeToModelDef(const GeModelPtr &ge_model); Status SaveModelWeights(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, size_t model_index = 0); Status SaveModelTbeKernel(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, diff --git a/inc/framework/omg/omg.h b/inc/framework/omg/omg.h index e7ca05f7..62332b8d 100644 --- a/inc/framework/omg/omg.h +++ b/inc/framework/omg/omg.h @@ -73,7 +73,7 @@ Status ParseGraph(ge::Graph &graph, const std::map &atc_params, * @param [key] encrypted key * @return Status result code */ -Status ConvertOmModelToJson(const char *model_file, const char *json_file); +Status ConvertOm(const char *model_file, const char *json_file, bool is_covert_to_json); Status ConvertPbtxtToJson(const char *model_file, const char *json_file); /** @@ -103,6 +103,8 @@ void GetOutputNodesNameAndIndex(std::vector> &ou void UpdateOmgCtxWithParserCtx(); void UpdateParserCtxWithOmgCtx(); + +void PrintModelInfo(ge::proto::ModelDef *model_def); } // namespace ge namespace domi { diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index 454890aa..dab79053 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -46,7 +46,8 @@ enum RunMode { GEN_OM_MODEL = 0, // generate offline model file MODEL_TO_JSON = 1, // convert to JSON file ONLY_PRE_CHECK = 3, // only for pre-check - PBTXT_TO_JSON = 5 // pbtxt to json + PBTXT_TO_JSON = 5, // pbtxt to json + DISPLAY_OM_INFO = 6 // display model info }; /// diff --git a/metadef b/metadef index c14d2be3..11c6cf29 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit c14d2be38171eed63416e71178774103faf1f5cd +Subproject commit 11c6cf2921b6a385616a3ebc601b4431b55b07db diff --git a/parser b/parser index 34559943..ecde83dc 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 34559943b6cb645042a87d99bc88ead016b15b64 +Subproject commit ecde83dc9da0d58522b4a90c4d90c242c75011fd diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index c209d989..6db99a45 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -49,6 +49,7 @@ include_directories(${GE_CODE_DIR}/metadef) include_directories(${GE_CODE_DIR}/metadef/graph) include_directories(${GE_CODE_DIR}/inc/external) include_directories(${GE_CODE_DIR}/metadef/inc/external) +include_directories(${GE_CODE_DIR}/parser) include_directories(${GE_CODE_DIR}/parser/parser) include_directories(${GE_CODE_DIR}/metadef/inc/external/graph) include_directories(${GE_CODE_DIR}/metadef/inc/graph) @@ -224,7 +225,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/cond_remove_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/for_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/enter_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/assign_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/assign_remove_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/addn_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/common_subexpression_elimination_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/transop_symmetry_elimination_pass.cc" @@ -302,6 +303,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" + "${GE_CODE_DIR}/ge/common/model_saver.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" "${GE_CODE_DIR}/ge/common/ge/datatype_util.cc" "${GE_CODE_DIR}/metadef/register/ops_kernel_builder_registry.cc" @@ -309,6 +311,13 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/metadef/graph/utils/tuning_utils.cc" "${GE_CODE_DIR}/metadef/register/op_tiling_registry.cpp" "${GE_CODE_DIR}/ge/ge_local_engine/engine/host_cpu_engine.cc" + "${GE_CODE_DIR}/parser/parser/common/pre_checker.cc" + "${GE_CODE_DIR}/parser/parser/common/convert/pb2json.cc" + "${GE_CODE_DIR}/parser/parser/common/parser_factory.cc" + "${GE_CODE_DIR}/parser/parser/common/model_saver.cc" + "${GE_CODE_DIR}/parser/parser/common/parser_types.cc" + "${GE_CODE_DIR}/parser/parser/common/parser_inner_ctx.cc" + "${GE_CODE_DIR}/ge/session/omg.cc" ) set(COMMON_FORMAT_SRC_FILES @@ -398,7 +407,6 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" - "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/common/debug/memory_dumper.cc" "${GE_CODE_DIR}/ge/executor/ge_executor.cc" @@ -429,7 +437,6 @@ set(GRAPH_BUILD_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/build/memory/hybrid_mem_assigner.cc" "${GE_CODE_DIR}/ge/graph/build/memory/max_block_mem_assigner.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" - "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" "${GE_CODE_DIR}/ge/common/thread_pool.cc" @@ -565,6 +572,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES "graph/load/end_graph_task_unittest.cc" "graph/load/new_model_manager_event_manager_unittest.cc" #"graph/load/output_net_output_unittest.cc" + "graph/load/davinci_model_unittest.cc" "graph/load/tbe_handle_store_unittest.cc" "graph/load/hccl_task_info_unittest.cc" "graph/load/kernel_ex_task_info_unittest.cc" @@ -573,6 +581,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES "graph/load/memcpy_async_task_info_unittest.cc" #"graph/graph_load_unittest.cc" "graph/ge_executor_unittest.cc" + "graph/load/model_helper_unittest.cc" ) set(PASS_TEST_FILES @@ -678,6 +687,7 @@ set(MULTI_PARTS_TEST_FILES "graph/variable_accelerate_ctrl_unittest.cc" "graph/build/logical_stream_allocator_unittest.cc" "graph/build/mem_assigner_unittest.cc" + "session/omg_omg_unittest.cc" ) set(SINGLE_OP_TEST_FILES @@ -687,7 +697,7 @@ set(SINGLE_OP_TEST_FILES ) set(PROFILING_MNG_TEST_FILES - #"profiling/ge_profiling_manager_unittest.cc" + "profiling/ge_profiling_manager_unittest.cc" ) set(OTHERS_TEST_FILES diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc new file mode 100644 index 00000000..3cd0455d --- /dev/null +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -0,0 +1,285 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define private public +#define protected public +#include "graph/utils/graph_utils.h" +#include "common/profiling/profiling_manager.h" +#include "graph/load/new_model_manager/davinci_model.h" + +using namespace std; + +namespace ge { +extern OpDescPtr CreateOpDesc(string name, string type); + +class UtestDavinciModel : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + +TEST_F(UtestDavinciModel, init_success) { + DavinciModel model(0, nullptr); + ComputeGraphPtr graph = make_shared("default"); + ProfilingManager::Instance().is_load_profiling_ = true; + + GeModelPtr ge_model = make_shared(); + ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph)); + AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000); + AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1); + + shared_ptr model_task_def = make_shared(); + ge_model->SetModelTaskDef(model_task_def); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_input = CreateOpDesc("data", DATA); + op_input->AddInputDesc(tensor); + op_input->AddOutputDesc(tensor); + op_input->SetInputOffset({1024}); + op_input->SetOutputOffset({1024}); + NodePtr node_input = graph->AddNode(op_input); // op_index = 0 + + OpDescPtr op_kernel = CreateOpDesc("square", "Square"); + op_kernel->AddInputDesc(tensor); + op_kernel->AddOutputDesc(tensor); + op_kernel->SetInputOffset({1024}); + op_kernel->SetOutputOffset({1024}); + NodePtr node_kernel = graph->AddNode(op_kernel); // op_index = 1 + + OpDescPtr op_memcpy = CreateOpDesc("memcpy", MEMCPYASYNC); + op_memcpy->AddInputDesc(tensor); + op_memcpy->AddOutputDesc(tensor); + op_memcpy->SetInputOffset({1024}); + op_memcpy->SetOutputOffset({5120}); + NodePtr node_memcpy = graph->AddNode(op_memcpy); // op_index = 2 + + OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); + op_output->AddInputDesc(tensor); + op_output->SetInputOffset({5120}); + op_output->SetSrcName( { "memcpy" } ); + op_output->SetSrcIndex( { 0 } ); + NodePtr node_output = graph->AddNode(op_output); // op_index = 3 + + + domi::TaskDef *task_def1 = model_task_def->add_task(); + task_def1->set_stream_id(0); + task_def1->set_type(RT_MODEL_TASK_KERNEL); + domi::KernelDef *kernel_def = task_def1->mutable_kernel(); + kernel_def->set_stub_func("stub_func"); + kernel_def->set_args_size(64); + string args(64, '1'); + kernel_def->set_args(args.data(), 64); + domi::KernelContext *context = kernel_def->mutable_context(); + context->set_op_index(1); + context->set_kernel_type(2); // ccKernelType::TE + uint16_t args_offset[9] = {0}; + context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); + + domi::TaskDef *task_def2 = model_task_def->add_task(); + task_def2->set_stream_id(0); + task_def2->set_type(RT_MODEL_TASK_MEMCPY_ASYNC); + domi::MemcpyAsyncDef *memcpy_async = task_def2->mutable_memcpy_async(); + memcpy_async->set_src(1024); + memcpy_async->set_dst(5120); + memcpy_async->set_dst_max(512); + memcpy_async->set_count(1); + memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE); + memcpy_async->set_op_index(2); + + EXPECT_EQ(model.Assign(ge_model), SUCCESS); + EXPECT_EQ(model.Init(), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 1); + EXPECT_EQ(model.task_list_.size(), 2); + + ProfilingManager::Instance().is_load_profiling_ = false; +} + +TEST_F(UtestDavinciModel, init_data_op) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + OpDescPtr op_input = CreateOpDesc("data", DATA); + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + op_input->AddInputDesc(tensor); + op_input->AddOutputDesc(tensor); + op_input->SetInputOffset({1024}); + op_input->SetOutputOffset({5120}); + NodePtr node_input = graph->AddNode(op_input); + + OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); + op_output->AddInputDesc(tensor); + op_output->SetInputOffset({1024}); + op_output->SetSrcName( { "data" } ); + op_output->SetSrcIndex( { 0 } ); + NodePtr node_output = graph->AddNode(op_output); + + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 1); + EXPECT_EQ(model.op_list_.size(), 2); +} + +TEST_F(UtestDavinciModel, init_data_op_subgraph) { + DavinciModel model(0, nullptr); + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + OpDescPtr op_input = CreateOpDesc("data", DATA); + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + op_input->AddInputDesc(tensor); + op_input->AddOutputDesc(tensor); + op_input->SetInputOffset({1024}); + op_input->SetOutputOffset({5120}); + NodePtr node = graph->AddNode(op_input); + + uint32_t data_op_index = 0; + map data_by_index; + EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 0); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(data_op_index, 0); + EXPECT_TRUE(data_by_index.empty()); +} + +TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) { + DavinciModel model(0, nullptr); + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + op_output->AddInputDesc(tensor); + op_output->SetInputOffset({1024}); + op_output->SetSrcName( { "data" } ); + op_output->SetSrcIndex( { 0 } ); + NodePtr node = graph->AddNode(op_output); + + std::vector output_op_list; + EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 0); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_TRUE(output_op_list.empty()); +} + +TEST_F(UtestDavinciModel, init_unknown) { + DavinciModel model(0, nullptr); + model.SetKnownNode(true); + ComputeGraphPtr graph = make_shared("default"); + + GeModelPtr ge_model = make_shared(); + ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph)); + AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000); + AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1); + + shared_ptr model_task_def = make_shared(); + ge_model->SetModelTaskDef(model_task_def); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_input = CreateOpDesc("data", DATA); + op_input->AddInputDesc(tensor); + op_input->AddOutputDesc(tensor); + op_input->SetInputOffset({1024}); + op_input->SetOutputOffset({1024}); + NodePtr node_input = graph->AddNode(op_input); // op_index = 0 + + OpDescPtr op_kernel = CreateOpDesc("square", "Square"); + op_kernel->AddInputDesc(tensor); + op_kernel->AddOutputDesc(tensor); + op_kernel->SetInputOffset({1024}); + op_kernel->SetOutputOffset({1024}); + NodePtr node_kernel = graph->AddNode(op_kernel); // op_index = 1 + + OpDescPtr op_memcpy = CreateOpDesc("memcpy", MEMCPYASYNC); + op_memcpy->AddInputDesc(tensor); + op_memcpy->AddOutputDesc(tensor); + op_memcpy->SetInputOffset({1024}); + op_memcpy->SetOutputOffset({5120}); + NodePtr node_memcpy = graph->AddNode(op_memcpy); // op_index = 2 + + OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); + op_output->AddInputDesc(tensor); + op_output->SetInputOffset({5120}); + op_output->SetSrcName( { "memcpy" } ); + op_output->SetSrcIndex( { 0 } ); + NodePtr node_output = graph->AddNode(op_output); // op_index = 3 + + + domi::TaskDef *task_def1 = model_task_def->add_task(); + task_def1->set_stream_id(0); + task_def1->set_type(RT_MODEL_TASK_KERNEL); + domi::KernelDef *kernel_def = task_def1->mutable_kernel(); + kernel_def->set_stub_func("stub_func"); + kernel_def->set_args_size(64); + string args(64, '1'); + kernel_def->set_args(args.data(), 64); + domi::KernelContext *context = kernel_def->mutable_context(); + context->set_op_index(1); + context->set_kernel_type(2); // ccKernelType::TE + uint16_t args_offset[9] = {0}; + context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); + + domi::TaskDef *task_def2 = model_task_def->add_task(); + task_def2->set_stream_id(0); + task_def2->set_type(RT_MODEL_TASK_MEMCPY_ASYNC); + domi::MemcpyAsyncDef *memcpy_async = task_def2->mutable_memcpy_async(); + memcpy_async->set_src(1024); + memcpy_async->set_dst(5120); + memcpy_async->set_dst_max(512); + memcpy_async->set_count(1); + memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE); + memcpy_async->set_op_index(2); + + EXPECT_EQ(model.Assign(ge_model), SUCCESS); + EXPECT_EQ(model.Init(), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 1); + EXPECT_EQ(model.task_list_.size(), 2); + + EXPECT_EQ(model.task_list_[0]->UpdateArgs(), SUCCESS); + EXPECT_EQ(model.task_list_[1]->UpdateArgs(), SUCCESS); + + vector out_shape_info; + model.GetModelAttr(out_shape_info); + + vector input_descs; + vector output_descs; + EXPECT_EQ(model.GetInputOutputDescInfo(input_descs, output_descs), SUCCESS); + + int32_t virtual_addr = 0; + const vector inputs = { &virtual_addr }; + const vector outputs = { &virtual_addr }; + EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS); +} +} // namespace ge diff --git a/tests/ut/ge/graph/load/model_helper_unittest.cc b/tests/ut/ge/graph/load/model_helper_unittest.cc new file mode 100644 index 00000000..455285bf --- /dev/null +++ b/tests/ut/ge/graph/load/model_helper_unittest.cc @@ -0,0 +1,52 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#define private public +#define protected public +#include "framework/common/helper/model_helper.h" +#include "ge/model/ge_model.h" +#undef private +#undef protected + +#include "proto/task.pb.h" + +using namespace std; + +namespace ge { +class UtestModelHelper : public testing::Test { + protected: + void SetUp() override {} + + void TearDown() override {} +}; + +TEST_F(UtestModelHelper, save_size_to_modeldef_failed) +{ + GeModelPtr ge_model = ge::MakeShared(); + ModelHelper model_helper; + EXPECT_EQ(ACL_ERROR_GE_MEMORY_ALLOCATION, model_helper.SaveSizeToModelDef(ge_model)); +} + +TEST_F(UtestModelHelper, save_size_to_modeldef) +{ + GeModelPtr ge_model = ge::MakeShared(); + std::shared_ptr task = ge::MakeShared(); + ge_model->SetModelTaskDef(task); + ModelHelper model_helper; + EXPECT_EQ(SUCCESS, model_helper.SaveSizeToModelDef(ge_model)); +} +} // namespace ge diff --git a/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc b/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc index 5027c988..ef19b516 100644 --- a/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc +++ b/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc @@ -37,121 +37,32 @@ class UtestGeProfilinganager : public testing::Test { void TearDown() override {} }; -class TestReporter : public Msprof::Engine::Reporter { - public: - TestReporter() {} - ~TestReporter() {} - - public: - int Report(const Msprof::Engine::ReporterData *data) { return 0; } - - int Flush() { return 0; } -}; - -class TestPluginIntf : public Msprof::Engine::PluginIntf { - public: - TestPluginIntf() {} - ~TestPluginIntf() {} - - public: - int Init(const Msprof::Engine::Reporter *reporter) { return 0; } - - int UnInit() { return 0; } -}; - TEST_F(UtestGeProfilinganager, init_success) { setenv("PROFILING_MODE", "true", true); Options options; options.device_id = 0; options.job_id = "0"; - string profiling_config; - - ProfilingManager::Instance().SetProfilingConfig(profiling_config); - - Status ret = ProfilingManager::Instance().Init(options); - EXPECT_EQ(ret, ge::SUCCESS); -} - -TEST_F(UtestGeProfilinganager, start_profiling_success) { - int32_t iter_num = 1; - - setenv("PROFILING_MODE", "true", true); - setenv("PROFILING_OPTIONS", "training_trace", true); - Options options; - string profiling_config; - - ProfilingManager::Instance().SetProfilingConfig(profiling_config); - - Status ret = ProfilingManager::Instance().Init(options); - EXPECT_EQ(ret, ge::SUCCESS); - ret = ProfilingManager::Instance().StartProfiling(iter_num, 0); - EXPECT_EQ(ret, ge::SUCCESS); + options.profiling_mode = "1"; + options.profiling_options = R"({"result_path":"/data/profiling","training_trace":"on","task_trace":"on","aicpu_trace":"on","fp_point":"Data_0","bp_point":"addn","ai_core_metrics":"ResourceConflictRatio"})"; - setenv("PROFILING_OPTIONS", "op_trance", true); - ret = ProfilingManager::Instance().Init(options); - EXPECT_EQ(ret, ge::SUCCESS); - ret = ProfilingManager::Instance().StartProfiling(iter_num, 0); - EXPECT_EQ(ret, ge::SUCCESS); -} - -TEST_F(UtestGeProfilinganager, stop_profiling_success) { - int32_t iter_num = 1; - Options options; - TestReporter test_reporter; + struct MsprofGeOptions prof_conf = {{ 0 }}; - string profiling_config; - ProfilingManager::Instance().SetProfilingConfig(profiling_config); - - Status ret = 0; - setenv("PROFILING_OPTIONS", "op_trance", true); - ret = ProfilingManager::Instance().Init(options); - EXPECT_EQ(ret, ge::SUCCESS); - ret = ProfilingManager::Instance().StartProfiling(iter_num, 0); + Status ret = ProfilingManager::Instance().InitFromOptions(options, prof_conf); EXPECT_EQ(ret, ge::SUCCESS); - ProfilingManager::Instance().StopProfiling(); -} - -TEST_F(UtestGeProfilinganager, plugin_impl_success) { - PluginImpl plugin_Impl("FMK"); - TestReporter test_reporter; - Msprof::Engine::Reporter *reporter_ptr = &test_reporter; - plugin_Impl.Init(reporter_ptr); - plugin_Impl.UnInit(); -} - -TEST_F(UtestGeProfilinganager, profiling_engine_impl_success) { - ProfilingEngineImpl profiling_engine_impl; - - Msprof::Engine::PluginIntf *plugin_ptr = new TestPluginIntf(); - profiling_engine_impl.ReleasePlugin(plugin_ptr); - - Msprof::Engine::PluginIntf *ptr = profiling_engine_impl.CreatePlugin(); - delete ptr; - ptr = nullptr; -} - -TEST_F(UtestGeProfilinganager, set_profilng_cfg_success) { - string profiling_config = "profiling_mode: true"; - ProfilingManager::Instance().SetProfilingConfig(profiling_config); } -TEST_F(UtestGeProfilinganager, init_from_cfg_success0) { - Options options; - string profiling_config = - "{\"startCfg\":[{\"deviceID\":\"0\",\"features\":[{\"name\":\"op_trace\",\"conf\":\"2\"}]}]}"; - ProfilingManager::Instance().SetProfilingConfig(profiling_config); +TEST_F(UtestGeProfilinganager, ParseOptions) { +setenv("PROFILING_MODE", "true", true); +Options options; +options.device_id = 0; +options.job_id = "0"; +options.profiling_mode = "1"; +options.profiling_options = R"({"result_path":"/data/profiling","training_trace":"on","task_trace":"on","aicpu_trace":"on","fp_point":"Data_0","bp_point":"addn","ai_core_metrics":"ResourceConflictRatio"})"; - Status ret = ProfilingManager::Instance().Init(options); - EXPECT_EQ(ret, ge::SUCCESS); -} -TEST_F(UtestGeProfilinganager, init_from_cfg_success1) { - Options options; - string profiling_config = - "{\"startCfg\":[{\"deviceID\":\"0\",\"features\":[{\"name\":\"test_trace\"}],\"jobID\":\"1231231231\"}]}"; - ProfilingManager::Instance().SetProfilingConfig(profiling_config); +struct MsprofGeOptions prof_conf = {{ 0 }}; - Status ret = ProfilingManager::Instance().Init(options); - EXPECT_EQ(ret, ge::SUCCESS); +Status ret = ProfilingManager::Instance().ParseOptions(options.profiling_options); +EXPECT_EQ(ret, ge::SUCCESS); } diff --git a/tests/ut/ge/session/omg_omg_unittest.cc b/tests/ut/ge/session/omg_omg_unittest.cc new file mode 100644 index 00000000..b9c7f1ec --- /dev/null +++ b/tests/ut/ge/session/omg_omg_unittest.cc @@ -0,0 +1,52 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "common/ge/ge_util.h" +#include "proto/ge_ir.pb.h" +#include "inc/framework/omg/omg.h" + + +using namespace std; + +namespace ge { +class UtestOmg : public testing::Test { + protected: + void SetUp() override {} + + void TearDown() override {} +}; + +TEST_F(UtestOmg, display_model_info_failed) { + ge::proto::ModelDef model_def; + PrintModelInfo(&model_def); +} + +TEST_F(UtestOmg, display_model_info_success) { + ge::proto::ModelDef model_def; + auto attrs = model_def.mutable_attr(); + ge::proto::AttrDef *attr_def_soc = &(*attrs)["soc_version"]; + attr_def_soc->set_s("Ascend310"); + ge::proto::AttrDef *attr_def = &(*attrs)["om_info_list"]; + attr_def->mutable_list()->add_i(1); + attr_def->mutable_list()->add_i(2); + attr_def->mutable_list()->add_i(3); + attr_def->mutable_list()->add_i(4); + attr_def->mutable_list()->add_i(5); + PrintModelInfo(&model_def); +} +} // namespace ge