Browse Source

revert broadcast relate conflict fix

pull/819/head
wangxiaotian22 5 years ago
parent
commit
673ba646ee
77 changed files with 1937 additions and 605 deletions
  1. +73
    -0
      Third_Party_Open_Source_Software_Notice
  2. +8
    -4
      ge/CMakeLists.txt
  3. +2
    -1
      ge/common/CMakeLists.txt
  4. +47
    -1
      ge/common/helper/model_helper.cc
  5. +10
    -5
      ge/common/profiling/profiling_manager.cc
  6. +8
    -0
      ge/common/proto/tensorflow/attr_value.proto
  7. +8
    -0
      ge/common/proto/tensorflow/function.proto
  8. +8
    -0
      ge/common/proto/tensorflow/graph.proto
  9. +8
    -0
      ge/common/proto/tensorflow/graph_library.proto
  10. +8
    -0
      ge/common/proto/tensorflow/node_def.proto
  11. +8
    -0
      ge/common/proto/tensorflow/op_def.proto
  12. +8
    -0
      ge/common/proto/tensorflow/resource_handle.proto
  13. +8
    -0
      ge/common/proto/tensorflow/tensor.proto
  14. +8
    -0
      ge/common/proto/tensorflow/tensor_shape.proto
  15. +8
    -0
      ge/common/proto/tensorflow/types.proto
  16. +8
    -0
      ge/common/proto/tensorflow/versions.proto
  17. +2
    -1
      ge/executor/CMakeLists.txt
  18. +0
    -39
      ge/executor/ge_executor.cc
  19. +1
    -0
      ge/executor/module.mk
  20. +3
    -1
      ge/ge_inference.mk
  21. +5
    -4
      ge/ge_local_engine/CMakeLists.txt
  22. +26
    -0
      ge/ge_local_engine/engine/host_cpu_engine.cc
  23. +3
    -1
      ge/ge_runner.mk
  24. +30
    -0
      ge/generator/ge_generator.cc
  25. +1
    -0
      ge/graph/build/memory/CMakeLists.txt
  26. +1
    -0
      ge/graph/build/memory/graph_mem_assigner.cc
  27. +184
    -171
      ge/graph/load/new_model_manager/davinci_model.cc
  28. +113
    -91
      ge/graph/load/new_model_manager/davinci_model.h
  29. +14
    -0
      ge/graph/manager/graph_manager.cc
  30. +17
    -1
      ge/graph/manager/graph_mem_allocator.cc
  31. +9
    -1
      ge/graph/manager/graph_mem_allocator.h
  32. +69
    -0
      ge/graph/manager/host_mem_allocator.cc
  33. +57
    -0
      ge/graph/manager/host_mem_allocator.h
  34. +14
    -1
      ge/graph/manager/host_mem_manager.cc
  35. +4
    -0
      ge/graph/manager/host_mem_manager.h
  36. +0
    -133
      ge/graph/passes/assign_pass.cc
  37. +250
    -0
      ge/graph/passes/assign_remove_pass.cc
  38. +19
    -4
      ge/graph/passes/assign_remove_pass.h
  39. +10
    -6
      ge/graph/passes/constant_fuse_same_pass.cc
  40. +16
    -1
      ge/graph/passes/constant_fuse_same_pass.h
  41. +83
    -0
      ge/graph/passes/inplace_support_check_pass.cc
  42. +28
    -0
      ge/graph/passes/inplace_support_check_pass.h
  43. +24
    -2
      ge/graph/passes/multi_batch_clone_pass.cc
  44. +1
    -1
      ge/graph/passes/switch_to_stream_switch_pass.cc
  45. +9
    -4
      ge/graph/preprocess/graph_preprocess.cc
  46. +5
    -4
      ge/host_cpu_engine/CMakeLists.txt
  47. +11
    -0
      ge/hybrid/common/npu_memory_allocator.cc
  48. +32
    -3
      ge/hybrid/model/hybrid_model_builder.cc
  49. +20
    -2
      ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
  50. +2
    -2
      ge/hybrid/node_executor/node_executor.cc
  51. +3
    -0
      ge/offline/CMakeLists.txt
  52. +37
    -3
      ge/offline/main.cc
  53. +1
    -0
      ge/plugin/engine/CMakeLists.txt
  54. +8
    -0
      ge/proto/caffe/caffe.proto
  55. +8
    -0
      ge/proto/tensorflow/attr_value.proto
  56. +8
    -0
      ge/proto/tensorflow/function.proto
  57. +8
    -0
      ge/proto/tensorflow/graph.proto
  58. +8
    -0
      ge/proto/tensorflow/graph_library.proto
  59. +8
    -0
      ge/proto/tensorflow/node_def.proto
  60. +8
    -0
      ge/proto/tensorflow/op_def.proto
  61. +8
    -0
      ge/proto/tensorflow/resource_handle.proto
  62. +8
    -0
      ge/proto/tensorflow/tensor.proto
  63. +8
    -0
      ge/proto/tensorflow/tensor_shape.proto
  64. +8
    -0
      ge/proto/tensorflow/types.proto
  65. +8
    -0
      ge/proto/tensorflow/versions.proto
  66. +80
    -6
      ge/session/omg.cc
  67. +4
    -1
      inc/external/ge/ge_api_types.h
  68. +1
    -0
      inc/framework/common/helper/model_helper.h
  69. +3
    -1
      inc/framework/omg/omg.h
  70. +2
    -1
      inc/framework/omg/omg_inner_types.h
  71. +1
    -1
      metadef
  72. +1
    -1
      parser
  73. +14
    -4
      tests/ut/ge/CMakeLists.txt
  74. +285
    -0
      tests/ut/ge/graph/load/davinci_model_unittest.cc
  75. +52
    -0
      tests/ut/ge/graph/load/model_helper_unittest.cc
  76. +14
    -103
      tests/ut/ge/profiling/ge_profiling_manager_unittest.cc
  77. +52
    -0
      tests/ut/ge/session/omg_omg_unittest.cc

+ 73
- 0
Third_Party_Open_Source_Software_Notice View File

@@ -458,3 +458,76 @@ Copyright (c) Facebook Inc. and Microsoft Corporation.

License: MIT License
Please see above.



Software: caffe 1.0

License: BSD 2-Clause License

Open Source Software Licensed Under the BSD 2-Clause License

GraphEngine uses source code files from caffe so as to support model format conversion from caffe model to GraphEngine model.
Please see below for the full list of source code files from caffe that are used by GraphEngine.
The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
----------------------------------------------------------------------------------------
1. caffe.proto master
All contributions by the University of California:
Copyright (c) 2014-2017 The Regents of the University of California (Regents)
All rights reserved.


Terms of the BSD 2-Clause License:
--------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.



Software: tensorflow 1.15.0

License: Apache-2.0 License

Open Source Software Licensed Under the Apache-2.0 License


GraphEngine uses source code files from tensorflow so as to support model format conversion from tensorflow model to GraphEngine model.
Please see below for the full list of source code files from tensorflow that are used by GraphEngine.
The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
----------------------------------------------------------------------------------------
1. attr_value.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

2. function.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

3. graph.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

4. node_def.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

5. op_def.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

6. resource_handle.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

7. tensor.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

8. tensor_shape.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

9. types.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

10. versions.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

Terms of the Apache-2.0 License:
Please see above.

+ 8
- 4
ge/CMakeLists.txt View File

@@ -125,6 +125,7 @@ set(TRAIN_SRC_LIST
"graph/manager/graph_var_manager.cc"
"graph/manager/host_mem_manager.cc"
"graph/manager/rdma_pool_allocator.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc>
"graph/manager/memory_api.cc"
"graph/manager/model_manager/event_manager.cc"
"graph/manager/trans_var_data_utils.cc"
@@ -165,7 +166,8 @@ set(TRAIN_SRC_LIST
"graph/passes/dropout_pass.cc"
"graph/passes/hccl_group_pass.cc"
"graph/passes/enter_pass.cc"
"graph/passes/assign_pass.cc"
"graph/passes/assign_remove_pass.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc>
"graph/passes/flow_ctrl_pass.cc"
"graph/passes/global_step_insert_pass.cc"
"host_kernels/transpose_kernel.cc"
@@ -401,6 +403,7 @@ set(INFER_SRC_LIST
"graph/manager/graph_var_manager.cc"
"graph/manager/host_mem_manager.cc"
"graph/manager/rdma_pool_allocator.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc>
"graph/manager/graph_mem_allocator.cc"
"graph/manager/graph_caching_allocator.cc"
"model/ge_model.cc"
@@ -521,7 +524,8 @@ set(INFER_SRC_LIST
"graph/passes/cond_remove_pass.cc"
"graph/passes/for_pass.cc"
"graph/passes/enter_pass.cc"
"graph/passes/assign_pass.cc"
"graph/passes/assign_remove_pass.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc>
"graph/passes/addn_pass.cc"
"graph/passes/common_subexpression_elimination_pass.cc"
"graph/passes/remove_same_const_pass.cc"
@@ -625,6 +629,7 @@ target_compile_definitions(ge_runner PRIVATE

target_compile_options(ge_runner PRIVATE
-O2
-fno-common
)

target_include_directories(ge_runner PRIVATE
@@ -669,7 +674,6 @@ target_link_libraries(ge_runner PRIVATE
c_sec
slog
runtime
resource
error_manager
ascend_hal_stub
-Wl,--as-needed
@@ -693,6 +697,7 @@ target_compile_definitions(ge_compiler PRIVATE

target_compile_options(ge_compiler PRIVATE
-O2
-fno-common
)

target_include_directories(ge_compiler PRIVATE
@@ -734,7 +739,6 @@ target_link_libraries(ge_compiler PRIVATE
error_manager
slog
runtime_compile
resource
-Wl,--as-needed
json
-lrt


+ 2
- 1
ge/common/CMakeLists.txt View File

@@ -80,6 +80,7 @@ target_compile_options(ge_common PRIVATE
-O2
-Werror
-Wno-deprecated-declarations
-fno-common
)

target_include_directories(ge_common PRIVATE
@@ -134,7 +135,7 @@ target_compile_definitions(ge_common_static PRIVATE
)

target_compile_options(ge_common_static PRIVATE
$<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations>
$<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common>
$<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Debug>>:/MTd>
$<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Release>>:/MT>
)


+ 47
- 1
ge/common/helper/model_helper.cc View File

@@ -76,6 +76,48 @@ Status ModelHelper::SaveModelPartition(std::shared_ptr<OmFileSaveHelper> &om_fil
return SUCCESS;
}

Status ModelHelper::SaveSizeToModelDef(const GeModelPtr &ge_model) {
vector<int64_t> om_info;
ModelPtr model_tmp = ge::MakeShared<ge::Model>(ge_model->GetName(), ge_model->GetPlatformVersion());
if (model_tmp == nullptr) {
GELOGE(FAILED, "Create Model %s Ptr failed", ge_model->GetName().c_str());
return FAILED;
}
model_tmp->SetGraph(ge_model->GetGraph());
model_tmp->SetVersion(ge_model->GetVersion());
model_tmp->SetAttr(ge_model->MutableAttrMap());
ge::Buffer model_buffer;
(void)model_tmp->Save(model_buffer);
GELOGD("SaveSizeToModelDef modeldef_size is %zu", model_buffer.GetSize());
om_info.push_back(model_buffer.GetSize());

auto ge_model_weight = ge_model->GetWeight();
GELOGD("SaveSizeToModelDef weight_data_size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData());
om_info.push_back(ge_model_weight.GetSize());

TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore();
GELOGD("SaveSizeToModelDef tbe_kernels_size is %zu", tbe_kernel_store.DataSize());
om_info.push_back(tbe_kernel_store.DataSize());

CustAICPUKernelStore cust_aicpu_kernel_store = ge_model->GetCustAICPUKernelStore();
GELOGD("SaveSizeToModelDef cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize());
om_info.push_back(cust_aicpu_kernel_store.DataSize());

std::shared_ptr<ModelTaskDef> model_task_def = ge_model->GetModelTaskDefPtr();
if (model_task_def == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create model task def ptr failed");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
size_t partition_task_size = model_task_def->ByteSizeLong();
GELOGD("SaveSizeToModelDef task_info_size is %zu", partition_task_size);
om_info.push_back(partition_task_size);

GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(*(ge_model.get()), "om_info_list", om_info),
GELOGE(FAILED, "SetListInt of om_info_list failed.");
return FAILED);

return SUCCESS;
}

Status ModelHelper::SaveModelDef(std::shared_ptr<OmFileSaveHelper> &om_file_save_helper,
const GeModelPtr &ge_model, ge::Buffer &model_buffer, size_t model_index) {
@@ -87,7 +129,11 @@ Status ModelHelper::SaveModelDef(std::shared_ptr<OmFileSaveHelper> &om_file_save
model_tmp->SetGraph(ge_model->GetGraph());
model_tmp->SetVersion(ge_model->GetVersion());
model_tmp->SetAttr(ge_model->MutableAttrMap());

Status ret = SaveSizeToModelDef(ge_model);
if (ret != SUCCESS) {
GELOGE(ret, "SaveSizeToModelDef failed");
return ret;
}

(void)model_tmp->Save(model_buffer);
GELOGD("MODEL_DEF size is %zu", model_buffer.GetSize());


+ 10
- 5
ge/common/profiling/profiling_manager.cc View File

@@ -89,13 +89,12 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
#ifdef DAVINCI_SUPPORT_PROFILING
// enable profiling by env
char env_profiling_mode[MMPA_MAX_PATH] = { 0x00 };
is_load_profiling_ = false; // Change in ProfInit
is_execute_profiling_ = false;

if (options.profiling_mode == "1" && !options.profiling_options.empty()) {
// enable profiling by ge option
if (memcpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
options.profiling_options.size()) != EOK) {
if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
GELOGE(INTERNAL_ERROR, "copy profiling_options failed.");
return INTERNAL_ERROR;
}
@@ -125,11 +124,12 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
return ge::PARAM_INVALID;
}

if (memcpy_s(prof_conf.jobId, sizeof(prof_conf.jobId), options.job_id.c_str(),
sizeof(options.job_id.c_str())) != EOK) {
if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(),
MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
GELOGE(INTERNAL_ERROR, "copy job_id failed.");
return INTERNAL_ERROR;
}
GELOGI("Job id: %s, original job id: %s.", prof_conf.jobId, options.job_id.c_str());
#endif
return ge::SUCCESS;
}
@@ -159,6 +159,7 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
if (!fp_point_.empty() && !bp_point_.empty()) {
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
}
is_training_trace_ = true;
} catch (...) {
GELOGE(FAILED, "Json prof_conf options is invalid.");
return ge::PARAM_INVALID;
@@ -632,6 +633,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
uint64_t module, const std::map<std::string, std::string> &config_para) {
#ifdef DAVINCI_SUPPORT_PROFILING
std::lock_guard<std::mutex> lock(mutex_);
uint64_t training_trace_mask = module & PROF_TRAINING_TRACE_MASK;
if (training_trace_mask == PROF_TRAINING_TRACE_MASK) {
is_training_trace_ = true;
}
int32_t device_num = 0;
vector<int32_t> device_list;
if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) {


+ 8
- 0
ge/common/proto/tensorflow/attr_value.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/function.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/graph.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/graph_library.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/node_def.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/op_def.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/resource_handle.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/tensor.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/tensor_shape.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

// Protocol buffer representing the shape of tensors.

syntax = "proto3";


+ 8
- 0
ge/common/proto/tensorflow/types.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/versions.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 2
- 1
ge/executor/CMakeLists.txt View File

@@ -28,6 +28,7 @@ set(SRC_LIST
"../graph/manager/trans_var_data_utils.cc"
"../graph/manager/util/debug.cc"
"../graph/manager/rdma_pool_allocator.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:../graph/manager/host_mem_allocator.cc>
"../hybrid/node_executor/aicpu/aicpu_ext_info.cc"
"../model/ge_model.cc"
"../model/ge_root_model.cc"
@@ -162,7 +163,7 @@ set(SRC_LIST
add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_HDRS})

target_compile_options(ge_executor PRIVATE
$<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations>
$<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common>
$<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Debug>>:/MTd>
$<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Release>>:/MT>
)


+ 0
- 39
ge/executor/ge_executor.cc View File

@@ -39,8 +39,6 @@
#include "graph/manager/graph_var_manager.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "opskernel_manager/ops_kernel_builder_manager.h"
#include "graph/opsproto_manager.h"
#include "ge_local_engine/engine/host_cpu_engine.h"

using std::string;
using std::vector;
@@ -210,33 +208,6 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims,
namespace ge {
bool GeExecutor::isInit_ = false;

static void InitOpsProtoManger() {
string opsproto_path;
const char *path_env = std::getenv("ASCEND_OPP_PATH");
if (path_env != nullptr) {
string path = path_env;
string file_path = RealPath(path.c_str());
if (file_path.empty()) {
GELOGE(FAILED, "File path %s is invalid.", path.c_str());
return;
}
opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
GELOGI("Get opsproto so path from env : %s", path.c_str());
} else {
string path_base = PluginManager::GetPath();
GELOGI("path_base is %s", path_base.c_str());
path_base = path_base.substr(0, path_base.rfind('/'));
path_base = path_base.substr(0, path_base.rfind('/') + 1);
opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
}

GELOGI("Get opsproto path is %s", opsproto_path.c_str());
OpsProtoManager *manager = OpsProtoManager::Instance();
map<string, string> option_tmp;
option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
(void)manager->Initialize(option_tmp);
}

GeExecutor::GeExecutor() {}

Status GeExecutor::Initialize() {
@@ -246,16 +217,6 @@ Status GeExecutor::Initialize() {
return ge::SUCCESS;
}

OpTilingManager::GetInstance().LoadSo();

Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize();
if (initHostCpuEngineStatus != SUCCESS) {
GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine");
return initHostCpuEngineStatus;
}

InitOpsProtoManger();

std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM);
mem_type.push_back(RT_MEMORY_P2P_DDR);
auto ret = MemManager::Instance().Initialize(mem_type);


+ 1
- 0
ge/executor/module.mk View File

@@ -15,6 +15,7 @@ local_ge_executor_src_files := \
../graph/manager/graph_manager_utils.cc \
../graph/manager/graph_var_manager.cc \
../graph/manager/rdma_pool_allocator.cc \
../graph/manager/host_mem_allocator.cc \
../graph/manager/graph_mem_allocator.cc \
../graph/manager/graph_caching_allocator.cc \
../graph/manager/trans_var_data_utils.cc \


+ 3
- 1
ge/ge_inference.mk View File

@@ -64,6 +64,7 @@ GRAPH_MANAGER_LOCAL_SRC_FILES := \
graph/manager/graph_var_manager.cc \
graph/manager/host_mem_manager.cc \
graph/manager/rdma_pool_allocator.cc \
graph/manager/host_mem_allocator.cc \
graph/manager/graph_mem_allocator.cc \
graph/manager/graph_caching_allocator.cc \

@@ -195,7 +196,8 @@ OMG_HOST_SRC_FILES := \
graph/passes/useless_control_out_remove_pass.cc \
graph/passes/for_pass.cc \
graph/passes/enter_pass.cc \
graph/passes/assign_pass.cc \
graph/passes/assign_remove_pass.cc \
graph/passes/inplace_support_check_pass.cc \
graph/passes/addn_pass.cc \
graph/passes/common_subexpression_elimination_pass.cc \
graph/passes/transop_symmetry_elimination_pass.cc \


+ 5
- 4
ge/ge_local_engine/CMakeLists.txt View File

@@ -26,6 +26,7 @@ add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})

target_compile_options(ge_local_engine PRIVATE
-Werror
-fno-common
)

target_compile_definitions(ge_local_engine PRIVATE
@@ -55,10 +56,8 @@ target_link_libraries(ge_local_engine PRIVATE
-Wl,--no-as-needed
graph
ascend_protobuf
register
c_sec
slog
runtime
-Wl,--as-needed
)

@@ -67,6 +66,7 @@ add_library(atc_ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})

target_compile_options(atc_ge_local_engine PRIVATE
-Werror
-fno-common
)

target_compile_definitions(atc_ge_local_engine PRIVATE
@@ -97,10 +97,8 @@ target_link_libraries(atc_ge_local_engine PRIVATE
-Wl,--no-as-needed
graph
ascend_protobuf
register
c_sec
slog
runtime_compile
-Wl,--as-needed
)

@@ -114,6 +112,7 @@ add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDR

target_compile_options(ge_local_opskernel_builder PRIVATE
-Werror
-fno-common
)

target_compile_definitions(ge_local_opskernel_builder PRIVATE
@@ -154,6 +153,7 @@ add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO

target_compile_options(atc_ge_local_opskernel_builder PRIVATE
-Werror
-fno-common
)

target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE
@@ -199,6 +199,7 @@ add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PR

target_compile_options(ge_local_opskernel_builder_static PRIVATE
-Werror
-fno-common
)

target_compile_definitions(ge_local_opskernel_builder_static PRIVATE


+ 26
- 0
ge/ge_local_engine/engine/host_cpu_engine.cc View File

@@ -26,6 +26,31 @@
#include "common/math/math_util.h"

namespace {
#ifndef ONLY_COMPILE_OPEN_SRC
#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \
case (DTYPE): { \
GeTensorPtr ge_tensor = nullptr; \
if (need_create_flag) { \
uint64_t size = data_num * sizeof(TYPE); \
ge_tensor = MakeShared<GeTensor>(out_desc, size); \
GE_CHECK_NOTNULL(ge_tensor); \
GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, size); \
ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \
ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \
outputs.emplace_back(ge_tensor); \
} else { \
ge_tensor = outputs[i]; \
GE_CHECK_NOTNULL(ge_tensor); \
GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \
} \
auto tensor = TensorAdapter::AsTensor(*ge_tensor); \
auto tensor_name = op_desc->GetOutputNameByIndex(i); \
GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \
op_desc->GetName().c_str(), i); \
named_outputs.emplace(tensor_name, tensor); \
break; \
}
#else
#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \
case (DTYPE): { \
GeTensorPtr ge_tensor = nullptr; \
@@ -61,6 +86,7 @@ namespace {
named_outputs.emplace(tensor_name, tensor); \
break; \
}
#endif
}

namespace ge {


+ 3
- 1
ge/ge_runner.mk View File

@@ -94,6 +94,7 @@ LIBGE_LOCAL_SRC_FILES := \
graph/manager/graph_var_manager.cc \
graph/manager/host_mem_manager.cc \
graph/manager/rdma_pool_allocator.cc \
graph/manager/host_mem_allocator.cc \
graph/manager/memory_api.cc \
graph/manager/model_manager/event_manager.cc \
graph/manager/trans_var_data_utils.cc \
@@ -134,7 +135,8 @@ LIBGE_LOCAL_SRC_FILES := \
graph/passes/dropout_pass.cc \
graph/passes/hccl_group_pass.cc \
graph/passes/enter_pass.cc \
graph/passes/assign_pass.cc \
graph/passes/assign_remove_pass.cc \
graph/passes/inplace_support_check_pass.cc \
graph/passes/flow_ctrl_pass.cc \
graph/passes/global_step_insert_pass.cc \
host_kernels/transpose_kernel.cc \


+ 30
- 0
ge/generator/ge_generator.cc View File

@@ -336,6 +336,7 @@ class GeGenerator::Impl {
bool GetVersionFromPath(const std::string &file_path, std::string &version);
bool SetAtcVersionInfo(AttrHolder &obj);
bool SetOppVersionInfo(AttrHolder &obj);
bool SetOmSystemInfo(AttrHolder &obj);
};

Status GeGenerator::Initialize(const map<string, string> &options) {
@@ -546,6 +547,32 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) {
return true;
}

bool GeGenerator::Impl::SetOmSystemInfo(AttrHolder &obj) {
std::string soc_version;
(void)ge::GetContext().GetOption(ge::SOC_VERSION, soc_version);
GELOGI("SetOmSystemInfo soc_version: %s", soc_version.c_str());
if (!ge::AttrUtils::SetStr(obj, "soc_version", soc_version)) {
GELOGW("SetStr of soc_version failed.");
return false;
}

// 0(Caffe) 1(MindSpore) 3(TensorFlow) 5(Onnx)
std::map<string, string> framework_type_to_string = {
{"0", "Caffe"},
{"1", "MindSpore"},
{"3", "TensorFlow"},
{"5", "Onnx"}
};
std::string framework_type;
(void)ge::GetContext().GetOption(ge::FRAMEWORK_TYPE, framework_type);
GELOGI("SetOmSystemInfo framework_type: %s", framework_type.c_str());
if (!ge::AttrUtils::SetStr(obj, "framework_type", framework_type_to_string[framework_type.c_str()])) {
GELOGW("SetStr of framework_type failed.");
return false;
}
return true;
}

Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs,
ModelBufferData &model, bool is_offline) {
rtContext_t ctx = nullptr;
@@ -842,6 +869,9 @@ Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootMo
if (!SetOppVersionInfo(*(model_root.get()))) {
GELOGW("SetPackageVersionInfo of ops failed!");
}
if (!SetOmSystemInfo(*(model_root.get()))) {
GELOGW("SetOmsystemInfo failed!");
}
ModelHelper model_helper;
model_helper.SetSaveMode(is_offline_);
ret = model_helper.SaveToOmRootModel(ge_root_model, save_param_, file_name_prefix, model_buff, is_unknown_shape);


+ 1
- 0
ge/graph/build/memory/CMakeLists.txt View File

@@ -14,6 +14,7 @@ add_library(ge_memory STATIC ${SRC_LIST})
target_compile_options(ge_memory PRIVATE
-Werror
-O2
-fno-common
)

target_compile_definitions(ge_memory PRIVATE


+ 1
- 0
ge/graph/build/memory/graph_mem_assigner.cc View File

@@ -402,6 +402,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
return FAILED;
}
continuous_mem_start = iter->second.mem_offset_;
for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);


+ 184
- 171
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -163,7 +163,6 @@ DavinciModel::~DavinciModel() {

op_list_.clear();
data_op_list_.clear();
output_op_list_.clear();
tensor_name_to_fixed_addr_size_.clear();
tensor_name_to_peer_output_index_.clear();
GE_DELETE_NEW_SINGLE(data_inputer_);
@@ -830,12 +829,11 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
{CASE, &DavinciModel::InitCase},
};

GE_CHK_STATUS_RET(InitInputOutputForDynamic(compute_graph), "InitInputOutputForDynamic failed.");

vector<OpDescPtr> output_op_list;
map<uint32_t, OpDescPtr> data_by_index;
auto nodes = compute_graph->GetAllNodes();
const CustAICPUKernelStore &aicpu_kernel_store = ge_model_->GetCustAICPUKernelStore();
for (size_t i = 0; i < nodes.size(); i++) {
for (size_t i = 0; i < nodes.size(); ++i) {
auto node = nodes.at(i);
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
@@ -850,7 +848,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc);

if (IsDataOp(op_desc->GetType())) {
if (InitDataOp(node, data_op_index, data_by_index) != SUCCESS) {
if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) {
GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str());
return PARAM_INVALID;
}
@@ -859,7 +857,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
}

if (op_desc->GetType() == NETOUTPUT) {
if (InitNetOutput(node) != SUCCESS) {
if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) {
GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str());
return PARAM_INVALID;
}
@@ -919,33 +917,10 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
}
GE_TIMESTAMP_ADD(InitTbeHandle);
}
AdjustDataOpList(data_by_index);
GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc.");
GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle.");
return SUCCESS;
}

Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph) {
if (!known_node_) return SUCCESS;
// for dynamic shape
auto direct_nodes = compute_graph->GetDirectNode();
for (size_t i = 0; i < direct_nodes.size(); i++) {
auto node = direct_nodes.at(i);
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
GELOGE(PARAM_INVALID, "op_desc is null.");
return PARAM_INVALID;
}
if (IsDataOp(op_desc->GetType())) {
GELOGD("init data op %s", op_desc->GetName().c_str());
data_op_list_.push_back(op_desc);
}
if (op_desc->GetType() == NETOUTPUT) {
GELOGD("init netouput op %s", op_desc->GetName().c_str());
output_op_list_.push_back(op_desc);
}
}
return SUCCESS;
return OptInputOutputInfo(data_by_index, output_op_list);
}

void DavinciModel::SetLabelForDynamic(const NodePtr &node) {
@@ -963,24 +938,35 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) {
}
}

///
/// @ingroup ge
/// @brief Data Op Initialize.
/// @param [in] ComputeGraphPtr: root graph of the model.
/// @param [in] NodePtr: Data Op.
/// @param [in/out] data_op_index: NetOutput addr size info.
/// @param [in/out] data_op_index: index of courrent count.
/// @param [in/out] data_by_index: Data ordered by index.
/// @return Status
Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, map<uint32_t, OpDescPtr> &data_by_index) {
///
Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
map<uint32_t, OpDescPtr> &data_by_index) {
// op_desc Checked by Init: Data, valid.
auto op_desc = node->GetOpDesc();
if (known_node_) {
if (node->GetOwnerComputeGraph() != graph) {
GELOGI("Skip subgraph Data node: %s.", op_desc->GetName().c_str());
return SUCCESS;
}
uint32_t parent_index = 0; // Ignore subgraph Data Node.
if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str());
return SUCCESS;

GELOGI("Init Data node: %s.", op_desc->GetName().c_str());
auto data_index = data_op_index++;
if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) {
GELOGD("Get new index %u, old %u", data_index, data_op_index - 1);
}

data_by_index[data_index] = op_desc;
data_op_list_.push_back(op_desc);
if (known_node_) {
return SUCCESS;
}

// Make information for copy input data.
const vector<int64_t> output_size_list = ModelUtils::GetOutputSize(op_desc);
@@ -992,10 +978,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma
op_desc->GetName().c_str(), output_size_list.size(), virtual_addr_list.size(), output_offset_list.size());
return PARAM_INVALID;
}
auto data_index = data_op_index;
if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) {
GELOGD("ge_train: get new index %u, old %u", data_index, data_op_index);
}

bool fusion_flag = false;
ZeroCopyOffset zero_copy_offset;
int64_t data_size = output_size_list[kDataIndex];
@@ -1006,7 +989,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma
return PARAM_INVALID;
}
new_input_data_info_[data_index] = zero_copy_offset;
data_by_index[data_index] = op_desc;

for (size_t index = 0; index < virtual_addr_list.size(); ++index) {
void *addr = virtual_addr_list.at(index);
@@ -1017,7 +999,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma
new_input_outside_addrs_[addr] = zero_copy_offset;
}

data_op_index++;
return SUCCESS;
}

@@ -1025,18 +1006,52 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma
/// @ingroup ge
/// @brief Sort Data op list by index.
/// @param [in] data_by_index: map of Data Op.
/// @return
/// @param [in] output_op_list: list of NetOutput op.
/// @return Status
///
void DavinciModel::AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index) {
Status DavinciModel::OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index,
const vector<OpDescPtr> &output_op_list) {
GELOGD("Data node size: %zu, NetOutput node size: %zu", data_op_list_.size(), output_op_list.size());
if (data_by_index.size() != data_op_list_.size()) {
GELOGW("Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size());
return;
GELOGE(INTERNAL_ERROR, "Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size());
return INTERNAL_ERROR;
}

data_op_list_.clear();
for (auto &item : data_by_index) {
data_op_list_.emplace_back(item.second);
auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second);
GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size());
input_addrs_list_.emplace_back(output_addrs);

if (item.second->GetType() == AIPP_DATA_TYPE) {
GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str());
is_dynamic_aipp_ = true;
}
}

for (const auto &op_desc : output_op_list) {
auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc);
GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size());
output_addrs_list_.emplace_back(input_addrs);

bool getnext_sink_dynamic = false;
if (AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) {
GELOGI("ATTR_GETNEXT_SINK_DYNMAIC has been set and is true, node: %s", op_desc->GetName().c_str());
is_getnext_sink_dynamic_ = true;
}

vector<string> shape_info;
if (AttrUtils::GetListStr(op_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, shape_info)) {
dynamic_output_shape_info_.insert(dynamic_output_shape_info_.end(), shape_info.begin(), shape_info.end());
}

if (InitOutputTensorInfo(op_desc) != SUCCESS) {
return INTERNAL_ERROR;
}
}

return InitOutputDescInfo(output_op_list, output_descs_, output_formats_);
}

bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) {
@@ -1050,24 +1065,27 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) {

/// @ingroup ge
/// @brief NetOutput Op Initialize.
/// @param [in] ComputeGraphPtr: root graph of the model.
/// @param [in] NodePtr: NetOutput Op.
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
/// @return Status
Status DavinciModel::InitNetOutput(const NodePtr &node) {
Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node,
vector<OpDescPtr> &output_op_list) {
// node->GetOpDesc Checked by Init: NetOutput, valid.
auto op_desc = node->GetOpDesc();
// excludes the function op sub graph, e.g. case,if
if (known_node_) {
if (node->GetOwnerComputeGraph() != graph) {
GELOGI("Skip subgraph NetOutput node: %s.", op_desc->GetName().c_str());
op_list_.erase(op_desc->GetId());
return SUCCESS;
}
ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph();
GE_CHECK_NOTNULL(owner_graph);
if (owner_graph->GetParentGraph() != nullptr) {
GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str());
op_list_.erase(op_desc->GetId());

GELOGI("Init NetOutput node: %s.", op_desc->GetName().c_str());
output_op_list.push_back(op_desc);
if (known_node_) {
return SUCCESS;
}

output_op_list_.push_back(op_desc);
// Make information for copy output data.
const vector<int64_t> input_size_list = ModelUtils::GetInputSize(op_desc);
const vector<void *> virtual_addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc);
@@ -1665,32 +1683,30 @@ Status DavinciModel::CpuModelRepeat() {

Status DavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc) {
if ((data_op_list_.empty()) || (data_op_list_[0]->GetInputsSize()) != 1) {
if (input_addrs_list_.empty() || input_addrs_list_[0].size() != 1) {
GELOGI("data_op_list_ is empty or input_desc size is not 1.");
} else {
std::vector<uint32_t> input_formats;
vector<uint32_t> input_formats;
GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed.");
}

std::vector<uint32_t> outputFormats;
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get output desc info failed.");

vector<uint32_t> output_formats;
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get output desc info failed");
return SUCCESS;
}

Status DavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &input_formats,
std::vector<uint32_t> &outputFormats) {
if ((data_op_list_.empty()) || (data_op_list_[0]->GetInputsSize()) != 1) {
vector<uint32_t> &input_formats,
vector<uint32_t> &output_formats) {
if (input_addrs_list_.empty() || input_addrs_list_[0].size() != 1) {
GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!");
return FAILED;
}

GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed");

GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get ouput desc info failed");

GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get output desc info failed");
return SUCCESS;
}

@@ -1828,29 +1844,22 @@ void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynami
dynamic_type = dynamic_type_;
}

void DavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) {
for (auto &op : output_op_list_) {
if (op->GetType() != NETOUTPUT) {
continue;
}
if (!AttrUtils::GetListStr(op, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) {
GELOGD("Can not get dynamic output dims attr");
}
}
void DavinciModel::GetModelAttr(vector<string> &out_shape_info) {
out_shape_info.insert(out_shape_info.end(), dynamic_output_shape_info_.begin(), dynamic_output_shape_info_.end());
}

Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &input_formats,
std::vector<uint32_t> &outputFormats) {
if ((data_op_list_.empty()) || (1 != data_op_list_[0]->GetInputsSize())) {
std::vector<uint32_t> &output_formats) {
if (input_addrs_list_.empty() || input_addrs_list_[0].size() != kOutputNum) {
GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!");
return FAILED;
}

GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed");

GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get ouput desc info failed");
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed");

GE_CHK_BOOL_RET_STATUS(output_desc.size() == output_memory_size_list_.size(), INTERNAL_ERROR,
"output_desc size[%zu] not equal output_size_list_[%zu] size!", output_desc.size(),
@@ -1939,7 +1948,7 @@ Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, s
return SUCCESS;
}

void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output,
void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputOutputDescInfo &output,
uint32_t &format_result) {
/// netoutput input tensor desc
GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr");
@@ -1992,10 +2001,10 @@ void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputD
output.data_type = op_desc->GetInputDescPtr(index)->GetDataType();
}

Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats) {
GELOGD("Output node size: %zu", output_op_list_.size());
for (size_t i = 0; i < output_op_list_.size(); i++) {
auto &op_desc = output_op_list_[i];
Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list,
vector<InputOutputDescInfo> &output_descs, vector<uint32_t> &output_formats) {
GELOGD("Output node size: %zu", output_op_list.size());
for (const auto &op_desc : output_op_list) {
uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize());
for (uint32_t index = 0; index < out_size; index++) {
string output_name;
@@ -2018,13 +2027,19 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc,
std::to_string(src_index[index]);
}
output.name = output_name;
output_desc.push_back(output);
formats.push_back(format_result);
output_descs.push_back(output);
output_formats.push_back(format_result);
}
}
return SUCCESS;
}

Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs, vector<uint32_t> &output_formats) {
output_descs.insert(output_descs.end(), output_descs_.begin(), output_descs_.end());
output_formats.insert(output_formats.end(), output_formats_.begin(), output_formats_.end());
return SUCCESS;
}

ge::Format DavinciModel::GetFormat() {
if ((data_op_list_.empty()) || data_op_list_[0] == nullptr || data_op_list_[0]->GetInputDescPtr(0) == nullptr) {
GELOGW("OP List Pointer is null or input_desc size is not 1!");
@@ -2368,7 +2383,7 @@ void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) {
/// @author
///
Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, rtMemcpyKind_t kind) {
if (output_op_list_.empty()) {
if (output_addrs_list_.empty()) {
Status ret = SyncVarData();
return ret;
}
@@ -2427,20 +2442,12 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r
return SUCCESS;
}

Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data,
std::vector<ge::OutputTensorInfo> &outputs) {
GE_CHECK_NOTNULL(op_desc);
GE_CHECK_NOTNULL(output_data);
if (output_data->blobs.size() > data_index) {
GELOGI("No need to generate output tensor info, model id:%u", model_id_);
return SUCCESS;
}
std::vector<int64_t> out_buffer_size_vec;
std::vector<std::vector<int64_t>> shape_info_vec;
Status DavinciModel::InitOutputTensorInfo(const OpDescPtr &op_desc) {
size_t input_num = op_desc->GetInputsSize();
if (is_getnext_sink_dynamic_) {
input_num = input_num - kGetDynamicDimsCount;
}

for (size_t i = 0; i < input_num; ++i) {
int64_t size = 0;
auto input_desc = op_desc->GetInputDescPtr(i);
@@ -2460,25 +2467,37 @@ Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data
}
}
GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str());
out_buffer_size_vec.push_back(size);
shape_info_vec.push_back(output_shape);
output_buffer_size_.push_back(size);
output_shape_info_.push_back(output_shape);
}

return SUCCESS;
}

Status DavinciModel::GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs) {
GE_CHECK_NOTNULL(output_data);
if (!output_data->blobs.empty()) {
GELOGI("No need to generate output tensor info, model id:%u", model_id_);
return SUCCESS;
}
GELOGI("Output blobs size:%zu, data index:%u, model id:%u", out_buffer_size_vec.size(), data_index, model_id_);
for (size_t i = 0; i < out_buffer_size_vec.size(); ++i) {
std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[out_buffer_size_vec[i]]);

GELOGI("Output blobs size:%zu, model id:%u", output_buffer_size_.size(), model_id_);
for (size_t i = 0; i < output_buffer_size_.size(); ++i) {
std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[output_buffer_size_[i]]);
if (data_buf == nullptr) {
GELOGE(GE_GRAPH_MALLOC_FAILED, "Malloc buffer failed.");
return GE_GRAPH_MALLOC_FAILED;
}
output_data->blobs.push_back({data_buf.get(), static_cast<uint64_t>(out_buffer_size_vec[i]), false});
output_data->blobs.push_back({data_buf.get(), static_cast<uint64_t>(output_buffer_size_[i]), false});
ge::OutputTensorInfo output;
output.dims = shape_info_vec[i];
output.dims = output_shape_info_[i];
output.data = std::move(data_buf);
output.length = out_buffer_size_vec[i];
output.length = output_buffer_size_[i];
outputs.emplace_back(std::move(output));
GELOGD("Output index:%zu, output dims is %s, data length:%lu.", i,
formats::JoinToString(output.dims).c_str(), output.length);
}

return SUCCESS;
}

@@ -2513,36 +2532,28 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b
return INTERNAL_ERROR;
}

if (output_op_list_.empty()) {
if (output_addrs_list_.empty()) {
GELOGW("Output tensor list is empty, model id: %u", model_id_);
GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs), "OnComputeDone failed.");
return INTERNAL_ERROR;
}

GE_CHECK_NOTNULL(output_data);
// index of data in output_data
uint32_t data_index = 0;

output_data->index = data_id;
output_data->model_id = model_id_;

is_getnext_sink_dynamic_ = false;
// copy output data from op to designated position
for (auto &op_desc : output_op_list_) {
if (IsGetNextSinkDynamic(op_desc)) {
GELOGD("Reinit cur dynamic dims when getnext sink dynamic.");
is_getnext_sink_dynamic_ = true;
cur_dynamic_dims_.clear();
cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_);
auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t),
netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST);
GE_CHK_RT_RET(ret);
}
GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str());
if (GenOutputTensorInfo(op_desc, data_index, output_data, outputs) != SUCCESS) {
return INTERNAL_ERROR;
}
data_index += op_desc->GetInputsSize();
if (is_getnext_sink_dynamic_) {
GELOGD("Reinit cur dynamic dims when getnext sink dynamic.");
cur_dynamic_dims_.clear();
cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_);
auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t),
netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST);
GE_CHK_RT_RET(ret);
}

GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str());
if (GenOutputTensorInfo(output_data, outputs) != SUCCESS) {
return INTERNAL_ERROR;
}

if (CopyOutputData(data_id, *output_data, RT_MEMCPY_DEVICE_TO_HOST) != SUCCESS) {
@@ -2680,10 +2691,10 @@ void *DavinciModel::Run(DavinciModel *model) {
model->SetProfileTime(MODEL_AFTER_PROC_START));
GE_TIMESTAMP_START(ReturnResult3);
// copy output data from device to host
GE_IF_BOOL_EXEC(!model->output_op_list_.empty(),
GE_IF_BOOL_EXEC(!model->output_addrs_list_.empty(),
(void)model->ReturnResult(current_data.index, rslt_flg, false, data_wrapper->GetOutput()))
// copy output data from device to host for variable graph
GE_IF_BOOL_EXEC(model->output_op_list_.empty(), (void)model->ReturnNoOutput(current_data.index));
GE_IF_BOOL_EXEC(model->output_addrs_list_.empty(), (void)model->ReturnNoOutput(current_data.index));
GE_IF_BOOL_EXEC(model->is_first_execute_,
GE_TIMESTAMP_EVENT_END(ReturnResult3, "GraphExcute::CopyDataFromDeviceToHost"));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(),
@@ -2803,30 +2814,49 @@ void DavinciModel::UnbindTaskSinkStream() {
}
}

void *DavinciModel::GetRunAddress(void *addr) const {
if (fixed_mem_base_ == reinterpret_cast<uintptr_t>(mem_base_)) {
return addr;
}

uintptr_t ptr = reinterpret_cast<uintptr_t>(addr);
if ((fixed_mem_base_ <= ptr) && (ptr < fixed_mem_base_ + runtime_param_.mem_size)) {
return mem_base_ + (ptr - fixed_mem_base_);
} else {
return addr;
}
}

Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs) {
GELOGI("DavinciModel::CreateKnownZeroCopyMap in.");
if (inputs.size() > data_op_list_.size()) {
GELOGE(FAILED, "input data addr %zu should less than input op number %zu.", inputs.size(), data_op_list_.size());
GELOGI("in, inputs size: %zu, input addr size: %zu, outputs size: %zu, output addr size: %zu",
inputs.size(), input_addrs_list_.size(), outputs.size(), output_addrs_list_.size());
if (inputs.size() > input_addrs_list_.size()) {
GELOGE(FAILED, "input data addr %zu should less than input op num %zu.", inputs.size(), input_addrs_list_.size());
return FAILED;
}
// remove zero copy addr in last iteration
knonw_input_data_info_.clear();
knonw_output_data_info_.clear();
known_input_data_info_.clear();
known_output_data_info_.clear();
for (size_t i = 0; i < inputs.size(); ++i) {
const vector<void *> addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, data_op_list_[i]);
knonw_input_data_info_[addr_list[kDataIndex]] = inputs[i];
GELOGI("DavinciModel::CreateKnownZeroCopyMap input %zu,v addr %p,p addr %p .", i, addr_list[kDataIndex], inputs[i]);
const vector<void *> &addr_list = input_addrs_list_[i];
void *addr = GetRunAddress(addr_list[kDataIndex]);
known_input_data_info_[addr] = inputs[i];
GELOGI("input %zu, v addr %p, r addr %p, p addr %p", i, addr_list[kDataIndex], addr, inputs[i]);
}
if (output_op_list_.size() < kOutputNum) {
GELOGW("output op num in graph is %zu.", output_op_list_.size());

if (output_addrs_list_.empty()) {
GELOGW("output op num in graph is %zu", output_addrs_list_.size());
return SUCCESS;
}
const vector<void *> addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, output_op_list_[kDataIndex]);
const vector<void *> &addr_list = output_addrs_list_.front();
for (size_t i = 0; i < addr_list.size() && i < outputs.size(); ++i) {
knonw_output_data_info_[addr_list[i]] = outputs[i];
GELOGI("DavinciModel::CreateKnownZeroCopyMap output %zu,v addr %p,p addr %p .", i, addr_list[i], outputs[i]);
void *addr = GetRunAddress(addr_list[i]);
known_output_data_info_[addr] = outputs[i];
GELOGI("output %zu, v addr %p, r addr %p, p addr %p", i, addr_list[i], addr, outputs[i]);
}
GELOGI("DavinciModel::CreateKnownZeroCopyMap success.");

GELOGI("success, known input data info size: %zu, known output data info size: %zu",
known_input_data_info_.size(), known_output_data_info_.size());
return SUCCESS;
}

@@ -2837,40 +2867,30 @@ void DavinciModel::SetTotalIOAddrs(const vector<void *> &io_addrs) {
}

for (size_t i = 0; i < io_addrs.size(); ++i) {
uintptr_t addr = reinterpret_cast<uintptr_t>(io_addrs[i]);
if ((fixed_mem_base_ <= addr) && (addr < fixed_mem_base_ + runtime_param_.mem_size)) {
total_io_addrs_.emplace_back(mem_base_ + (addr - fixed_mem_base_));
} else {
total_io_addrs_.emplace_back(io_addrs[i]);
}
total_io_addrs_.emplace_back(GetRunAddress(io_addrs[i]));
}
}

Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) {
if (fixed_mem_base_ != reinterpret_cast<uintptr_t>(mem_base_)) {
for (size_t i = 0; i < total_io_addrs.size(); ++i) {
uintptr_t addr = reinterpret_cast<uintptr_t>(total_io_addrs[i]);
if ((fixed_mem_base_ <= addr) && (addr < fixed_mem_base_ + runtime_param_.mem_size)) {
total_io_addrs[i] = mem_base_ + (addr - fixed_mem_base_);
}
total_io_addrs[i] = GetRunAddress(total_io_addrs[i]);
}
}

for (size_t i = 0; i < total_io_addrs.size(); ++i) {
auto it_in = knonw_input_data_info_.find(total_io_addrs[i]);
if (it_in != knonw_input_data_info_.end()) {
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
knonw_input_data_info_.at(total_io_addrs[i]));
total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]);
auto it_in = known_input_data_info_.find(total_io_addrs[i]);
if (it_in != known_input_data_info_.end()) {
GELOGI("input %zu, v addr %p, p addr %p", i, total_io_addrs[i], known_input_data_info_.at(total_io_addrs[i]));
total_io_addrs[i] = known_input_data_info_.at(total_io_addrs[i]);
}
auto it_out = knonw_output_data_info_.find(total_io_addrs[i]);
if (it_out != knonw_output_data_info_.end()) {
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
knonw_output_data_info_.at(total_io_addrs[i]));
total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]);
auto it_out = known_output_data_info_.find(total_io_addrs[i]);
if (it_out != known_output_data_info_.end()) {
GELOGI("output %zu, v addr %p, p addr %p", i, total_io_addrs[i], known_output_data_info_.at(total_io_addrs[i]));
total_io_addrs[i] = known_output_data_info_.at(total_io_addrs[i]);
}
}
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success.");
GELOGI("success, total io addrs size: %zu", total_io_addrs.size());
return SUCCESS;
}

@@ -3171,15 +3191,8 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64
"MAY cause inference result ERROR, please check model input",
input_size, op_size);
}
bool is_dynamic_aipp = false;
for (const auto &op_desc : data_op_list_) {
if (op_desc->GetType() == AIPP_DATA_TYPE) {
GELOGI("This is dynamic aipp model.");
is_dynamic_aipp = true;
break;
}
}
if (is_dynamic_aipp) {

if (is_dynamic_aipp_) {
GELOGI("This is dynamic aipp model, no need to judge smaller input size");
return true;
}


+ 113
- 91
ge/graph/load/new_model_manager/davinci_model.h View File

@@ -49,6 +49,10 @@
#include "task_info/task_info.h"
#include "graph/common/local_context.h"

using std::mutex;
using std::thread;
using std::multimap;

namespace ge {
// op debug need 2048 bits buffer
const size_t kOpDebugMemorySize = 2048UL;
@@ -84,11 +88,11 @@ struct SuperKernelTaskInfo {
uint32_t last_stream_id;
void *last_stream;
void *last_sm_desc;
std::vector<void *> kernel_list;
std::vector<void *> arg_list;
std::vector<uint32_t> dump_flag_list;
std::vector<OpDescPtr> op_desc_list;
std::vector<uintptr_t> dump_args_list;
vector<void *> kernel_list;
vector<void *> arg_list;
vector<uint32_t> dump_flag_list;
vector<OpDescPtr> op_desc_list;
vector<uintptr_t> dump_args_list;
uint32_t last_dump_flag;
int64_t last_group_key;
uintptr_t last_dump_args;
@@ -123,7 +127,7 @@ class DavinciModel {
/// @brief DavinciModel constructor
/// @author
///
DavinciModel(int32_t priority, const std::shared_ptr<ModelListener> &listener);
DavinciModel(int32_t priority, const shared_ptr<ModelListener> &listener);

///
/// @ingroup ge
@@ -153,7 +157,7 @@ class DavinciModel {
/// @param [in] output_que_ids: input queue ids from user, nums equal NetOutput Op.
/// @return: 0 for success / others for fail
///
Status SetQueIds(const std::vector<uint32_t> &input_queue_ids, const std::vector<uint32_t> &output_queue_ids);
Status SetQueIds(const vector<uint32_t> &input_queue_ids, const vector<uint32_t> &output_queue_ids);

///
/// @ingroup ge
@@ -223,13 +227,14 @@ class DavinciModel {
// get total mem size
size_t TotalMemSize() const { return runtime_param_.mem_size; }

const std::map<uint32_t, MemInfo> &P2PMemInfos() const {return runtime_param_.memory_infos;}
const map<uint32_t, MemInfo> &P2PMemInfos() const { return runtime_param_.memory_infos; }

// model name
string Name() const { return name_; }

// om_name
string OmName() const { return om_name_; }

// version
uint32_t Version() const { return version_; }

@@ -255,9 +260,6 @@ class DavinciModel {

Status DestroyThread();

// Get Data Op.
const vector<OpDescPtr> &GetDataList() const { return data_op_list_; }

// get Op
OpDescPtr GetOpByIndex(uint32_t index) const {
if (op_list_.find(index) == op_list_.end()) {
@@ -274,11 +276,12 @@ class DavinciModel {
}
return nullptr;
}

// get task info for profiling
const std::vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; }
const vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; }

// get updated task info list
std::vector<TaskInfoPtr> GetTaskList() { return task_list_; }
vector<TaskInfoPtr> GetTaskList() { return task_list_; }

// Modified from KernelTaskInfo.
SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; }
@@ -323,7 +326,7 @@ class DavinciModel {
Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc);

Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &output_formats);
vector<uint32_t> &inputFormats, vector<uint32_t> &output_formats);

///
/// @ingroup ge
@@ -332,7 +335,7 @@ class DavinciModel {
/// @param [out] dynamic_type
/// @return execute result
///
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) const;
Status GetDynamicBatchInfo(vector<vector<int64_t>> &batch_info, int32_t &dynamic_type) const;

///
/// @ingroup ge
@@ -340,13 +343,13 @@ class DavinciModel {
/// @param [out] batch_info
/// @return None
///
void GetCombinedDynamicDims(std::vector<std::vector<int64_t>> &batch_info) const;
void GetCombinedDynamicDims(vector<vector<int64_t>> &batch_info) const;

void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) const;
void GetUserDesignateShapeOrder(vector<string> &user_input_shape_order) const;

void GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type);
void GetCurShape(vector<int64_t> &batch_info, int32_t &dynamic_type);

void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info);
void GetModelAttr(vector<string> &dynamic_output_shape_info);

///
/// @ingroup ge
@@ -373,7 +376,7 @@ class DavinciModel {
/// @param [in] string identification: unique identification for current op.
/// @return None
///
void GetUniqueId(const OpDescPtr &op_desc, std::string &unique_identification);
void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification);

///
/// @ingroup ge
@@ -384,7 +387,7 @@ class DavinciModel {
///
Status GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &output_formats);
vector<uint32_t> &inputFormats, vector<uint32_t> &output_formats);

Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data);

@@ -406,8 +409,6 @@ class DavinciModel {
///
bool RunFlag() const { return run_flg_; }

Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats);

///
/// @ingroup ge
/// @brief Set Session Id
@@ -453,14 +454,14 @@ class DavinciModel {
/// @ingroup ge
/// @brief Save outside address of Data or NetOutput used info for ZeroCopy.
/// @param [in] const OpDescPtr &op_desc: current op desc
/// @param [in] const std::vector<void *> &outside_addrs: address of task
/// @param [in] const vector<void *> &outside_addrs: address of task
/// @param [in] const void *args_offset: arguments address save the address.
/// @return None.
///
void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, void *args,
void SetZeroCopyAddr(const OpDescPtr &op_desc, const vector<void *> &outside_addrs, const void *info, void *args,
size_t size, size_t offset);

void SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_t dynamic_type);
void SetDynamicSize(const vector<uint64_t> &batch_num, int32_t dynamic_type);

bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; }

@@ -476,7 +477,7 @@ class DavinciModel {
data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id);
}

void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args) {
void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const shared_ptr<OpDesc> &op_desc, uintptr_t args) {
data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args);
}

@@ -485,7 +486,7 @@ class DavinciModel {

DavinciModel(const DavinciModel &model) = delete;

const map<int64_t, std::vector<rtStream_t>> &GetHcclFolowStream() {
const map<int64_t, vector<rtStream_t>> &GetHcclFolowStream() {
return main_follow_stream_mapping_;
}
void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream);
@@ -534,8 +535,8 @@ class DavinciModel {
void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }

Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims,
std::vector<InputOutputDims> &output_dims);
Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims,
vector<InputOutputDims> &output_dims);
void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; }
// om file name
void SetOmName(string om_name) { om_name_ = om_name; }
@@ -546,7 +547,6 @@ class DavinciModel {
bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info);
}
Status InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph);

private:
// memory address of weights
@@ -566,6 +566,8 @@ class DavinciModel {
struct timeInfo time_info_;
int32_t dataInputTid;

void *GetRunAddress(void *addr) const;

///
/// @ingroup ge
/// @brief Copy Check input size and model op size.
@@ -603,7 +605,7 @@ class DavinciModel {
/// @param [in] batch_label: batch label for multi-batch scenes
/// @return SUCCESS handle successfully / others handle failed
///
Status UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input,
Status UpdateIoTaskArgs(const map<uint32_t, ZeroCopyOffset> &data_info, bool is_input,
const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label);

Status CopyInputData(const InputData &input_data, bool device_data = false);
@@ -619,7 +621,8 @@ class DavinciModel {

void SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, InputOutputDescInfo &input);

Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats);
Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<uint32_t> &input_formats);
Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &output_formats);

Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo);

@@ -631,7 +634,7 @@ class DavinciModel {

uint8_t *MallocWeightsMem(size_t weights_size);

uint8_t* MallocP2PMem(size_t p2p_data_size);
uint8_t *MallocP2PMem(size_t p2p_data_size);

void FreeFeatureMapMem();

@@ -663,27 +666,33 @@ class DavinciModel {
///
/// @ingroup ge
/// @brief Data Op Initialize.
/// @param [in] ComputeGraphPtr: root graph of the model.
/// @param [in] NodePtr: Data Op.
/// @param [in/out] data_op_index: NetOutput addr size info.
/// @param [in/out] data_op_index: index of courrent count.
/// @param [in/out] data_by_index: Data ordered by index.
/// @return Status
///
Status InitDataOp(const NodePtr &node, uint32_t &data_op_index, map<uint32_t, OpDescPtr> &data_by_index);
Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
map<uint32_t, OpDescPtr> &data_by_index);

///
/// @ingroup ge
/// @brief Sort Data op list by index.
/// @param [in] data_by_index: map of Data Op.
/// @return
/// @param [in] output_op_list: list of NetOutput op.
/// @return Status
///
void AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index);
Status OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, const vector<OpDescPtr> &output_op_list);

///
/// @ingroup ge
/// @brief NetOutput Op Initialize.
/// @param [in] ComputeGraphPtr: root graph of the model.
/// @param [in] NodePtr: NetOutput Op.
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
/// @return Status
///
Status InitNetOutput(const NodePtr &node);
Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list);

///
/// @ingroup ge
@@ -722,7 +731,7 @@ class DavinciModel {
///
Status InitTbeHandle(const OpDescPtr &op_desc);

void StoreTbeHandle(const std::string &handle_key);
void StoreTbeHandle(const string &handle_key);
void CleanTbeHandle();

///
@@ -753,7 +762,7 @@ class DavinciModel {
///
Status BindInputQueue();

Status CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs);
Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, map<const void *, ZeroCopyOffset> &outside_addrs);

///
/// @ingroup ge
@@ -824,7 +833,7 @@ class DavinciModel {

Status DoTaskSink();

void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result);
void CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result);

Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id);

@@ -838,13 +847,16 @@ class DavinciModel {

Status SinkTimeProfile(const InputData &current_data);

Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data,
std::vector<ge::OutputTensorInfo> &outputs);
Status InitOutputTensorInfo(const OpDescPtr &op_desc);
Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs);

void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info);
Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list,
vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &formats);

void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info);
void SetLabelForDynamic(const NodePtr &node);

void ParseDynamicOutShape(const std::vector<std::string> &str_info, std::vector<vector<int64_t>> &vec_info);
void ParseDynamicOutShape(const vector<string> &str_info, vector<vector<int64_t>> &vec_info);
bool IsGetNextSinkDynamic(const OpDescPtr &op_desc);
void GetAllGearsInfo(const NodePtr &node);
Status GetGetDynamicDimsNodeInfo(const NodePtr &node);
@@ -866,56 +878,54 @@ class DavinciModel {
GeModelPtr ge_model_;

bool need_destroy_aicpu_kernel_{false};
vector<std::string> out_node_name_;
vector<string> out_node_name_;

map<uint32_t, OpDescPtr> op_list_;

// data op_desc
vector<OpDescPtr> data_op_list_;

vector<OpDescPtr> output_op_list_;

vector<OpDescPtr> variable_op_list_;

std::map<uint32_t, ZeroCopyOffset> new_input_data_info_;
std::map<uint32_t, ZeroCopyOffset> new_output_data_info_;
std::map<const void *, ZeroCopyOffset> new_input_outside_addrs_;
std::map<const void *, ZeroCopyOffset> new_output_outside_addrs_;
map<uint32_t, ZeroCopyOffset> new_input_data_info_;
map<uint32_t, ZeroCopyOffset> new_output_data_info_;
map<const void *, ZeroCopyOffset> new_input_outside_addrs_;
map<const void *, ZeroCopyOffset> new_output_outside_addrs_;

std::set<const void *> real_virtual_addrs_;
set<const void *> real_virtual_addrs_;

// output op: save cce op actual needed memory size
vector<int64_t> output_memory_size_list_;

std::thread thread_id_;
thread thread_id_;

std::shared_ptr<ModelListener> listener_;
shared_ptr<ModelListener> listener_;

bool run_flg_;

std::mutex mux_run_flg_;
mutex mux_run_flg_;

int32_t priority_;

vector<rtStream_t> stream_list_;

std::mutex all_hccl_stream_list_mutex_;
mutex all_hccl_stream_list_mutex_;
vector<rtStream_t> all_hccl_stream_list_;

// for reuse hccl_follow_stream
std::mutex capacity_of_stream_mutex_;
std::map<int64_t, std::vector<rtStream_t>> main_follow_stream_mapping_;
mutex capacity_of_stream_mutex_;
map<int64_t, vector<rtStream_t>> main_follow_stream_mapping_;

vector<rtEvent_t> event_list_;

vector<rtLabel_t> label_list_;
set<uint32_t> label_id_indication_;

std::mutex outside_addrs_mutex_;
std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr.
std::set<const void *> copy_only_addrs_; // Address need copy to original place.
mutex outside_addrs_mutex_;
vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr.
set<const void *> copy_only_addrs_; // Address need copy to original place.

std::vector<TaskInfoPtr> task_list_;
vector<TaskInfoPtr> task_list_;
// rt_moodel_handle
rtModel_t rt_model_handle_;

@@ -933,39 +943,39 @@ class DavinciModel {
rtAicpuDeployType_t deploy_type_{AICPU_DEPLOY_RESERVED};

// ACL queue schedule, save queue ids for Init.
std::vector<TaskInfoPtr> cpu_task_list_;
std::vector<uint32_t> input_queue_ids_; // input queue ids created by caller.
std::vector<uint32_t> output_queue_ids_; // output queue ids created by caller.
std::vector<uintptr_t> input_mbuf_list_; // input mbuf created by dequeue task.
std::vector<uintptr_t> output_mbuf_list_; // output mbuf created by dequeue task.
vector<TaskInfoPtr> cpu_task_list_;
vector<uint32_t> input_queue_ids_; // input queue ids created by caller.
vector<uint32_t> output_queue_ids_; // output queue ids created by caller.
vector<uintptr_t> input_mbuf_list_; // input mbuf created by dequeue task.
vector<uintptr_t> output_mbuf_list_; // output mbuf created by dequeue task.

uint64_t session_id_;

uint32_t device_id_;

std::mutex flowctrl_op_index_internal_map_mutex_;
std::map<uint32_t, uint32_t> flowctrl_op_index_internal_map_;
mutex flowctrl_op_index_internal_map_mutex_;
map<uint32_t, uint32_t> flowctrl_op_index_internal_map_;

std::vector<rtStream_t> active_stream_list_;
std::set<uint32_t> active_stream_indication_;
vector<rtStream_t> active_stream_list_;
set<uint32_t> active_stream_indication_;

std::set<uint32_t> hcom_streams_;
set<uint32_t> hcom_streams_;
RuntimeParam runtime_param_;

static std::mutex tvm_bin_mutex_;
std::set<std::string> tvm_bin_kernel_;
static mutex tvm_bin_mutex_;
set<string> tvm_bin_kernel_;

std::map<std::string, uint32_t> used_tbe_handle_map_;
map<string, uint32_t> used_tbe_handle_map_;

// for profiling task and graph info
std::vector<TaskDescInfo> task_desc_info_;
vector<TaskDescInfo> task_desc_info_;

int64_t maxDumpOpNum_;
// for data dump
DataDumper data_dumper_;
uint64_t iterator_count_;
bool is_l1_fusion_enable_;
std::map<OpDescPtr, void *> saved_task_addrs_;
map<OpDescPtr, void *> saved_task_addrs_;
void *l1_fusion_addr_ = nullptr;

bool known_node_ = false;
@@ -976,14 +986,14 @@ class DavinciModel {
void *hybrid_addrs_ = nullptr;
uint32_t total_hybrid_args_size_ = 0;
int64_t total_fixed_addr_size_ = 0;
std::map<const void *, void *> knonw_input_data_info_;
std::map<const void *, void *> knonw_output_data_info_;
map<const void *, void *> known_input_data_info_;
map<const void *, void *> known_output_data_info_;
vector<void *> total_io_addrs_;
vector<void *> orig_total_io_addrs_;
bool base_addr_not_changed_ = false;

vector<vector<int64_t>> batch_info_;
std::vector<std::vector<int64_t>> combined_batch_info_;
vector<vector<int64_t>> combined_batch_info_;
vector<string> user_designate_shape_order_;
int32_t dynamic_type_ = 0;
bool is_dynamic_ = false;
@@ -991,35 +1001,47 @@ class DavinciModel {
vector<uint64_t> batch_size_;
// key: input tensor name, generally rts op;
// value: the fixed addr of input anchor, same as the peer output anchor addr of the peer op
std::map<string, int64_t> tensor_name_to_fixed_addr_size_;
map<string, int64_t> tensor_name_to_fixed_addr_size_;

// key: input tensor name, generally rts op; value: the peer output anchor of the peer op
std::map<string, int64_t> tensor_name_to_peer_output_index_;
map<string, int64_t> tensor_name_to_peer_output_index_;
// if model is first execute
bool is_first_execute_;
// for op debug
std::mutex debug_reg_mutex_;
mutex debug_reg_mutex_;
bool is_op_debug_reg_ = false;
void *op_debug_addr_ = nullptr;
void *p2p_debug_addr_ = nullptr;
bool is_new_model_desc_{false};
bool is_online_infer_dynamic_ = false;
bool is_getnext_sink_dynamic_ = false;
std::vector<int64_t> cur_dynamic_dims_;
vector<int64_t> cur_dynamic_dims_;
void *netoutput_last_input_addr_ = nullptr;
int64_t netoutput_last_input_size_ = 0;
size_t shape_of_cur_dynamic_dims_ = 0;
// key: input_index: input is merge node; value: each gear info and each output size
std::map<size_t, std::map<vector<int64_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_;
map<size_t, map<vector<int64_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_;
// key: input_index: input is merge node; value: each gear info and each output shape
std::map<size_t, std::map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
std::vector<std::vector<int64_t>> all_gears_info_;
map<size_t, map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
vector<vector<int64_t>> all_gears_info_;

std::multimap<uint32_t, uint32_t> op_id_map_;
std::vector<ProfileInfo> profile_list_;
multimap<uint32_t, uint32_t> op_id_map_;
vector<ProfileInfo> profile_list_;

// For super kernel.
SuperKernelTaskInfo skt_info_;

bool is_dynamic_aipp_ = false;
vector<string> dynamic_output_shape_info_;

vector<vector<void *>> input_addrs_list_;
vector<vector<void *>> output_addrs_list_;

vector<int64_t> output_buffer_size_;
vector<vector<int64_t>> output_shape_info_;

vector<InputOutputDescInfo> output_descs_;
vector<uint32_t> output_formats_;
};
} // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_

+ 14
- 0
ge/graph/manager/graph_manager.cc View File

@@ -38,6 +38,10 @@
#include "graph/partition/stage_partition.h"
#include "graph/passes/addn_pass.h"
#include "graph/passes/bitcast_pass.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/passes/assign_remove_pass.h"
#include "graph/passes/inplace_support_check_pass.h"
#endif
#include "graph/passes/atomic_addr_clean_pass.h"
#include "graph/passes/attach_stream_label_pass.h"
#include "graph/passes/cast_remove_pass.h"
@@ -2247,10 +2251,20 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
ReshapeRemovePass reshape_remove_pass;
CondRemovePass condition_remove_pass;
BitcastPass bitcast_pass;
#ifndef ONLY_COMPILE_OPEN_SRC
AssignRemovePass assign_remove_pass;
InplaceSupportCheckPass inplace_support_check_pass;
#endif
names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass);
names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass);
names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass);
names_to_passes.emplace_back("BitcastPass", &bitcast_pass);
#ifndef ONLY_COMPILE_OPEN_SRC
if (GetContext().GetHostExecFlag()) {
names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass);
names_to_passes.emplace_back("InplaceSupportCheckPass", &inplace_support_check_pass);
}
#endif
GE_TIMESTAMP_START(names_to_passes);
ret = GEPass(compute_graph).Run(names_to_passes);
GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses");


+ 17
- 1
ge/graph/manager/graph_mem_allocator.cc View File

@@ -19,7 +19,9 @@
#include <string>
#include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/rdma_pool_allocator.h"

#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/manager/host_mem_allocator.h"
#endif
namespace ge {
void MemoryAllocator::Initialize(uint32_t device_id) {
GELOGI("MemoryAllocator::Initialize");
@@ -190,6 +192,12 @@ Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) {
GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed.");
return ge::INTERNAL_ERROR;
}
#ifndef ONLY_COMPILE_OPEN_SRC
if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) {
GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed.");
return ge::INTERNAL_ERROR;
}
#endif
return SUCCESS;
}

@@ -211,6 +219,9 @@ void MemManager::Finalize() noexcept {
// caching and rdma allocator use memory allocator, so finalize them first
FinalizeAllocatorMap(caching_allocator_map_);
FinalizeAllocatorMap(rdma_allocator_map_);
#ifndef ONLY_COMPILE_OPEN_SRC
FinalizeAllocatorMap(host_allocator_map_);
#endif
FinalizeAllocatorMap(memory_allocator_map_);
}

@@ -239,4 +250,9 @@ CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) {
RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) {
return Instance().GetAllocator(memory_type, rdma_allocator_map_);
}
#ifndef ONLY_COMPILE_OPEN_SRC
HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) {
return Instance().GetAllocator(memory_type, host_allocator_map_);
}
#endif
} // namespace ge

+ 9
- 1
ge/graph/manager/graph_mem_allocator.h View File

@@ -139,7 +139,9 @@ class MemoryAllocator {
using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>;
class CachingAllocator;
class RdmaPoolAllocator;

#ifndef ONLY_COMPILE_OPEN_SRC
class HostMemAllocator;
#endif
class MemManager {
public:
MemManager();
@@ -148,6 +150,9 @@ class MemManager {
static MemoryAllocator *Instance(rtMemType_t memory_type);
CachingAllocator &CachingInstance(rtMemType_t memory_type);
RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type);
#ifndef ONLY_COMPILE_OPEN_SRC
HostMemAllocator &HostMemInstance(rtMemType_t memory_type);
#endif
MemManager(const MemManager &) = delete;
MemManager &operator=(const MemManager &) = delete;
///
@@ -235,6 +240,9 @@ class MemManager {
std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_;
std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_;
std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_;
#ifndef ONLY_COMPILE_OPEN_SRC
std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_;
#endif
std::recursive_mutex allocator_mutex_;
};
} // namespace ge


+ 69
- 0
ge/graph/manager/host_mem_allocator.cc View File

@@ -0,0 +1,69 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/manager/host_mem_allocator.h"
#include "framework/common/debug/ge_log.h"
#include "common/ge/ge_util.h"

namespace ge {
const void *HostMemAllocator::Malloc(const std::shared_ptr<AlignedPtr> &aligned_ptr, size_t size) {
if (aligned_ptr == nullptr) {
GELOGW("Insert a null aligned_ptr");
return nullptr;
}
GELOGD("allocate existed host memory succ, size=%zu", size);
allocated_blocks_[aligned_ptr->Get()] = { size, aligned_ptr };
return aligned_ptr->Get();
}

uint8_t *HostMemAllocator::Malloc(size_t size) {
GELOGD("start to malloc host memory, size=%zu", size);
std::lock_guard<std::mutex> lock(mutex_);
std::shared_ptr<AlignedPtr> aligned_ptr = MakeShared<AlignedPtr>(size);
if (aligned_ptr == nullptr) {
GELOGE(INTERNAL_ERROR, "make shared_ptr for AlignedPtr failed");
return nullptr;
}
allocated_blocks_[aligned_ptr->Get()] = { size, aligned_ptr };
GELOGD("allocate host memory succ, size=%zu", size);
return aligned_ptr->MutableGet();
}

Status HostMemAllocator::Free(const void *memory_addr) {
if (memory_addr == nullptr) {
GELOGE(GE_GRAPH_FREE_FAILED, "Invalid memory pointer");
return GE_GRAPH_FREE_FAILED;
}

std::lock_guard<std::mutex> lock(mutex_);
auto it = allocated_blocks_.find(memory_addr);
if (it == allocated_blocks_.end()) {
GELOGE(PARAM_INVALID, "Invalid memory pointer");
return PARAM_INVALID;
}
it->second.second.reset();
allocated_blocks_.erase(it);

return SUCCESS;
}

void HostMemAllocator::Clear() {
for (auto &block : allocated_blocks_) {
block.second.second.reset();
}
allocated_blocks_.clear();
}
} // namespace ge

+ 57
- 0
ge/graph/manager/host_mem_allocator.h View File

@@ -0,0 +1,57 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_MANAGER_HOST_MEM_ALLOCATOR_H_
#define GE_GRAPH_MANAGER_HOST_MEM_ALLOCATOR_H_

#include <mutex>
#include <map>

#include "framework/common/ge_inner_error_codes.h"
#include "graph/aligned_ptr.h"
#include "runtime/mem.h"

namespace ge {
class HostMemAllocator {
public:
explicit HostMemAllocator(rtMemType_t) {}
~HostMemAllocator() = default;

HostMemAllocator(const HostMemAllocator &) = delete;
HostMemAllocator &operator=(const HostMemAllocator &) = delete;

Status Initialize() {
Clear();
return SUCCESS;
}
void Finalize() { Clear(); }

const void *Malloc(const std::shared_ptr<AlignedPtr>& aligned_ptr, size_t size);
uint8_t *Malloc(size_t size);
Status Free(const void *memory_addr);

std::pair<size_t, std::shared_ptr<AlignedPtr>> GetAlignedPtr(const void *addr) { return allocated_blocks_[addr]; }

private:
void Clear();

std::map<const void *, std::pair<size_t, std::shared_ptr<AlignedPtr>>> allocated_blocks_;
// lock around all operations
mutable std::mutex mutex_;
};
} // namespace ge

#endif // GE_GRAPH_MANAGER_HOST_MEM_ALLOCATOR_H_

+ 14
- 1
ge/graph/manager/host_mem_manager.cc View File

@@ -43,16 +43,29 @@ Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) {
return GE_GRAPH_MEMORY_ALLOC_FAILED;
}
mem_info.fd = output_para.fd;
#ifndef ONLY_COMPILE_OPEN_SRC
mem_info.host_aligned_ptr = AlignedPtr::BuildFromAllocFunc([&output_para](std::unique_ptr<uint8_t[], deleter> &ptr) {
ptr.reset(reinterpret_cast<uint8_t *>(output_para.ptr));
},
[](uint8_t *ptr) {
ptr = nullptr;
});
#else
mem_info.host_address = reinterpret_cast<uint8_t *>(output_para.ptr);
#endif
mem_info.device_address = reinterpret_cast<uint8_t *>(output_para.devPtr);
return SUCCESS;
}

Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) {
GELOGD("SharedMemAllocator::DeAllocate");
#ifndef ONLY_COMPILE_OPEN_SRC
rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd,
mem_info.host_aligned_ptr->MutableGet(), mem_info.device_address};
#else
rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd,
mem_info.host_address, mem_info.device_address};

#endif
rtError_t rt_ret = rtFreeHostSharedMemory(&free_para);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret);


+ 4
- 0
ge/graph/manager/host_mem_manager.h View File

@@ -42,7 +42,11 @@ struct SharedMemInfo {
uint64_t mem_size = 0;
int fd = 0;
uint8_t *device_address = nullptr;
#ifndef ONLY_COMPILE_OPEN_SRC
std::shared_ptr<AlignedPtr> host_aligned_ptr = nullptr;
#else
uint8_t *host_address = nullptr;
#endif
SharedMemInfo() = default;
SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {}
};


+ 0
- 133
ge/graph/passes/assign_pass.cc View File

@@ -1,133 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/passes/assign_pass.h"

#include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h"
#include "graph/utils/graph_utils.h"
#include "graph/debug/ge_attr_define.h"

namespace {
const uint32_t kValidInputNodeOutputNum = 1;
const int32_t kAssignRefInputIndex = 0;
const int32_t kAssignValueInputIndex = 1;
}

namespace ge {
Status AssignPass::Run(NodePtr &node) {
GELOGD("AssignPass running");
if (node->GetType() != ASSIGN) {
GELOGD("No need run AssignPass on [%s, %s].", node->GetName().c_str(), node->GetType().c_str());
return SUCCESS;
}

const auto &ref_in_anchor = node->GetInDataAnchor(kAssignRefInputIndex);
const auto &value_in_anchor = node->GetInDataAnchor(kAssignValueInputIndex);
if ((ref_in_anchor == nullptr) || (value_in_anchor == nullptr)) {
GELOGE(FAILED, "In data anchor is null, node:%s", node->GetName().c_str());
return FAILED;
}
const auto &ref_peer_anchor = ref_in_anchor->GetPeerOutAnchor();
const auto &value_peer_anchor = value_in_anchor->GetPeerOutAnchor();
if ((ref_peer_anchor == nullptr) || (value_peer_anchor == nullptr)) {
GELOGE(FAILED, "Peer data anchor is null, node:%s", node->GetName().c_str());
return FAILED;
}

if (IsCondMatch(node, ref_peer_anchor, value_peer_anchor)) {
///
/// variable not-const not-const
/// \ / |
/// \ / |
/// Assign ----> variable
/// | |
/// | |
/// node node
///
GELOGI("Optimization for assign_node %s start", node->GetName().c_str());
if (IsolateAndDeleteNode(node, {kAssignRefInputIndex}) != SUCCESS) {
GELOGE(FAILED, "Isolate and delete assign_node %s failed.", node->GetName().c_str());
return FAILED;
}
AddNodeDeleted(node);

const auto &ref_input = ref_peer_anchor->GetOwnerNode()->GetOpDesc();
const auto &value_input = value_peer_anchor->GetOwnerNode()->GetOpDesc();
if ((ref_input == nullptr) || (value_input == nullptr)) {
GELOGE(FAILED, "value input is null");
return FAILED;
}
if (!AttrUtils::SetStr(value_input->MutableOutputDesc(value_peer_anchor->GetIdx()), ASSIGN_VAR_NAME,
ref_input->GetName())) {
GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed.");
return FAILED;
}

// variable has and only has one input
if (ref_input->UpdateInputDesc(0, value_input->GetOutputDesc(value_peer_anchor->GetIdx())) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Update input_desc for variable %s failed.", ref_input->GetName().c_str());
return FAILED;
}
if (GraphUtils::AddEdge(value_peer_anchor, ref_peer_anchor->GetOwnerNode()->GetInDataAnchor(0)) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Add data edge %s->%s failed", value_input->GetName().c_str(), ref_input->GetName().c_str());
return FAILED;
}
}

GELOGD("AssignPass success");
return SUCCESS;
}

///
/// @brief Check if need optimize for assign_node
/// @param [in] assign_node
/// @param [in] peer_data_anchor for ref_input of assign_node
/// @param [in] peer_data_anchor for value_input of assign_node
/// @return Status
///
bool AssignPass::IsCondMatch(const NodePtr &node, const OutDataAnchorPtr &ref_peer_anchor,
const OutDataAnchorPtr &value_peer_anchor) {
GELOGD("Check if assign_node %s match optimization condition, ref_input: %s, value_input: %s",
node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(),
value_peer_anchor->GetOwnerNode()->GetName().c_str());

const std::string &value_type = value_peer_anchor->GetOwnerNode()->GetType();
if ((value_type == CONSTANTOP) || (value_type == CONSTANT)) {
GELOGD("value input is const");
return false;
}

const std::string &ref_type = ref_peer_anchor->GetOwnerNode()->GetType();
if ((ref_type != VARIABLE) && (ref_type != VARIABLEV2)) {
GELOGD("ref input is not var");
return false;
}
if (!ref_peer_anchor->GetOwnerNode()->GetInDataNodes().empty()) {
GELOGD("ref input has data input");
return false;
}

if ((ref_peer_anchor->GetPeerInDataNodesSize() != kValidInputNodeOutputNum) ||
(value_peer_anchor->GetPeerInDataNodesSize() != kValidInputNodeOutputNum)) {
GELOGD("ref / value input has other output(s)");
return false;
}

GELOGD("Optimization condition matches, assign_node: %s", node->GetName().c_str());
return true;
}
} // namespace ge

+ 250
- 0
ge/graph/passes/assign_remove_pass.cc View File

@@ -0,0 +1,250 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/passes/assign_remove_pass.h"
#include "framework/common/debug/log.h"
#include "graph/utils/graph_utils.h"
#include "graph/debug/ge_attr_define.h"

namespace {
constexpr uint32_t kValidInputNodeOutputNum = 1;
constexpr int32_t kAssignRefInputIndex = 0;
constexpr int32_t kAssignValueInputIndex = 1;
static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA,
ge::CONSTANT, ge::CONSTANTOP,
ge::VARIABLE, ge::VARIABLEV2 };
}

namespace ge {
#ifndef ONLY_COMPILE_OPEN_SRC
Status AssignRemovePass::Run(NodePtr &node) {
GELOGD("AssignRemovePass running");

if (TransformAttr(node) != SUCCESS) {
GELOGE(FAILED, "Transform assign_var_name attr failed, node=%s", node->GetName().c_str());
return FAILED;
}

if (node->GetType() == ASSIGN) {
if (OptimizedAssignNode(node) != SUCCESS) {
GELOGE(FAILED, "Optimize for assign_node %s failed", node->GetName().c_str());
return FAILED;
}
}

GELOGD("AssignRemovePass success");
return SUCCESS;
}

///
/// @brief Optimize for assign_node
/// @param [in] assign_node
/// @return Status
///
Status AssignRemovePass::OptimizedAssignNode(NodePtr &assign_node) {
const auto &ref_in_anchor = assign_node->GetInDataAnchor(kAssignRefInputIndex);
const auto &value_in_anchor = assign_node->GetInDataAnchor(kAssignValueInputIndex);
if ((ref_in_anchor == nullptr) || (value_in_anchor == nullptr)) {
GELOGE(FAILED, "In data anchor is null, node:%s", assign_node->GetName().c_str());
return FAILED;
}
const auto &ref_peer_anchor = ref_in_anchor->GetPeerOutAnchor();
const auto &value_peer_anchor = value_in_anchor->GetPeerOutAnchor();
if ((ref_peer_anchor == nullptr) || (value_peer_anchor == nullptr)) {
GELOGE(FAILED, "Peer data anchor is null, node:%s", assign_node->GetName().c_str());
return FAILED;
}

if (IsCondMatch(assign_node, ref_peer_anchor, value_peer_anchor)) {
///
/// variable not-const not-const
/// \ / |
/// \ / |
/// Assign ----> variable
/// | |
/// | |
/// node node
///
GELOGD("Optimization for assign_node %s start", assign_node->GetName().c_str());
if (IsolateAndDeleteNode(assign_node, {kAssignRefInputIndex}) != SUCCESS) {
GELOGE(FAILED, "Isolate and delete assign_node %s failed.", assign_node->GetName().c_str());
return FAILED;
}

const auto &ref_input = ref_peer_anchor->GetOwnerNode()->GetOpDesc();
const auto &value_input = value_peer_anchor->GetOwnerNode()->GetOpDesc();
if ((ref_input == nullptr) || (value_input == nullptr)) {
GELOGE(FAILED, "value input is null");
return FAILED;
}

// variable has and only has one input
if (ref_input->UpdateInputDesc(0, value_input->GetOutputDesc(value_peer_anchor->GetIdx())) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Update input_desc for variable %s failed.", ref_input->GetName().c_str());
return FAILED;
}
if (GraphUtils::AddEdge(value_peer_anchor, ref_peer_anchor->GetOwnerNode()->GetInDataAnchor(0)) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Add data edge %s->%s failed", value_input->GetName().c_str(), ref_input->GetName().c_str());
return FAILED;
}

GELOGD("add attr ASSIGN_VAR_NAME on node %s, var_name=%s",
value_input->GetName().c_str(), ref_input->GetName().c_str());
if (!AttrUtils::SetStr(value_input->MutableOutputDesc(value_peer_anchor->GetIdx()), ASSIGN_VAR_NAME,
ref_input->GetName())) {
GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed.");
return FAILED;
}
auto value_node = value_peer_anchor->GetOwnerNode();
AddRePassNode(value_node);
}
return SUCCESS;
}

///
/// @brief Transform assign_var_name attr
/// @param [in] node
/// @return Status
///
Status AssignRemovePass::TransformAttr(NodePtr &node) {
GE_CHECK_NOTNULL(node->GetOpDesc());
for (const auto &output_desc : node->GetOpDesc()->GetAllOutputsDesc()) {
int32_t inplace_input_idx = -1;
std::string assign_var_name;
if (AttrUtils::GetInt(output_desc, INPLACE_SUPPORT_INPUT_INDEX, inplace_input_idx) &&
AttrUtils::GetStr(output_desc, ASSIGN_VAR_NAME, assign_var_name)) {
GELOGD("Transform attr ASSIGN_VAR_NAME on node %s, assign_var_name=%s, inplace_input_idx=%d, ",
node->GetName().c_str(), assign_var_name.c_str(), inplace_input_idx);
const auto &in_data_anchor = node->GetInDataAnchor(inplace_input_idx);
GE_CHECK_NOTNULL(in_data_anchor);
const auto &peer_data_anchor = in_data_anchor->GetPeerOutAnchor();
GE_CHECK_NOTNULL(peer_data_anchor);
auto in_node = peer_data_anchor->GetOwnerNode();
GE_CHECK_NOTNULL(in_node->GetOpDesc());
GELOGD("add attr ASSIGN_VAR_NAME on node %s, var_name=%s", in_node->GetName().c_str(), assign_var_name.c_str());
if (!AttrUtils::SetStr(in_node->GetOpDesc()->MutableOutputDesc(peer_data_anchor->GetIdx()),
ASSIGN_VAR_NAME, assign_var_name)) {
GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed.");
return FAILED;
}
AddRePassNode(in_node);
}
}
return SUCCESS;
}
#else
Status AssignRemovePass::Run(NodePtr &node) {
GELOGD("AssignRemovePass running");
if (node->GetType() != ASSIGN) {
GELOGD("No need run AssignRemovePass on [%s, %s].", node->GetName().c_str(), node->GetType().c_str());
return SUCCESS;
}

const auto &ref_in_anchor = node->GetInDataAnchor(kAssignRefInputIndex);
const auto &value_in_anchor = node->GetInDataAnchor(kAssignValueInputIndex);
if ((ref_in_anchor == nullptr) || (value_in_anchor == nullptr)) {
GELOGE(FAILED, "In data anchor is null, node:%s", node->GetName().c_str());
return FAILED;
}
const auto &ref_peer_anchor = ref_in_anchor->GetPeerOutAnchor();
const auto &value_peer_anchor = value_in_anchor->GetPeerOutAnchor();
if ((ref_peer_anchor == nullptr) || (value_peer_anchor == nullptr)) {
GELOGE(FAILED, "Peer data anchor is null, node:%s", node->GetName().c_str());
return FAILED;
}

if (IsCondMatch(node, ref_peer_anchor, value_peer_anchor)) {
///
/// variable not-const not-const
/// \ / |
/// \ / |
/// Assign ----> variable
/// | |
/// | |
/// node node
///
GELOGI("Optimization for assign_node %s start", node->GetName().c_str());
if (IsolateAndDeleteNode(node, {kAssignRefInputIndex}) != SUCCESS) {
GELOGE(FAILED, "Isolate and delete assign_node %s failed.", node->GetName().c_str());
return FAILED;
}
AddNodeDeleted(node);

const auto &ref_input = ref_peer_anchor->GetOwnerNode()->GetOpDesc();
const auto &value_input = value_peer_anchor->GetOwnerNode()->GetOpDesc();
if ((ref_input == nullptr) || (value_input == nullptr)) {
GELOGE(FAILED, "value input is null");
return FAILED;
}
if (!AttrUtils::SetStr(value_input->MutableOutputDesc(value_peer_anchor->GetIdx()), ASSIGN_VAR_NAME,
ref_input->GetName())) {
GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed.");
return FAILED;
}

// variable has and only has one input
if (ref_input->UpdateInputDesc(0, value_input->GetOutputDesc(value_peer_anchor->GetIdx())) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Update input_desc for variable %s failed.", ref_input->GetName().c_str());
return FAILED;
}
if (GraphUtils::AddEdge(value_peer_anchor, ref_peer_anchor->GetOwnerNode()->GetInDataAnchor(0)) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Add data edge %s->%s failed", value_input->GetName().c_str(), ref_input->GetName().c_str());
return FAILED;
}
}

GELOGD("AssignRemovePass success");
return SUCCESS;
}
#endif
///
/// @brief Check if need optimize for assign_node
/// @param [in] assign_node
/// @param [in] peer_data_anchor for ref_input of assign_node
/// @param [in] peer_data_anchor for value_input of assign_node
/// @return Status
///
bool AssignRemovePass::IsCondMatch(const NodePtr &node, const OutDataAnchorPtr &ref_peer_anchor,
const OutDataAnchorPtr &value_peer_anchor) {
GELOGD("Check if assign_node %s match optimization condition, ref_input: %s, value_input: %s",
node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(),
value_peer_anchor->GetOwnerNode()->GetName().c_str());

if (kNoTaskNodeTypes.count(value_peer_anchor->GetOwnerNode()->GetType()) > 0) {
GELOGD("value input is not calculate node");
return false;
}

const std::string &ref_type = ref_peer_anchor->GetOwnerNode()->GetType();
if ((ref_type != VARIABLE) && (ref_type != VARIABLEV2)) {
GELOGD("ref input is not var");
return false;
}
if (!ref_peer_anchor->GetOwnerNode()->GetInDataNodes().empty()) {
GELOGD("ref input has data input");
return false;
}

if ((ref_peer_anchor->GetPeerInDataNodesSize() != kValidInputNodeOutputNum) ||
(value_peer_anchor->GetPeerInDataNodesSize() != kValidInputNodeOutputNum)) {
GELOGD("ref / value input has other output(s)");
return false;
}

GELOGD("Optimization condition matches, assign_node: %s", node->GetName().c_str());
return true;
}
} // namespace ge

ge/graph/passes/assign_pass.h → ge/graph/passes/assign_remove_pass.h View File

@@ -14,17 +14,32 @@
* limitations under the License.
*/

#ifndef GE_GRAPH_PASSES_ASSIGN_PASS_H_
#define GE_GRAPH_PASSES_ASSIGN_PASS_H_
#ifndef GE_GRAPH_PASSES_ASSIGN_REMOVE_PASS_H_
#define GE_GRAPH_PASSES_ASSIGN_REMOVE_PASS_H_

#include "graph/passes/base_pass.h"

namespace ge {
class AssignPass : public BaseNodePass {
class AssignRemovePass : public BaseNodePass {
public:
Status Run(NodePtr &node) override;

private:
#ifndef ONLY_COMPILE_OPEN_SRC
///
/// @brief Optimize for assign_node
/// @param [in] assign_node
/// @return Status
///
Status OptimizedAssignNode(NodePtr &assign_node);

///
/// @brief Transform assign_var_name attr
/// @param [in] node
/// @return Status
///
Status TransformAttr(NodePtr &node);
#endif
///
/// @brief Check if need optimize for assign_node
/// @param [in] assign_node
@@ -36,4 +51,4 @@ class AssignPass : public BaseNodePass {
const OutDataAnchorPtr &value_peer_anchor);
};
} // namespace ge
#endif // GE_GRAPH_PASSES_ASSIGN_PASS_H_
#endif // GE_GRAPH_PASSES_ASSIGN_REMOVE_PASS_H_

+ 10
- 6
ge/graph/passes/constant_fuse_same_pass.cc View File

@@ -19,13 +19,7 @@
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "common/ge/ge_util.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/ge_inner_error_codes.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/type_utils.h"

@@ -121,11 +115,21 @@ void ConstantFuseSamePass::GetFuseConstNodes(ComputeGraphPtr &graph,
TypeUtils::DataTypeToSerialString(data_type).c_str());
continue;
}
#ifndef ONLY_COMPILE_OPEN_SRC
if ((type_size != 0) && (weight->MutableData().GetAlignedPtr() == nullptr)) {
GELOGW("aligned_ptr is null while size is not 0");
continue;
}
#endif
++insert_const_nums;

SameConstKey map_key;
map_key.data_size = type_size;
#ifndef ONLY_COMPILE_OPEN_SRC
map_key.aligned_ptr = weight->MutableData().GetAlignedPtr();
#else
map_key.data = weight->GetData().GetData();
#endif
map_key.data_type = data_type;
map_key.format = output_tensor->GetFormat();
map_key.shape = output_tensor->GetShape().GetDims();


+ 16
- 1
ge/graph/passes/constant_fuse_same_pass.h View File

@@ -21,14 +21,20 @@
#include <set>
#include <utility>
#include <vector>

#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/aligned_ptr.h"
#endif
#include "graph/types.h"
#include "inc/graph_pass.h"

namespace ge {
struct SameConstKey {
int data_size;
#ifndef ONLY_COMPILE_OPEN_SRC
std::shared_ptr<AlignedPtr> aligned_ptr;
#else
const uint8_t *data;
#endif
DataType data_type;
Format format;
std::vector<int64_t> shape;
@@ -38,10 +44,19 @@ struct SameConstKey {
if (data_size != key.data_size) {
return data_size < key.data_size;
}
#ifndef ONLY_COMPILE_OPEN_SRC
if (data_size != 0) {
int ret = memcmp(aligned_ptr->Get(), key.aligned_ptr->Get(), data_size);
if (ret != 0) {
return ret < 0;
}
}
#else
int ret = memcmp(data, key.data, data_size);
if (ret != 0) {
return ret < 0;
}
#endif
if (data_type != key.data_type) {
return data_type < key.data_type;
}


+ 83
- 0
ge/graph/passes/inplace_support_check_pass.cc View File

@@ -0,0 +1,83 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/passes/inplace_support_check_pass.h"
#include "framework/common/debug/log.h"
#include "graph/utils/graph_utils.h"
#include "graph/debug/ge_attr_define.h"

namespace {
constexpr uint32_t kInplaceSupportOutputIndex = 0;
constexpr uint32_t kInplaceSupportOutputNum = 1;
static const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA,
ge::CONSTANT, ge::CONSTANTOP,
ge::VARIABLE, ge::VARIABLEV2 };
}

namespace ge {
Status InplaceSupportCheckPass::Run(NodePtr &node) {
GELOGD("InplaceSupportCheckPass running");
if (node->GetAllOutDataAnchorsSize() != kInplaceSupportOutputNum) {
GELOGD("output num of node %s is not %u, skip InplaceSupportCheckPass",
node->GetName().c_str(), kInplaceSupportOutputNum);
return SUCCESS;
}
GE_CHECK_NOTNULL(node->GetOpDesc());
const DataType &output_type = node->GetOpDesc()->GetOutputDesc(kInplaceSupportOutputIndex).GetDataType();
const GeShape &output_shape = node->GetOpDesc()->GetOutputDesc(kInplaceSupportOutputIndex).GetShape();
GELOGD("process InplaceSupportCheckPass on node %s", node->GetName().c_str());
for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
const auto &peer_data_anchor = in_data_anchor->GetPeerOutAnchor();
if (peer_data_anchor == nullptr) {
continue;
}
auto in_node = peer_data_anchor->GetOwnerNode();
if (kSrcNodeTypes.count(in_node->GetType()) > 0) {
GELOGD("meet src_node %s", in_node->GetName().c_str());
continue;
}
if (peer_data_anchor->GetPeerInDataNodesSize() != kInplaceSupportOutputNum) {
GELOGD("peer_data_anchor links with multi in_data_anchors");
continue;
}

int32_t inplace_input_idx = in_data_anchor->GetIdx();
const DataType &input_type = node->GetOpDesc()->GetInputDesc(inplace_input_idx).GetDataType();
const GeShape &input_shape = node->GetOpDesc()->GetInputDesc(inplace_input_idx).GetShape();
if (input_type != output_type) {
GELOGW("DataType mismatch, in_idx=%d, input_type=%u, output_type=%u", inplace_input_idx, input_type, output_type);
continue;
}
if (input_shape.GetDims() != output_shape.GetDims()) {
GELOGW("Shape mismatch, in_idx=%d, input_shape=[%s], output_shape=[%s]",
inplace_input_idx, input_shape.ToString().c_str(), output_shape.ToString().c_str());
continue;
}

GELOGD("add attr INPLACE_SUPPORT_INPUT_INDEX on node %s, input_idx=%d", node->GetName().c_str(), inplace_input_idx);
if (!AttrUtils::SetInt(node->GetOpDesc()->MutableOutputDesc(kInplaceSupportOutputIndex),
INPLACE_SUPPORT_INPUT_INDEX, inplace_input_idx)) {
GELOGE(FAILED, "Set attr INPLACE_SUPPORT_INPUT_INDEX on node %s failed.", node->GetName().c_str());
return FAILED;
}
AddRePassNode(node);
break;
}

GELOGD("InplaceSupportCheckPass success");
return SUCCESS;
}
} // namespace ge

+ 28
- 0
ge/graph/passes/inplace_support_check_pass.h View File

@@ -0,0 +1,28 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_PASSES_INPLACE_SUPPORT_CHECK_PASS_H_
#define GE_GRAPH_PASSES_INPLACE_SUPPORT_CHECK_PASS_H_

#include "graph/passes/base_pass.h"

namespace ge {
class InplaceSupportCheckPass : public BaseNodePass {
public:
Status Run(NodePtr &node) override;
};
} // namespace ge
#endif // GE_GRAPH_PASSES_INPLACE_SUPPORT_CHECK_PASS_H_

+ 24
- 2
ge/graph/passes/multi_batch_clone_pass.cc View File

@@ -22,6 +22,8 @@
#include "graph/preprocess/multi_batch_options.h"
#include "graph/utils/node_utils.h"
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "register/op_registry.h"

namespace ge {
@@ -478,8 +480,28 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) {
if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
return SUCCESS;
}

(void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims());

GeTensorDesc tensor(NodeUtils::GetOutputDesc(*data, kDataOutIndex));
std::vector<std::string> input_dims_str;
for (size_t i = 0; i < batch_shapes_.size(); ++i) {
auto shape = data_shape;
auto ret = multibatch::CalcShape(data_to_dynamic_info_.at(data_name).at(i), shape);
if (ret != SUCCESS) {
GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", data->GetName().c_str());
return ret;
}
tensor.SetShape(shape);
int64_t tensor_size = 0;
(void)TensorUtils::GetTensorSizeInBytes(tensor, tensor_size);
string input_str = TypeUtils::FormatToSerialString(tensor.GetFormat()) + ":" +
TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + data->GetName() + ":" +
std::to_string(tensor_size) + ":" + std::to_string(tensor.GetShape().GetDimNum()) + ":" +
formats::JoinToString(tensor.GetShape().GetDims());
input_dims_str.emplace_back(input_str);
}
(void)AttrUtils::SetListStr(data->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str);

size_t max_shape_index = 0;
int64_t max_size = 0;
for (size_t i = 0; i < batch_shapes_.size(); ++i) {
@@ -593,7 +615,7 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const
graph->AddSubgraph(subgraph->GetName(), subgraph);
all_branch_output_[subgraph] = subgraph->FindFirstNodeMatchType(NETOUTPUT);
GE_CHK_STATUS_RET(UpdateSubgraphOutput(all_branch_output_[subgraph]),
"Update %s failed", all_branch_output_[subgraph]->GetName().c_str());
"Update %s failed", all_branch_output_[subgraph]->GetName().c_str());

const string key_name = "branches" + std::to_string(i);
op_desc->AddSubgraphName(key_name);


+ 1
- 1
ge/graph/passes/switch_to_stream_switch_pass.cc View File

@@ -598,7 +598,7 @@ Status SwitchToStreamSwitchPass::AddConstNode(const ComputeGraphPtr &graph, cons
///
Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node,
const std::set<NodePtr> &same_cond_switch) {
GELOGD("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(),
GELOGD("ModifySwitchInCtlEdges: switch_node=%s, cast_node=%s", switch_node->GetName().c_str(),
cast_node->GetName().c_str());
std::string orig_switch_name = switch_node->GetName();
OpDescPtr switch_desc = switch_node->GetOpDesc();


+ 9
- 4
ge/graph/preprocess/graph_preprocess.cc View File

@@ -19,7 +19,6 @@
#include <set>
#include <string>
#include "common/formats/format_transfers/format_transfer_fractal_nz.h"
#include "common/formats/format_transfers/format_transfer_fractal_z.h"
#include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h"
#include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h"
#include "common/formats/format_transfers/format_transfer_transpose.h"
@@ -38,7 +37,9 @@
#include "graph/passes/addn_pass.h"
#include "graph/passes/aicpu_constant_folding_pass.h"
#include "graph/passes/assert_pass.h"
#include "graph/passes/assign_pass.h"
#ifdef ONLY_COMPILE_OPEN_SRC
#include "graph/passes/assign_remove_pass.h"
#endif
#include "graph/passes/common_subexpression_elimination_pass.h"
#include "graph/passes/cond_pass.h"
#include "graph/passes/cond_remove_pass.h"
@@ -1699,7 +1700,9 @@ Status GraphPrepare::PrepareOptimize() {
VarIsInitializedOpPass var_is_initialized_pass;
ParallelConcatStartOpPass parallel_concat_start_op_pass;
IdentityPass identity_pass(false);
AssignPass assign_pass;
#ifdef ONLY_COMPILE_OPEN_SRC
AssignRemovePass assign_remove_pass;
#endif
SnapshotPass snapshot_pass;
if (!options_.train_graph_flag) {
names_to_passes.emplace_back("DropOutPass", &dropout_pass);
@@ -1714,9 +1717,11 @@ Status GraphPrepare::PrepareOptimize() {
names_to_passes.emplace_back("VarIsInitializedOpPass", &var_is_initialized_pass);
names_to_passes.emplace_back("ParallelConcatStartOpPass", &parallel_concat_start_op_pass);
names_to_passes.emplace_back("IdentityPass", &identity_pass);
#ifdef ONLY_COMPILE_OPEN_SRC
if (GetContext().GetHostExecFlag()) {
names_to_passes.emplace_back("AssignPass", &assign_pass);
names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass);
}
#endif
GE_TIMESTAMP_START(names_to_passes);
ret = ge_passes.Run(names_to_passes);
GE_TIMESTAMP_END(names_to_passes, "GraphPrepare::NamesToPasses");


+ 5
- 4
ge/host_cpu_engine/CMakeLists.txt View File

@@ -20,6 +20,7 @@ add_library(host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS})

target_compile_options(host_cpu_engine PRIVATE
-Werror
-fno-common
)

target_compile_definitions(host_cpu_engine PRIVATE
@@ -49,9 +50,7 @@ target_link_libraries(host_cpu_engine PRIVATE
ascend_protobuf
c_sec
graph
register
slog
runtime
-Wl,--as-needed
)

@@ -60,6 +59,7 @@ add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS})

target_compile_options(atc_host_cpu_engine PRIVATE
-Werror
-fno-common
)

target_compile_definitions(atc_host_cpu_engine PRIVATE
@@ -90,9 +90,7 @@ target_link_libraries(atc_host_cpu_engine PRIVATE
ascend_protobuf
c_sec
graph
register
slog
runtime_compile
-Wl,--as-needed
)

@@ -106,6 +104,7 @@ add_library(host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST})

target_compile_options(host_cpu_opskernel_builder PRIVATE
-Werror
-fno-common
)

target_compile_definitions(host_cpu_opskernel_builder PRIVATE
@@ -145,6 +144,7 @@ add_library(atc_host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST})

target_compile_options(atc_host_cpu_opskernel_builder PRIVATE
-Werror
-fno-common
)

target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE
@@ -189,6 +189,7 @@ add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST})

target_compile_options(host_cpu_opskernel_builder_static PRIVATE
-Werror
-fno-common
)

target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE


+ 11
- 0
ge/hybrid/common/npu_memory_allocator.cc View File

@@ -20,6 +20,9 @@
#include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/rdma_pool_allocator.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/manager/host_mem_allocator.h"
#endif

namespace ge {
namespace hybrid {
@@ -64,7 +67,11 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) {
if (mem_type == RDMA_HBM) {
buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(allocate_size, device_id_);
} else if (mem_type == HOST_DDR) {
#ifndef ONLY_COMPILE_OPEN_SRC
buffer = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(allocate_size);
#else
buffer = malloc(allocate_size);
#endif
} else {
if (allocate_size > kMaxHbmMemorySize) {
GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size);
@@ -101,7 +108,11 @@ void NpuMemoryAllocator::Deallocate(void *data, MemStorageType mem_type) {
if (mem_type == RDMA_HBM) {
MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_);
} else if (mem_type == HOST_DDR) {
#ifndef ONLY_COMPILE_OPEN_SRC
MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(data);
#else
free(data);
#endif
} else {
MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_);
}


+ 32
- 3
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -25,11 +25,13 @@
#include "graph/manager/graph_var_manager.h"
#include "graph/manager/host_mem_manager.h"
#include "graph/manager/trans_var_data_utils.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/host_mem_allocator.h"
#endif
#include "graph/utils/graph_utils.h"
#include "hybrid/common/npu_memory_allocator.h"
#include "hybrid/node_executor/node_executor.h"
#include "framework/common/debug/ge_log.h"
#include "graph/utils/attr_utils.h"

namespace ge {
namespace hybrid {
@@ -852,9 +854,24 @@ Status HybridModelBuilder::InitConstantOps() {

std::unique_ptr<TensorValue> var_tensor;
if (GetContext().GetHostExecFlag()) {
#ifndef ONLY_COMPILE_OPEN_SRC
GE_CHECK_NOTNULL(ge_tensor);
// Address for eigen kernel should be aligned with 16 bytes
// Tensors return by api GetWeights share data with proto, whose addr is not confirmed to be aligned
GeTensor aligned_tensor = ge_tensor->Clone();
GELOGD("Init tensor with host constant %s size = %zu", var_name.c_str(), aligned_tensor.MutableData().GetSize());
if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(aligned_tensor.GetAlignedPtr(),
aligned_tensor.GetData().size()) == nullptr) {
GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed.");
return MEMALLOC_FAILED;
}
var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(),
aligned_tensor.GetData().size()));
#else
auto buffer = ge_tensor->MutableData();
GELOGD("Init tensor with host constant. size = %zu", buffer.GetSize());
var_tensor.reset(new(std::nothrow)TensorValue(buffer.GetData(), buffer.GetSize()));
#endif
} else {
GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor));
GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize());
@@ -909,9 +926,21 @@ Status HybridModelBuilder::InitVariableTensors() {
GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str());
return GE_GRAPH_MALLOC_FAILED;
}
#ifndef ONLY_COMPILE_OPEN_SRC
if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr,
tensor_size) == nullptr) {
GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed.");
return MEMALLOC_FAILED;
}
GELOGD("Host variable [%s] malloc success, size=%lld.", it.first.c_str(), tensor_size);

std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(),
tensor_size));
#else
GELOGD("Host variable [%s] malloc success.", it.first.c_str());

std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size));
#endif
GE_CHECK_NOTNULL(tensor);
hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor));
}
@@ -933,7 +962,7 @@ Status HybridModelBuilder::InitWeights() {
auto op_desc = constant_node->GetOpDesc();
auto v_weights = ModelUtils::GetWeights(op_desc);
if (v_weights.empty()) {
GELOGE(INTERNAL_ERROR, "[%s] Constant no not have value", constant_node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", constant_node->GetName().c_str());
return INTERNAL_ERROR;
}
auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get());


+ 20
- 2
ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc View File

@@ -18,6 +18,10 @@
#include "hybrid/node_executor/host_cpu/kernel_factory.h"
#include "graph/passes/folding_pass.h"
#include "hybrid/model/hybrid_model.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/host_mem_allocator.h"
#endif
#include "ge_local_engine/engine/host_cpu_engine.h"

namespace ge {
@@ -50,15 +54,23 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) {
auto input_desc_ptr = context.GetInputDesc(i);
GE_CHECK_NOTNULL(input_desc_ptr);
const auto &input_desc = *input_desc_ptr;
#ifndef ONLY_COMPILE_OPEN_SRC
auto tensor = context.GetInput(i);
GE_CHECK_NOTNULL(tensor);
auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData());
GE_CHECK_NOTNULL(item.second);
auto in_tensor = MakeShared<GeTensor>(input_desc, item.second, item.first);
#else
GE_CHECK_NOTNULL(context.GetInput(i));
auto in_tensor = MakeShared<GeTensor>(input_desc,
reinterpret_cast<const uint8_t *>(context.GetInput(i)->GetData()),
context.GetInput(i)->GetSize());
#endif
GE_CHECK_NOTNULL(in_tensor);
in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType());
in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape());
inputs.emplace_back(in_tensor);
GELOGI("node:%s allocate input %d, size=%zu", op_desc->GetName().c_str(), i, in_tensor->GetData().size());
GELOGD("node:%s allocate input %d, size=%zu", op_desc->GetName().c_str(), i, in_tensor->GetData().size());
}

std::vector<GeTensorPtr> outputs;
@@ -72,14 +84,20 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) {
}
auto tensor = context.GetOutput(i);
GE_CHECK_NOTNULL(tensor);
#ifndef ONLY_COMPILE_OPEN_SRC
auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData());
GE_CHECK_NOTNULL(item.second);
auto out_tensor = MakeShared<GeTensor>(output_desc, item.second, item.first);
#else
auto out_tensor = MakeShared<GeTensor>(output_desc,
reinterpret_cast<const uint8_t *>(tensor->GetData()),
tensor->GetSize());
#endif
GE_CHECK_NOTNULL(out_tensor);
out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType());
out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape());
outputs.emplace_back(out_tensor);
GELOGI("node:%s allocate output %d, size=%zu", op_desc->GetName().c_str(), i, out_tensor->GetData().size());
GELOGD("node:%s allocate output %d, size=%zu", op_desc->GetName().c_str(), i, out_tensor->GetData().size());
}

return HostCpuEngine::GetInstance().Run(node_, inputs, outputs);


+ 2
- 2
ge/hybrid/node_executor/node_executor.cc View File

@@ -243,8 +243,8 @@ Status NoOpTask::UpdateArgs(TaskContext &context) {
return SUCCESS;
}
Status NoOpTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) {
GELOGD("[%s] Skipping execute for op with empty outputs", context.GetNodeName());
return SUCCESS;
GELOGD("[%s] Skipping execution for op with empty outputs", context.GetNodeName());
return context.TryExecuteCallback(done_callback);
}
} // namespace hybrid
} // namespace ge

+ 3
- 0
ge/offline/CMakeLists.txt View File

@@ -22,6 +22,7 @@ target_compile_options(atc PRIVATE
-Werror
-O2
-Wno-deprecated-declarations
-fno-common
)

target_compile_definitions(atc PRIVATE
@@ -83,6 +84,7 @@ target_compile_options(atc_atc.bin PRIVATE
-Werror
-O2
-Wno-deprecated-declarations
-fno-common
)

target_compile_definitions(atc_atc.bin PRIVATE
@@ -149,6 +151,7 @@ target_compile_options(fwk_atc.bin PRIVATE
-Werror
-O2
-Wno-deprecated-declarations
-fno-common
)

target_compile_definitions(fwk_atc.bin PRIVATE


+ 37
- 3
ge/offline/main.cc View File

@@ -206,6 +206,8 @@ DEFINE_string(mdl_bank_path, "", "Optional; model bank path");

DEFINE_string(op_bank_path, "", "Optional; op bank path");

DEFINE_string(display_model_info, "0", "Optional; display model info");

class GFlagUtils {
public:
/**
@@ -225,7 +227,8 @@ class GFlagUtils {
"===== Basic Functionality =====\n"
"[General]\n"
" --h/help Show this help message\n"
" --mode Run mode. 0(default): generate offline model; 1: convert model to JSON format "
" --mode Run mode. 0(default): generate offline model; 1: convert model to JSON format; "
"6: display model info"
"3: only pre-check; 5: convert ge dump txt file to JSON format\n"
"\n[Input]\n"
" --model Model file\n"
@@ -313,7 +316,8 @@ class GFlagUtils {
" --op_compiler_cache_dir Set the save path of operator compilation cache files.\n"
"Default value: $HOME/atc_data\n"
" --op_compiler_cache_mode Set the operator compilation cache mode."
"Options are disable(default), enable and force(force to refresh the cache)");
"Options are disable(default), enable and force(force to refresh the cache)\n"
" --display_model_info enable for display model info; 0(default): close display, 1: open display");

gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
// Using gflags to analyze input parameters
@@ -862,7 +866,7 @@ domi::Status GenerateInfershapeJson() {
static Status ConvertModelToJson(int fwk_type, const string &model_file, const string &json_file) {
Status ret = ge::SUCCESS;
if (fwk_type == -1) {
ret = ge::ConvertOmModelToJson(model_file.c_str(), json_file.c_str());
ret = ge::ConvertOm(model_file.c_str(), json_file.c_str(), true);
return ret;
}

@@ -1176,6 +1180,8 @@ domi::Status GenerateOmModel() {
options.insert(std::pair<string, string>(string(ge::MDL_BANK_PATH_FLAG), FLAGS_mdl_bank_path));

options.insert(std::pair<string, string>(string(ge::OP_BANK_PATH_FLAG), FLAGS_op_bank_path));

options.insert(std::pair<string, string>(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info));
// set enable scope fusion passes
SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes);
// print atc option map
@@ -1188,6 +1194,11 @@ domi::Status GenerateOmModel() {
return domi::FAILED;
}

if (FLAGS_display_model_info == "1") {
GELOGI("need to display model info.");
return ge::ConvertOm(FLAGS_output.c_str(), "", false);
}

return domi::SUCCESS;
}

@@ -1201,6 +1212,26 @@ domi::Status ConvertModelToJson() {
return domi::SUCCESS;
}

domi::Status DisplayModelInfo() {
// No model path passed in
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_om == "",
ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"om"});
return ge::FAILED,
"Input parameter[--om]'s value is empty!!");

// Check if the model path is valid
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
FLAGS_om != "" && !ge::CheckInputPathValid(FLAGS_om, "--om"),
return ge::FAILED,
"model file path is invalid: %s.", FLAGS_om.c_str());

if (FLAGS_framework == -1) {
return ge::ConvertOm(FLAGS_om.c_str(), "", false);
}

return ge::FAILED;
}

bool CheckRet(domi::Status ret) {
if (ret != domi::SUCCESS) {
if (FLAGS_mode == ONLY_PRE_CHECK) {
@@ -1344,6 +1375,9 @@ int main(int argc, char* argv[]) {
} else if (FLAGS_mode == ge::RunMode::PBTXT_TO_JSON) {
GE_CHK_BOOL_EXEC(ConvertPbtxtToJson() == domi::SUCCESS, ret = domi::FAILED;
break, "ATC convert pbtxt to json execute failed!!");
} else if (FLAGS_mode == ge::RunMode::DISPLAY_OM_INFO) {
GE_CHK_BOOL_EXEC(DisplayModelInfo() == domi::SUCCESS, ret = domi::FAILED;
break, "ATC DisplayModelInfo failed!!");
} else {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"}, {"--mode", std::to_string(FLAGS_mode), kModeSupport});


+ 1
- 0
ge/plugin/engine/CMakeLists.txt View File

@@ -8,6 +8,7 @@ add_library(engine SHARED ${SRC_LIST})

target_compile_options(engine PRIVATE
-Werror
-fno-common
)

target_compile_definitions(engine PRIVATE


+ 8
- 0
ge/proto/caffe/caffe.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software caffe, version 1.0 https://github.com/BVLC/caffe
*
* This file is included by GraphEngine so as to support model format conversion from caffe model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto2";

package domi.caffe;


+ 8
- 0
ge/proto/tensorflow/attr_value.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/function.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/graph.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/graph_library.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/node_def.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/op_def.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/resource_handle.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/tensor.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/tensor_shape.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

// Protocol buffer representing the shape of tensors.

syntax = "proto3";


+ 8
- 0
ge/proto/tensorflow/types.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/versions.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 80
- 6
ge/session/omg.cc View File

@@ -71,6 +71,7 @@ const char *const kOutputTypeError = "The multiple out nodes set in output_type
const size_t kNodeNameIndex = 0;
const size_t kIndexStrIndex = 1;
const size_t kDTValueIndex = 2;
const size_t kOmInfoSize = 5;
} // namespace

// When the model is converted to a JSON file, the following operator attributes in the blacklist will be ignored
@@ -869,9 +870,78 @@ void GetGroupName(ge::proto::ModelDef &model_def) {
});
}

FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, const char *json_file) {
FMK_FUNC_HOST_VISIBILITY void PrintModelInfo(ge::proto::ModelDef *model_def) {
std::cout << "============ Display Model Info start ============" << std::endl;

auto model_attr_map = model_def->mutable_attr();
// system info
auto iter = model_attr_map->find(ATTR_MODEL_ATC_VERSION);
auto atc_version = (iter != model_attr_map->end()) ? iter->second.s() : "";
iter = model_attr_map->find("soc_version");
auto soc_version = (iter != model_attr_map->end()) ? iter->second.s() : "";
iter = model_attr_map->find("framework_type");
auto framework_type = (iter != model_attr_map->end()) ? iter->second.s() : "";
std::cout << "system info: "
<< ATTR_MODEL_ATC_VERSION
<< "[" << atc_version << "], "
<< "soc_version"
<< "[" << soc_version << "], "
<< "framework_type"
<< "[" << framework_type << "]." << std::endl;

// resource info
iter = model_attr_map->find(ATTR_MODEL_MEMORY_SIZE);
auto memory_size = (iter != model_attr_map->end()) ? iter->second.i() : -1;
iter = model_attr_map->find(ATTR_MODEL_WEIGHT_SIZE);
auto weight_size = (iter != model_attr_map->end()) ? iter->second.i() : -1;
iter = model_attr_map->find(ATTR_MODEL_STREAM_NUM);
auto stream_num = (iter != model_attr_map->end()) ? iter->second.i() : -1;
iter = model_attr_map->find(ATTR_MODEL_EVENT_NUM);
auto event_num = (iter != model_attr_map->end()) ? iter->second.i() : -1;
std::cout << "resource info: "
<< ATTR_MODEL_MEMORY_SIZE
<< "[" << memory_size << " B], "
<< ATTR_MODEL_WEIGHT_SIZE
<< "[" << weight_size << " B], "
<< ATTR_MODEL_STREAM_NUM
<< "[" << stream_num << "], "
<< ATTR_MODEL_EVENT_NUM
<< "[" << event_num << "]."
<< std::endl;

// om info
iter = model_attr_map->find("om_info_list");
if (iter == model_attr_map->end()) {
std::cout << "Display Model Info failed, attr \"om_info_list\" is not found in om, check the version is matched."
<< std::endl;
std::cout << "============ Display Model Info end ============" << std::endl;
return;
}
auto list_size = iter->second.list().i_size();
if (list_size == kOmInfoSize) {
std::cout << "om info: "
<< "modeldef_size"
<< "[" << iter->second.list().i(0) << " B], "
<< "weight_data_size"
<< "[" << iter->second.list().i(1) << " B], "
<< "tbe_kernels_size"
<< "[" << iter->second.list().i(2) << " B], "
<< "cust_aicpu_kernel_store_size"
<< "[" << iter->second.list().i(3) << " B], "
<< "task_info_size"
<< "[" << iter->second.list().i(4) << " B]." << std::endl;
} else {
std::cout << "Display Model Info error, please check!" << std::endl;
};

std::cout << "============ Display Model Info end ============" << std::endl;
}

FMK_FUNC_HOST_VISIBILITY Status ConvertOm(const char *model_file, const char *json_file, bool is_covert_to_json) {
GE_CHECK_NOTNULL(model_file);
GE_CHECK_NOTNULL(json_file);
if (is_covert_to_json) {
GE_CHECK_NOTNULL(json_file);
}
ge::ModelData model;

// Mode 2 does not need to verify the priority, and a default value of 0 is passed
@@ -917,12 +987,16 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con
// De serialization
bool flag = ReadProtoFromArray(ir_part.data, ir_part.size, &model_def);
if (flag) {
GetGroupName(model_def);
if (is_covert_to_json) {
GetGroupName(model_def);

json j;
Pb2Json::Message2Json(model_def, kOmBlackFields, j, true);
json j;
Pb2Json::Message2Json(model_def, kOmBlackFields, j, true);

ret = ModelSaver::SaveJsonToFile(json_file, j);
ret = ModelSaver::SaveJsonToFile(json_file, j);
} else {
PrintModelInfo(&model_def);
}
} else {
ret = INTERNAL_ERROR;
GELOGE(ret, "ReadProtoFromArray failed.");


+ 4
- 1
inc/external/ge/ge_api_types.h View File

@@ -291,6 +291,9 @@ const std::string OP_DEBUG_LEVEL = "ge.opDebugLevel";
// Configure model bank path
const std::string MDL_BANK_PATH_FLAG = "ge.mdl_bank_path";

// Configure display_model_info flag
const std::string DISPLAY_MODEL_INFO = "ge.display_model_info";

// Configure op bank path
const std::string OP_BANK_PATH_FLAG = "ge.op_bank_path";
const std::string OP_BANK_UPDATE_FLAG = "ge.op_bank_update";
@@ -397,7 +400,7 @@ const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT,
// for interface: aclgrphParse
const std::set<std::string> ir_parser_suppported_options = {
INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT,
OUT_NODES, COMPRESS_WEIGHT_CONF, ENABLE_SCOPE_FUSION_PASSES};
OUT_NODES, ENABLE_SCOPE_FUSION_PASSES};

// for interface: aclgrphBuildInitialize
const std::set<std::string> global_options = {CORE_TYPE,


+ 1
- 0
inc/framework/common/helper/model_helper.h View File

@@ -84,6 +84,7 @@ class ModelHelper {
const uint8_t *data, size_t size, size_t model_index);
Status SaveModelDef(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
Buffer &model_buffer, size_t model_index = 0);
Status SaveSizeToModelDef(const GeModelPtr &ge_model);
Status SaveModelWeights(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,
size_t model_index = 0);
Status SaveModelTbeKernel(shared_ptr<OmFileSaveHelper> &om_file_save_helper, const GeModelPtr &ge_model,


+ 3
- 1
inc/framework/omg/omg.h View File

@@ -73,7 +73,7 @@ Status ParseGraph(ge::Graph &graph, const std::map<string, string> &atc_params,
* @param [key] encrypted key
* @return Status result code
*/
Status ConvertOmModelToJson(const char *model_file, const char *json_file);
Status ConvertOm(const char *model_file, const char *json_file, bool is_covert_to_json);

Status ConvertPbtxtToJson(const char *model_file, const char *json_file);
/**
@@ -103,6 +103,8 @@ void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &ou
void UpdateOmgCtxWithParserCtx();

void UpdateParserCtxWithOmgCtx();

void PrintModelInfo(ge::proto::ModelDef *model_def);
} // namespace ge

namespace domi {


+ 2
- 1
inc/framework/omg/omg_inner_types.h View File

@@ -46,7 +46,8 @@ enum RunMode {
GEN_OM_MODEL = 0, // generate offline model file
MODEL_TO_JSON = 1, // convert to JSON file
ONLY_PRE_CHECK = 3, // only for pre-check
PBTXT_TO_JSON = 5 // pbtxt to json
PBTXT_TO_JSON = 5, // pbtxt to json
DISPLAY_OM_INFO = 6 // display model info
};

///


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit c14d2be38171eed63416e71178774103faf1f5cd
Subproject commit 11c6cf2921b6a385616a3ebc601b4431b55b07db

+ 1
- 1
parser

@@ -1 +1 @@
Subproject commit 34559943b6cb645042a87d99bc88ead016b15b64
Subproject commit ecde83dc9da0d58522b4a90c4d90c242c75011fd

+ 14
- 4
tests/ut/ge/CMakeLists.txt View File

@@ -49,6 +49,7 @@ include_directories(${GE_CODE_DIR}/metadef)
include_directories(${GE_CODE_DIR}/metadef/graph)
include_directories(${GE_CODE_DIR}/inc/external)
include_directories(${GE_CODE_DIR}/metadef/inc/external)
include_directories(${GE_CODE_DIR}/parser)
include_directories(${GE_CODE_DIR}/parser/parser)
include_directories(${GE_CODE_DIR}/metadef/inc/external/graph)
include_directories(${GE_CODE_DIR}/metadef/inc/graph)
@@ -224,7 +225,7 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/graph/passes/cond_remove_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/for_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/enter_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/assign_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/assign_remove_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/addn_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/common_subexpression_elimination_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/transop_symmetry_elimination_pass.cc"
@@ -302,6 +303,7 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc"
"${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc"
"${GE_CODE_DIR}/ge/common/dump/dump_op.cc"
"${GE_CODE_DIR}/ge/common/model_saver.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc"
"${GE_CODE_DIR}/ge/common/ge/datatype_util.cc"
"${GE_CODE_DIR}/metadef/register/ops_kernel_builder_registry.cc"
@@ -309,6 +311,13 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/metadef/graph/utils/tuning_utils.cc"
"${GE_CODE_DIR}/metadef/register/op_tiling_registry.cpp"
"${GE_CODE_DIR}/ge/ge_local_engine/engine/host_cpu_engine.cc"
"${GE_CODE_DIR}/parser/parser/common/pre_checker.cc"
"${GE_CODE_DIR}/parser/parser/common/convert/pb2json.cc"
"${GE_CODE_DIR}/parser/parser/common/parser_factory.cc"
"${GE_CODE_DIR}/parser/parser/common/model_saver.cc"
"${GE_CODE_DIR}/parser/parser/common/parser_types.cc"
"${GE_CODE_DIR}/parser/parser/common/parser_inner_ctx.cc"
"${GE_CODE_DIR}/ge/session/omg.cc"
)

set(COMMON_FORMAT_SRC_FILES
@@ -398,7 +407,6 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES
"${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
"${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
"${GE_CODE_DIR}/ge/model/ge_model.cc"
"${GE_CODE_DIR}/ge/common/helper/model_helper.cc"
"${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
"${GE_CODE_DIR}/ge/common/debug/memory_dumper.cc"
"${GE_CODE_DIR}/ge/executor/ge_executor.cc"
@@ -429,7 +437,6 @@ set(GRAPH_BUILD_COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/graph/build/memory/hybrid_mem_assigner.cc"
"${GE_CODE_DIR}/ge/graph/build/memory/max_block_mem_assigner.cc"
"${GE_CODE_DIR}/ge/model/ge_model.cc"
"${GE_CODE_DIR}/ge/common/helper/model_helper.cc"
"${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
"${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc"
"${GE_CODE_DIR}/ge/common/thread_pool.cc"
@@ -565,6 +572,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES
"graph/load/end_graph_task_unittest.cc"
"graph/load/new_model_manager_event_manager_unittest.cc"
#"graph/load/output_net_output_unittest.cc"
"graph/load/davinci_model_unittest.cc"
"graph/load/tbe_handle_store_unittest.cc"
"graph/load/hccl_task_info_unittest.cc"
"graph/load/kernel_ex_task_info_unittest.cc"
@@ -573,6 +581,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES
"graph/load/memcpy_async_task_info_unittest.cc"
#"graph/graph_load_unittest.cc"
"graph/ge_executor_unittest.cc"
"graph/load/model_helper_unittest.cc"
)

set(PASS_TEST_FILES
@@ -678,6 +687,7 @@ set(MULTI_PARTS_TEST_FILES
"graph/variable_accelerate_ctrl_unittest.cc"
"graph/build/logical_stream_allocator_unittest.cc"
"graph/build/mem_assigner_unittest.cc"
"session/omg_omg_unittest.cc"
)

set(SINGLE_OP_TEST_FILES
@@ -687,7 +697,7 @@ set(SINGLE_OP_TEST_FILES
)

set(PROFILING_MNG_TEST_FILES
#"profiling/ge_profiling_manager_unittest.cc"
"profiling/ge_profiling_manager_unittest.cc"
)

set(OTHERS_TEST_FILES


+ 285
- 0
tests/ut/ge/graph/load/davinci_model_unittest.cc View File

@@ -0,0 +1,285 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>

#define private public
#define protected public
#include "graph/utils/graph_utils.h"
#include "common/profiling/profiling_manager.h"
#include "graph/load/new_model_manager/davinci_model.h"

using namespace std;

namespace ge {
extern OpDescPtr CreateOpDesc(string name, string type);

class UtestDavinciModel : public testing::Test {
protected:
void SetUp() {}

void TearDown() {}
};

TEST_F(UtestDavinciModel, init_success) {
DavinciModel model(0, nullptr);
ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
ProfilingManager::Instance().is_load_profiling_ = true;

GeModelPtr ge_model = make_shared<GeModel>();
ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph));
AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000);
AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1);

shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>();
ge_model->SetModelTaskDef(model_task_def);

GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
TensorUtils::SetSize(tensor, 512);

OpDescPtr op_input = CreateOpDesc("data", DATA);
op_input->AddInputDesc(tensor);
op_input->AddOutputDesc(tensor);
op_input->SetInputOffset({1024});
op_input->SetOutputOffset({1024});
NodePtr node_input = graph->AddNode(op_input); // op_index = 0

OpDescPtr op_kernel = CreateOpDesc("square", "Square");
op_kernel->AddInputDesc(tensor);
op_kernel->AddOutputDesc(tensor);
op_kernel->SetInputOffset({1024});
op_kernel->SetOutputOffset({1024});
NodePtr node_kernel = graph->AddNode(op_kernel); // op_index = 1

OpDescPtr op_memcpy = CreateOpDesc("memcpy", MEMCPYASYNC);
op_memcpy->AddInputDesc(tensor);
op_memcpy->AddOutputDesc(tensor);
op_memcpy->SetInputOffset({1024});
op_memcpy->SetOutputOffset({5120});
NodePtr node_memcpy = graph->AddNode(op_memcpy); // op_index = 2

OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
op_output->AddInputDesc(tensor);
op_output->SetInputOffset({5120});
op_output->SetSrcName( { "memcpy" } );
op_output->SetSrcIndex( { 0 } );
NodePtr node_output = graph->AddNode(op_output); // op_index = 3


domi::TaskDef *task_def1 = model_task_def->add_task();
task_def1->set_stream_id(0);
task_def1->set_type(RT_MODEL_TASK_KERNEL);
domi::KernelDef *kernel_def = task_def1->mutable_kernel();
kernel_def->set_stub_func("stub_func");
kernel_def->set_args_size(64);
string args(64, '1');
kernel_def->set_args(args.data(), 64);
domi::KernelContext *context = kernel_def->mutable_context();
context->set_op_index(1);
context->set_kernel_type(2); // ccKernelType::TE
uint16_t args_offset[9] = {0};
context->set_args_offset(args_offset, 9 * sizeof(uint16_t));

domi::TaskDef *task_def2 = model_task_def->add_task();
task_def2->set_stream_id(0);
task_def2->set_type(RT_MODEL_TASK_MEMCPY_ASYNC);
domi::MemcpyAsyncDef *memcpy_async = task_def2->mutable_memcpy_async();
memcpy_async->set_src(1024);
memcpy_async->set_dst(5120);
memcpy_async->set_dst_max(512);
memcpy_async->set_count(1);
memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE);
memcpy_async->set_op_index(2);

EXPECT_EQ(model.Assign(ge_model), SUCCESS);
EXPECT_EQ(model.Init(), SUCCESS);

EXPECT_EQ(model.input_addrs_list_.size(), 1);
EXPECT_EQ(model.output_addrs_list_.size(), 1);
EXPECT_EQ(model.task_list_.size(), 2);

ProfilingManager::Instance().is_load_profiling_ = false;
}

TEST_F(UtestDavinciModel, init_data_op) {
DavinciModel model(0, nullptr);
model.ge_model_ = make_shared<GeModel>();
model.runtime_param_.mem_base = (uint8_t *)0x08000000;
model.runtime_param_.mem_size = 5120000;
ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

OpDescPtr op_input = CreateOpDesc("data", DATA);
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
TensorUtils::SetSize(tensor, 512);
op_input->AddInputDesc(tensor);
op_input->AddOutputDesc(tensor);
op_input->SetInputOffset({1024});
op_input->SetOutputOffset({5120});
NodePtr node_input = graph->AddNode(op_input);

OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
op_output->AddInputDesc(tensor);
op_output->SetInputOffset({1024});
op_output->SetSrcName( { "data" } );
op_output->SetSrcIndex( { 0 } );
NodePtr node_output = graph->AddNode(op_output);

EXPECT_EQ(model.InitNodes(graph), SUCCESS);

EXPECT_EQ(model.input_addrs_list_.size(), 1);
EXPECT_EQ(model.output_addrs_list_.size(), 1);
EXPECT_EQ(model.op_list_.size(), 2);
}

TEST_F(UtestDavinciModel, init_data_op_subgraph) {
DavinciModel model(0, nullptr);
model.runtime_param_.mem_base = (uint8_t *)0x08000000;
model.runtime_param_.mem_size = 5120000;
ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

OpDescPtr op_input = CreateOpDesc("data", DATA);
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
op_input->AddInputDesc(tensor);
op_input->AddOutputDesc(tensor);
op_input->SetInputOffset({1024});
op_input->SetOutputOffset({5120});
NodePtr node = graph->AddNode(op_input);

uint32_t data_op_index = 0;
map<uint32_t, OpDescPtr> data_by_index;
EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index), SUCCESS);

EXPECT_EQ(model.input_addrs_list_.size(), 0);
EXPECT_EQ(model.output_addrs_list_.size(), 0);
EXPECT_EQ(data_op_index, 0);
EXPECT_TRUE(data_by_index.empty());
}

TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) {
DavinciModel model(0, nullptr);
model.runtime_param_.mem_base = (uint8_t *)0x08000000;
model.runtime_param_.mem_size = 5120000;
ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
op_output->AddInputDesc(tensor);
op_output->SetInputOffset({1024});
op_output->SetSrcName( { "data" } );
op_output->SetSrcIndex( { 0 } );
NodePtr node = graph->AddNode(op_output);

std::vector<OpDescPtr> output_op_list;
EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list), SUCCESS);

EXPECT_EQ(model.input_addrs_list_.size(), 0);
EXPECT_EQ(model.output_addrs_list_.size(), 0);
EXPECT_TRUE(output_op_list.empty());
}

TEST_F(UtestDavinciModel, init_unknown) {
DavinciModel model(0, nullptr);
model.SetKnownNode(true);
ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

GeModelPtr ge_model = make_shared<GeModel>();
ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph));
AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000);
AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1);

shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>();
ge_model->SetModelTaskDef(model_task_def);

GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
TensorUtils::SetSize(tensor, 512);

OpDescPtr op_input = CreateOpDesc("data", DATA);
op_input->AddInputDesc(tensor);
op_input->AddOutputDesc(tensor);
op_input->SetInputOffset({1024});
op_input->SetOutputOffset({1024});
NodePtr node_input = graph->AddNode(op_input); // op_index = 0

OpDescPtr op_kernel = CreateOpDesc("square", "Square");
op_kernel->AddInputDesc(tensor);
op_kernel->AddOutputDesc(tensor);
op_kernel->SetInputOffset({1024});
op_kernel->SetOutputOffset({1024});
NodePtr node_kernel = graph->AddNode(op_kernel); // op_index = 1

OpDescPtr op_memcpy = CreateOpDesc("memcpy", MEMCPYASYNC);
op_memcpy->AddInputDesc(tensor);
op_memcpy->AddOutputDesc(tensor);
op_memcpy->SetInputOffset({1024});
op_memcpy->SetOutputOffset({5120});
NodePtr node_memcpy = graph->AddNode(op_memcpy); // op_index = 2

OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
op_output->AddInputDesc(tensor);
op_output->SetInputOffset({5120});
op_output->SetSrcName( { "memcpy" } );
op_output->SetSrcIndex( { 0 } );
NodePtr node_output = graph->AddNode(op_output); // op_index = 3


domi::TaskDef *task_def1 = model_task_def->add_task();
task_def1->set_stream_id(0);
task_def1->set_type(RT_MODEL_TASK_KERNEL);
domi::KernelDef *kernel_def = task_def1->mutable_kernel();
kernel_def->set_stub_func("stub_func");
kernel_def->set_args_size(64);
string args(64, '1');
kernel_def->set_args(args.data(), 64);
domi::KernelContext *context = kernel_def->mutable_context();
context->set_op_index(1);
context->set_kernel_type(2); // ccKernelType::TE
uint16_t args_offset[9] = {0};
context->set_args_offset(args_offset, 9 * sizeof(uint16_t));

domi::TaskDef *task_def2 = model_task_def->add_task();
task_def2->set_stream_id(0);
task_def2->set_type(RT_MODEL_TASK_MEMCPY_ASYNC);
domi::MemcpyAsyncDef *memcpy_async = task_def2->mutable_memcpy_async();
memcpy_async->set_src(1024);
memcpy_async->set_dst(5120);
memcpy_async->set_dst_max(512);
memcpy_async->set_count(1);
memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE);
memcpy_async->set_op_index(2);

EXPECT_EQ(model.Assign(ge_model), SUCCESS);
EXPECT_EQ(model.Init(), SUCCESS);

EXPECT_EQ(model.input_addrs_list_.size(), 1);
EXPECT_EQ(model.output_addrs_list_.size(), 1);
EXPECT_EQ(model.task_list_.size(), 2);

EXPECT_EQ(model.task_list_[0]->UpdateArgs(), SUCCESS);
EXPECT_EQ(model.task_list_[1]->UpdateArgs(), SUCCESS);

vector<string> out_shape_info;
model.GetModelAttr(out_shape_info);

vector<InputOutputDescInfo> input_descs;
vector<InputOutputDescInfo> output_descs;
EXPECT_EQ(model.GetInputOutputDescInfo(input_descs, output_descs), SUCCESS);

int32_t virtual_addr = 0;
const vector<void *> inputs = { &virtual_addr };
const vector<void *> outputs = { &virtual_addr };
EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS);
}
} // namespace ge

+ 52
- 0
tests/ut/ge/graph/load/model_helper_unittest.cc View File

@@ -0,0 +1,52 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#define private public
#define protected public
#include "framework/common/helper/model_helper.h"
#include "ge/model/ge_model.h"
#undef private
#undef protected
#include "proto/task.pb.h"
using namespace std;
namespace ge {
class UtestModelHelper : public testing::Test {
protected:
void SetUp() override {}
void TearDown() override {}
};
TEST_F(UtestModelHelper, save_size_to_modeldef_failed)
{
GeModelPtr ge_model = ge::MakeShared<ge::GeModel>();
ModelHelper model_helper;
EXPECT_EQ(ACL_ERROR_GE_MEMORY_ALLOCATION, model_helper.SaveSizeToModelDef(ge_model));
}
TEST_F(UtestModelHelper, save_size_to_modeldef)
{
GeModelPtr ge_model = ge::MakeShared<ge::GeModel>();
std::shared_ptr<domi::ModelTaskDef> task = ge::MakeShared<domi::ModelTaskDef>();
ge_model->SetModelTaskDef(task);
ModelHelper model_helper;
EXPECT_EQ(SUCCESS, model_helper.SaveSizeToModelDef(ge_model));
}
} // namespace ge

+ 14
- 103
tests/ut/ge/profiling/ge_profiling_manager_unittest.cc View File

@@ -37,121 +37,32 @@ class UtestGeProfilinganager : public testing::Test {
void TearDown() override {}
};

class TestReporter : public Msprof::Engine::Reporter {
public:
TestReporter() {}
~TestReporter() {}

public:
int Report(const Msprof::Engine::ReporterData *data) { return 0; }

int Flush() { return 0; }
};

class TestPluginIntf : public Msprof::Engine::PluginIntf {
public:
TestPluginIntf() {}
~TestPluginIntf() {}

public:
int Init(const Msprof::Engine::Reporter *reporter) { return 0; }

int UnInit() { return 0; }
};

TEST_F(UtestGeProfilinganager, init_success) {
setenv("PROFILING_MODE", "true", true);
Options options;
options.device_id = 0;
options.job_id = "0";
string profiling_config;

ProfilingManager::Instance().SetProfilingConfig(profiling_config);

Status ret = ProfilingManager::Instance().Init(options);
EXPECT_EQ(ret, ge::SUCCESS);
}

TEST_F(UtestGeProfilinganager, start_profiling_success) {
int32_t iter_num = 1;

setenv("PROFILING_MODE", "true", true);
setenv("PROFILING_OPTIONS", "training_trace", true);
Options options;
string profiling_config;

ProfilingManager::Instance().SetProfilingConfig(profiling_config);

Status ret = ProfilingManager::Instance().Init(options);
EXPECT_EQ(ret, ge::SUCCESS);
ret = ProfilingManager::Instance().StartProfiling(iter_num, 0);
EXPECT_EQ(ret, ge::SUCCESS);
options.profiling_mode = "1";
options.profiling_options = R"({"result_path":"/data/profiling","training_trace":"on","task_trace":"on","aicpu_trace":"on","fp_point":"Data_0","bp_point":"addn","ai_core_metrics":"ResourceConflictRatio"})";

setenv("PROFILING_OPTIONS", "op_trance", true);
ret = ProfilingManager::Instance().Init(options);
EXPECT_EQ(ret, ge::SUCCESS);
ret = ProfilingManager::Instance().StartProfiling(iter_num, 0);
EXPECT_EQ(ret, ge::SUCCESS);
}

TEST_F(UtestGeProfilinganager, stop_profiling_success) {
int32_t iter_num = 1;
Options options;

TestReporter test_reporter;
struct MsprofGeOptions prof_conf = {{ 0 }};

string profiling_config;
ProfilingManager::Instance().SetProfilingConfig(profiling_config);

Status ret = 0;
setenv("PROFILING_OPTIONS", "op_trance", true);
ret = ProfilingManager::Instance().Init(options);
EXPECT_EQ(ret, ge::SUCCESS);
ret = ProfilingManager::Instance().StartProfiling(iter_num, 0);
Status ret = ProfilingManager::Instance().InitFromOptions(options, prof_conf);
EXPECT_EQ(ret, ge::SUCCESS);
ProfilingManager::Instance().StopProfiling();
}

TEST_F(UtestGeProfilinganager, plugin_impl_success) {
PluginImpl plugin_Impl("FMK");
TestReporter test_reporter;
Msprof::Engine::Reporter *reporter_ptr = &test_reporter;
plugin_Impl.Init(reporter_ptr);
plugin_Impl.UnInit();
}

TEST_F(UtestGeProfilinganager, profiling_engine_impl_success) {
ProfilingEngineImpl profiling_engine_impl;

Msprof::Engine::PluginIntf *plugin_ptr = new TestPluginIntf();
profiling_engine_impl.ReleasePlugin(plugin_ptr);

Msprof::Engine::PluginIntf *ptr = profiling_engine_impl.CreatePlugin();
delete ptr;
ptr = nullptr;
}

TEST_F(UtestGeProfilinganager, set_profilng_cfg_success) {
string profiling_config = "profiling_mode: true";
ProfilingManager::Instance().SetProfilingConfig(profiling_config);
}

TEST_F(UtestGeProfilinganager, init_from_cfg_success0) {
Options options;
string profiling_config =
"{\"startCfg\":[{\"deviceID\":\"0\",\"features\":[{\"name\":\"op_trace\",\"conf\":\"2\"}]}]}";
ProfilingManager::Instance().SetProfilingConfig(profiling_config);
TEST_F(UtestGeProfilinganager, ParseOptions) {
setenv("PROFILING_MODE", "true", true);
Options options;
options.device_id = 0;
options.job_id = "0";
options.profiling_mode = "1";
options.profiling_options = R"({"result_path":"/data/profiling","training_trace":"on","task_trace":"on","aicpu_trace":"on","fp_point":"Data_0","bp_point":"addn","ai_core_metrics":"ResourceConflictRatio"})";

Status ret = ProfilingManager::Instance().Init(options);
EXPECT_EQ(ret, ge::SUCCESS);
}

TEST_F(UtestGeProfilinganager, init_from_cfg_success1) {
Options options;
string profiling_config =
"{\"startCfg\":[{\"deviceID\":\"0\",\"features\":[{\"name\":\"test_trace\"}],\"jobID\":\"1231231231\"}]}";
ProfilingManager::Instance().SetProfilingConfig(profiling_config);
struct MsprofGeOptions prof_conf = {{ 0 }};

Status ret = ProfilingManager::Instance().Init(options);
EXPECT_EQ(ret, ge::SUCCESS);
Status ret = ProfilingManager::Instance().ParseOptions(options.profiling_options);
EXPECT_EQ(ret, ge::SUCCESS);
}

+ 52
- 0
tests/ut/ge/session/omg_omg_unittest.cc View File

@@ -0,0 +1,52 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include "common/ge/ge_util.h"
#include "proto/ge_ir.pb.h"
#include "inc/framework/omg/omg.h"
using namespace std;
namespace ge {
class UtestOmg : public testing::Test {
protected:
void SetUp() override {}
void TearDown() override {}
};
TEST_F(UtestOmg, display_model_info_failed) {
ge::proto::ModelDef model_def;
PrintModelInfo(&model_def);
}
TEST_F(UtestOmg, display_model_info_success) {
ge::proto::ModelDef model_def;
auto attrs = model_def.mutable_attr();
ge::proto::AttrDef *attr_def_soc = &(*attrs)["soc_version"];
attr_def_soc->set_s("Ascend310");
ge::proto::AttrDef *attr_def = &(*attrs)["om_info_list"];
attr_def->mutable_list()->add_i(1);
attr_def->mutable_list()->add_i(2);
attr_def->mutable_list()->add_i(3);
attr_def->mutable_list()->add_i(4);
attr_def->mutable_list()->add_i(5);
PrintModelInfo(&model_def);
}
} // namespace ge

Loading…
Cancel
Save