Browse Source

Merge remote-tracking branch 'upstream/development' into development

pull/668/head
zhengyuanhua 5 years ago
parent
commit
aa33501462
48 changed files with 640 additions and 788 deletions
  1. +2
    -0
      ge/CMakeLists.txt
  2. +1
    -0
      ge/common/CMakeLists.txt
  3. +14
    -1
      ge/common/dump/dump_op.cc
  4. +10
    -3
      ge/common/ge/tbe_plugin_manager.cc
  5. +3
    -4
      ge/common/profiling/ge_profiling.cc
  6. +32
    -20
      ge/common/profiling/profiling_manager.cc
  7. +17
    -16
      ge/common/profiling/profiling_manager.h
  8. +2
    -0
      ge/common/proto/op_mapping_info.proto
  9. +1
    -0
      ge/executor/CMakeLists.txt
  10. +0
    -82
      ge/executor/ge_executor.cc
  11. +2
    -0
      ge/executor/proto/op_mapping_info.proto
  12. +1
    -0
      ge/ge_inference.mk
  13. +1
    -0
      ge/ge_local_engine/CMakeLists.txt
  14. +1
    -0
      ge/ge_runner.mk
  15. +51
    -1
      ge/generator/ge_generator.cc
  16. +1
    -0
      ge/graph/build/memory/CMakeLists.txt
  17. +1
    -1
      ge/graph/build/memory/graph_mem_assigner.cc
  18. +27
    -10
      ge/graph/build/stream_graph_optimizer.cc
  19. +1
    -1
      ge/graph/build/stream_graph_optimizer.h
  20. +1
    -1
      ge/graph/build/task_generator.cc
  21. +10
    -67
      ge/graph/load/graph_loader.cc
  22. +0
    -6
      ge/graph/load/graph_loader.h
  23. +6
    -0
      ge/graph/load/new_model_manager/data_dumper.cc
  24. +130
    -328
      ge/graph/load/new_model_manager/davinci_model.cc
  25. +22
    -52
      ge/graph/load/new_model_manager/davinci_model.h
  26. +31
    -35
      ge/graph/load/new_model_manager/model_manager.cc
  27. +1
    -2
      ge/graph/load/new_model_manager/model_manager.h
  28. +6
    -8
      ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
  29. +3
    -7
      ge/graph/load/new_model_manager/zero_copy_offset.cc
  30. +1
    -1
      ge/graph/load/new_model_manager/zero_copy_offset.h
  31. +2
    -49
      ge/graph/load/new_model_manager/zero_copy_task.cc
  32. +1
    -7
      ge/graph/load/new_model_manager/zero_copy_task.h
  33. +13
    -1
      ge/graph/manager/graph_manager.cc
  34. +1
    -1
      ge/graph/manager/memory_api.cc
  35. +159
    -0
      ge/graph/passes/dynamic_single_op_reset_shape_pass.cc
  36. +36
    -0
      ge/graph/passes/dynamic_single_op_reset_shape_pass.h
  37. +7
    -5
      ge/graph/preprocess/multi_batch_copy_graph.cc
  38. +1
    -0
      ge/host_cpu_engine/CMakeLists.txt
  39. +25
    -25
      ge/hybrid/node_executor/hccl/hccl_node_executor.cc
  40. +3
    -0
      ge/offline/CMakeLists.txt
  41. +2
    -0
      ge/proto/op_mapping_info.proto
  42. +3
    -13
      inc/external/ge/ge_api_types.h
  43. +3
    -0
      inc/framework/common/ge_types.h
  44. +3
    -2
      inc/framework/common/profiling/ge_profiling.h
  45. +0
    -13
      inc/framework/executor/ge_executor.h
  46. +1
    -1
      inc/framework/omg/parser/parser_inner_ctx.h
  47. +1
    -0
      tests/ut/ge/CMakeLists.txt
  48. +0
    -25
      third_party/fwkacllib/inc/toolchain/slog.h

+ 2
- 0
ge/CMakeLists.txt View File

@@ -144,6 +144,7 @@ set(TRAIN_SRC_LIST
"graph/passes/atomic_addr_clean_pass.cc"
"graph/passes/mark_same_addr_pass.cc"
"graph/passes/mark_graph_unknown_status_pass.cc"
"graph/passes/dynamic_single_op_reset_shape_pass.cc"
"graph/passes/mark_agnostic_pass.cc"
"graph/partition/dynamic_shape_partition.cc"
"graph/partition/stage_partition.cc"
@@ -434,6 +435,7 @@ set(INFER_SRC_LIST
"graph/passes/atomic_addr_clean_pass.cc"
"graph/passes/mark_same_addr_pass.cc"
"graph/passes/mark_graph_unknown_status_pass.cc"
"graph/passes/dynamic_single_op_reset_shape_pass.cc"
"graph/passes/mark_agnostic_pass.cc"
"graph/common/omg_util.cc"
"graph/common/bcast.cc"


+ 1
- 0
ge/common/CMakeLists.txt View File

@@ -130,6 +130,7 @@ target_compile_definitions(ge_common_static PRIVATE
google=ascend_private
$<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0>
$<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX>
LOG_CPP
)

target_compile_options(ge_common_static PRIVATE


+ 14
- 1
ge/common/dump/dump_op.cc View File

@@ -94,6 +94,9 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) {
for (auto dim : output_descs.at(i).GetShape().GetDims()) {
output.mutable_shape()->add_dim(dim);
}
for (auto dim : output_descs.at(i).GetOriginShape().GetDims()) {
output.mutable_origin_shape()->add_dim(dim);
}
int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
@@ -118,6 +121,9 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) {
for (auto dim : input_descs.at(i).GetShape().GetDims()) {
input.mutable_shape()->add_dim(dim);
}
for (auto dim : input_descs.at(i).GetOriginShape().GetDims()) {
input.mutable_origin_shape()->add_dim(dim);
}
int64_t input_size = 0;
if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
@@ -214,8 +220,15 @@ Status DumpOp::LaunchDumpOp() {
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
dump_path.c_str());

uint32_t task_id = 0;
uint32_t stream_id = 0;
rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret);
}
aicpu::dump::Task task;
task.set_task_id(task_id);
task.set_stream_id(stream_id);
task.mutable_op()->set_op_name(op_desc_->GetName());
task.mutable_op()->set_op_type(op_desc_->GetType());
if (dump_properties_.GetDumpMode() == kDumpOutput) {


+ 10
- 3
ge/common/ge/tbe_plugin_manager.cc View File

@@ -181,12 +181,19 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) {
void TBEPluginManager::LoadCustomOpLib() {
LoadPluginSo(options_);

std::string fmk_type = std::to_string(domi::TENSORFLOW);
auto it = options_.find(ge::FRAMEWORK_TYPE);
if (it != options_.end()) {
fmk_type = it->second;
}
std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas;
GELOGI("The size of registration_datas is: %zu", registration_datas.size());
for (OpRegistrationData reg_data : registration_datas) {
GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(),
TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str());
domi::OpRegistry::Instance()->Register(reg_data);
if (std::to_string(reg_data.GetFrameworkType()) == fmk_type) {
GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(),
TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str());
(void)domi::OpRegistry::Instance()->Register(reg_data);
}
}
}



+ 3
- 4
ge/common/profiling/ge_profiling.cc View File

@@ -112,7 +112,6 @@ ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) {
if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) {
GELOGW("Msprof ctrl callback is exist, just ignore it.");
} else {
GELOGI("GE register Msprof ctrl callback.");
ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func);
}
return ge::SUCCESS;
@@ -124,7 +123,6 @@ ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) {
return ge::PARAM_INVALID;
}
// Pass MsprofSetDeviceCallback to runtime
GELOGI("GE pass setdevice callback to runtime.");
ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func));
if (rt_ret != ge::SUCCESS) {
GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!");
@@ -158,7 +156,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
if (type != kProfCommandhandleFinalize) {
GE_CHECK_NOTNULL(data);
}
ProfCommandHandleData *prof_config_param = (ProfCommandHandleData *)data;
ProfCommandHandleData *prof_config_param = reinterpret_cast<ProfCommandHandleData *>(data);
auto iter = kProfCommandTypeMap.find(type);
if (iter == kProfCommandTypeMap.end()) {
GELOGW("The prof comand type is invalid.");
@@ -183,7 +181,8 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
if (type != kProfCommandhandleFinalize) {
command.module_index = prof_config_param->profSwitch;
}
GELOGI("GE commandhandle execute, Command Type: %d, data type config: 0x%llx", type, command.module_index);
GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(),
command.module_index);
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str());
}


+ 32
- 20
ge/common/profiling/profiling_manager.cc View File

@@ -38,10 +38,8 @@ const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
} // namespace

namespace ge {
ProfilingManager::ProfilingManager() : is_load_profiling_(false),
is_execute_profiling_(false),
is_training_trace_(false),
subscribe_count_(0) {
ProfilingManager::ProfilingManager()
: is_load_profiling_(false), is_execute_profiling_(false), is_training_trace_(false), subscribe_count_(0) {
prof_cb_.msprofCtrlCallback = nullptr;
prof_cb_.msprofReporterCallback = nullptr;
}
@@ -102,8 +100,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
return INTERNAL_ERROR;
}
is_execute_profiling_ = true;
GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(),
prof_conf.options, options.profiling_options.c_str());
GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options,
options.profiling_options.c_str());
} else {
(void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH);
(void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX);
@@ -215,7 +213,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
uint32_t task_id = task.task_id;
uint32_t stream_id = task.stream_id;
std::string shape_type = task.shape_type;
uint64_t cur_iter_num = task.cur_iter_num;
int64_t cur_iter_num = task.cur_iter_num;
data = model_name.append(" ")
.append(op_name).append(" ")
.append(std::to_string(block_dim)).append(" ")
@@ -809,32 +807,46 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP
if (!fp_point_.empty() && !bp_point_.empty()) {
fp_point = fp_point_;
bp_point = bp_point_;
GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str());
GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(),
fp_point.c_str());
return;
}
// ProfApi mode and training trace is set
try {
char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 };
// Parse options first
char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 };
bool is_profiling_valid = false;
std::string profiling_options;
if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_OPTIONS, profiling_options) == SUCCESS &&
!profiling_options.empty()) {
is_profiling_valid = true;
} else {
INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, MSPROF_OPTIONS_DEF_LEN_MAX);
if (ret != EN_OK) {
GELOGI("PROFILING_OPTIONS env is not exist.");
return;
}
GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options);
Json prof_options = Json::parse(env_profiling_options);
profiling_options = env_profiling_options;
is_profiling_valid = true;
}
if (is_profiling_valid) {
try {
Json prof_options = Json::parse(profiling_options);

fp_point_ = prof_options[kFpPoint];
bp_point_ = prof_options[kBpPoint];
fp_point_ = prof_options[kFpPoint];
bp_point_ = prof_options[kBpPoint];

fp_point = fp_point_;
bp_point = bp_point_;
if (!fp_point_.empty() && !bp_point_.empty()) {
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
fp_point = fp_point_;
bp_point = bp_point_;
if (!fp_point_.empty() && !bp_point_.empty()) {
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
}
} catch (...) {
GELOGW("Json prof options is invalid.");
return;
}
} catch (...) {
GELOGE(FAILED, "Json prof options is invalid.");
return;
}
return;
}



+ 17
- 16
ge/common/profiling/profiling_manager.h View File

@@ -36,21 +36,21 @@ using Json = nlohmann::json;
namespace {
const std::string GE_PROFILING_MODULE = "Framework";
// DataTypeConfig MASK
#define PROF_ACL_API_MASK 0x0001
#define PROF_TASK_TIME_MASK 0x0002
#define PROF_AICORE_METRICS_MASK 0x0004
#define PROF_AICPU_TRACE_MASK 0x0008
#define PROF_MODEL_EXECUTE_MASK 0x0010
#define PROF_RUNTIME_API_MASK 0x0020
#define PROF_RUNTIME_TRACE_MASK 0x0040
#define PROF_SCHEDULE_TIMELINE_MASK 0x0080
#define PROF_SCHEDULE_TRACE_MASK 0x0100
#define PROF_AIVECTORCORE_METRICS_MASK 0x0200
#define PROF_SUBTASK_TIME_MASK 0x0400
#define PROF_TRAINING_TRACE_MASK 0x0800
#define PROF_HCCL_TRACE_MASK 0x1000
#define PROF_DATA_PROCESS_MASK 0x2000
#define PROF_MODEL_LOAD_MASK 0x8000000000000000
const uint64_t PROF_ACL_API_MASK = 0x0001;
const uint64_t PROF_TASK_TIME_MASK = 0x0002;
const uint64_t PROF_AICORE_METRICS_MASK = 0x0004;
const uint64_t PROF_AICPU_TRACE_MASK = 0x0008;
const uint64_t PROF_MODEL_EXECUTE_MASK = 0x0010;
const uint64_t PROF_RUNTIME_API_MASK = 0x0020;
const uint64_t PROF_RUNTIME_TRACE_MASK = 0x0040;
const uint64_t PROF_SCHEDULE_TIMELINE_MASK = 0x0080;
const uint64_t PROF_SCHEDULE_TRACE_MASK = 0x0100;
const uint64_t PROF_AIVECTORCORE_METRICS_MASK = 0x0200;
const uint64_t PROF_SUBTASK_TIME_MASK = 0x0400;
const uint64_t PROF_TRAINING_TRACE_MASK = 0x0800;
const uint64_t PROF_HCCL_TRACE_MASK = 0x1000;
const uint64_t PROF_DATA_PROCESS_MASK = 0x2000;
const uint64_t PROF_MODEL_LOAD_MASK = 0x8000000000000000;

} // namespace
namespace ge {
@@ -80,7 +80,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
bool ProfilingTrainingTraceOn() const { return is_training_trace_; }
bool ProfilingModelLoadOn() const { return is_load_profiling_; }
bool ProfilingModelExecuteOn() const;
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // is_execute_profiling_ only used by ge option and env
// is_execute_profiling_ only used by ge option and env
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; }
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info);
void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,


+ 2
- 0
ge/common/proto/op_mapping_info.proto View File

@@ -15,6 +15,7 @@ message Output {
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
Shape origin_shape = 10;
}

message Input {
@@ -23,6 +24,7 @@ message Input {
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
Shape origin_shape = 6;
}

enum BufferType {


+ 1
- 0
ge/executor/CMakeLists.txt View File

@@ -173,6 +173,7 @@ target_compile_definitions(ge_executor PRIVATE
google=ascend_private
$<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0>
$<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX>
LOG_CPP
)

target_include_directories(ge_executor PRIVATE


+ 0
- 82
ge/executor/ge_executor.cc View File

@@ -209,19 +209,6 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims,

namespace ge {
bool GeExecutor::isInit_ = false;
class ModelListenerAdapter : public ModelListener {
public:
domi::Status OnComputeDone(uint32_t model_id, uint32_t dataIndex, uint32_t resultCode,
std::vector<ge::OutputTensorInfo> &outputs) {
if (listener == nullptr) {
GELOGE(ge::FAILED, "listener is null.");
return FAILED;
}
return listener->OnComputeDone(model_id, dataIndex, resultCode, outputs);
}

std::shared_ptr<ge::ModelListener> listener;
};

static void InitOpsProtoManger() {
string opsproto_path;
@@ -573,60 +560,6 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add
return SUCCESS;
}

// Load model
Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key,
int32_t priority, std::shared_ptr<ge::ModelListener> listener) {
GELOGI("load model offline begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
}

string filePath = RealPath(path.c_str());
if (filePath.empty()) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID,
"File path is invalid. please check your text file '%s'.", path.c_str());
return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
}

std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
if (listener_adapter == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
listener_adapter->listener = listener;

Status ret = GraphLoader::LoadModelFromFile(path, key, priority, listener_adapter, model_id);
if (ret != SUCCESS) {
GELOGE(ret, "[GeExecutor] LoadModelFromFile failed");
return ACL_ERROR_GE_LOAD_MODEL;
}
return SUCCESS;
}

Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data,
std::shared_ptr<ge::ModelListener> listener) {
GELOGI("Load model begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
}

std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
if (listener_adapter == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
listener_adapter->listener = listener;

Status ret = GraphLoader::LoadModel(model_data, listener_adapter, model_id);
if (ret != SUCCESS) {
GELOGE(ret, "[GeExecutor] LoadModel failed.");
return ACL_ERROR_GE_LOAD_MODEL;
}
return ret;
}

Status GeExecutor::UnloadModel(uint32_t model_id) {
GELOGD("unload model %u begin.", model_id);
if (!isInit_) {
@@ -659,21 +592,6 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
return SUCCESS;
}

Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) {
GELOGI("run model begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
}

InputData inputs;
GetDomiInputData(input_data, inputs);
OutputData outputs;
GetDomiOutputData(output_data, outputs);

return GraphExecutor::DataInput(inputs, outputs);
}

// Get input and output descriptor
Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) {


+ 2
- 0
ge/executor/proto/op_mapping_info.proto View File

@@ -15,6 +15,7 @@ message Output {
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
Shape origin_shape = 10;
}

message Input {
@@ -23,6 +24,7 @@ message Input {
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
Shape origin_shape = 6;
}

enum BufferType {


+ 1
- 0
ge/ge_inference.mk View File

@@ -109,6 +109,7 @@ OMG_HOST_SRC_FILES := \
graph/passes/atomic_addr_clean_pass.cc \
graph/passes/mark_same_addr_pass.cc \
graph/passes/mark_graph_unknown_status_pass.cc \
graph/passes/dynamic_single_op_reset_shape_pass.cc \
graph/passes/mark_agnostic_pass.cc \
graph/common/omg_util.cc \
graph/common/bcast.cc \


+ 1
- 0
ge/ge_local_engine/CMakeLists.txt View File

@@ -203,6 +203,7 @@ target_compile_options(ge_local_opskernel_builder_static PRIVATE

target_compile_definitions(ge_local_opskernel_builder_static PRIVATE
google=ascend_private
LOG_CPP
)

target_include_directories(ge_local_opskernel_builder_static PRIVATE


+ 1
- 0
ge/ge_runner.mk View File

@@ -113,6 +113,7 @@ LIBGE_LOCAL_SRC_FILES := \
graph/passes/atomic_addr_clean_pass.cc \
graph/passes/mark_same_addr_pass.cc \
graph/passes/mark_graph_unknown_status_pass.cc \
graph/passes/dynamic_single_op_reset_shape_pass.cc \
graph/passes/mark_agnostic_pass.cc \
graph/partition/dynamic_shape_partition.cc \
graph/partition/stage_partition.cc \


+ 51
- 1
ge/generator/ge_generator.cc View File

@@ -47,6 +47,8 @@ const char *const kEngineNameDefault = "default";
const char *const kVectorEngine = "VectorEngine";
const char *const kAIcoreEngine = "AIcoreEngine";
const char *const kFileNameSuffix = "online";
const size_t kDynamicDimSize = 1;
const int64_t kDynamicDimValue = -2;

std::map<ge::OpEngineType, std::string> engine_type_map{
{ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}};
@@ -248,6 +250,43 @@ static void GetOpsProtoPath(string &opsproto_path) {
opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
}

static Status CheckShapeReset(const OpDescPtr &op_desc, bool &change_shape_flag) {
GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID);
change_shape_flag = false;
for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) {
auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
GE_CHECK_NOTNULL(input_desc);
// pass scalar input desc
auto dims = input_desc->GetShape().GetDims();
if (dims.size() == kDynamicDimSize && dims[0] == kDynamicDimValue) {
change_shape_flag = true;
}
}
return SUCCESS;
}

static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor> &inputs_dynamic) {
for (auto input : inputs) {
auto input_desc = input.GetTensorDesc();
GeShape shape_ori = input_desc.GetShape();

std::vector<int64_t> dynamic_shape_dims = {kDynamicDimValue};
GeShape dynamic_shape(dynamic_shape_dims);

ge::GeTensor inputTensor;
ge::GeTensorDesc desc(input_desc);

bool is_const = false;
(void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const);
if (!is_const && shape_ori.GetDims().size() > 0) {
desc.SetShape(dynamic_shape);
}

inputTensor.SetTensorDesc(desc);
inputs_dynamic.push_back(inputTensor);
}
}

class GeGenerator::Impl {
public:
Impl(OmgContext &omg_context) : omg_context_(omg_context) {}
@@ -638,7 +677,18 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
}
GeModelPtr &ge_model = name_to_ge_model.begin()->second;
GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str());
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));

bool dynamic_flag = false;
if (CheckShapeReset(op_desc, dynamic_flag) == SUCCESS && dynamic_flag) {
vector<GeTensor> inputs_dynamic;
vector<GeTensor> outputs_dynamic;
ResetTensorVecShape(inputs, inputs_dynamic);
ResetTensorVecShape(outputs, outputs_dynamic);
GE_CHK_STATUS_RET_NOLOG(
impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic));
} else {
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
}
GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff));
return SUCCESS;
}


+ 1
- 0
ge/graph/build/memory/CMakeLists.txt View File

@@ -18,6 +18,7 @@ target_compile_options(ge_memory PRIVATE

target_compile_definitions(ge_memory PRIVATE
google=ascend_private
LOG_CPP
)

target_link_libraries(ge_memory PRIVATE


+ 1
- 1
ge/graph/build/memory/graph_mem_assigner.cc View File

@@ -99,7 +99,7 @@ Status GraphMemoryAssigner::AssignMemory() {
MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);

if (mem_assigner->GetP2PMemOffset() > 0) {
if (mem_assigner->GetP2PMemOffset() >= 0) {
MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
}


+ 27
- 10
ge/graph/build/stream_graph_optimizer.cc View File

@@ -48,26 +48,41 @@ void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Grap
}
}

bool StreamGraphOptimizer::IsSameStreamId(const ComputeGraphPtr &comp_graph) {
bool StreamGraphOptimizer::IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph) {
if (comp_graph == nullptr) {
return false;
}
std::set<int64_t> stream_set;
std::set<std::string> label_set;
for (const ge::NodePtr &cur_node : comp_graph->GetDirectNode()) {
GE_IF_BOOL_EXEC(cur_node->GetOpDesc() == nullptr, continue);
int64_t stream_id = cur_node->GetOpDesc()->GetStreamId();
if (stream_id == kInvalidStream) {
continue;
}
GELOGD("Node %s in subgraph %s stream id is: %ld, node num: %zu", cur_node->GetName().c_str(),
comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize());
stream_set.insert(stream_id);

std::string batch_label;
if (AttrUtils::GetStr(cur_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) {
label_set.insert(batch_label);
} else {
GELOGD("Node %s[%s] has no batch label, subgraph %s, stream id: %ld", cur_node->GetName().c_str(),
cur_node->GetType().c_str(), comp_graph->GetName().c_str(), stream_id);
continue;
}

GELOGD("Node %s in subgraph %s stream id: %ld, node num: %zu", cur_node->GetName().c_str(),
comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize());
}
if (stream_set.size() > 1) {
GELOGI("Nodes of graph: %s have different stream id, node num: %zu, different stream num: %zu.",
if (stream_set.size() > 1 || label_set.size() > 1) {
GELOGI("Nodes of graph: %s have different stream id or batch_label, node num: %zu, different stream num: %zu.",
comp_graph->GetName().c_str(), comp_graph->GetDirectNodesSize(), stream_set.size());
return false;
}

if (!label_set.empty()) {
(void)AttrUtils::SetStr(comp_graph, ATTR_NAME_BATCH_LABEL, *label_set.begin());
}
return true;
}

@@ -99,8 +114,8 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
continue;
}

if (!IsSameStreamId(subgraph)) {
GELOGI("There are more than one stream in subgraph %s", subgraph->GetName().c_str());
if (!IsSameStreamIdOrBatchLabel(subgraph)) {
GELOGI("There are more than one stream or batch_label in subgraph %s", subgraph->GetName().c_str());
continue;
}
OpDescPtr op_desc = nodes.at(0)->GetOpDesc();
@@ -112,9 +127,11 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
return FAILED;
}
run_context.stream = run_context.graphStreamList[stream_id];
GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu.",
subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)));
std::string batch_label;
(void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label);
GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu, "
"batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)), batch_label.c_str());
for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) {
GE_CHECK_NOTNULL(*iter);
Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context);


+ 1
- 1
ge/graph/build/stream_graph_optimizer.h View File

@@ -41,7 +41,7 @@ class StreamGraphOptimizer {
private:
void RefreshNodeId(const ComputeGraphPtr &comp_graph, Graph2SubGraphInfoList &subgraph_map);

bool IsSameStreamId(const ComputeGraphPtr &comp_graph);
bool IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph);
};
} // namespace ge
#endif // GE_GRAPH_BUILD_OPTIMIZE_STREAM_GRAPH_H_

+ 1
- 1
ge/graph/build/task_generator.cc View File

@@ -567,7 +567,7 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_
continue;
}
string op_type = op_desc->GetType();
if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0)) {
if (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0) {
continuous_op_lists.emplace_back(vector<OpDescPtr>());
} else {
continuous_op_lists.back().emplace_back(op_desc);


+ 10
- 67
ge/graph/load/graph_loader.cc View File

@@ -122,14 +122,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
ModelData &model_data) {
Status ret;
if (!CheckInputPathValid(path)) {
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
return GE_EXEC_MODEL_PATH_INVALID;
GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
}

GELOGI("Load model begin, model path is: %s", path.c_str());
if (!key_path.empty() && !CheckInputPathValid(key_path)) {
GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
return GE_EXEC_MODEL_KEY_PATH_INVALID;
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
return ACL_ERROR_GE_PARAM_INVALID;
}

ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data);
@@ -144,63 +144,6 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
return SUCCESS;
}

Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
const std::shared_ptr<ModelListener> &listener, uint32_t &model_id) {
Status ret;
ModelData model_data;
ret = LoadDataFromFile(path, key_path, priority, model_data);
if (ret != SUCCESS) {
GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret);
if (model_data.model_data != nullptr) {
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;
}
return ret;
}

ret = LoadModel(model_data, listener, model_id);
if (ret != SUCCESS) {
GELOGE(ret, "LoadModel: Load failed. ret = %u", ret);
if (model_data.model_data != nullptr) {
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;
}
}

if (model_data.model_data != nullptr) {
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;
}

return ret;
}

Status GraphLoader::LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
uint32_t &model_id) {
GELOGI("Load model begin, model_id:%u.", model_id);

// For GeOp, Open Device 0 here.
GE_CHK_RT_RET(rtSetDevice(0));
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->LoadModelOffline(model_id, model_data, listener);
if (ret != SUCCESS) {
GE_CHK_RT(rtDeviceReset(0));
GELOGE(ret, "LoadModel: Load failed.");
return ret;
}
ret = model_manager->Start(model_id);
if (ret != SUCCESS) {
if (model_manager->Unload(model_id) != SUCCESS) {
GELOGE(FAILED, "LoadModel: Unload failed while trying to unload after a failed start.");
}
GELOGE(ret, "LoadModel: Start failed.");
return ret;
}
GELOGI("LoadModel: Start model success, model_id:%u.", model_id);
return SUCCESS;
}

Status GraphLoader::CommandHandle(const Command &command) {
try {
auto model_manager = ModelManager::GetInstance();
@@ -225,16 +168,16 @@ Status GraphLoader::CommandHandle(const Command &command) {
}

Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr,
size_t memsize, void *weight_ptr, size_t weightsize) {
size_t mem_size, void *weight_ptr, size_t weight_size) {
GELOGI("Load model begin, model_id:%u.", model_id);
// For ACL, Open Device from App.
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->LoadModelOffline(
model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize);
model_id, model_data, nullptr, dev_ptr, mem_size, weight_ptr, weight_size);
if (ret != SUCCESS) {
GELOGE(ret, "Load model failed, model_id:%u.", model_id);
return ret;
GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model failed, model_id:%u.", model_id);
return ACL_ERROR_GE_LOAD_MODEL;
}
GELOGI("Load model success, model_id:%u.", model_id);
return SUCCESS;
@@ -259,8 +202,8 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids);
if (ret != SUCCESS) {
GELOGE(ret, "Load model with queue failed, model_id:%u.", model_id);
return ret;
GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model with queue failed, model_id:%u.", model_id);
return ACL_ERROR_GE_LOAD_MODEL;
}

GELOGI("Load model with queue success, model_id:%u.", model_id);


+ 0
- 6
ge/graph/load/graph_loader.h View File

@@ -44,12 +44,6 @@ class GraphLoader {

static Status GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size);

static Status LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
uint32_t &model_id);

static Status LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
const std::shared_ptr<ModelListener> &listener, uint32_t &model_id);

static Status CommandHandle(const Command &command);

static Status GetMemoryInfo(int64_t &free);


+ 6
- 0
ge/graph/load/new_model_manager/data_dumper.cc View File

@@ -319,6 +319,9 @@ Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vis
for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
output.mutable_shape()->add_dim(dim);
}
for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
output.mutable_origin_shape()->add_dim(dim);
}
int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
@@ -476,6 +479,9 @@ Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor
for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
input.mutable_shape()->add_dim(dim);
}
for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
input.mutable_origin_shape()->add_dim(dim);
}
int64_t input_size = 0;
if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
GELOGI("Get aipp input size according to attr is %ld", input_size);


+ 130
- 328
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -289,8 +289,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh
if (weight_ptr == nullptr) {
weights_mem_base_ = MallocWeightsMem(weights_size);
if (weights_mem_base_ == nullptr) {
GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc weight memory failed. size: %zu", weights_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
is_inner_weight_base_ = true;
}
@@ -307,8 +307,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh

Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
if (is_feature_map_mem_has_inited_) {
GELOGE(FAILED, "call InitFeatureMapMem more than once .");
return FAILED;
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "call InitFeatureMapMem more than once .");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
is_feature_map_mem_has_inited_ = true;

@@ -316,8 +316,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;

if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
return FAILED;
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}

mem_base_ = static_cast<uint8_t *>(dev_ptr);
@@ -327,8 +327,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
if (TotalMemSize() && mem_base_ == nullptr) {
mem_base_ = MallocFeatureMapMem(data_size);
if (mem_base_ == nullptr) {
GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size);
return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED;
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]",
runtime_param_.graph_id, mem_base_, data_size);
@@ -343,8 +343,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
if (p2p_data_size != 0) {
p2p_mem_base_ = MallocP2PMem(p2p_data_size);
if (p2p_mem_base_ == nullptr) {
GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size);
return GE_EXEC_ALLOC_P2P_MEM_FAILED;
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc p2p memory failed,size: %zu", p2p_data_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
p2p_mem_base_, p2p_data_size);
@@ -710,6 +710,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
}

// collect profiling for ge
GE_CHK_STATUS_RET(InitModelProfile(), "Init model profile failed");
auto &profiling_manager = ProfilingManager::Instance();
if (profiling_manager.ProfilingModelLoadOn()) {
Status p_ret = ReportProfilingData();
@@ -970,7 +971,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma
uint32_t parent_index = 0; // Ignore subgraph Data Node.
if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str());
return InitInputBatchLabel(node);
return SUCCESS;
}

data_op_list_.push_back(op_desc);
@@ -1011,10 +1012,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma
}

data_op_index++;
if (InitInputZeroCopyNodes(node) != SUCCESS) {
GELOGE(PARAM_INVALID, "Input zero copy nodes init failed!");
return PARAM_INVALID;
}
return SUCCESS;
}

@@ -1036,39 +1033,6 @@ void DavinciModel::AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_inde
}
}

///
/// @ingroup ge
/// @brief input zero copy node Initialize.
/// @param [in] NodePtr: Data Op.
/// @return Status
///
Status DavinciModel::InitInputZeroCopyNodes(const NodePtr &node) {
auto out_data_anchor = node->GetOutDataAnchor(kDataIndex);
if (out_data_anchor == nullptr) {
GELOGE(FAILED, "Out data anchor is nullptr");
return FAILED;
}
for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
auto node = peer_in_data_anchor->GetOwnerNode();
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
GELOGE(FAILED, "Op desc is nullptr");
return FAILED;
}
string batch_label;
(void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
if (batch_label.empty()) {
batch_label = kDefaultBatchLable;
}
if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) {
zero_copy_op_id_batch_label_.emplace(pair<int64_t, string>(op_desc->GetId(), batch_label));
GELOGD("Init input zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", op_desc->GetName().c_str(),
op_desc->GetId(), batch_label.c_str());
}
}
return SUCCESS;
}

bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) {
bool getnext_sink_dynamic = false;
if (ge::AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) {
@@ -1094,7 +1058,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) {
if (owner_graph->GetParentGraph() != nullptr) {
GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str());
op_list_.erase(op_desc->GetId());
return InitOutputBatchLabel(node);
return SUCCESS;
}

output_op_list_.push_back(op_desc);
@@ -1146,8 +1110,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) {
}
}

GE_IF_BOOL_EXEC(InitOutputZeroCopyNodes(node) != SUCCESS,
GELOGE(PARAM_INVALID, "Output zero copy nodes init failed!"); return PARAM_INVALID;);
GetAllGearsInfo(node);
if (is_getnext_sink_dynamic_) {
GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS,
@@ -1343,121 +1305,6 @@ void DavinciModel::ParseDynamicOutShape(const std::vector<std::string> &str_info
}
}

///
/// @ingroup ge
/// @brief output zero copy node Initialize.
/// @param [in] NodePtr: netoutput Op.
/// @return Status
///
Status DavinciModel::InitOutputZeroCopyNodes(const NodePtr &node) {
set<NodePtr> nodes_need_record;
for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
if (peer_out_data_anchor == nullptr) {
continue;
}
auto peer_node = peer_out_data_anchor->GetOwnerNode();
nodes_need_record.emplace(peer_node);

// Merge node output multiplexed input, upstream nodes need to be considered in multiple batch scenarios
if (peer_node->GetType() == MERGE) {
for (const auto &merge_peer_in_data_anchor : peer_node->GetAllInDataAnchors()) {
auto merge_peer_out_data_anchor = merge_peer_in_data_anchor->GetPeerOutAnchor();
if (merge_peer_out_data_anchor == nullptr) {
continue;
}
auto merge_peer_node = merge_peer_out_data_anchor->GetOwnerNode();
nodes_need_record.emplace(merge_peer_node);
}
} else {
for (const auto &other_in_data_anchor : peer_out_data_anchor->GetPeerInDataAnchors()) {
auto other_in_node = other_in_data_anchor->GetOwnerNode();
if (other_in_node->GetType() != NETOUTPUT) {
nodes_need_record.emplace(other_in_node);
}
}
}
}

for (const auto &node_need_record : nodes_need_record) {
auto op_desc = node_need_record->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
string batch_label;
(void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
if (batch_label.empty()) {
batch_label = kDefaultBatchLable;
}
if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) {
zero_copy_op_id_batch_label_.emplace(pair<int64_t, string>(op_desc->GetId(), batch_label));
GELOGD("Init Output zero copy nodes success, op name:%s, op id: %ld, batch label: %s.",
op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str());
}
}
return SUCCESS;
}

///
/// @ingroup ge
/// @brief input zero copy node Initialize.
/// @param [in] NodePtr: Data Op.
/// @return Status
///
Status DavinciModel::InitInputBatchLabel(const NodePtr &node) {
string batch_label;
if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) {
return SUCCESS; // Not Multi-batch.
}

const auto &out_data_anchor = node->GetOutDataAnchor(kDataIndex);
GE_CHECK_NOTNULL(out_data_anchor);

for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
const auto &node = peer_in_data_anchor->GetOwnerNode();
const auto &op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);

if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) {
zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label;
GELOGD("Init input zero copy nodes success, op name: %s, op id: %ld, batch label: %s", op_desc->GetName().c_str(),
op_desc->GetId(), batch_label.c_str());
}
}

return SUCCESS;
}

///
/// @ingroup ge
/// @brief output zero copy node Initialize for Case.
/// @param [in] NodePtr: netoutput Op.
/// @return Status
///
Status DavinciModel::InitOutputBatchLabel(const NodePtr &node) {
string batch_label;
if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) {
return SUCCESS; // Not Multi-batch.
}

for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
const auto &peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
if (peer_out_data_anchor == nullptr) {
continue;
}

const auto &peer_node = peer_out_data_anchor->GetOwnerNode();
const auto &op_desc = peer_node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);

if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) {
zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label;
GELOGD("Init Output zero copy nodes success, op name: %s, op id: %ld, batch label: %s",
op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str());
}
}

return SUCCESS;
}

/// @ingroup ge
/// @brief LabelSet Op Initialize.
/// @param [in] op_desc: LabelSet Op descriptor.
@@ -2240,12 +2087,61 @@ Status DavinciModel::SyncVarData() {
return ret;
}

inline int64_t SumSize(const vector<int64_t> &size_list) {
int64_t sum_size = 0;
for (const int64_t &size : size_list) {
sum_size += size;
Status DavinciModel::InitModelProfile() {
for (const auto &task : task_list_) {
GE_CHECK_NOTNULL(task);
const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo();
// when type is RT_MODEL_TASK_KERNEL, ctx is not null
if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) {
continue;
}

GELOGI("task.id = %u, opNum = %zu", task->GetTaskID(), fusion_op_info->original_op_names.size());
op_id_map_.insert(std::make_pair(fusion_op_info->op_index, task->GetTaskID()));
}

std::set<uint32_t> task_id_set;
using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
using Range = std::pair<CIT, CIT>;
for (const auto &task : task_list_) {
GE_CHECK_NOTNULL(task);
const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo();
if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) {
continue;
}

if (task_id_set.count(task->GetTaskID()) > 0) {
continue;
}

const auto &op_desc = GetOpByIndex(fusion_op_info->op_index);
GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index: %u out of range", fusion_op_info->op_index);

ProfileInfo profile;
profile.fusion_info = *fusion_op_info;
Range range = op_id_map_.equal_range(fusion_op_info->op_index);
for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) {
profile.task_count++;
task_id_set.insert(range_idx->second);
}

// memory info
TaskMemInfo &mem_info = profile.memory_info;
const auto input_size = ModelUtils::GetInputSize(op_desc);
const auto output_size = ModelUtils::GetOutputSize(op_desc);
const auto workspace_size = ModelUtils::GetWorkspaceSize(op_desc);
const auto weight_size = ModelUtils::GetWeightSize(op_desc);
mem_info.input_size = std::accumulate(input_size.begin(), input_size.end(), 0);
mem_info.output_size = std::accumulate(output_size.begin(), output_size.end(), 0);
mem_info.workspace_size = std::accumulate(workspace_size.begin(), workspace_size.end(), 0);
mem_info.weight_size = std::accumulate(weight_size.begin(), weight_size.end(), 0);
mem_info.total_size = mem_info.weight_size + mem_info.input_size + mem_info.output_size + mem_info.workspace_size;

profile_list_.emplace_back(profile);
}
return sum_size;

GELOGI("fusion task size: %zu, profile info size: %zu", op_id_map_.size(), profile_list_.size());
return SUCCESS;
}

Status DavinciModel::SinkModelProfile() {
@@ -2253,18 +2149,12 @@ Status DavinciModel::SinkModelProfile() {
auto &prof_mgr = ProfilingManager::Instance();
ReporterData reporter_data{};
// report model data tag name
std::string tag_name;
tag_name.append("model_load_info_").append(std::to_string(this->Id()));
std::string tag_name("model_load_info_" + std::to_string(this->Id()));
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
return FAILED, "Sink model tag memcpy error.");

// Model Header
string name;
if (!om_name_.empty()) {
name = om_name_;
} else {
name = name_;
}
std::string name = om_name_.empty() ? name_ : om_name_;
size_t name_len = name.size();
reporter_data.deviceId = device_id_;
reporter_data.data = (unsigned char *)&name_len;
@@ -2296,128 +2186,71 @@ Status DavinciModel::SinkModelProfile() {
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

int32_t task_num = task_list_.size();
std::multimap<uint32_t, uint32_t> op_id_map;
std::set<uint32_t> task_id_set;
for (int32_t i = 0; i < task_num; i++) {
auto task = task_list_[i];
GE_CHECK_NOTNULL(task);
auto fusion_op_info = task->GetFusionOpInfo();
// when type is RT_MODEL_TASK_KERNEL, ctx is not null
if (fusion_op_info != nullptr) {
uint32_t op_num = fusion_op_info->original_op_names.size();
uint32_t task_id = task->GetTaskID();
if (op_num > 0) {
GELOGI("task.id = %u, opNum = %u", task_id, op_num);
op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id));
}
}
}

struct memoryInfo {
int64_t input_size;
int64_t output_size;
int64_t weight_size;
int64_t workspace_size;
int64_t total_size;

memoryInfo() : input_size(0), output_size(0), weight_size(0), workspace_size(0), total_size(0) {}
};

using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
using Range = std::pair<CIT, CIT>;
for (int32_t i = 0; i < task_num; i++) {
auto task = task_list_[i];
GE_CHECK_NOTNULL(task);
auto fusion_op_info = task->GetFusionOpInfo();
if (fusion_op_info != nullptr && fusion_op_info->original_op_names.size() > 0) {
uint32_t task_id = task->GetTaskID();
uint32_t op_num = fusion_op_info->original_op_names.size();
uint32_t task_count = 0;
if (task_id_set.count(task_id) != 0) {
continue;
}

uint32_t op_id = fusion_op_info->op_index;
Range range = op_id_map.equal_range(op_id);
for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) {
task_count++;
uint32_t task_id = range_idx->second;
task_id_set.insert(task_id);
}

// op name after fusion
string fusion_op_name = fusion_op_info->op_name;
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
reporter_data.data = (unsigned char *)&fusion_op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

reporter_data.data = (unsigned char *)fusion_op_name.c_str();
reporter_data.dataLen = fusion_op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// original op name before fusion
reporter_data.data = (unsigned char *)&op_num;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

for (uint32_t k = 0; k < op_num; k++) {
std::string op_name = fusion_op_info->original_op_names[k];
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
reporter_data.data = (unsigned char *)&op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
reporter_data.data = (unsigned char *)op_name.c_str();
reporter_data.dataLen = op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
}

// stream id info
uint32_t streamId = task->GetStreamId();
reporter_data.data = (unsigned char *)&streamId;
for (const ProfileInfo &profile : profile_list_) {
// op name after fusion
string fusion_op_name = profile.fusion_info.op_name;
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
reporter_data.data = (unsigned char *)&fusion_op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

reporter_data.data = (unsigned char *)fusion_op_name.c_str();
reporter_data.dataLen = fusion_op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// original op name before fusion
uint32_t op_num = profile.fusion_info.original_op_names.size();
reporter_data.data = (unsigned char *)&op_num;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

for (uint32_t k = 0; k < op_num; k++) {
std::string op_name = profile.fusion_info.original_op_names[k];
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
reporter_data.data = (unsigned char *)&op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// memory info
struct memoryInfo memory_info;
uint32_t op_index = fusion_op_info->op_index;
auto iter = op_list_.find(op_index);
GE_CHK_BOOL_EXEC(iter != op_list_.end(), return FAILED, "index is out of range, index: %u", op_index);
auto op_desc = iter->second;
memory_info.input_size = SumSize(ModelUtils::GetInputSize(op_desc));
memory_info.output_size = SumSize(ModelUtils::GetOutputSize(op_desc));
memory_info.workspace_size = SumSize(ModelUtils::GetWorkspaceSize(op_desc));
memory_info.weight_size = SumSize(ModelUtils::GetWeightSize(op_desc));
memory_info.total_size =
memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size;
reporter_data.data = (unsigned char *)&memory_info;
reporter_data.dataLen = sizeof(struct memoryInfo);
reporter_data.data = (unsigned char *)op_name.c_str();
reporter_data.dataLen = op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
}

// task info
reporter_data.data = (unsigned char *)&task_count;
// stream id info
uint32_t streamId = profile.fusion_info.stream_id;
reporter_data.data = (unsigned char *)&streamId;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// memory info
reporter_data.data = (unsigned char *)&profile.memory_info;
reporter_data.dataLen = sizeof(profile.memory_info);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// task info
reporter_data.data = (unsigned char *)&profile.task_count;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index);
for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
uint32_t task_id = idx->second;
reporter_data.data = (unsigned char *)&task_id;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

Range task_range = op_id_map.equal_range(op_id);
for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
uint32_t task_id = idx->second;
reporter_data.data = (unsigned char *)&task_id;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
}
}
}

return SUCCESS;
}

@@ -3264,27 +3097,20 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v

for (auto &input_outside_addrs : new_input_outside_addrs_) {
ZeroCopyOffset &input_outside = input_outside_addrs.second;
bool ret = input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
if (ret) {
void *args_val = static_cast<uint8_t *>(args) + offset + i * kAddrLen;
SetBatchLabelAddr(op_desc, reinterpret_cast<uintptr_t>(args_val));
}
input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
}

for (auto &output_outside_addrs : new_output_outside_addrs_) {
ZeroCopyOffset &output_outside = output_outside_addrs.second;
bool ret = output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
if (ret) {
void *args_val = static_cast<uint8_t *>(args) + offset + i * kAddrLen;
SetBatchLabelAddr(op_desc, reinterpret_cast<uintptr_t>(args_val));
}
output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
}
}
auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId());
if (it == zero_copy_op_id_batch_label_.end()) {

string batch_label;
if (!AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label) || batch_label.empty()) {
zero_copy_task.SetBatchLabel(kDefaultBatchLable);
} else {
zero_copy_task.SetBatchLabel(it->second);
zero_copy_task.SetBatchLabel(batch_label);
}

std::lock_guard<std::mutex> lock(outside_addrs_mutex_);
@@ -3294,27 +3120,6 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v
}
}

void DavinciModel::SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr) {
// Establish a mapping between batch label and zero copy address for multi-batch scenes
auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId());
if (it == zero_copy_op_id_batch_label_.end()) {
return;
}

const string &batch_label = it->second;
auto iter = zero_copy_batch_label_addrs_.find(batch_label);
if (iter != zero_copy_batch_label_addrs_.end()) {
iter->second.insert(addr);
GELOGD("[ZCPY] Set zero copy batch label and addrs success, batch label: %s, op name:%s.", batch_label.c_str(),
op_desc->GetName().c_str());
} else {
set<uintptr_t> addrs = {addr};
zero_copy_batch_label_addrs_.emplace(pair<string, set<uintptr_t>>(batch_label, addrs));
GELOGD("[ZCPY] New added zero copy batch label and addrs success, batch label: %s, op name:%s.",
batch_label.c_str(), op_desc->GetName().c_str());
}
}

///
/// @ingroup ge
/// @brief Copy Check input size and model op size.
@@ -3448,15 +3253,15 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &
void *addr = data.second.GetDataInfo().at(count).second;
void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data) +
data.second.GetRelativeOffset().at(count));
GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p", input_or_output.c_str(),
data.first, addr, size, buffer_addr);
GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p, batch_label: %s",
input_or_output.c_str(), data.first, addr, size, buffer_addr, batch_label.c_str());
// For input data, just copy for rts task.
for (ZeroCopyTask &task : zero_copy_tasks_) {
if (task.GetBatchLabel() != kDefaultBatchLable && task.GetBatchLabel() != batch_label) {
continue;
}
uintptr_t addr_val = reinterpret_cast<uintptr_t>(addr);
if (task.UpdateTaskParam(addr_val, buffer_addr, zero_copy_batch_label_addrs_, batch_label) != SUCCESS) {
if (task.UpdateTaskParam(addr_val, buffer_addr) != SUCCESS) {
return FAILED;
}
}
@@ -3818,9 +3623,6 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
GELOGD("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_);
GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed.");
is_dynamic_ = input_data.is_dynamic_batch;
if (!is_dynamic_) {
zero_copy_batch_label_addrs_.clear();
}

GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START));
Status ret = CopyModelData(input_data, output_data, is_dynamic_);


+ 22
- 52
ge/graph/load/new_model_manager/davinci_model.h View File

@@ -76,6 +76,20 @@ struct timeInfo {
int64_t dumpEndTime;
};

struct TaskMemInfo {
int64_t input_size{0};
int64_t output_size{0};
int64_t weight_size{0};
int64_t workspace_size{0};
int64_t total_size{0};
};

struct ProfileInfo {
FusionOpInfo fusion_info;
TaskMemInfo memory_info;
uint32_t task_count{0};
};

enum ExecuteMode {
INITIALIZATION,
SYNCHRONIZATION,
@@ -226,8 +240,6 @@ class DavinciModel {
const vector<OpDescPtr> &GetDataList() const { return data_op_list_; }

// get Op
const map<uint32_t, OpDescPtr> &GetOpList() const { return op_list_; }

OpDescPtr GetOpByIndex(uint32_t index) const {
if (op_list_.find(index) == op_list_.end()) {
return nullptr;
@@ -436,10 +448,6 @@ class DavinciModel {

int64_t GetLoadEndTime() { return load_end_time_; }

Status SinkModelProfile();

Status SinkTimeProfile(const InputData &current_data);

Status ReportProfilingData();

void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
@@ -537,15 +545,6 @@ class DavinciModel {
struct timeInfo time_info_;
int32_t dataInputTid;

///
/// @ingroup ge
/// @brief Save Batch label Info.
/// @param [in] const OpDescPtr &op_desc
/// @param [in] uintptr_t addr: address value in args block.
/// @return None.
///
void SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr);

///
/// @ingroup ge
/// @brief Copy Check input size and model op size.
@@ -657,14 +656,6 @@ class DavinciModel {
///
void AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index);

///
/// @ingroup ge
/// @brief input zero copy node Initialize.
/// @param [in] NodePtr: Data Op.
/// @return Status
///
Status InitInputZeroCopyNodes(const NodePtr &node);

///
/// @ingroup ge
/// @brief NetOutput Op Initialize.
@@ -673,30 +664,6 @@ class DavinciModel {
///
Status InitNetOutput(const NodePtr &node);

///
/// @ingroup ge
/// @brief output zero copy node Initialize.
/// @param [in] NodePtr: Data Op.
/// @return Status
///
Status InitOutputZeroCopyNodes(const NodePtr &node);

///
/// @ingroup ge
/// @brief input zero copy node Initialize for Case.
/// @param [in] NodePtr: Data Op.
/// @return Status
///
Status InitInputBatchLabel(const NodePtr &node);

///
/// @ingroup ge
/// @brief output zero copy node Initialize for Case.
/// @param [in] NodePtr: netoutput Op.
/// @return Status
///
Status InitOutputBatchLabel(const NodePtr &node);

///
/// @ingroup ge
/// @brief Constant Op Init.
@@ -845,6 +812,11 @@ class DavinciModel {

void SetDataDumperArgs(const ComputeGraphPtr &compute_graph);

Status InitModelProfile();
Status SinkModelProfile();

Status SinkTimeProfile(const InputData &current_data);

Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data,
std::vector<ge::OutputTensorInfo> &outputs);

@@ -922,11 +894,6 @@ class DavinciModel {
std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr.
std::set<const void *> copy_only_addrs_; // Address need copy to original place.

// {op_id, batch_label}
std::map<int64_t, std::string> zero_copy_op_id_batch_label_;
// {batch_label, addrs}
std::map<std::string, std::set<uintptr_t>> zero_copy_batch_label_addrs_;

std::vector<TaskInfoPtr> task_list_;
// rt_moodel_handle
rtModel_t rt_model_handle_;
@@ -1026,6 +993,9 @@ class DavinciModel {
// key: input_index: input is merge node; value: each gear info and each output shape
std::map<size_t, std::map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
std::vector<std::vector<int64_t>> all_gears_info_;

std::multimap<uint32_t, uint32_t> op_id_map_;
std::vector<ProfileInfo> profile_list_;
};
} // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_

+ 31
- 35
ge/graph/load/new_model_manager/model_manager.cc View File

@@ -89,6 +89,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) {
std::vector<uint64_t> v_aicpu_kernel;
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
auto iter = model_aicpu_kernel_.find(model_key);
if (iter != model_aicpu_kernel_.end()) {
GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id);
@@ -176,7 +177,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
}

void ModelManager::DestroyAicpuSession(uint64_t session_id) {
std::lock_guard<std::mutex> lock(sess_ids_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
auto it = sess_ids_.find(session_id);
if (it == sess_ids_.end()) {
GELOGI("The session: %lu not created.", session_id);
@@ -205,7 +206,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
}

ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
auto hybrid_davinci_model = hybrid_model_map_.find(model_id);
if (hybrid_davinci_model != hybrid_model_map_.end()) {
uint64_t session_id = hybrid_davinci_model->second->GetSessionId();
@@ -215,8 +216,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {

auto it = model_map_.find(model_id);
if (it == model_map_.end()) {
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
return GE_EXEC_MODEL_ID_INVALID;
GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
}
uint64_t session_id = it->second->GetSessionId();
DestroyAicpuSession(session_id);
@@ -225,7 +226,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {

ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id);
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id);
@@ -238,7 +239,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_
}

ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) {
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::vector<uint64_t> v_aicpu_kernel;
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
@@ -250,7 +251,7 @@ ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_i
}

ModelManager::~ModelManager() {
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
model_map_.clear();
model_aicpu_kernel_.clear();
cust_aicpu_so_.clear();
@@ -358,18 +359,18 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge

void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) {
GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id);
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
model_map_[id] = davinci_model;
}

void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) {
GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id);
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
hybrid_model_map_[id] = hybrid_model;
}

Status ModelManager::DeleteModel(uint32_t id) {
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);

auto it = model_map_.find(id);
auto hybrid_model_it = hybrid_model_map_.find(id);
@@ -384,22 +385,22 @@ Status ModelManager::DeleteModel(uint32_t id) {
} else if (hybrid_model_it != hybrid_model_map_.end()) {
(void)hybrid_model_map_.erase(hybrid_model_it);
} else {
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
return GE_EXEC_MODEL_ID_INVALID;
GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
}

return SUCCESS;
}

std::shared_ptr<DavinciModel> ModelManager::GetModel(uint32_t id) {
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);

auto it = model_map_.find(id);
return (it == model_map_.end()) ? nullptr : it->second;
}

std::shared_ptr<hybrid::HybridDavinciModel> ModelManager::GetHybridModel(uint32_t id) {
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);

auto it = hybrid_model_map_.find(id);
return (it == hybrid_model_map_.end()) ? nullptr : it->second;
@@ -902,7 +903,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
}

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID,
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);

davinci_model->SetModelDescVersion(new_model_desc);
@@ -970,8 +971,9 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id,
}

Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHECK_NOTNULL(davinci_model);
auto davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetCurShape Failed, Invalid Model ID %u!", model_id);
davinci_model->GetCurShape(batch_info, dynamic_type);
return SUCCESS;
}
@@ -984,7 +986,8 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynami
}

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHECK_NOTNULL(davinci_model);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetModelAttr Failed, Invalid Model ID %u!", model_id);
davinci_model->GetModelAttr(dynamic_output_shape_info);
return SUCCESS;
}
@@ -994,9 +997,8 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
std::vector<uint32_t> &inputFormats,
std::vector<uint32_t> &outputFormats) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!",
model_id);

GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);
return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats);
}

@@ -1011,18 +1013,14 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetAIPPInfo failed, invalid model_id is %u.",
model_id);

"GetAIPPInfo failed, invalid model_id is %u.", model_id);
return davinci_model->GetAIPPInfo(index, aipp_info);
}

Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetAIPPInfo failed, invalid model_id is %u.",
model_id);

"GetAIPPInfo failed, invalid model_id is %u.", model_id);
return davinci_model->GetAippType(index, type, aipp_index);
}

@@ -1059,8 +1057,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
if (model_helper.GetModelType()) {
bool is_shape_unknown = false;
GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown),
"CheckIsUnknownShape failed, model id:%u",
model_id);
"CheckIsUnknownShape failed, model id:%u", model_id);
if (is_shape_unknown || GetContext().GetHostExecFlag()) {
return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener);
}
@@ -1078,8 +1075,8 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
} catch (...) {
GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise");
return INTERNAL_ERROR;
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed since other exception raise");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
ret = davinci_model->Assign(ge_model);
if (ret != SUCCESS) {
@@ -1091,7 +1088,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
int32_t device_id = 0;
rtError_t rt_ret = rtGetDevice(&device_id);
if (rt_ret != RT_ERROR_NONE || device_id < 0) {
GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
davinci_model->SetDeviceId(device_id);
@@ -1246,7 +1243,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
}

Status ModelManager::CreateAicpuSession(uint64_t session_id) {
std::lock_guard<std::mutex> lock(sess_ids_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
auto it = sess_ids_.find(session_id);
// never been created by any model
if (it == sess_ids_.end()) {
@@ -1465,8 +1462,7 @@ void ModelManager::GenModelId(uint32_t *id) {
if (id == nullptr) {
return;
}

std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
*id = ++max_model_id_;
}



+ 1
- 2
ge/graph/load/new_model_manager/model_manager.h View File

@@ -353,8 +353,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_;
std::map<std::string, std::vector<uint64_t>> model_aicpu_kernel_;
uint32_t max_model_id_;
std::mutex map_mutex_;
std::mutex sess_ids_mutex_;
std::recursive_mutex map_mutex_;
std::mutex session_id_create_mutex_;
static::std::mutex exeception_infos_mutex_;
uint64_t session_id_bias_;


+ 6
- 8
ge/graph/load/new_model_manager/task_info/kernel_task_info.cc View File

@@ -90,20 +90,18 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
fusion_op_info_.op_index = context.op_index(); fusion_op_info_.original_op_names = original_op_names;
fusion_op_info_.op_name = op_desc_->GetName());

string session_graph_model_id;
davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
// get bin_file_key
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
// new aicpu kernel(rtCpuKernelLaunch) no need to check function
if (kernel_type_ == ccKernelType::CCE_AI_CORE) {
rtError_t rt_ret;
rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
rtError_t rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s",
kernel_def.stub_func().c_str());
return RT_ERROR_TO_GE_STATUS(rt_ret););
} else if (kernel_type_ == ccKernelType::TE) {
rtError_t rt_ret;
rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
// get bin_file_key
string session_graph_model_id;
davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
rtError_t rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key);
return RT_ERROR_TO_GE_STATUS(rt_ret););


+ 3
- 7
ge/graph/load/new_model_manager/zero_copy_offset.cc View File

@@ -183,22 +183,18 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo
addr_count_ = out_count;
}

bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr);
bool set_batch_label_flag = false;
for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) {
auto &addrs_mapping_list = GetOutsideAddrs();
auto args_addrs = addrs_mapping_list[out_count].find(outside_addr);
if (args_addrs != addrs_mapping_list[out_count].end()) {
auto args_addrs = outside_addrs_[out_count].find(outside_addr);
if (args_addrs != outside_addrs_[out_count].end()) {
GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid.");
void *args_val = static_cast<uint8_t *>(args) + offset;
args_addrs->second.push_back(args_val);
GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val,
args, offset);
set_batch_label_flag = true;
}
}
return set_batch_label_flag;
}

} // namespace ge

+ 1
- 1
ge/graph/load/new_model_manager/zero_copy_offset.h View File

@@ -51,7 +51,7 @@ class ZeroCopyOffset {
const OpDescPtr &op_desc, const size_t &idx, bool &fusion_flag);
void SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr,
std::vector<void *> &tensor_addrs);
bool SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset);
void SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset);

// basic_addr of l2-fusion
void *GetBasicAddr() const { return basic_addr_; }


+ 2
- 49
ge/graph/load/new_model_manager/zero_copy_task.cc View File

@@ -22,8 +22,6 @@
#include "common/ge_compiler_options.h"

namespace ge {
const char *const kDefaultBatchLable = "Batch_default";

ZeroCopyTask::ZeroCopyTask(const string &name, uint8_t *args, size_t size)
: name_(name), args_addr_(args), args_size_(size), is_updated_(false) {}

@@ -66,68 +64,23 @@ void ZeroCopyTask::SetOriginalArgs(const void *info, size_t size) {
const uint8_t *data = static_cast<const uint8_t *>(info);
args_info_.assign(data, data + size);

GELOGI("[ZCPY] %s set info from virtual_addr: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info,
GELOGI("[ZCPY] %s set original args info: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info,
args_addr_, args_size_, size);
}

/**
* @ingroup ge
* @brief Check is dynamic batch node.
* @param [in] addr: virtual address value from Op.
* @param [in] data: data buffer from user.
* @param [in] batch_addrs: dynamic batch addr info.
* @param [in] batch_label: batch label.
* @return: true / false
*/
bool ZeroCopyTask::CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label,
uintptr_t addr) {
// Used for dynamic batch / resolution scene
set<uintptr_t> dynamic_input_addrs;
auto dynamic_input_iter = batch_addrs.find(batch_label);
if (dynamic_input_iter != batch_addrs.end()) {
dynamic_input_addrs = dynamic_input_iter->second;
}

set<uintptr_t> fix_input_addrs;
auto fix_input_iter = batch_addrs.find(kDefaultBatchLable);
if (fix_input_iter != batch_addrs.end()) {
fix_input_addrs = fix_input_iter->second;
}

if (fix_input_addrs.empty()) {
if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end()) {
return false;
}
} else {
if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end() &&
fix_input_addrs.find(addr) == fix_input_addrs.end()) {
return false;
}
}

return true;
}

/**
* @ingroup ge
* @brief Set user data addr to Task param.
* @param [in] addr: virtual address value from Op.
* @param [in] buffer_addr: real_data_buffer_addr from user.
* @param [in] batch_addrs: dynamic batch addr info.
* @param [in] batch_label: batch label.
* @return: void
*/
Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map<string, set<uintptr_t>> &batch_addrs,
const string &batch_label) {
Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr) {
auto iter = task_addr_offset_.find(addr);
if (iter != task_addr_offset_.end()) {
auto &cur_pair = *iter;
uint8_t *args_info = args_info_.data();
for (auto offset : cur_pair.second) {
if (!CheckDynamicBatch(batch_addrs, batch_label, reinterpret_cast<uintptr_t>(args_addr_ + offset))) {
continue;
}

auto dst_addr = static_cast<uint8_t *>(buffer_addr);
GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p",
name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr);


+ 1
- 7
ge/graph/load/new_model_manager/zero_copy_task.h View File

@@ -67,12 +67,9 @@ class ZeroCopyTask {
* @brief Set user data addr to Task param.
* @param [in] addr: virtual address value from Op.
* @param [in] buffer_addr: data buffer_addr from user.
* @param [in] batch_addrs: dynamic batch addr info.
* @param [in] batch_label: batch label.
* @return: 0 SUCCESS / others FAILED
*/
ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map<string, set<uintptr_t>> &batch_addrs,
const string &batch_label);
ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr);

/**
* @ingroup ge
@@ -91,9 +88,6 @@ class ZeroCopyTask {
return batch_label_;
}

protected:
bool CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label, uintptr_t addr);

private:
const string name_;



+ 13
- 1
ge/graph/manager/graph_manager.cc View File

@@ -56,6 +56,7 @@
#include "graph/passes/iterator_op_pass.h"
#include "graph/passes/link_gen_mask_nodes_pass.h"
#include "graph/passes/mark_graph_unknown_status_pass.h"
#include "graph/passes/dynamic_single_op_reset_shape_pass.h"
#include "graph/passes/merge_pass.h"
#include "graph/passes/merge_input_memcpy_pass.h"
#include "graph/passes/merge_to_stream_merge_pass.h"
@@ -631,11 +632,22 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_

Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph, GraphPartitioner &partitioner) {
GE_CHECK_NOTNULL(compute_graph);
PassManager pass_for_dynamic_shape_reset_optimize;
GE_CHK_STATUS_RET(pass_for_dynamic_shape_reset_optimize.AddPass(
"SetSubgraph::AfterSetSubgraph::DynamicSingleOpResetShapePass", new (std::nothrow) DynamicSingleOpResetShapePass))
GE_TIMESTAMP_START(pass_for_dynamic_shape_reset_optimize);
Status ret = pass_for_dynamic_shape_reset_optimize.Run(compute_graph);
GE_TIMESTAMP_END(pass_for_dynamic_shape_reset_optimize, "SetSubgraph::AfterSetSubgraph");
if (ret != SUCCESS && ret != NOT_CHANGED) {
GELOGE(ret, "Run passes when optimize subgraph failed");
return ret;
}

auto sub_graph_map = partitioner.GetSubGraphMap();
GELOGD("Directly optimize subgraph with build mode:%s, and step:%s.",
options_.build_mode.c_str(),
options_.build_step.c_str());
Status ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id);
ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id);
if (ret != SUCCESS) {
GELOGE(ret, "Multiply optimize subgraph failed");
return ret;


+ 1
- 1
ge/graph/manager/memory_api.cc View File

@@ -63,7 +63,7 @@ Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t
});

auto hcom_remote_mem_register =
(HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "hcom_remote_access_mem_register");
(HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "HcomRegRemoteAccessMem");
if (hcom_remote_mem_register == nullptr) {
GELOGE(FAILED, "Failed to invoke hcom_remote_mem_register function.");
return FAILED;


+ 159
- 0
ge/graph/passes/dynamic_single_op_reset_shape_pass.cc View File

@@ -0,0 +1,159 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/passes/dynamic_single_op_reset_shape_pass.h"
#include "common/ge_inner_error_codes.h"
#include "graph/utils/node_utils.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/type_utils.h"
#include "graph/debug/ge_attr_define.h"

namespace ge {
namespace {
const int64_t kDynamicShapeDim = -2;
const char *const kEngineNameAiCpu = "DNN_VM_AICPU_ASCEND";
const char *const kEngineNameAiCpuTf = "DNN_VM_AICPU";
} // namespace
Status DynamicSingleOpResetShapePass::Run(ComputeGraphPtr graph) {
GE_CHECK_NOTNULL(graph);

std::shared_ptr<GELib> instance = ge::GELib::GetInstance();
if (instance == nullptr || !instance->InitFlag()) {
GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "Run CompileNodesPass failed.");
return ge::GE_CLI_GE_NOT_INITIALIZED;
}

// pass if graph has not aicpu node.
bool is_not_aicpu = false;
if (CheckAllAicpuNodes(graph, is_not_aicpu) != SUCCESS) {
GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "Check if graph has not aicpu node failed.");
return ge::GE_CLI_GE_NOT_INITIALIZED;
}
if (is_not_aicpu) {
GELOGI("The graph [%s] has not aicpu node, whose aicpu nodes would not be reset dynamic shape",
graph->GetName().c_str());
return SUCCESS;
}

for (const auto &node : graph->GetDirectNode()) {
GE_CHECK_NOTNULL(node->GetOpDesc());
// pass input and output node
if (node->GetType() == DATA || node->GetType() == CONSTANT || node->GetType() == CONSTANTOP ||
node->GetType() == NETOUTPUT) {
continue;
}

// pass node without attr: ATTR_DYNAMIC_SHAPE_SINGLE_AICPU
bool single_aicpu_unknown = false;
if (!AttrUtils::GetBool(node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, single_aicpu_unknown) ||
!single_aicpu_unknown) {
continue;
}

// reset aicpu shape to unknown shape
auto op_desc = node->GetOpDesc();
if (ResetOpShape(op_desc) != SUCCESS) {
GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "Reset node[%s] dynamic shapr failed.", node->GetName().c_str());
return ge::GE_CLI_GE_NOT_INITIALIZED;
}
GELOGD("Reset dynamic aicpu node [%s] shape success!", node->GetName().c_str());
}

GELOGD("Reset dynamic aicpu nodes shape of graph [%s] success!", graph->GetName().c_str());
return SUCCESS;
}

Status DynamicSingleOpResetShapePass::CheckAllAicpuNodes(const ComputeGraphPtr &graph, bool &is_not_aicpu) {
is_not_aicpu = false;
for (const auto &node : graph->GetDirectNode()) {
GE_CHECK_NOTNULL(node->GetOpDesc());
// pass input and output node
if (node->GetType() == DATA || node->GetType() == CONSTANT || node->GetType() == CONSTANTOP ||
node->GetType() == NETOUTPUT) {
continue;
}

// find if there are aicpu nodes.
auto op_desc = node->GetOpDesc();
string engine_name = op_desc->GetOpEngineName();
if (engine_name.empty()) {
GELOGE(GRAPH_FAILED, "Get engine failed of node[%s].", node->GetName().c_str());
return GRAPH_FAILED;
}
if (engine_name != kEngineNameAiCpu && engine_name != kEngineNameAiCpuTf) {
is_not_aicpu = true;
return SUCCESS;
}
}
return SUCCESS;
}

bool DynamicSingleOpResetShapePass::CheckIfConstInput(const GeTensorDescPtr &input_tensor_desc) {
bool is_const = false;
(void)AttrUtils::GetBool(input_tensor_desc, CONST_ATTR_NAME_INPUT, is_const);
return is_const;
}

Status DynamicSingleOpResetShapePass::ResetOpShape(OpDescPtr &op_desc) {
GE_CHECK_NOTNULL(op_desc);
std::vector<int64_t> dynamic_shape_dims = {kDynamicShapeDim};
GeShape dynamic_shape(dynamic_shape_dims);
bool reset_shape_flag = false;
if (ResetInputTensorShape(op_desc, dynamic_shape, reset_shape_flag) == SUCCESS && reset_shape_flag) {
(void)ResetOutputTensorShape(op_desc, dynamic_shape);
}
return SUCCESS;
}

Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape,
bool &reset_shape_flag) {
reset_shape_flag = false;
GE_CHECK_NOTNULL(op_desc);
for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) {
auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
GE_CHECK_NOTNULL(input_desc);
// pass scalar input desc
auto dims_ori = input_desc->GetShape().GetDims();
if (dims_ori.size() == 0) {
continue;
}
// pass const input
if (CheckIfConstInput(input_desc)) {
continue;
}
reset_shape_flag = true;
input_desc->SetShape(dynamic_shape);
}
return SUCCESS;
}

Status DynamicSingleOpResetShapePass::ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape) {
GE_CHECK_NOTNULL(op_desc);
for (size_t i = 0; i < op_desc->GetAllOutputsDesc().size(); i++) {
auto output_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(i));
GE_CHECK_NOTNULL(output_desc);
// pass scalar input desc
auto output_dims_ori = output_desc->GetShape().GetDims();
if (output_dims_ori.size() == 0) {
continue;
}
output_desc->SetShape(dynamic_shape);
}
return SUCCESS;
}
} // namespace ge

+ 36
- 0
ge/graph/passes/dynamic_single_op_reset_shape_pass.h View File

@@ -0,0 +1,36 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_PASSES_DYNAMIC_SINGLE_OP_RESET_SHAPE_PASS_H_
#define GE_GRAPH_PASSES_DYNAMIC_SINGLE_OP_RESET_SHAPE_PASS_H_
#include "graph/graph.h"
#include "inc/graph_pass.h"
#include "init/gelib.h"

namespace ge {
class DynamicSingleOpResetShapePass : public GraphPass {
public:
Status Run(ComputeGraphPtr graph) override;

private:
Status ResetOpShape(OpDescPtr &op_desc);
Status ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape, bool &reset_shape_flag);
Status ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape);
Status CheckAllAicpuNodes(const ComputeGraphPtr &graph, bool &is_not_aicpu);
bool CheckIfConstInput(const GeTensorDescPtr &input_tensor_desc);
};
} // namespace ge
#endif // GE_GRAPH_PASSES_DYNAMIC_SINGLE_OP_RESET_SHAPE_PASS_H_

+ 7
- 5
ge/graph/preprocess/multi_batch_copy_graph.cc View File

@@ -1407,11 +1407,13 @@ Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() {
}

Status ProcessMultiBatch(ComputeGraphPtr &graph) {
const char *multi_batch_with_case = std::getenv("MULTI_BATCH_WITH_CASE");
if (multi_batch_with_case != nullptr) {
PassManager pass_manager;
GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass));
return pass_manager.Run(graph);
if (GetLocalOmgContext().dynamic_node_type.empty()) {
const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN");
if (multi_batch_with_switchn == nullptr) {
PassManager pass_manager;
GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass));
return pass_manager.Run(graph);
}
}
if (!GetLocalOmgContext().need_multi_batch) {
GELOGI("No need to process_multi for no_train graph.");


+ 1
- 0
ge/host_cpu_engine/CMakeLists.txt View File

@@ -193,6 +193,7 @@ target_compile_options(host_cpu_opskernel_builder_static PRIVATE

target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE
google=ascend_private
LOG_CPP
)

target_include_directories(host_cpu_opskernel_builder_static PRIVATE


+ 25
- 25
ge/hybrid/node_executor/hccl/hccl_node_executor.cc View File

@@ -42,10 +42,10 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do
GELOGE(FAILED, "hccl handle is nullptr! ");
return FAILED;
}
auto EnqueueHcomOpertion = (HcclResult(*)(HcomOpertion, std::function<void(HcclResult status)>))dlsym(
context.handle_, "EnqueueHcomOpertion");
if (EnqueueHcomOpertion == nullptr) {
GELOGE(FAILED, "Failed to invoke EnqueueHcomOpertion hcom unknown node function.");
auto HcomExecEnqueueOperation = (HcclResult(*)(HcomOperation, std::function<void(HcclResult status)>))dlsym(
context.handle_, "HcomExecEnqueueOperation");
if (HcomExecEnqueueOperation == nullptr) {
GELOGE(FAILED, "Failed to invoke HcomExecEnqueueOperation hcom unknown node function.");
if (dlclose(context.handle_) != 0) {
GELOGW("Failed to close handle %s", dlerror());
}
@@ -70,7 +70,7 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do
const OpDescPtr op_desc = node_item.GetOpDesc();
GE_CHECK_NOTNULL(op_desc);

HcomOpertion op_info;
HcomOperation op_info;
op_info.hcclType = op_desc->GetType();
op_info.inputPtr = inputs.empty() ? nullptr : inputs[0];
op_info.outputPtr = outputs.empty() ? nullptr : outputs[0];
@@ -96,7 +96,7 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do
op_info.root = root_id;
auto callback = [this, op_desc](HcclResult status) {
if (status != HCCL_SUCCESS) {
GELOGE(HCCL_E_INTERNAL, "node %s call EnqueueHcomOpertion failed, ret: 0x%X", op_desc->GetName().c_str(), status);
GELOGE(HCCL_E_INTERNAL, "node %s call HcomExecEnqueueOperation failed, ret: 0x%X", op_desc->GetName().c_str(), status);
}
std::lock_guard<std::mutex> lock(this->hccl_mutex_);
this->cond_.notify_all();
@@ -110,9 +110,9 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do
context.GetNodeName(), op_info.hcclType.c_str(), count, op_info.dataType, op_info.opType, op_info.root);
op_info.count = count;

HcclResult hccl_ret = EnqueueHcomOpertion(op_info, callback);
HcclResult hccl_ret = HcomExecEnqueueOperation(op_info, callback);
if (hccl_ret != HCCL_SUCCESS) {
GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret);
GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret);
return HCCL_E_INTERNAL;
}

@@ -213,11 +213,11 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector<HcomRemoteAccess

Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) {
GELOGI("[%s] RdmaNodeTask::ExecuteAsync in.", context.GetNodeName());
auto EnqueueRemoteAccess =
auto HcomExecEnqueueRemoteAccess =
(HcclResult(*)(const string &, const vector<HcomRemoteAccessAddrInfo> &,
std::function<void(HcclResult status)>))dlsym(context.handle_, "EnqueueRemoteAccess");
if (EnqueueRemoteAccess == nullptr) {
GELOGE(FAILED, "Failed to invoke EnqueueRemoteAccess hcom unknown node function.");
std::function<void(HcclResult status)>))dlsym(context.handle_, "HcomExecEnqueueRemoteAccess");
if (HcomExecEnqueueRemoteAccess == nullptr) {
GELOGE(FAILED, "Failed to invoke HcomExecEnqueueRemoteAccess hcom unknown node function.");
if (dlclose(context.handle_) != 0) {
GELOGW("Failed to close handle %s", dlerror());
}
@@ -228,15 +228,15 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do

auto callback = [this](HcclResult status) {
if (status != HCCL_SUCCESS) {
GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", status);
GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", status);
}
std::lock_guard<std::mutex> lock(this->hccl_mutex_);
this->cond_.notify_all();
GELOGI("rdma callback success.");
};
HcclResult hccl_ret = EnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback);
HcclResult hccl_ret = HcomExecEnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback);
if (hccl_ret != HCCL_SUCCESS) {
GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret);
GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret);
return HCCL_E_INTERNAL;
}

@@ -307,32 +307,32 @@ Status HcclNodeExecutor::Initialize() {
GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", dlerror());
return FAILED;
}
auto HcomExcutorInitialize = (HcclResult(*)())dlsym(handle_, "HcomExcutorInitialize");
if (HcomExcutorInitialize == nullptr) {
GELOGE(FAILED, "Failed to invoke HcomExcutorInitialize hcom unknown node function.");
auto HcomExecInitialize = (HcclResult(*)())dlsym(handle_, "HcomExecInitialize");
if (HcomExecInitialize == nullptr) {
GELOGE(FAILED, "Failed to invoke HcomExecInitialize hcom unknown node function.");
return FAILED;
}
HcclResult hccl_ret = HcomExcutorInitialize();
HcclResult hccl_ret = HcomExecInitialize();
if (hccl_ret == HCCL_E_PTR) {
GELOGI("Hccl comm is null, hcom executor initialize is not required.");
} else if (hccl_ret == HCCL_SUCCESS) {
GELOGI("Hcom executor initialize success.");
} else {
GELOGE(FAILED, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret);
GELOGE(FAILED, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret);
return FAILED;
}
return SUCCESS;
}

Status HcclNodeExecutor::Finalize() {
auto HcomExcutorFinalize = (HcclResult(*)())dlsym(handle_, "HcomExcutorFinalize");
if (HcomExcutorFinalize == nullptr) {
GELOGE(FAILED, "Failed to invoke HcomExcutorFinalize hcom unknown node function.");
auto HcomExecFinalize = (HcclResult(*)())dlsym(handle_, "HcomExecFinalize");
if (HcomExecFinalize == nullptr) {
GELOGE(FAILED, "Failed to invoke HcomExecFinalize hcom unknown node function.");
return FAILED;
}
HcclResult hccl_ret = HcomExcutorFinalize();
HcclResult hccl_ret = HcomExecFinalize();
if (hccl_ret != HCCL_SUCCESS) {
GELOGE(FAILED, "Call HcomExcutorFinalize failed, ret: 0x%X", hccl_ret);
GELOGE(FAILED, "Call HcomExecFinalize failed, ret: 0x%X", hccl_ret);
return FAILED;
}
// dlclose file handle


+ 3
- 0
ge/offline/CMakeLists.txt View File

@@ -27,6 +27,7 @@ target_compile_definitions(atc PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
COMPILE_OMG_PACKAGE
google=ascend_private
LOG_CPP
)

target_include_directories(atc PRIVATE
@@ -87,6 +88,7 @@ target_compile_definitions(atc_atc.bin PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
COMPILE_OMG_PACKAGE
google=ascend_private
LOG_CPP
)

target_include_directories(atc_atc.bin PRIVATE
@@ -152,6 +154,7 @@ target_compile_definitions(fwk_atc.bin PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
COMPILE_OMG_PACKAGE
google=ascend_private
LOG_CPP
)

target_include_directories(fwk_atc.bin PRIVATE


+ 2
- 0
ge/proto/op_mapping_info.proto View File

@@ -15,6 +15,7 @@ message Output {
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
Shape origin_shape = 10;
}

message Input {
@@ -23,6 +24,7 @@ message Input {
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
Shape origin_shape = 6;
}

enum BufferType {


+ 3
- 13
inc/external/ge/ge_api_types.h View File

@@ -395,19 +395,9 @@ const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT,
OP_BANK_UPDATE};

// for interface: aclgrphParse
const std::set<std::string> ir_parser_suppported_options = {INPUT_FORMAT,
INPUT_SHAPE,
OP_NAME_MAP,
IS_DYNAMIC_INPUT,
INPUT_FP16_NODES,
IS_INPUT_ADJUST_HW_LAYOUT,
IS_OUTPUT_ADJUST_HW_LAYOUT,
OUTPUT,
OUTPUT_TYPE,
OUT_NODES,
COMPRESS_WEIGHT_CONF,
ENABLE_SCOPE_FUSION_PASSES,
LOG_LEVEL};
const std::set<std::string> ir_parser_suppported_options = {
INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT,
OUT_NODES, COMPRESS_WEIGHT_CONF, ENABLE_SCOPE_FUSION_PASSES};

// for interface: aclgrphBuildInitialize
const std::set<std::string> global_options = {CORE_TYPE,


+ 3
- 0
inc/framework/common/ge_types.h View File

@@ -37,6 +37,9 @@ enum FrameworkType {
MINDSPORE = 1,
TENSORFLOW = 3,
ANDROID_NN,
#ifndef ONLY_COMPILE_OPEN_SRC
ONNX,
#endif
FRAMEWORK_RESERVED,
};



+ 3
- 2
inc/framework/common/profiling/ge_profiling.h View File

@@ -20,7 +20,8 @@
#include "ge/ge_api_error_codes.h"
#include "toolchain/prof_callback.h"

#define MAX_DEV_NUM (64)
const int MAX_DEV_NUM = 64;

enum ProfCommandHandleType {
kProfCommandhandleInit = 0,
kProfCommandhandleStart,
@@ -32,7 +33,7 @@ enum ProfCommandHandleType {

struct ProfCommandHandleData {
uint64_t profSwitch;
uint32_t devNums; // length of device id list
uint32_t devNums; // length of device id list
uint32_t devIdList[MAX_DEV_NUM];
uint32_t modelId;
};


+ 0
- 13
inc/framework/executor/ge_executor.h View File

@@ -30,8 +30,6 @@
#include "runtime/base.h"

namespace ge {
class ModelListenerAdapter;

class SingleOp;
class DynamicSingleOp;

@@ -55,14 +53,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
ge::Status Initialize();
ge::Status Finalize();

// Load model
ge::Status LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key, int32_t priority,
std::shared_ptr<ge::ModelListener> listener);

ge::Status UnloadModel(uint32_t modelId);

ge::Status RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data);

// Get input and output descriptor
ge::Status GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
std::vector<ge::TensorDesc> &output_desc, bool new_model_desc = false);
@@ -168,9 +160,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
std::vector<ge::TensorDesc> &output_desc);

ge::Status LoadModel(uint32_t &model_id, const ge::ModelData &model_data,
std::shared_ptr<ge::ModelListener> listener);

ge::Status CommandHandle(const ge::Command &command);

ge::Status SetDump(const DumpConfig &dump_config);
@@ -297,8 +286,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
private:
static bool isInit_;
};

ge::Status ModelInfoParser(const ge::ModelData &model, ge::ModelInfo &model_info);
} // namespace ge

#endif // INC_FRAMEWORK_EXECUTOR_GE_EXECUTOR_H_

+ 1
- 1
inc/framework/omg/parser/parser_inner_ctx.h View File

@@ -59,7 +59,7 @@ struct ParserContext {
bool train_flag = false;
domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND;
domi::FrameworkType type = domi::FRAMEWORK_RESERVED;
RunMode run_mode = ONLY_PRE_CHECK;
RunMode run_mode = GEN_OM_MODEL;
// save caffe custom proto path, used by caffe parse
std::string custom_proto_path;
// save caffe proto path, used by caffe parse


+ 1
- 0
tests/ut/ge/CMakeLists.txt View File

@@ -182,6 +182,7 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/graph/passes/atomic_addr_clean_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/mark_same_addr_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/mark_graph_unknown_status_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/mark_agnostic_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/dimension_compute_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/dimension_adjust_pass.cc"


+ 0
- 25
third_party/fwkacllib/inc/toolchain/slog.h View File

@@ -18,9 +18,7 @@
#define D_SYSLOG_H_

#ifdef __cplusplus
#ifndef LOG_CPP
extern "C" {
#endif
#endif // __cplusplus

#ifndef LINUX
@@ -107,7 +105,6 @@ extern "C" {
#define SECURITY_LOG_MASK (0x00100000)
#define RUN_LOG_MASK (0x01000000)
#define OPERATION_LOG_MASK (0x10000000)
#define RESERVERD_LENGTH 52

typedef struct tagDCODE {
const char *cName;
@@ -119,18 +116,6 @@ typedef struct tagKV {
char *value;
} KeyValue;

typedef enum {
APPLICATION = 0,
SYSTEM
} ProcessType;

typedef struct {
ProcessType type;
unsigned int pid;
unsigned int deviceId;
char reserved[RESERVERD_LENGTH];
} LogAttr;

/**
* @ingroup slog
*
@@ -243,14 +228,6 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent);
*/
DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel);

/**
* @ingroup slog
* @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION
* @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID)
* @return: 0: SUCCEED, others: FAILED
*/
DLL_EXPORT int DlogSetAttr(LogAttr logAttr);

/**
* @ingroup slog
* @brief dlog_error: print error log
@@ -390,8 +367,6 @@ void DlogInner(int moduleId, int level, const char *fmt, ...);
void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);

#ifdef __cplusplus
#ifndef LOG_CPP
}
#endif // LOG_CPP
#endif // __cplusplus
#endif // D_SYSLOG_H_

Loading…
Cancel
Save