| @@ -235,14 +235,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||
| # fi | |||
| # if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then | |||
| echo "Generating coverage statistics, please wait..." | |||
| cd ${BASEPATH} | |||
| rm -rf ${BASEPATH}/cov | |||
| mkdir ${BASEPATH}/cov | |||
| lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||
| lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||
| cd ${BASEPATH}/cov | |||
| genhtml coverage.info | |||
| echo "Generating coverage statistics, please wait..." | |||
| cd ${BASEPATH} | |||
| rm -rf ${BASEPATH}/cov | |||
| mkdir ${BASEPATH}/cov | |||
| lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||
| lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||
| cd ${BASEPATH}/cov | |||
| genhtml coverage.info | |||
| fi | |||
| # generate output package in tar form, including ut/st libraries/executables | |||
| @@ -129,38 +129,38 @@ set(TRAIN_SRC_LIST | |||
| "graph/label/partitioned_call_label_maker.cc" | |||
| "graph/label/while_label_maker.cc" | |||
| "graph/load/graph_loader.cc" | |||
| "graph/load/new_model_manager/cpu_queue_schedule.cc" | |||
| "graph/load/new_model_manager/data_dumper.cc" | |||
| "graph/load/new_model_manager/data_inputer.cc" | |||
| "graph/load/new_model_manager/davinci_model.cc" | |||
| "graph/load/new_model_manager/davinci_model_parser.cc" | |||
| "graph/load/new_model_manager/model_manager.cc" | |||
| "graph/load/new_model_manager/model_utils.cc" | |||
| "graph/load/new_model_manager/aipp_utils.cc" | |||
| "graph/load/new_model_manager/task_info/end_graph_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/model_exit_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/event_record_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/event_wait_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/hccl_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/kernel_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/label_set_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/stream_active_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | |||
| "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
| "graph/load/new_model_manager/task_info/task_info.cc" | |||
| "graph/load/new_model_manager/tbe_handle_store.cc" | |||
| "graph/load/new_model_manager/zero_copy_task.cc" | |||
| "graph/load/new_model_manager/zero_copy_offset.cc" | |||
| "graph/load/model_manager/cpu_queue_schedule.cc" | |||
| "graph/load/model_manager/data_dumper.cc" | |||
| "graph/load/model_manager/data_inputer.cc" | |||
| "graph/load/model_manager/davinci_model.cc" | |||
| "graph/load/model_manager/davinci_model_parser.cc" | |||
| "graph/load/model_manager/model_manager.cc" | |||
| "graph/load/model_manager/model_utils.cc" | |||
| "graph/load/model_manager/aipp_utils.cc" | |||
| "graph/load/model_manager/task_info/end_graph_task_info.cc" | |||
| "graph/load/model_manager/task_info/model_exit_task_info.cc" | |||
| "graph/load/model_manager/task_info/event_record_task_info.cc" | |||
| "graph/load/model_manager/task_info/event_wait_task_info.cc" | |||
| "graph/load/model_manager/task_info/fusion_start_task_info.cc" | |||
| "graph/load/model_manager/task_info/fusion_stop_task_info.cc" | |||
| "graph/load/model_manager/task_info/hccl_task_info.cc" | |||
| "graph/load/model_manager/task_info/kernel_ex_task_info.cc" | |||
| "graph/load/model_manager/task_info/kernel_task_info.cc" | |||
| "graph/load/model_manager/task_info/label_set_task_info.cc" | |||
| "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" | |||
| "graph/load/model_manager/task_info/label_goto_ex_task_info.cc" | |||
| "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" | |||
| "graph/load/model_manager/task_info/memcpy_async_task_info.cc" | |||
| "graph/load/model_manager/task_info/profiler_trace_task_info.cc" | |||
| "graph/load/model_manager/task_info/stream_active_task_info.cc" | |||
| "graph/load/model_manager/task_info/stream_switch_task_info.cc" | |||
| "graph/load/model_manager/task_info/stream_switchn_task_info.cc" | |||
| "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | |||
| "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
| "graph/load/model_manager/task_info/task_info.cc" | |||
| "graph/load/model_manager/tbe_handle_store.cc" | |||
| "graph/load/model_manager/zero_copy_task.cc" | |||
| "graph/load/model_manager/zero_copy_offset.cc" | |||
| "graph/manager/graph_context.cc" | |||
| "graph/manager/graph_manager.cc" | |||
| "graph/manager/graph_manager_utils.cc" | |||
| @@ -375,6 +375,7 @@ set(TRAIN_SRC_LIST | |||
| "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | |||
| "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | |||
| "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | |||
| "hybrid/node_executor/host_cpu/kernel/data_kernel.cc" | |||
| "hybrid/node_executor/controlop/control_op_executor.cc" | |||
| "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | |||
| "hybrid/node_executor/hccl/hccl_node_executor.cc" | |||
| @@ -605,37 +606,37 @@ set(INFER_SRC_LIST | |||
| "graph/manager/util/rt_context_util.cc" | |||
| "graph/manager/util/variable_accelerate_ctrl.cc" | |||
| "graph/manager/util/debug.cc" | |||
| "graph/load/new_model_manager/model_manager.cc" | |||
| "graph/load/new_model_manager/data_inputer.cc" | |||
| "graph/load/new_model_manager/davinci_model.cc" | |||
| "graph/load/new_model_manager/davinci_model_parser.cc" | |||
| "graph/load/new_model_manager/model_utils.cc" | |||
| "graph/load/new_model_manager/aipp_utils.cc" | |||
| "graph/load/new_model_manager/tbe_handle_store.cc" | |||
| "graph/load/new_model_manager/cpu_queue_schedule.cc" | |||
| "graph/load/new_model_manager/zero_copy_task.cc" | |||
| "graph/load/new_model_manager/zero_copy_offset.cc" | |||
| "graph/load/new_model_manager/data_dumper.cc" | |||
| "graph/load/new_model_manager/task_info/task_info.cc" | |||
| "graph/load/new_model_manager/task_info/event_record_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/event_wait_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/kernel_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/label_set_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/stream_active_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/end_graph_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/model_exit_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
| "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | |||
| "graph/load/model_manager/model_manager.cc" | |||
| "graph/load/model_manager/data_inputer.cc" | |||
| "graph/load/model_manager/davinci_model.cc" | |||
| "graph/load/model_manager/davinci_model_parser.cc" | |||
| "graph/load/model_manager/model_utils.cc" | |||
| "graph/load/model_manager/aipp_utils.cc" | |||
| "graph/load/model_manager/tbe_handle_store.cc" | |||
| "graph/load/model_manager/cpu_queue_schedule.cc" | |||
| "graph/load/model_manager/zero_copy_task.cc" | |||
| "graph/load/model_manager/zero_copy_offset.cc" | |||
| "graph/load/model_manager/data_dumper.cc" | |||
| "graph/load/model_manager/task_info/task_info.cc" | |||
| "graph/load/model_manager/task_info/event_record_task_info.cc" | |||
| "graph/load/model_manager/task_info/event_wait_task_info.cc" | |||
| "graph/load/model_manager/task_info/fusion_start_task_info.cc" | |||
| "graph/load/model_manager/task_info/fusion_stop_task_info.cc" | |||
| "graph/load/model_manager/task_info/kernel_ex_task_info.cc" | |||
| "graph/load/model_manager/task_info/kernel_task_info.cc" | |||
| "graph/load/model_manager/task_info/label_set_task_info.cc" | |||
| "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" | |||
| "graph/load/model_manager/task_info/label_goto_ex_task_info.cc" | |||
| "graph/load/model_manager/task_info/memcpy_async_task_info.cc" | |||
| "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" | |||
| "graph/load/model_manager/task_info/profiler_trace_task_info.cc" | |||
| "graph/load/model_manager/task_info/stream_active_task_info.cc" | |||
| "graph/load/model_manager/task_info/stream_switch_task_info.cc" | |||
| "graph/load/model_manager/task_info/stream_switchn_task_info.cc" | |||
| "graph/load/model_manager/task_info/end_graph_task_info.cc" | |||
| "graph/load/model_manager/task_info/model_exit_task_info.cc" | |||
| "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
| "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | |||
| "single_op/task/op_task.cc" | |||
| "single_op/task/build_task_utils.cc" | |||
| "single_op/task/tbe_task_builder.cc" | |||
| @@ -706,7 +707,7 @@ target_compile_options(ge_runner PRIVATE | |||
| -O2 | |||
| -fno-common | |||
| $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | |||
| $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable> | |||
| $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | |||
| ) | |||
| target_include_directories(ge_runner SYSTEM PRIVATE | |||
| @@ -775,7 +776,7 @@ target_compile_options(ge_compiler PRIVATE | |||
| -O2 | |||
| -fno-common | |||
| $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | |||
| $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable> | |||
| $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | |||
| ) | |||
| target_include_directories(ge_compiler SYSTEM PRIVATE | |||
| @@ -57,6 +57,7 @@ message TaskDef { | |||
| LabelSetDef label_set = 37; | |||
| LabelGotoExDef label_goto_ex = 38; | |||
| LabelSwitchByIndexDef label_switch_by_index = 39; | |||
| KernelDefWithHandle kernel_with_handle = 40; | |||
| } | |||
| message KernelDef { | |||
| @@ -74,6 +75,19 @@ message KernelDef { | |||
| uint32 kernel_ext_info_size = 19; | |||
| } | |||
| message KernelDefWithHandle { | |||
| KernelContext context = 1; | |||
| uint64 handle = 10; | |||
| string dev_func = 11; | |||
| uint32 block_dim = 12; | |||
| uint32 args_size = 13; | |||
| bytes args = 14; | |||
| bytes sm_desc = 15; | |||
| string original_kernel_key = 16; | |||
| string node_info = 17; | |||
| } | |||
| message KernelContext { | |||
| uint32 kernel_type = 1; | |||
| uint32 op_id = 2; // OP type in CCE | |||
| @@ -62,7 +62,7 @@ Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) { | |||
| while (size > size_1g) { | |||
| write_count = mmWrite(fd, reinterpret_cast<void *>(seek), size_1g); | |||
| if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { | |||
| GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno)); | |||
| GELOGE(FAILED, "Write data failed. mmpa_errorno = %ld, %s", write_count, strerror(errno)); | |||
| return FAILED; | |||
| } | |||
| size -= size_1g; | |||
| @@ -75,7 +75,7 @@ Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) { | |||
| // -1: Failed to write to file; - 2: Illegal parameter | |||
| if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { | |||
| GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno)); | |||
| GELOGE(FAILED, "Write data failed. mmpa_errorno = %ld, %s", write_count, strerror(errno)); | |||
| return FAILED; | |||
| } | |||
| @@ -133,7 +133,7 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi | |||
| WriteData(static_cast<const void *>(&model_partition_table), table_size, fd) != SUCCESS, ret = FAILED; break); | |||
| // Write partition data | |||
| for (const auto &partitionData : partition_datas) { | |||
| GELOGI("GC:size[%zu]", partitionData.size); | |||
| GELOGI("GC:size[%u]", partitionData.size); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| WriteData(static_cast<const void *>(partitionData.data), partitionData.size, fd) != SUCCESS, ret = FAILED; | |||
| break); | |||
| @@ -305,7 +305,7 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi | |||
| // Write partition data | |||
| auto &cur_partition_datas = all_partition_datas[index]; | |||
| for (const auto &partition_data : cur_partition_datas) { | |||
| GELOGI("GC:size[%zu]", partition_data.size); | |||
| GELOGI("GC:size[%u]", partition_data.size); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| WriteData(static_cast<const void *>(partition_data.data), partition_data.size, fd) != SUCCESS, ret = FAILED; | |||
| break); | |||
| @@ -99,8 +99,8 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) { | |||
| } | |||
| int64_t output_size = 0; | |||
| if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { | |||
| GELOGE(PARAM_INVALID, "Get output size filed"); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed"); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| GELOGD("Get output size in lanch dump op is %ld", output_size); | |||
| output.set_size(output_size); | |||
| @@ -126,8 +126,8 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) { | |||
| } | |||
| int64_t input_size = 0; | |||
| if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { | |||
| GELOGE(PARAM_INVALID, "Get output size filed"); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed"); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| GELOGD("Get input size in lanch dump op is %ld", input_size); | |||
| input.set_size(input_size); | |||
| @@ -151,31 +151,31 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||
| size_t proto_size = op_mapping_info.ByteSizeLong(); | |||
| bool ret = op_mapping_info.SerializeToString(&proto_msg); | |||
| if (!ret || proto_size == 0) { | |||
| GELOGE(FAILED, "Protobuf serialize failed,proto_size is %zu", proto_size); | |||
| return FAILED; | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Protobuf serialize failed, proto_size is %zu", proto_size); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| rtError_t rt_ret = rtMalloc(&proto_dev_mem_, proto_size, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_FAILED; | |||
| GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| rt_ret = rtMemcpy(proto_dev_mem_, proto_size, proto_msg.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | |||
| return RT_FAILED; | |||
| GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| rt_ret = rtMalloc(&proto_size_dev_mem_, sizeof(size_t), RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_FAILED; | |||
| GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| rt_ret = rtMemcpy(proto_size_dev_mem_, sizeof(size_t), &proto_size, sizeof(size_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | |||
| return RT_FAILED; | |||
| GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| constexpr int32_t io_addr_num = 2; | |||
| @@ -193,8 +193,8 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||
| nullptr, // no need smDesc | |||
| stream_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret); | |||
| return rt_ret; | |||
| GELOGE(rt_ret, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGI("Kernel launch dump op success"); | |||
| return SUCCESS; | |||
| @@ -204,9 +204,15 @@ Status DumpOp::LaunchDumpOp() { | |||
| GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str()); | |||
| int32_t device_id = 0; | |||
| rtError_t rt_ret = rtGetDevice(&device_id); | |||
| if (rt_ret != RT_ERROR_NONE || device_id < 0) { | |||
| GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); | |||
| return RT_FAILED; | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| if (device_id < 0) { | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, | |||
| "Check device_id failed, device_id = %d, which should be not less than 0.", | |||
| device_id); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| aicpu::dump::OpMappingInfo op_mapping_info; | |||
| auto dump_path = dump_properties_.GetDumpPath() + std::to_string(device_id) + "/"; | |||
| @@ -232,29 +238,31 @@ Status DumpOp::LaunchDumpOp() { | |||
| task.mutable_op()->set_op_name(op_desc_->GetName()); | |||
| task.mutable_op()->set_op_type(op_desc_->GetType()); | |||
| if (dump_properties_.GetDumpMode() == kDumpOutput) { | |||
| if (DumpOutput(task) != SUCCESS) { | |||
| GELOGE(FAILED, "Dump output failed"); | |||
| return FAILED; | |||
| auto ret = DumpOutput(task); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Dump output failed"); | |||
| return ret; | |||
| } | |||
| op_mapping_info.mutable_task()->Add(std::move(task)); | |||
| } | |||
| if (dump_properties_.GetDumpMode() == kDumpInput) { | |||
| if (DumpInput(task) != SUCCESS) { | |||
| GELOGE(FAILED, "Dump input failed"); | |||
| return FAILED; | |||
| auto ret = DumpInput(task); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Dump input failed"); | |||
| return ret; | |||
| } | |||
| op_mapping_info.mutable_task()->Add(std::move(task)); | |||
| } | |||
| if (dump_properties_.GetDumpMode() == kDumpAll) { | |||
| auto ret = DumpOutput(task); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(FAILED, "Dump output failed when in dumping all"); | |||
| return FAILED; | |||
| GELOGE(ret, "Dump output failed when in dumping all"); | |||
| return ret; | |||
| } | |||
| ret = DumpInput(task); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(FAILED, "Dump input failed when in dumping all"); | |||
| return FAILED; | |||
| GELOGE(ret, "Dump input failed when in dumping all"); | |||
| return ret; | |||
| } | |||
| op_mapping_info.mutable_task()->Add(std::move(task)); | |||
| } | |||
| @@ -162,7 +162,7 @@ Status FormatTransferC1hwncoc0Hwcn::TransFormat(const TransArgs &args, TransResu | |||
| Status FormatTransferC1hwncoc0Hwcn::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | |||
| DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| GELOGD("The shape derivation from C1HWNCoC0 to HWCN is not unique. Trans shape in this direction is not supported"); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferC1hwncoc0Hwcn, FORMAT_C1HWNCoC0, FORMAT_HWCN) | |||
| @@ -32,7 +32,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat | |||
| std::vector<int64_t> &dst_shape) { | |||
| auto c0 = GetCubeSizeByDataType(data_type); | |||
| if (c0 < 0) { | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| auto c1 = Ceil(c, c0); | |||
| @@ -50,7 +50,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat | |||
| Status TransShapeDhwckToFz3D(const std::vector<int64_t> &src_shape, DataType data_type, | |||
| std::vector<int64_t> &dst_shape) { | |||
| if (!CheckShapeValid(src_shape, kDhwcnDimsNum)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| auto d = src_shape.at(kDhwcnD); | |||
| auto h = src_shape.at(kDhwcnH); | |||
| @@ -163,14 +163,14 @@ Status FormatTransferDhwcnFractalZ3D::TransShape(Format src_format, const std::v | |||
| DataType data_type, Format dst_format, | |||
| std::vector<int64_t> &dst_shape) { | |||
| if (CheckDataTypeSupport(data_type) != SUCCESS) { | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| if (src_format == FORMAT_DHWCN && dst_format == FORMAT_FRACTAL_Z_3D) { | |||
| return TransShapeDhwckToFz3D(src_shape, data_type, dst_shape); | |||
| } | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferDhwcnFractalZ3D, FORMAT_DHWCN, FORMAT_FRACTAL_Z_3D) | |||
| @@ -32,7 +32,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat | |||
| std::vector<int64_t> &dst_shape) { | |||
| auto c0 = GetCubeSizeByDataType(data_type); | |||
| if (c0 < 0) { | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| auto c1 = Ceil(c, c0); | |||
| @@ -50,7 +50,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat | |||
| Status TransShapeDhwncToFz3DTranspose(const std::vector<int64_t> &src_shape, DataType data_type, | |||
| std::vector<int64_t> &dst_shape) { | |||
| if (!CheckShapeValid(src_shape, kDhwncDimsNum)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| auto d = src_shape.at(kDhwncD); | |||
| auto h = src_shape.at(kDhwncH); | |||
| @@ -164,14 +164,14 @@ Status FormatTransferDhwncFractalZ3DTranspose::TransShape(Format src_format, con | |||
| DataType data_type, Format dst_format, | |||
| std::vector<int64_t> &dst_shape) { | |||
| if (CheckDataTypeSupport(data_type) != SUCCESS) { | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| if (src_format == FORMAT_DHWNC && dst_format == FORMAT_FRACTAL_Z_3D_TRANSPOSE) { | |||
| return TransShapeDhwncToFz3DTranspose(src_shape, data_type, dst_shape); | |||
| } | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferDhwncFractalZ3DTranspose, FORMAT_DHWNC, FORMAT_FRACTAL_Z_3D_TRANSPOSE) | |||
| @@ -87,8 +87,8 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap | |||
| hw_shape.push_back(DIM_DEFAULT_VALUE); | |||
| hw_shape.push_back(src_shape[kNdDimIndexN]); | |||
| if (!IsShapeValid(dst_shape)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| default: | |||
| @@ -106,8 +106,8 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap | |||
| hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); | |||
| hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); | |||
| if (!IsShapeValid(dst_shape)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -299,11 +299,19 @@ Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult & | |||
| Status FormatTransferFractalNz::TransShape(Format src_format, const ShapeVector &src_shape, DataType data_type, | |||
| Format dst_format, ShapeVector &dst_shape) { | |||
| if (!IsDataTypeSupport(data_type) || !CheckShape(src_format, src_shape)) { | |||
| GELOGE(PARAM_INVALID, "Trans format from %s to %s, src shape %s, data type %s is not supported", | |||
| if (!IsDataTypeSupport(data_type)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, | |||
| "Trans format from %s to %s, src shape %s, data type %s is not supported", | |||
| TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | |||
| ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShape(src_format, src_shape)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, | |||
| "Trans format from %s to %s, src shape %s, data type %s is not supported", | |||
| TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | |||
| ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| ShapeVector hw_shape; | |||
| return TransShapeToFracNz(src_shape, data_type, dst_shape, hw_shape); | |||
| @@ -334,7 +342,7 @@ Status FormatTransferFractalNzND::TransShape(Format src_format, const ShapeVecto | |||
| Format dst_format, ShapeVector &dst_shape) { | |||
| GELOGD("The shape derivation from %s to %s is not unique. Trans shape is not supported", | |||
| TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str()); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferFractalNz, FORMAT_ND, FORMAT_FRACTAL_NZ) | |||
| @@ -42,7 +42,7 @@ Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_ | |||
| Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape) { | |||
| auto c0 = GetCubeSizeByDataType(data_type); | |||
| if (c0 < 0) { | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| auto c1 = Ceil(c, c0); | |||
| @@ -54,15 +54,16 @@ Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_ | |||
| dst_shape.push_back(kNiSize); | |||
| dst_shape.push_back(c0); | |||
| if (!IsShapeValid(dst_shape)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status TransShapeNchwToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) { | |||
| if (!CheckShapeValid(src_shape, kNchwDimsNum)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| auto n = src_shape.at(kNchwN); | |||
| @@ -74,7 +75,7 @@ Status TransShapeNchwToFz(const std::vector<int64_t> &src_shape, DataType data_t | |||
| Status TransShapeHwcnToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) { | |||
| if (!CheckShapeValid(src_shape, kHwcnDimsNum)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| auto h = src_shape.at(kHwcnH); | |||
| @@ -87,7 +88,7 @@ Status TransShapeHwcnToFz(const std::vector<int64_t> &src_shape, DataType data_t | |||
| Status TransShapeNhwcToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) { | |||
| if (!CheckShapeValid(src_shape, kNhwcDimsNum)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| auto n = src_shape.at(kNhwcN); | |||
| @@ -369,7 +370,7 @@ Status FormatTransferFractalZ::TransFormat(const TransArgs &args, TransResult &r | |||
| Status FormatTransferFractalZ::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type, | |||
| Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| if (CheckDataTypeSupport(data_type) != SUCCESS) { | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| if (src_format == FORMAT_NHWC && dst_format == FORMAT_FRACTAL_Z) { | |||
| @@ -382,7 +383,7 @@ Status FormatTransferFractalZ::TransShape(Format src_format, const std::vector<i | |||
| return TransShapeNchwToFz(src_shape, data_type, dst_shape); | |||
| } | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferFractalZ, FORMAT_NCHW, FORMAT_FRACTAL_Z) | |||
| @@ -86,8 +86,9 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap | |||
| hw_shape.push_back(DIM_DEFAULT_VALUE); | |||
| hw_shape.push_back(src_shape[kNdDimIndexN]); | |||
| if (!IsShapeValid(dst_shape)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| default: | |||
| @@ -105,8 +106,9 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap | |||
| hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); | |||
| hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); | |||
| if (!IsShapeValid(dst_shape)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -311,11 +313,19 @@ Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult & | |||
| Status FormatTransferFractalZz::TransShape(Format src_format, const ShapeVector &src_shape, DataType data_type, | |||
| Format dst_format, ShapeVector &dst_shape) { | |||
| if (!IsDataTypeSupport(data_type) || !CheckShape(src_format, src_shape)) { | |||
| GELOGE(PARAM_INVALID, "Not support trans format from %s to %s, src shape %s, data type %s", | |||
| if (!IsDataTypeSupport(data_type)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, | |||
| "Not support trans format from %s to %s, src shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | |||
| ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShape(src_format, src_shape)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, | |||
| "Not support trans format from %s to %s, src shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | |||
| ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| ShapeVector hw_shape; | |||
| return TransShapeToFracZz(src_shape, data_type, dst_shape, hw_shape); | |||
| @@ -346,7 +356,7 @@ Status FormatTransferFractalZzND::TransShape(Format src_format, const ShapeVecto | |||
| Format dst_format, ShapeVector &dst_shape) { | |||
| GELOGD("The shape derivation from %s to %s is not unique. Trans shape is not supported", | |||
| TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str()); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferFractalZz, FORMAT_ND, FORMAT_FRACTAL_ZZ) | |||
| @@ -161,7 +161,7 @@ Status FormatTransferFracZHwcn::TransFormat(const TransArgs &args, TransResult & | |||
| Status FormatTransferFracZHwcn::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type, | |||
| Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| GELOGD("The shape derivation from FracZ to HWCN is not unique. Trans shape in this direction is not supported"); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferFracZHwcn, FORMAT_FRACTAL_Z, FORMAT_HWCN) | |||
| @@ -160,7 +160,7 @@ Status FormatTransferFracZNchw::TransFormat(const TransArgs &args, TransResult & | |||
| Status FormatTransferFracZNchw::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type, | |||
| Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| GELOGD("The shape derivation from FracZ to NCHW is not unique. Trans shape in this direction is not supported"); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferFracZNchw, FORMAT_FRACTAL_Z, FORMAT_NCHW) | |||
| @@ -43,8 +43,9 @@ Status TransShapeHwcnToC1hwncoc0(const DataType &data_type, const std::vector<in | |||
| dst_shape.push_back(cube_size); | |||
| dst_shape.push_back(cube_size); | |||
| if (!CheckShapeValid(dst_shape, kC1hwncoc0DimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -197,12 +198,15 @@ Status FormatTransferHwcnC1hwncoc0::TransShape(Format src_format, const std::vec | |||
| DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| if (src_format == FORMAT_HWCN && CheckDataTypeSupported(data_type)) { | |||
| if (!CheckShapeValid(src_shape, kHwcnDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", | |||
| ShapeToString(src_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| return TransShapeHwcnToC1hwncoc0(data_type, src_shape, dst_shape); | |||
| } else if (src_format != FORMAT_HWCN) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } else { | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| } | |||
| @@ -157,7 +157,7 @@ Status FormatTransferNc1hwc0Nhwc::TransFormat(const TransArgs &args, TransResult | |||
| Status FormatTransferNc1hwc0Nhwc::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | |||
| DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| GELOGD("The shape derivation from NC1HWC0 to NHWC is not unique. Trans shape in this direction is not supported"); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferNc1hwc0Nhwc, FORMAT_NC1HWC0, FORMAT_NHWC) | |||
| @@ -45,7 +45,7 @@ Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_ | |||
| Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape) { | |||
| auto c0 = GetCubeSizeByDataType(data_type); | |||
| if (c0 < 0) { | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| auto chw = c * h * w; | |||
| @@ -59,8 +59,9 @@ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type | |||
| dst_shape.push_back(c0); | |||
| if (!IsShapeValid(dst_shape)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -68,7 +69,7 @@ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type | |||
| Status TransShapeNchwToFzC04(const std::vector<int64_t> &src_shape, DataType data_type, | |||
| std::vector<int64_t> &dst_shape) { | |||
| if (!CheckShapeValid(src_shape, kNchwDimsNum)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| auto n = src_shape.at(kNchwN); | |||
| @@ -293,13 +294,13 @@ Status FormatTransferNchwToFZC04::TransFormat(const TransArgs &args, TransResult | |||
| Status FormatTransferNchwToFZC04::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | |||
| DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| if (CheckDataTypeSupport(data_type) != SUCCESS) { | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| if (src_format == FORMAT_NCHW && dst_format == FORMAT_FRACTAL_Z_C04) { | |||
| return TransShapeNchwToFzC04(src_shape, data_type, dst_shape); | |||
| } | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferNchwToFZC04, FORMAT_NCHW, FORMAT_FRACTAL_Z_C04) | |||
| @@ -32,12 +32,13 @@ Status TransShapeNchwToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d | |||
| std::vector<int64_t> &dst_shape) { | |||
| int64_t c0 = GetCubeSizeByDataType(data_type); | |||
| if (c0 <= 0) { | |||
| GELOGE(PARAM_INVALID, "Failed to get cube size, the data type is invalid"); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(src_shape, kNchwDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", | |||
| ShapeToString(src_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| dst_shape.clear(); | |||
| dst_shape.push_back(src_shape.at(kNchwN)); | |||
| @@ -46,8 +47,9 @@ Status TransShapeNchwToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d | |||
| dst_shape.push_back(src_shape.at(kNchwW)); | |||
| dst_shape.push_back(c0); | |||
| if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -193,7 +195,7 @@ Status FormatTransferNchwNc1hwc0::TransShape(Format src_format, const std::vecto | |||
| if (src_format == FORMAT_NCHW) { | |||
| return TransShapeNchwToNc1hwc0(src_shape, data_type, dst_shape); | |||
| } else { | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| } | |||
| @@ -34,8 +34,8 @@ Status TransShapeNhwcToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d | |||
| std::vector<int64_t> &dst_shape) { | |||
| int64_t c0 = GetCubeSizeByDataType(data_type); | |||
| if (c0 <= 0) { | |||
| GELOGE(PARAM_INVALID, "Failed to get cube size, the data type is invalid"); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| dst_shape.clear(); | |||
| dst_shape.push_back(src_shape.at(kNhwcN)); | |||
| @@ -44,8 +44,9 @@ Status TransShapeNhwcToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d | |||
| dst_shape.push_back(src_shape.at(kNhwcW)); | |||
| dst_shape.push_back(c0); | |||
| if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -189,12 +190,15 @@ Status FormatTransferNhwcNc1hwc0::TransShape(Format src_format, const std::vecto | |||
| DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| if (src_format == FORMAT_NHWC && CheckDataTypeSupported(data_type)) { | |||
| if (!CheckShapeValid(src_shape, kNhwcDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", | |||
| ShapeToString(src_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| return TransShapeNhwcToNc1hwc0(src_shape, data_type, dst_shape); | |||
| } else if (src_format != FORMAT_NHWC) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } else { | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| } | |||
| } | |||
| @@ -211,16 +211,16 @@ Status GetPermByForamt(Format src_format, Format dst_format, std::vector<int64_t | |||
| std::string error = "Failed to trans shape, do not support transpose from format " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| auto iter = dst_iter->second.find(dst_format); | |||
| if (iter == dst_iter->second.end()) { | |||
| std::string error = "Failed to trans shape, do not support transpose from format " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| perm = iter->second; | |||
| return SUCCESS; | |||
| @@ -244,7 +244,7 @@ Status FormatTransferTranspose::TransShape(Format src_format, const std::vector< | |||
| std::vector<int64_t> perm_arg; | |||
| GE_CHK_STATUS_RET_NOLOG(GetPermByForamt(src_format, dst_format, perm_arg)); | |||
| if (!IsShapeArgValid(src_shape, perm_arg)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| } | |||
| dst_shape = TransShapeByPerm(src_shape, perm_arg); | |||
| return SUCCESS; | |||
| @@ -64,8 +64,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransShape(Format src_form | |||
| std::string error = "Failed to trans data from format " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| } | |||
| return transfer->TransShape(src_format, src_shape, data_type, dst_format, dst_shape); | |||
| @@ -32,7 +32,7 @@ int64_t GetCubeSizeByDataType(DataType data_type) { | |||
| if (size <= 0) { | |||
| std::string error = "Failed to get cube size, the data type " + | |||
| FmtToStr(TypeUtils::DataTypeToSerialString(data_type)) + " is invalid"; | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||
| GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); | |||
| return -1; | |||
| } else if (size == 1) { | |||
| return kCubeSize * 2; // 32 bytes cube size | |||
| @@ -61,7 +61,7 @@ bool CheckShapeValid(const std::vector<int64_t> &shape, const int64_t expect_dim | |||
| if (expect_dims <= 0 || shape.size() != static_cast<size_t>(expect_dims)) { | |||
| std::string error = "Invalid shape, dims num " + FmtToStr(shape.size()) + | |||
| ", expect " + FmtToStr(expect_dims); | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||
| GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); | |||
| return false; | |||
| } | |||
| return IsShapeValid(shape); | |||
| @@ -75,12 +75,12 @@ bool IsShapeValid(const std::vector<int64_t> &shape) { | |||
| for (auto dim : shape) { | |||
| if (dim < 0) { | |||
| std::string error = "Invalid negative dims in the shape " + FmtToStr(ShapeToString(shape)); | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||
| GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); | |||
| return false; | |||
| } | |||
| if (dim != 0 && kShapeItemNumMAX / dim < num) { | |||
| std::string error = "Shape overflow, the total count should be less than " + FmtToStr(kShapeItemNumMAX); | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||
| GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); | |||
| return false; | |||
| } | |||
| num *= dim; | |||
| @@ -108,7 +108,7 @@ bool IsTransShapeSrcCorrect(const TransArgs &args, std::vector<int64_t> &expect_ | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", invalid relationship between src shape " + | |||
| FmtToStr(ShapeToString(args.src_shape)) + " and dst " + | |||
| FmtToStr(ShapeToString(args.dst_shape)); | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||
| GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); | |||
| return false; | |||
| } | |||
| return true; | |||
| @@ -121,7 +121,7 @@ bool IsTransShapeDstCorrect(const TransArgs &args, std::vector<int64_t> &expect_ | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", the dst shape" + | |||
| FmtToStr(ShapeToString(args.dst_shape)) + " is invalid, expect" + | |||
| FmtToStr(ShapeToString(expect_shape)); | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||
| GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); | |||
| return false; | |||
| } | |||
| return true; | |||
| @@ -93,7 +93,7 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||
| std::vector<std::string> path_vec; | |||
| SplitPath(path, path_vec); | |||
| for (const auto &single_path : path_vec) { | |||
| GE_IF_BOOL_EXEC(single_path.length() >= MMPA_MAX_PATH, GELOGE(GE_PLGMGR_PATH_INVALID, | |||
| GE_IF_BOOL_EXEC(single_path.length() >= MMPA_MAX_PATH, GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID, | |||
| "The shared library file path is too long!"); | |||
| continue); | |||
| // load break when number of loaded so reach maximum | |||
| @@ -125,7 +125,8 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||
| GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||
| ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, | |||
| {"mmDlopen", "shared library path is " + FmtToStr(file_path_dlopen) + ". Errormessage" + FmtToStr(error)}); | |||
| GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen the shared library path[%s]. Errormessage[%s]!", | |||
| GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID, | |||
| "Failed to dlopen the shared library path[%s]. Errormessage[%s]!", | |||
| file_path_dlopen.c_str(), error); | |||
| continue; | |||
| } | |||
| @@ -138,8 +139,8 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||
| ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, | |||
| {"mmDlsym", FmtToStr(func_name) + " is skipped since function" + | |||
| FmtToStr(func_name) + " is not existed!"}); | |||
| GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(), | |||
| func_name.c_str()); | |||
| GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", | |||
| func_name.c_str(), func_name.c_str()); | |||
| is_valid = false; | |||
| break; | |||
| } | |||
| @@ -28,7 +28,7 @@ | |||
| #include "framework/common/util.h" | |||
| #include "graph/detail/attributes_holder.h" | |||
| #include "graph/detail/model_serialize_imp.h" | |||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/model.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| @@ -1000,8 +1000,8 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const { | |||
| auto offset = (tensor_addr_mgr.offset); | |||
| // Check logic address and offset | |||
| if (logic_address - offset != VarManager::Instance(session_id_)->GetVarMemLogicBase()) { | |||
| GELOGW("Check logic_address[%u] and offset [%u] of %s failed, var mem logic base is %u, abandon", logic_address, | |||
| offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase()); | |||
| GELOGW("Check logic_address[%lu] and offset [%lu] of %s failed, var mem logic base is %lu, abandon", | |||
| logic_address, offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase()); | |||
| return PARAM_INVALID; | |||
| } | |||
| // Offset is needed by SaveVarVddr instead of logic address | |||
| @@ -23,7 +23,7 @@ | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/omg/version.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| @@ -479,8 +479,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c | |||
| Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | |||
| if (status != SUCCESS) { | |||
| GELOGE(status, "Parse model content failed!"); | |||
| return status; | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data); | |||
| @@ -517,8 +517,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod | |||
| } | |||
| if (is_assign_model_) { | |||
| GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); | |||
| return GE_EXEC_LOAD_MODEL_REPEATED; | |||
| GELOGE(ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); | |||
| return ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED; | |||
| } | |||
| if (ReleaseLocalModelData() != SUCCESS) { | |||
| @@ -528,8 +528,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod | |||
| Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | |||
| if (status != SUCCESS) { | |||
| GELOGE(status, "Parse model content failed!"); | |||
| return status; | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data); | |||
| @@ -537,7 +537,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod | |||
| //model verison 1.0 file header does not have model_num member | |||
| is_unknown_shape_model_ = file_header_->version >= ge::MODEL_VERSION && | |||
| file_header_->model_num > kStatiOmFileModelNum; | |||
| GELOGD("cur om model is ge root model or no %d, model version %zu", is_unknown_shape_model_, file_header_->version); | |||
| GELOGD("cur om model is ge root model or no %d, model version %u", is_unknown_shape_model_, file_header_->version); | |||
| OmFileLoadHelper om_load_helper; | |||
| if (is_unknown_shape_model_) { | |||
| @@ -609,7 +609,7 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { | |||
| GeModelPtr cur_model = ge::MakeShared<ge::GeModel>(); | |||
| Status ret = LoadModelData(om_load_helper, cur_model, mode_index); | |||
| if (ret != SUCCESS) { | |||
| return GE_EXEC_LOAD_MODEL_PARTITION_FAILED; | |||
| return ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED; | |||
| } | |||
| if (is_first_model) { | |||
| @@ -622,22 +622,22 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { | |||
| ret = LoadWeights(om_load_helper, cur_model, mode_index); | |||
| if (ret != SUCCESS) { | |||
| return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; | |||
| return ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; | |||
| } | |||
| ret = LoadTBEKernelStore(om_load_helper, cur_model, mode_index); | |||
| if (ret != SUCCESS) { | |||
| return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||
| return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||
| } | |||
| ret = LoadCustAICPUKernelStore(om_load_helper, cur_model, mode_index); | |||
| if (ret != SUCCESS) { | |||
| return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||
| return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||
| } | |||
| ret = LoadTask(om_load_helper, cur_model, mode_index); | |||
| if (ret != SUCCESS) { | |||
| return GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||
| return ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||
| } | |||
| root_model_->SetSubgraphInstanceNameToModel(cur_model->GetName(), cur_model); | |||
| } | |||
| @@ -746,7 +746,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(Om | |||
| GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed."); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| GELOGD("TASK_INFO op_size:%zu, stream_num:%u", task->op().size(), task->stream_num()); | |||
| GELOGD("TASK_INFO op_size:%d, stream_num:%u", task->op().size(), task->stream_num()); | |||
| } | |||
| cur_model->SetModelTaskDef(task); | |||
| return SUCCESS; | |||
| @@ -203,7 +203,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||
| auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_data + cur_offset); | |||
| size_t partition_table_size = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | |||
| cur_offset += partition_table_size; | |||
| GELOGD("Cur model index %zu: ModelPartitionTable num :%u, " | |||
| GELOGD("Cur model index %u: ModelPartitionTable num :%u, " | |||
| "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | |||
| index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); | |||
| if (model_data_size <= cur_offset) { | |||
| @@ -219,7 +219,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||
| partition.type = partition_table->partition[i].type; | |||
| if (index >= model_contexts_.size()) { | |||
| if (index != model_contexts_.size()) { | |||
| GELOGE(FAILED, "cur index is %zu make model_contexts_ overflow", index); | |||
| GELOGE(FAILED, "cur index is %u make model_contexts_ overflow", index); | |||
| return FAILED; | |||
| } | |||
| @@ -231,16 +231,16 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||
| } | |||
| if (partition.size > model_data_size || cur_offset > model_data_size - partition.size) { | |||
| GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", | |||
| GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %u is greater than the model data size %u.", | |||
| partition.size + cur_offset, model_data_size); | |||
| return GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||
| } | |||
| cur_offset += partition.size; | |||
| GELOGD("Partition, type:%d, size:%u, model_index:%zu", static_cast<int>(partition.type), partition.size, index); | |||
| GELOGD("Partition, type:%d, size:%u, model_index:%u", static_cast<int>(partition.type), partition.size, index); | |||
| } | |||
| } | |||
| if (cur_offset != model_data_size) { | |||
| GELOGE(FAILED, "do not get the complete model, read end offset:%zu, all size:%zu", cur_offset, model_data_size); | |||
| GELOGE(FAILED, "do not get the complete model, read end offset:%u, all size:%u", cur_offset, model_data_size); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| @@ -51,7 +51,7 @@ bool KernelStore::Build() { | |||
| kernel_head.name_len = static_cast<uint32_t>(kernel->GetName().length()); | |||
| kernel_head.bin_len = static_cast<uint32_t>(kernel->GetBinDataSize()); | |||
| GELOGD("get kernel bin name %s, addr %p, size %u", | |||
| GELOGD("get kernel bin name %s, addr %p, size %zu", | |||
| kernel->GetName().c_str(), kernel->GetBinData(), kernel->GetBinDataSize()); | |||
| mem_ret = memcpy_s(next_buffer, remain_len, &kernel_head, sizeof(kernel_head)); | |||
| GE_CHK_BOOL_EXEC_NOLOG(mem_ret == EOK, return false); | |||
| @@ -878,11 +878,11 @@ inline Status CheckInt32DivOverflow(int32_t a, int32_t b) { | |||
| return INTERNAL_ERROR; \ | |||
| } | |||
| #define FMK_INT64_UINT32_MULCHECK(a, b) \ | |||
| if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) { \ | |||
| GELOGW("Int64 %ld and UINT32 %u multiplication can result in overflow!", static_cast<uint32_t>(a), \ | |||
| static_cast<uint32_t>(b)); \ | |||
| return INTERNAL_ERROR; \ | |||
| #define FMK_INT64_UINT32_MULCHECK(a, b) \ | |||
| if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) { \ | |||
| GELOGW("Int64 %ld and Uint32 %u multiplication can result in overflow!", static_cast<int64_t>(a), \ | |||
| static_cast<uint32_t>(b)); \ | |||
| return INTERNAL_ERROR; \ | |||
| } | |||
| #define FMK_FP16_ZEROCHECK(a) \ | |||
| @@ -34,7 +34,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro | |||
| ge::ModelData &model_data) { | |||
| std::string real_path = RealPath(model_path); | |||
| if (real_path.empty()) { | |||
| GELOGE(GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); | |||
| return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | |||
| } | |||
| @@ -181,7 +181,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le | |||
| if (type != kProfCommandhandleFinalize) { | |||
| command.module_index = prof_config_param->profSwitch; | |||
| } | |||
| GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(), | |||
| GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%lx", iter->second.c_str(), | |||
| command.module_index); | |||
| if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | |||
| GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); | |||
| @@ -192,7 +192,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le | |||
| return ge::FAILED; | |||
| } | |||
| GELOGI("Successfully execute profiling command type: %d, command 0x%llx.", type, command.module_index); | |||
| GELOGI("Successfully execute profiling command type: %d, command 0x%lx.", type, command.module_index); | |||
| return ge::SUCCESS; | |||
| } | |||
| @@ -21,7 +21,7 @@ | |||
| #include "framework/common/string_util.h" | |||
| #include "graph/ge_context.h" | |||
| #include "runtime/base.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace { | |||
| const char *const kTrainingTrace = "training_trace"; | |||
| @@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||
| uint32_t stream_id = task.stream_id; | |||
| std::string shape_type = task.shape_type; | |||
| int64_t cur_iter_num = task.cur_iter_num; | |||
| uint32_t task_type = task.task_type; | |||
| data = model_name.append(" ") | |||
| .append(op_name).append(" ") | |||
| .append(std::to_string(block_dim)).append(" ") | |||
| @@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||
| .append(std::to_string(stream_id)).append(" ") | |||
| .append(std::to_string(model_id)).append(" ") | |||
| .append(shape_type).append(" ") | |||
| .append(std::to_string(cur_iter_num)).append("\n"); | |||
| .append(std::to_string(cur_iter_num)).append(" ") | |||
| .append(std::to_string(task_type)).append("\n"); | |||
| ReporterData reporter_data{}; | |||
| reporter_data.deviceId = device_id; | |||
| @@ -538,7 +540,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi | |||
| for (auto device_id_module : device_id_module_map_) { | |||
| if (device_id_module.second != 0) { | |||
| uint32_t device_id = static_cast<uint32_t>(device_id_module.first); | |||
| GELOGI("Prof finalize: device_id: %u, module: 0x%llx.", device_id, device_id_module.second); | |||
| GELOGI("Prof finalize: device_id: %u, module: 0x%lx.", device_id, device_id_module.second); | |||
| rt_ret = rtProfilerStop(device_id_module.second, 1, &device_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(FAILED, "Runtime profiler stop failed."); | |||
| @@ -627,7 +629,7 @@ Status ProfilingManager::ProfParseParam(const std::map<std::string, std::string> | |||
| } | |||
| if (device_num == 0 || device_num > kMaxDeviceNum || device_num != static_cast<int32_t>(device_list.size())) { | |||
| GELOGE(FAILED, "Config para device num: %d not equal to device list size: %d.", device_num, device_list.size()); | |||
| GELOGE(FAILED, "Config para device num: %d not equal to device list size: %zu.", device_num, device_list.size()); | |||
| return FAILED; | |||
| } | |||
| #endif | |||
| @@ -657,7 +659,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||
| for (int32_t i = 0; i < device_num; i++) { | |||
| device_id_ptr[i] = static_cast<uint32_t>(device_list[i]); | |||
| } | |||
| GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); | |||
| GELOGI("Runtime config param: 0x%lx, device num: %d.", module, device_num); | |||
| rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| @@ -699,7 +701,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||
| for (int32_t i = 0; i < device_num; i++) { | |||
| device_id_ptr[i] = static_cast<uint32_t>(device_list[i]); | |||
| } | |||
| GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); | |||
| GELOGI("Prof stop: runtime config param: 0x%lx, device num: %d", module, device_num); | |||
| rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); | |||
| @@ -57,6 +57,7 @@ message TaskDef { | |||
| LabelSetDef label_set = 37; | |||
| LabelGotoExDef label_goto_ex = 38; | |||
| LabelSwitchByIndexDef label_switch_by_index = 39; | |||
| KernelDefWithHandle kernel_with_handle = 40; | |||
| } | |||
| message KernelDef { | |||
| @@ -74,6 +75,19 @@ message KernelDef { | |||
| uint32 kernel_ext_info_size = 19; | |||
| } | |||
| message KernelDefWithHandle { | |||
| KernelContext context = 1; | |||
| uint64 handle = 10; | |||
| string dev_func = 11; | |||
| uint32 block_dim = 12; | |||
| uint32 args_size = 13; | |||
| bytes args = 14; | |||
| bytes sm_desc = 15; | |||
| string original_kernel_key = 16; | |||
| string node_info = 17; | |||
| } | |||
| message KernelContext { | |||
| uint32 kernel_type = 1; | |||
| uint32 op_id = 2; // OP type in CCE | |||
| @@ -388,6 +388,7 @@ REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive"); | |||
| REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead"); | |||
| REGISTER_OPTYPE_DEFINE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); | |||
| REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite"); | |||
| REGISTER_OPTYPE_DEFINE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite"); | |||
| REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); | |||
| REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp"); | |||
| @@ -32,37 +32,37 @@ set(SRC_LIST | |||
| "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" | |||
| "../model/ge_model.cc" | |||
| "../model/ge_root_model.cc" | |||
| "../graph/load/new_model_manager/davinci_model.cc" | |||
| "../graph/load/new_model_manager/davinci_model_parser.cc" | |||
| "../graph/load/new_model_manager/model_manager.cc" | |||
| "../graph/load/new_model_manager/tbe_handle_store.cc" | |||
| "../graph/load/new_model_manager/cpu_queue_schedule.cc" | |||
| "../graph/load/new_model_manager/model_utils.cc" | |||
| "../graph/load/new_model_manager/aipp_utils.cc" | |||
| "../graph/load/new_model_manager/data_inputer.cc" | |||
| "../graph/load/new_model_manager/data_dumper.cc" | |||
| "../graph/load/new_model_manager/zero_copy_task.cc" | |||
| "../graph/load/new_model_manager/zero_copy_offset.cc" | |||
| "../graph/load/new_model_manager/task_info/task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/event_record_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/event_wait_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/fusion_start_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/kernel_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/label_set_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/stream_active_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/stream_switch_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/end_graph_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/model_exit_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
| "../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | |||
| "../graph/load/model_manager/davinci_model.cc" | |||
| "../graph/load/model_manager/davinci_model_parser.cc" | |||
| "../graph/load/model_manager/model_manager.cc" | |||
| "../graph/load/model_manager/tbe_handle_store.cc" | |||
| "../graph/load/model_manager/cpu_queue_schedule.cc" | |||
| "../graph/load/model_manager/model_utils.cc" | |||
| "../graph/load/model_manager/aipp_utils.cc" | |||
| "../graph/load/model_manager/data_inputer.cc" | |||
| "../graph/load/model_manager/data_dumper.cc" | |||
| "../graph/load/model_manager/zero_copy_task.cc" | |||
| "../graph/load/model_manager/zero_copy_offset.cc" | |||
| "../graph/load/model_manager/task_info/task_info.cc" | |||
| "../graph/load/model_manager/task_info/event_record_task_info.cc" | |||
| "../graph/load/model_manager/task_info/event_wait_task_info.cc" | |||
| "../graph/load/model_manager/task_info/fusion_start_task_info.cc" | |||
| "../graph/load/model_manager/task_info/fusion_stop_task_info.cc" | |||
| "../graph/load/model_manager/task_info/kernel_ex_task_info.cc" | |||
| "../graph/load/model_manager/task_info/kernel_task_info.cc" | |||
| "../graph/load/model_manager/task_info/label_set_task_info.cc" | |||
| "../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" | |||
| "../graph/load/model_manager/task_info/label_goto_ex_task_info.cc" | |||
| "../graph/load/model_manager/task_info/memcpy_async_task_info.cc" | |||
| "../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" | |||
| "../graph/load/model_manager/task_info/profiler_trace_task_info.cc" | |||
| "../graph/load/model_manager/task_info/stream_active_task_info.cc" | |||
| "../graph/load/model_manager/task_info/stream_switch_task_info.cc" | |||
| "../graph/load/model_manager/task_info/stream_switchn_task_info.cc" | |||
| "../graph/load/model_manager/task_info/end_graph_task_info.cc" | |||
| "../graph/load/model_manager/task_info/model_exit_task_info.cc" | |||
| "../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
| "../graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | |||
| "../graph/common/local_context.cc" | |||
| "../opskernel_manager/ops_kernel_builder_manager.cc" | |||
| "../single_op/single_op_manager.cc" | |||
| @@ -104,6 +104,7 @@ set(SRC_LIST | |||
| "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | |||
| "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | |||
| "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | |||
| "../hybrid/node_executor/host_cpu/kernel/data_kernel.cc" | |||
| "../hybrid/node_executor/controlop/control_op_executor.cc" | |||
| "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | |||
| "../hybrid/node_executor/rts/rts_node_executor.cc" | |||
| @@ -29,15 +29,15 @@ | |||
| #include "framework/common/util.h" | |||
| #include "graph/execute/graph_execute.h" | |||
| #include "graph/load/graph_loader.h" | |||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "graph/manager/graph_mem_allocator.h" | |||
| #include "graph/model.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "mmpa/mmpa_api.h" | |||
| #include "single_op/single_op_manager.h" | |||
| #include "graph/manager/graph_var_manager.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | |||
| using std::string; | |||
| @@ -226,7 +226,7 @@ Status GeExecutor::Initialize() { | |||
| } | |||
| GE_CHK_STATUS_RET(OpsKernelBuilderManager::Instance().Initialize({}, false), | |||
| "Failed to initialize OpsKernelBuilders"); | |||
| "Failed to initialize OpsKernelBuilders."); | |||
| // Start profiling | |||
| Options profiling_options; | |||
| @@ -454,7 +454,7 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||
| if (all_data_dims[i] < 0) { | |||
| cur_dynamic_dims.push_back(dynamic_dims[i]); | |||
| } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %d should be %d", | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %lu should be %ld", | |||
| i, dynamic_dims[i], all_data_dims[i]); | |||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | |||
| } | |||
| @@ -22,37 +22,37 @@ local_ge_executor_src_files := \ | |||
| ../graph/manager/util/debug.cc \ | |||
| ../model/ge_model.cc \ | |||
| ../model/ge_root_model.cc \ | |||
| ../graph/load/new_model_manager/davinci_model.cc \ | |||
| ../graph/load/new_model_manager/davinci_model_parser.cc \ | |||
| ../graph/load/new_model_manager/model_manager.cc \ | |||
| ../graph/load/new_model_manager/tbe_handle_store.cc \ | |||
| ../graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||
| ../graph/load/new_model_manager/model_utils.cc \ | |||
| ../graph/load/new_model_manager/aipp_utils.cc \ | |||
| ../graph/load/new_model_manager/data_inputer.cc \ | |||
| ../graph/load/new_model_manager/data_dumper.cc \ | |||
| ../graph/load/new_model_manager/zero_copy_task.cc \ | |||
| ../graph/load/new_model_manager/zero_copy_offset.cc \ | |||
| ../graph/load/new_model_manager/task_info/task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/model_exit_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
| ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||
| ../graph/load/model_manager/davinci_model.cc \ | |||
| ../graph/load/model_manager/davinci_model_parser.cc \ | |||
| ../graph/load/model_manager/model_manager.cc \ | |||
| ../graph/load/model_manager/tbe_handle_store.cc \ | |||
| ../graph/load/model_manager/cpu_queue_schedule.cc \ | |||
| ../graph/load/model_manager/model_utils.cc \ | |||
| ../graph/load/model_manager/aipp_utils.cc \ | |||
| ../graph/load/model_manager/data_inputer.cc \ | |||
| ../graph/load/model_manager/data_dumper.cc \ | |||
| ../graph/load/model_manager/zero_copy_task.cc \ | |||
| ../graph/load/model_manager/zero_copy_offset.cc \ | |||
| ../graph/load/model_manager/task_info/task_info.cc \ | |||
| ../graph/load/model_manager/task_info/event_record_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/event_wait_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/fusion_start_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/fusion_stop_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/kernel_ex_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/kernel_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/label_set_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/memcpy_async_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/profiler_trace_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/stream_active_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/stream_switch_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/stream_switchn_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/end_graph_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/model_exit_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
| ../graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ | |||
| ../opskernel_manager/ops_kernel_builder_manager.cc \ | |||
| ../single_op/single_op_manager.cc \ | |||
| ../single_op/single_op_model.cc \ | |||
| @@ -95,6 +95,7 @@ local_ge_executor_src_files := \ | |||
| ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ | |||
| ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ | |||
| ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ | |||
| ../hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ | |||
| ../hybrid/node_executor/controlop/control_op_executor.cc \ | |||
| ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ | |||
| ../hybrid/node_executor/rts/rts_node_executor.cc \ | |||
| @@ -57,6 +57,7 @@ message TaskDef { | |||
| LabelSetDef label_set = 37; | |||
| LabelGotoExDef label_goto_ex = 38; | |||
| LabelSwitchByIndexDef label_switch_by_index = 39; | |||
| KernelDefWithHandle kernel_with_handle = 40; | |||
| } | |||
| message KernelDef { | |||
| @@ -74,6 +75,19 @@ message KernelDef { | |||
| uint32 kernel_ext_info_size = 19; | |||
| } | |||
| message KernelDefWithHandle { | |||
| KernelContext context = 1; | |||
| uint64 handle = 10; | |||
| string dev_func = 11; | |||
| uint32 block_dim = 12; | |||
| uint32 args_size = 13; | |||
| bytes args = 14; | |||
| bytes sm_desc = 15; | |||
| string original_kernel_key = 16; | |||
| string node_info = 17; | |||
| } | |||
| message KernelContext { | |||
| uint32 kernel_type = 1; | |||
| uint32 op_id = 2; // OP type in CCE | |||
| @@ -228,37 +228,37 @@ OME_HOST_SRC_FILES := \ | |||
| graph/manager/util/rt_context_util.cc \ | |||
| graph/manager/util/variable_accelerate_ctrl.cc \ | |||
| graph/manager/util/debug.cc \ | |||
| graph/load/new_model_manager/model_manager.cc \ | |||
| graph/load/new_model_manager/data_inputer.cc \ | |||
| graph/load/new_model_manager/davinci_model.cc \ | |||
| graph/load/new_model_manager/davinci_model_parser.cc \ | |||
| graph/load/new_model_manager/model_utils.cc \ | |||
| graph/load/new_model_manager/aipp_utils.cc \ | |||
| graph/load/new_model_manager/tbe_handle_store.cc \ | |||
| graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||
| graph/load/new_model_manager/zero_copy_task.cc \ | |||
| graph/load/new_model_manager/zero_copy_offset.cc \ | |||
| graph/load/new_model_manager/data_dumper.cc \ | |||
| graph/load/new_model_manager/task_info/task_info.cc \ | |||
| graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/model_exit_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||
| graph/load/model_manager/model_manager.cc \ | |||
| graph/load/model_manager/data_inputer.cc \ | |||
| graph/load/model_manager/davinci_model.cc \ | |||
| graph/load/model_manager/davinci_model_parser.cc \ | |||
| graph/load/model_manager/model_utils.cc \ | |||
| graph/load/model_manager/aipp_utils.cc \ | |||
| graph/load/model_manager/tbe_handle_store.cc \ | |||
| graph/load/model_manager/cpu_queue_schedule.cc \ | |||
| graph/load/model_manager/zero_copy_task.cc \ | |||
| graph/load/model_manager/zero_copy_offset.cc \ | |||
| graph/load/model_manager/data_dumper.cc \ | |||
| graph/load/model_manager/task_info/task_info.cc \ | |||
| graph/load/model_manager/task_info/event_record_task_info.cc \ | |||
| graph/load/model_manager/task_info/event_wait_task_info.cc \ | |||
| graph/load/model_manager/task_info/fusion_start_task_info.cc \ | |||
| graph/load/model_manager/task_info/fusion_stop_task_info.cc \ | |||
| graph/load/model_manager/task_info/kernel_ex_task_info.cc \ | |||
| graph/load/model_manager/task_info/kernel_task_info.cc \ | |||
| graph/load/model_manager/task_info/label_set_task_info.cc \ | |||
| graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ | |||
| graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ | |||
| graph/load/model_manager/task_info/memcpy_async_task_info.cc \ | |||
| graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||
| graph/load/model_manager/task_info/profiler_trace_task_info.cc \ | |||
| graph/load/model_manager/task_info/stream_active_task_info.cc \ | |||
| graph/load/model_manager/task_info/stream_switch_task_info.cc \ | |||
| graph/load/model_manager/task_info/stream_switchn_task_info.cc \ | |||
| graph/load/model_manager/task_info/end_graph_task_info.cc \ | |||
| graph/load/model_manager/task_info/model_exit_task_info.cc \ | |||
| graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
| graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ | |||
| single_op/task/op_task.cc \ | |||
| single_op/task/build_task_utils.cc \ | |||
| single_op/task/tbe_task_builder.cc \ | |||
| @@ -270,7 +270,7 @@ OME_HOST_SRC_FILES := \ | |||
| single_op/single_op_manager.cc \ | |||
| hybrid/hybrid_davinci_model_stub.cc \ | |||
| hybrid/node_executor/aicpu/aicpu_ext_info.cc \ | |||
| # graph/load/new_model_manager/task_info/hccl_task_info.cc | |||
| # graph/load/model_manager/task_info/hccl_task_info.cc | |||
| OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES) | |||
| @@ -33,7 +33,7 @@ namespace { | |||
| uint64_t size = data_num * sizeof(TYPE); \ | |||
| ge_tensor = MakeShared<GeTensor>(out_desc, size); \ | |||
| GE_CHECK_NOTNULL(ge_tensor); \ | |||
| GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, size); \ | |||
| GELOGD("node:%s allocate output %zu success, size=%ld", op_desc->GetName().c_str(), i, size); \ | |||
| ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ | |||
| ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ | |||
| } else { \ | |||
| @@ -72,7 +72,7 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) { | |||
| num_size = max_range_size; | |||
| } | |||
| if (num_size < 0) { | |||
| GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%lld.", num_size); | |||
| GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%ld.", num_size); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| data_num = static_cast<uint64_t>(num_size); | |||
| @@ -57,6 +57,7 @@ message TaskDef { | |||
| LabelSetDef label_set = 37; | |||
| LabelGotoExDef label_goto_ex = 38; | |||
| LabelSwitchByIndexDef label_switch_by_index = 39; | |||
| KernelDefWithHandle kernel_with_handle = 40; | |||
| } | |||
| message KernelDef { | |||
| @@ -74,6 +75,19 @@ message KernelDef { | |||
| uint32 kernel_ext_info_size = 19; | |||
| } | |||
| message KernelDefWithHandle { | |||
| KernelContext context = 1; | |||
| uint64 handle = 10; | |||
| string dev_func = 11; | |||
| uint32 block_dim = 12; | |||
| uint32 args_size = 13; | |||
| bytes args = 14; | |||
| bytes sm_desc = 15; | |||
| string original_kernel_key = 16; | |||
| string node_info = 17; | |||
| } | |||
| message KernelContext { | |||
| uint32 kernel_type = 1; | |||
| uint32 op_id = 2; // OP type in CCE | |||
| @@ -54,38 +54,38 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
| graph/label/partitioned_call_label_maker.cc \ | |||
| graph/label/while_label_maker.cc \ | |||
| graph/load/graph_loader.cc \ | |||
| graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||
| graph/load/new_model_manager/data_dumper.cc \ | |||
| graph/load/new_model_manager/data_inputer.cc \ | |||
| graph/load/new_model_manager/davinci_model.cc \ | |||
| graph/load/new_model_manager/davinci_model_parser.cc \ | |||
| graph/load/new_model_manager/model_manager.cc \ | |||
| graph/load/new_model_manager/model_utils.cc \ | |||
| graph/load/new_model_manager/aipp_utils.cc \ | |||
| graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/model_exit_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/hccl_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
| graph/load/new_model_manager/task_info/task_info.cc \ | |||
| graph/load/new_model_manager/tbe_handle_store.cc \ | |||
| graph/load/new_model_manager/zero_copy_task.cc \ | |||
| graph/load/new_model_manager/zero_copy_offset.cc \ | |||
| graph/load/model_manager/cpu_queue_schedule.cc \ | |||
| graph/load/model_manager/data_dumper.cc \ | |||
| graph/load/model_manager/data_inputer.cc \ | |||
| graph/load/model_manager/davinci_model.cc \ | |||
| graph/load/model_manager/davinci_model_parser.cc \ | |||
| graph/load/model_manager/model_manager.cc \ | |||
| graph/load/model_manager/model_utils.cc \ | |||
| graph/load/model_manager/aipp_utils.cc \ | |||
| graph/load/model_manager/task_info/end_graph_task_info.cc \ | |||
| graph/load/model_manager/task_info/model_exit_task_info.cc \ | |||
| graph/load/model_manager/task_info/event_record_task_info.cc \ | |||
| graph/load/model_manager/task_info/event_wait_task_info.cc \ | |||
| graph/load/model_manager/task_info/fusion_start_task_info.cc \ | |||
| graph/load/model_manager/task_info/fusion_stop_task_info.cc \ | |||
| graph/load/model_manager/task_info/hccl_task_info.cc \ | |||
| graph/load/model_manager/task_info/kernel_ex_task_info.cc \ | |||
| graph/load/model_manager/task_info/kernel_task_info.cc \ | |||
| graph/load/model_manager/task_info/label_set_task_info.cc \ | |||
| graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ | |||
| graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ | |||
| graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||
| graph/load/model_manager/task_info/memcpy_async_task_info.cc \ | |||
| graph/load/model_manager/task_info/profiler_trace_task_info.cc \ | |||
| graph/load/model_manager/task_info/stream_active_task_info.cc \ | |||
| graph/load/model_manager/task_info/stream_switch_task_info.cc \ | |||
| graph/load/model_manager/task_info/stream_switchn_task_info.cc \ | |||
| graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ | |||
| graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
| graph/load/model_manager/task_info/task_info.cc \ | |||
| graph/load/model_manager/tbe_handle_store.cc \ | |||
| graph/load/model_manager/zero_copy_task.cc \ | |||
| graph/load/model_manager/zero_copy_offset.cc \ | |||
| graph/manager/graph_context.cc \ | |||
| graph/manager/graph_manager.cc \ | |||
| graph/manager/graph_manager_utils.cc \ | |||
| @@ -300,6 +300,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
| hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ | |||
| hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ | |||
| hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ | |||
| hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ | |||
| hybrid/node_executor/controlop/control_op_executor.cc \ | |||
| hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ | |||
| hybrid/node_executor/hccl/hccl_node_executor.cc \ | |||
| @@ -670,7 +670,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | |||
| bool is_offline) { | |||
| if (!is_offline) { | |||
| (void)AttrUtils::SetBool(op_desc, ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, true); | |||
| (void)AttrUtils::SetBool(op_desc, ATTR_SINGLE_OP_SCENE, true); | |||
| } | |||
| if (CheckForSingleOp(op_desc, inputs, outputs) != SUCCESS) { | |||
| @@ -37,6 +37,8 @@ using domi::BuildMode; | |||
| namespace { | |||
| const int32_t kInvalidPerfLevel = -1; | |||
| const int64_t kProfilingArStep = 2; | |||
| const int64_t kProfilingArStartLogid = 3; | |||
| enum NodeType { kSubgraphData, kSubgraphNode, kOthers }; | |||
| } // namespace | |||
| namespace ge { | |||
| @@ -187,8 +189,7 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph | |||
| return SUCCESS; | |||
| } | |||
| Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||
| GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { | |||
| Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { | |||
| if (comp_graph == nullptr) { | |||
| GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null."); | |||
| return GE_GRAPH_PARAM_NULLPTR; | |||
| @@ -203,18 +204,18 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfo | |||
| (void)AttrUtils::GetBool(comp_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); | |||
| if (is_dynamic_shape || comp_graph->GetGraphUnknownFlag()) { | |||
| GE_CHK_STATUS_RET( | |||
| BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id), | |||
| BuildForDynamicShapeGraph(comp_graph, ge_root_model_ptr, ge_model_ptr, session_id), | |||
| "Build for dynamic shape graph failed."); | |||
| return SUCCESS; | |||
| } | |||
| GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, subgraph_ptr_list, ge_model_ptr, session_id), | |||
| GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, ge_model_ptr, session_id), | |||
| "Build for known shape graph failed."); | |||
| ge_root_model_ptr->SetSubgraphInstanceNameToModel(comp_graph->GetName(), ge_model_ptr); | |||
| return SUCCESS; | |||
| } | |||
| Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list, | |||
| Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, | |||
| GeModelPtr &ge_model_ptr, uint64_t session_id) { | |||
| if (ge::GetContext().GetHostExecFlag()) { | |||
| GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed."); | |||
| @@ -222,7 +223,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v | |||
| } | |||
| GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str()); | |||
| Status ret = SecondPartition(comp_graph, subgraph_list); | |||
| Status ret = SecondPartition(comp_graph); | |||
| GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str()); | |||
| auto subgraph_map = graph_partitioner_.GetSubGraphMap(); | |||
| @@ -458,6 +459,11 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { | |||
| if (all_reduce_node_index[i] == node_index) { | |||
| GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); | |||
| (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true); | |||
| GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), | |||
| GELOGE(FAILED, "Multiply result is out of range."); | |||
| return FAILED); | |||
| int64_t log_id = i * kProfilingArStep + kProfilingArStartLogid; | |||
| (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); | |||
| continue; | |||
| } | |||
| } | |||
| @@ -470,7 +476,6 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { | |||
| } | |||
| Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||
| std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||
| GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | |||
| uint64_t session_id) { | |||
| GELOGI("Start to build BuildForDynamicShape for dynamic shape."); | |||
| @@ -517,7 +522,7 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||
| } | |||
| } | |||
| // known shape build flow | |||
| GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id), | |||
| GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, ge_model_ptr, session_id), | |||
| "Build for known shape graph failed."); | |||
| } | |||
| ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr); | |||
| @@ -719,7 +724,7 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc) | |||
| return SUCCESS; | |||
| } | |||
| Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list) { | |||
| Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph) { | |||
| GE_TIMESTAMP_START(GraphPartition2); | |||
| auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning); | |||
| if (ret != SUCCESS) { | |||
| @@ -727,10 +732,8 @@ Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge: | |||
| return ret; | |||
| } | |||
| GE_CHK_STATUS_RET(ret, "Graph partition Failed."); | |||
| auto graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap(); | |||
| if (graph_2_subgraphlist.find(comp_graph) != graph_2_subgraphlist.end()) { | |||
| subgraph_ptr_list = graph_2_subgraphlist[comp_graph]; | |||
| } else { | |||
| const auto &graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap(); | |||
| if (graph_2_subgraphlist.find(comp_graph) == graph_2_subgraphlist.end()) { | |||
| GELOGE(FAILED, "Find subgraph failed."); | |||
| return FAILED; | |||
| } | |||
| @@ -745,7 +748,7 @@ Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) { | |||
| if (!AttrUtils::GetInt(op_desc, ATTR_INPUT_MEMORY_TYPE, mem_type)) { | |||
| return SUCCESS; | |||
| } | |||
| GELOGD("[%s] has attr input_memory_type %ld", op_desc->GetName().c_str(), mem_type); | |||
| GELOGD("[%s] has attr input_memory_type %u", op_desc->GetName().c_str(), mem_type); | |||
| for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||
| const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||
| @@ -755,7 +758,7 @@ Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) { | |||
| while (true) { | |||
| const auto &src_desc = src_node->GetOpDesc(); | |||
| GE_IF_BOOL_EXEC(src_desc == nullptr, continue); | |||
| GELOGD("[%s:%u] set attr output_memory_type %ld", src_desc->GetName().c_str(), src_out_anchor->GetIdx(), | |||
| GELOGD("[%s:%u] set attr output_memory_type %d", src_desc->GetName().c_str(), src_out_anchor->GetIdx(), | |||
| mem_type); | |||
| if (!AttrUtils::SetInt(src_desc->MutableOutputDesc(src_out_anchor->GetIdx()), ATTR_OUTPUT_MEMORY_TYPE, | |||
| mem_type)) { | |||
| @@ -47,8 +47,7 @@ class GraphBuilder { | |||
| GraphBuilder(const GraphBuilder &in) = delete; | |||
| GraphBuilder &operator=(const GraphBuilder &in) = delete; | |||
| virtual ~GraphBuilder() = default; | |||
| Status Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||
| GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | |||
| Status Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | |||
| void SetOptions(const GraphManagerOptions &options); | |||
| private: | |||
| @@ -59,12 +58,12 @@ class GraphBuilder { | |||
| Status UpdateDataInputSize(const ge::NodePtr &node_ptr); | |||
| Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr); | |||
| Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc); | |||
| Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list); | |||
| Status SecondPartition(ge::ComputeGraphPtr &comp_graph); | |||
| Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph); | |||
| Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||
| Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||
| GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | |||
| uint64_t session_id = INVALID_SESSION_ID); | |||
| Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list, | |||
| Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, | |||
| GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | |||
| Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | |||
| uint64_t session_id = INVALID_SESSION_ID); | |||
| @@ -24,6 +24,7 @@ | |||
| #include "graph/buffer.h" | |||
| #include "graph/ge_attr_value.h" | |||
| #include "graph/ge_context.h" | |||
| #include "graph/types.h" | |||
| #include "graph/node.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "graph/utils/node_utils.h" | |||
| @@ -542,11 +543,31 @@ void GetMaxBatchAllMemorySize(std::map<std::string, vector<int64_t>> &batch_all_ | |||
| } | |||
| } | |||
| void BlockMemAssigner::MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node) { | |||
| auto node_op_desc = node->GetOpDesc(); | |||
| GE_IF_BOOL_EXEC(node_op_desc == nullptr, return); | |||
| // if input size just one and from variable, no need to reassign continuous memory | |||
| bool is_input_continuous = false; | |||
| (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | |||
| if (is_input_continuous && (node_op_desc->GetInputsSize() == 1)) { | |||
| auto peer_out_anchor = node->GetInDataAnchor(0)->GetPeerOutAnchor(); | |||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, return); | |||
| auto in_node = peer_out_anchor->GetOwnerNode(); | |||
| GE_IF_BOOL_EXEC(in_node == nullptr, return); | |||
| if (in_node->GetType() == VARIABLE || in_node->GetType() == CONSTANT) { | |||
| GELOGI("node only one input and from variable, set continuous alloced. node_name:%s", node->GetName().c_str()); | |||
| (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); | |||
| } | |||
| } | |||
| } | |||
| void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||
| vector<int64_t> temp; | |||
| std::map<std::string, vector<int64_t>> batch_all_memory_size; | |||
| std::map<std::string, int64_t> batch_total_size; | |||
| for (const NodePtr &n : compute_graph_->GetAllNodes()) { | |||
| MarkContinuousAllocedForOneInputFromVariable(n); | |||
| auto node_op_desc = n->GetOpDesc(); | |||
| GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | |||
| @@ -1131,18 +1152,73 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||
| return block; | |||
| } | |||
| MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | |||
| const bool is_op_reuse_mem) { | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); | |||
| bool IsOutputIndexRef(const OpDescPtr &op_desc, uint32_t index) { | |||
| auto output_tensor = op_desc->GetOutputDescPtr(index); | |||
| bool dst_reuse_input = false; | |||
| (void)ge::TensorUtils::GetReuseInput(*output_tensor, dst_reuse_input); | |||
| if (dst_reuse_input) { | |||
| return true; | |||
| } | |||
| bool is_ref = false; | |||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref); | |||
| if (is_ref) { | |||
| string output_name = op_desc->GetOutputNameByIndex(index); | |||
| for (const auto &input_name : op_desc->GetAllInputNames()) { | |||
| if (output_name == input_name) { | |||
| return true;; | |||
| } | |||
| } | |||
| } | |||
| return false; | |||
| } | |||
| void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, | |||
| const NodePtr &n) { | |||
| const auto node_op_desc = n->GetOpDesc(); | |||
| for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | |||
| if (!IsOutputIndexRef(node_op_desc, index)) { | |||
| isAllOutputRef = false; | |||
| break; | |||
| } else { | |||
| zero_memory_list_.emplace_back(n, kOutput, index); | |||
| isOutputHasRef = true; | |||
| } | |||
| } | |||
| } | |||
| Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | |||
| const bool is_op_reuse_mem) { | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null."); | |||
| auto node_op_desc = n->GetOpDesc(); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null."); | |||
| // continuous output support ref only when all output ref input | |||
| bool isAllOutputRef = true; | |||
| bool isOutputHasRef = false; | |||
| ContinuousOutRefCheck(isAllOutputRef, isOutputHasRef, n); | |||
| if (isAllOutputRef) { | |||
| GELOGI("continuous output node ref all input, skip continuous alloc, node_name:%s", n->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| if (!isAllOutputRef && isOutputHasRef) { | |||
| GELOGE(INTERNAL_ERROR, "continuous output node ref part input, not support this situation, node_name:%s", | |||
| n->GetName().c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| MemoryBlock *block = nullptr; | |||
| int64_t total_size = 0; | |||
| int64_t memory_type = RT_MEMORY_HBM; | |||
| for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | |||
| auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | |||
| if (output_op_desc == nullptr) { | |||
| return nullptr; | |||
| GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| if (CheckIsZeroMemNodeType(n->GetType())) { | |||
| @@ -1152,8 +1228,8 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec | |||
| int64_t size = 0; | |||
| if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | |||
| GELOGI("Get size failed"); | |||
| return nullptr; | |||
| GELOGE(INTERNAL_ERROR, "Get size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| size_t align_size = static_cast<size_t>(size); | |||
| AlignMemOffset(align_size); | |||
| @@ -1176,7 +1252,7 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec | |||
| } | |||
| if (total_size == 0) { | |||
| return nullptr; | |||
| return SUCCESS; | |||
| } | |||
| auto block_size = GetBlockSize(total_size, ranges); | |||
| @@ -1190,8 +1266,11 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec | |||
| // hccl task need align header and tail | |||
| block->first_continuous_block_ = true; | |||
| block->last_continuous_block_ = true; | |||
| } else { | |||
| GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| return block; | |||
| return SUCCESS; | |||
| } | |||
| MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | |||
| @@ -1203,9 +1282,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||
| NodeIndexIO node_index_io(n, index, kOut); | |||
| int64_t size = 0; | |||
| auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | |||
| if (output_op_desc != nullptr) { | |||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||
| } | |||
| GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr); | |||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||
| size_t no_align_size = 0; | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | |||
| return nullptr, "Get no align size failed"); | |||
| @@ -1231,6 +1309,13 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||
| AlignMemOffset(align_size); | |||
| theory_memory_size_ += align_size; | |||
| } else { | |||
| // if ref input is variable, can not find symbol, must judge alone | |||
| if (IsOutputIndexRef(node_op_desc, index)) { | |||
| zero_memory_list_.emplace_back(n, kOutput, index, false); | |||
| GELOGI("ref mode skip out block assign. node_name: %s, index:%d", n->GetName().c_str(), index); | |||
| return nullptr; | |||
| } | |||
| int64_t max_size = size; | |||
| int64_t memory_type = RT_MEMORY_HBM; | |||
| auto iter1 = anchor_to_symbol_.find(node_index_io.ToString()); | |||
| @@ -1477,8 +1562,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
| for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); | |||
| ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); | |||
| if (IsContinuousOutput(node)) { | |||
| (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | |||
| return SUCCESS; | |||
| return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | |||
| } | |||
| for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | |||
| int64_t size = 0; | |||
| @@ -1486,6 +1570,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
| if (output_op_desc != nullptr) { | |||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||
| } | |||
| // fusion: other type's size not means malloc HBM memory | |||
| bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; | |||
| if (l1_flag) { | |||
| @@ -1493,6 +1578,11 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
| op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); | |||
| size = 0; | |||
| } | |||
| int32_t calc_type = 0; | |||
| bool ret = ge::AttrUtils::GetInt(output_op_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); | |||
| GE_IF_BOOL_EXEC((ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))), size = 0;); | |||
| std::string peer_name; | |||
| uint32_t peer_input_index = 0; | |||
| bool out_node_set_continuous_input = false; | |||
| @@ -1973,9 +2063,8 @@ Status BlockMemAssigner::Assign() { | |||
| bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | |||
| return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | |||
| (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || | |||
| (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || | |||
| (node_type == HVDCALLBACKBROADCAST); | |||
| (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || | |||
| (node_type == ASSIGN) || (node_type == HVDWAIT); | |||
| } | |||
| bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { | |||
| @@ -448,7 +448,11 @@ class BlockMemAssigner : public MemAssigner { | |||
| bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); | |||
| MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem); | |||
| void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n); | |||
| Status ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem); | |||
| void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node); | |||
| std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; | |||
| @@ -88,6 +88,14 @@ Status VariableMemoryAssigner::AssignVarAttr2Nodes() { | |||
| return ge::SUCCESS; | |||
| } | |||
| Status VariableMemoryAssigner::AssignMemory2HasRefAttrNode() { | |||
| Status result = ge::VarMemAssignUtil::AssignMemory2HasRefAttrNode(compute_graph_); | |||
| if (result != ge::SUCCESS) { | |||
| return result; | |||
| } | |||
| return ge::SUCCESS; | |||
| } | |||
| Status GraphMemoryAssigner::AssignMemory() { | |||
| ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_)); | |||
| if (mem_assigner->Assign() != ge::SUCCESS) { | |||
| @@ -135,6 +143,19 @@ ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { | |||
| return ge::SUCCESS; | |||
| } | |||
| ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() { | |||
| auto variable_assigner = | |||
| std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | |||
| if (variable_assigner == nullptr) { | |||
| GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); | |||
| return ge::FAILED; | |||
| } | |||
| if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) { | |||
| return ge::FAILED; | |||
| } | |||
| return ge::SUCCESS; | |||
| } | |||
| ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, | |||
| int64_t dim_index, int64_t &output_mem_size, | |||
| int64_t &batch_dim_num, int64_t &out_size) { | |||
| @@ -371,10 +392,10 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||
| // Assign continuous input memory | |||
| bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | |||
| int64_t memory_type = RT_MEMORY_HBM; | |||
| GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); | |||
| if (continuous_input) { | |||
| int64_t mem_clean_start = 0; | |||
| int64_t mem_clean_size = 0; | |||
| GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); | |||
| ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type, continuous_type); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ret, "Assign continuous input memory failed!"); | |||
| @@ -412,6 +433,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||
| // Assign continuous output memory | |||
| bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); | |||
| if (continuous_output) { | |||
| GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), "Get node memory type failed."); | |||
| ret = AssignContinuousOutputMemory(node, memory_type, continuous_type); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ret, "Assign continuous output memory failed!"); | |||
| @@ -640,9 +662,7 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||
| } | |||
| int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start; | |||
| GE_CHECK_NOTNULL(mem_assigner_); | |||
| GE_CHECK_NOTNULL(mem_assigner_->GetPriorityAssinger()); | |||
| if ((atomic_mem_size != 0) && (iter_batch.first == mem_assigner_->GetPriorityAssinger()->GetMaxBatchLabel())) { | |||
| if (atomic_mem_size != 0) { | |||
| GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM), | |||
| "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); | |||
| } | |||
| @@ -1233,8 +1253,8 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||
| GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset)); | |||
| } | |||
| GELOGD("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", | |||
| has_mem_type_attr == true ? "Fusion" : "", | |||
| GELOGD("%s node[%s] input[%ld] is set from node[%s] out index[%lu] offset[%ld]", | |||
| has_mem_type_attr ? "Fusion" : "", | |||
| tmp_op_desc->GetName().c_str(), | |||
| valid_input_index, | |||
| peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), | |||
| @@ -63,6 +63,8 @@ class VariableMemoryAssigner { | |||
| /// | |||
| ge::Status AssignVarAttr2Nodes(); | |||
| ge::Status AssignMemory2HasRefAttrNode(); | |||
| private: | |||
| ge::ComputeGraphPtr compute_graph_; | |||
| }; | |||
| @@ -99,6 +101,8 @@ class GraphMemoryAssigner { | |||
| /// | |||
| ge::Status AssignVarAttr2Nodes(); | |||
| ge::Status AssignMemory2HasRefAttrNode(); | |||
| ge::Status ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset); | |||
| ge::Status AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size); | |||
| @@ -40,6 +40,11 @@ Status MemoryAssigner::AssignMemory(bool is_loop_graph, map<int64_t, size_t> &me | |||
| return ge::FAILED; | |||
| } | |||
| if (graph_mem_assigner.AssignMemory2HasRefAttrNode() != ge::SUCCESS) { | |||
| GELOGE(ge::FAILED, "Assign memory to node which has ref attr failed!"); | |||
| return ge::FAILED; | |||
| } | |||
| // Assign memory for reference | |||
| if (graph_mem_assigner.AssignReferenceMemory() != ge::SUCCESS) { | |||
| GELOGE(ge::FAILED, "Assign reference memory failed!"); | |||
| @@ -33,10 +33,7 @@ using std::vector; | |||
| namespace ge { | |||
| Status VarMemAssignUtil::AssignVarMemory(ge::ComputeGraphPtr &compute_graph) { | |||
| GE_CHK_STATUS_RET(AssignMemory2VariableNode(compute_graph)); | |||
| GE_CHK_STATUS_RET(AssignMemory2HasRefAttrNode(compute_graph)); | |||
| return SUCCESS; | |||
| return AssignMemory2VariableNode(compute_graph); | |||
| } | |||
| Status VarMemAssignUtil::AssignConstantOpMemory(ge::ComputeGraphPtr &compute_graph) { | |||
| @@ -60,9 +57,14 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr | |||
| return FAILED); | |||
| ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0); | |||
| GE_CHECK_NOTNULL(tensor_desc); | |||
| rtMemType_t memory_type = RT_MEMORY_HBM; | |||
| uint32_t mem_type = 0; | |||
| if (AttrUtils::GetInt(n->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) { | |||
| memory_type = RT_MEMORY_RDMA_HBM; | |||
| } | |||
| if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) { | |||
| GE_CHK_STATUS_RET( | |||
| VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM)); | |||
| VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, memory_type)); | |||
| GE_IF_BOOL_EXEC(n->GetType() == VARIABLE, | |||
| GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID()))); | |||
| GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) | |||
| @@ -70,7 +72,6 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr | |||
| } | |||
| uint8_t *dev_ptr = nullptr; | |||
| rtMemType_t memory_type = RT_MEMORY_HBM; | |||
| GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) | |||
| ->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); | |||
| vector<int64_t> output_list = n->GetOpDesc()->GetOutputOffset(); | |||
| @@ -1013,6 +1013,24 @@ bool StreamAllocator::IsActivated(int64_t stream_id) const { | |||
| return false; | |||
| } | |||
| // Iteraotor loop : | |||
| // StreamSwitch -> StreamActive | |||
| // FpBp loop: | |||
| // StreamSwitch -> AssignAdd -> StreamActive | |||
| NodePtr FindSwitchNodeBeforeLoopActiveNode(const NodePtr &active_node) { | |||
| for (auto pre_node : active_node->GetInControlNodes()) { | |||
| if (pre_node->GetType() == STREAMSWITCH) { | |||
| return pre_node; | |||
| } | |||
| for (auto pre_pre_node : pre_node->GetInControlNodes()) { | |||
| if (pre_pre_node->GetType() == STREAMSWITCH) { | |||
| return pre_pre_node; | |||
| } | |||
| } | |||
| } | |||
| return nullptr; | |||
| } | |||
| Status StreamAllocator::SetActiveStreamsForLoop() { | |||
| vector<uint32_t> loop_active_streams; | |||
| for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) { | |||
| @@ -1038,6 +1056,13 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||
| bool is_loop_active = false; | |||
| if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) { | |||
| vector<string> activated_label_list; | |||
| NodePtr pre_switch_node = FindSwitchNodeBeforeLoopActiveNode(node); | |||
| if (pre_switch_node == nullptr) { | |||
| GELOGE(FAILED, "find switch node before loop active node %s failed", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) || | |||
| activated_label_list.empty()) { | |||
| GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, loop_active_streams), | |||
| @@ -1053,7 +1078,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||
| // it may cause some stream actived by iterator next step when this stream still alive. | |||
| // If above situation happen, active message will lose, cause process block in next iteration. | |||
| // In order to avoid this abnormal happen, | |||
| // add event between each last node and iterator active node in target active stream | |||
| // add event between each last node and iterator switch node | |||
| GELOGI("there are %zu next iterator target streams has streamswitch node.", streams_skip_iterator_event.size()); | |||
| for (auto iter : stream_id_to_last_node) { | |||
| if (streams_skip_iterator_event.find(iter.first) != streams_skip_iterator_event.end()) { | |||
| @@ -1067,7 +1092,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||
| continue; | |||
| } | |||
| AddSendEventId(iter.second, event_num_); | |||
| AddRecvEventId(node, event_num_); | |||
| AddRecvEventId(pre_switch_node, event_num_); | |||
| event_num_++; | |||
| } | |||
| @@ -234,6 +234,19 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion | |||
| return SUCCESS; | |||
| } | |||
| bool TaskGenerator::IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const { | |||
| auto parent_graph_ptr = graph->GetParentGraph(); | |||
| if (parent_graph_ptr == nullptr) { | |||
| return false; | |||
| } | |||
| auto root_graph_ptr = GraphUtils::FindRootGraph(parent_graph_ptr); | |||
| if (root_graph_ptr == nullptr) { | |||
| return false; | |||
| } | |||
| return root_graph_ptr->GetGraphUnknownFlag(); | |||
| } | |||
| Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &graph, | |||
| vector<domi::TaskDef> &task_def_list, map<uint32_t, string> &op_name_map) { | |||
| GELOGD("Beign to generate task, graph name is %s.", graph->GetName().c_str()); | |||
| @@ -274,7 +287,6 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||
| }; | |||
| GE_MAKE_GUARD(release, callback); | |||
| uint64_t all_reduce_node_idx = 0; | |||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||
| OpDescPtr op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| @@ -293,7 +305,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||
| // Part2: Call | |||
| auto fusion_task_info = | |||
| FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, | |||
| ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes, all_reduce_node_idx}; | |||
| ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; | |||
| GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), | |||
| "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); | |||
| // continue directly | |||
| @@ -317,8 +329,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||
| type.c_str()); | |||
| // Profiling task | |||
| size_t task_list_size_before = task_def_list.size(); | |||
| GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, | |||
| node_index, task_def_list, all_reduce_node_idx)); | |||
| GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | |||
| int64_t op_id = op_desc->GetId(); | |||
| // Compatible with dynamic shape scenes, the default is 0 | |||
| int64_t stream_id = 0; | |||
| @@ -338,8 +349,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||
| return ret; | |||
| } | |||
| // Profiling task | |||
| GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, | |||
| node_index, task_def_list, all_reduce_node_idx)); | |||
| GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | |||
| size_t task_list_size_after = task_def_list.size(); | |||
| // If tasks is reduced | |||
| if (task_list_size_after < task_list_size_before) { | |||
| @@ -382,7 +392,6 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info | |||
| auto &op_name_map = fusion_task_info.op_name_map; | |||
| auto &profiling_point = fusion_task_info.profiling_point; | |||
| auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes; | |||
| auto &all_reduce_idx = fusion_task_info.all_reduce_node_idx; | |||
| // If op_desc have this attr, call nodes with same group key in a stream together | |||
| if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) && | |||
| (fusion_nodes_seen.count(node.get()) == 0)) { | |||
| @@ -429,8 +438,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info | |||
| return INTERNAL_ERROR; | |||
| } | |||
| // profiling task | |||
| (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, | |||
| node_index, task_def_list, all_reduce_idx); | |||
| (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); | |||
| run_context.stream = run_context.graphStreamList[stream_id]; | |||
| GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.", | |||
| op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id); | |||
| @@ -443,8 +451,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info | |||
| return ret; | |||
| } | |||
| // profiling task | |||
| (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, | |||
| node_index, task_def_list, all_reduce_idx); | |||
| (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); | |||
| size_t task_list_size_after = task_def_list.size(); | |||
| // if tasks is reduced | |||
| if (task_list_size_after < task_list_size_before) { | |||
| @@ -466,11 +473,10 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info | |||
| task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr)); | |||
| } | |||
| GELOGI( | |||
| "Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]" | |||
| " task finished, generate %u task(s).", | |||
| op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id, | |||
| task_list_size_after - task_list_size_before); | |||
| GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]" | |||
| " task finished, generate %zu task(s).", | |||
| op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id, | |||
| task_list_size_after - task_list_size_before); | |||
| // record nodes which have call generate task successfully | |||
| fusion_nodes_seen.insert(fusion_node.get()); | |||
| @@ -681,7 +687,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||
| } | |||
| } | |||
| if (graph->GetNeedIteration()) { | |||
| if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") { | |||
| if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) { | |||
| profiling_point.end_index.insert(current_idx); | |||
| GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", | |||
| op_desc->GetName().c_str(), current_idx); | |||
| @@ -850,6 +856,13 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||
| GELOGD("Profiling is not open."); | |||
| return SUCCESS; | |||
| } | |||
| // subgraph of dynamic graph no need to find index, has been found in parent graph | |||
| if (IsSubGraphOfDynamicGraph(graph)) { | |||
| GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| GELOGI("Start get FP/BP index."); | |||
| std::string fp_point_str; | |||
| std::string bp_point_str; | |||
| @@ -887,9 +900,47 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||
| return SUCCESS; | |||
| } | |||
| Status TaskGenerator::InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes, | |||
| uint32_t node_index, std::vector<domi::TaskDef> &task_def_list, | |||
| bool is_insert_bp_profiling_task) { | |||
| bool is_insert_all_reduce_task = false; | |||
| int64_t ar_log_id = 0xFFFF; | |||
| if (is_insert_bp_profiling_task) { | |||
| (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id); | |||
| is_insert_all_reduce_task = true; | |||
| } | |||
| if (!is_insert_all_reduce_task) { | |||
| for (size_t i = 0; i < all_reduce_nodes.size(); i++) { | |||
| if (all_reduce_nodes[i] == node_index) { | |||
| GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), | |||
| GELOGE(FAILED, "Multiply result is out of range."); | |||
| return FAILED); | |||
| ar_log_id = i * kProfilingArStep + kProfilingArStartLogid; | |||
| is_insert_all_reduce_task = true; | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| if (is_insert_all_reduce_task) { | |||
| GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id); | |||
| TaskDef ar_task_def; | |||
| ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | |||
| ar_task_def.set_stream_id(op_desc->GetStreamId()); | |||
| LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); | |||
| if (ar_log_def != nullptr) { | |||
| ar_log_def->set_logid(ar_log_id); | |||
| ar_log_def->set_notify(false); | |||
| } | |||
| task_def_list.push_back(ar_task_def); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | |||
| vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | |||
| vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx) { | |||
| vector<domi::TaskDef> &task_def_list) { | |||
| const char *profiling_mode = std::getenv(kProfilingMode); | |||
| bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||
| ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||
| @@ -932,19 +983,31 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const | |||
| } | |||
| bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | |||
| uint64_t all_reduce_task_idx = 0; | |||
| if (is_all_reduce) { | |||
| (void)InsertProfilingArTaskBefore(op_desc, all_reduce_nodes, node_index, | |||
| task_def_list, is_insert_bp_profiling_task); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status TaskGenerator::InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes, | |||
| uint32_t node_index, std::vector<domi::TaskDef> &task_def_list, | |||
| bool is_insert_bp_profiling_task) { | |||
| bool is_insert_all_reduce_task = false; | |||
| if (is_all_reduce && is_insert_bp_profiling_task) { | |||
| all_reduce_task_idx = all_reduce_node_idx; | |||
| int64_t ar_log_id = 0xFFFF; | |||
| if (is_insert_bp_profiling_task) { | |||
| (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id); | |||
| ar_log_id += 1; | |||
| is_insert_all_reduce_task = true; | |||
| } | |||
| if (is_all_reduce) { | |||
| all_reduce_node_idx++; | |||
| } | |||
| if (!is_insert_all_reduce_task) { | |||
| for (size_t i = 0; i < all_reduce_nodes.size(); i++) { | |||
| if (all_reduce_nodes[i] == node_index) { | |||
| all_reduce_task_idx = i; | |||
| GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), | |||
| GELOGE(FAILED, "Multiply result is out of range."); | |||
| return FAILED); | |||
| ar_log_id = i * kProfilingArStep + kProfilingArEndLogid; | |||
| is_insert_all_reduce_task = true; | |||
| break; | |||
| } | |||
| @@ -952,28 +1015,24 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const | |||
| } | |||
| if (is_insert_all_reduce_task) { | |||
| GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | |||
| GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id); | |||
| TaskDef ar_task_def; | |||
| ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | |||
| ar_task_def.set_stream_id(op_desc->GetStreamId()); | |||
| LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); | |||
| if (ar_log_def != nullptr) { | |||
| GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), | |||
| GELOGE(FAILED, "Multiply result is out of range."); | |||
| return FAILED); | |||
| auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArStartLogid; | |||
| ar_log_def->set_logid(log_id); | |||
| ar_log_def->set_logid(ar_log_id); | |||
| ar_log_def->set_notify(false); | |||
| (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); | |||
| } | |||
| task_def_list.push_back(ar_task_def); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | |||
| vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | |||
| vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx) { | |||
| vector<domi::TaskDef> &task_def_list) { | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| const char *profiling_mode = std::getenv(kProfilingMode); | |||
| bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||
| @@ -1018,36 +1077,11 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||
| task_def_list.emplace_back(end_task_def); | |||
| } | |||
| uint32_t all_reduce_task_idx = 0; | |||
| bool is_insert_all_reduce_task = false; | |||
| if (is_all_reduce && is_insert_bp_profiling_task) { | |||
| all_reduce_task_idx = all_reduce_node_idx; | |||
| is_insert_all_reduce_task = true; | |||
| } | |||
| for (size_t i = 0; i < all_reduce_nodes.size(); i++) { | |||
| if (all_reduce_nodes[i] == node_index) { | |||
| all_reduce_task_idx = i; | |||
| is_insert_all_reduce_task = true; | |||
| break; | |||
| } | |||
| if (is_all_reduce) { | |||
| (void)InsertProfilingArTaskAfter(op_desc, all_reduce_nodes, node_index, | |||
| task_def_list, is_insert_bp_profiling_task); | |||
| } | |||
| if (is_insert_all_reduce_task) { | |||
| GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | |||
| TaskDef ar_task_def; | |||
| ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | |||
| ar_task_def.set_stream_id(op_desc->GetStreamId()); | |||
| LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); | |||
| GE_CHECK_NOTNULL(ar_log_def); | |||
| GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), | |||
| GELOGE(FAILED, "Multiply result is out of range."); | |||
| return FAILED); | |||
| auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArEndLogid; | |||
| ar_log_def->set_logid(log_id); | |||
| ar_log_def->set_notify(false); | |||
| task_def_list.emplace_back(ar_task_def); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -129,10 +129,16 @@ class TaskGenerator { | |||
| std::vector<uint32_t> &all_reduce_nodes) const; | |||
| Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | |||
| std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | |||
| std::vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx); | |||
| std::vector<domi::TaskDef> &task_def_list); | |||
| Status InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes, | |||
| uint32_t node_index, std::vector<domi::TaskDef> &task_def_listy, | |||
| bool is_insert_bp_profiling_task); | |||
| Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | |||
| std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | |||
| std::vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx); | |||
| std::vector<domi::TaskDef> &task_def_list); | |||
| Status InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes, | |||
| uint32_t node_index, std::vector<domi::TaskDef> &task_def_list, | |||
| bool is_insert_bp_profiling_task); | |||
| static bool IsProfPoint(const OpDescPtr &op, const std::string &name); | |||
| @@ -155,6 +161,8 @@ class TaskGenerator { | |||
| Status SetKnownShapeStream(RunContext &run_context, int64_t stream_id); | |||
| bool IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const; | |||
| uint8_t *var_mem_base_ = nullptr; | |||
| uint64_t var_mem_size_ = 0; | |||
| }; | |||
| @@ -21,7 +21,7 @@ | |||
| #include "common/ge_inner_error_codes.h" | |||
| #include "common/model_parser/base.h" | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "omm/csa_interact.h" | |||
| #include "runtime/dev.h" | |||
| #include "runtime/mem.h" | |||
| @@ -22,8 +22,8 @@ | |||
| #include "common/helper/model_helper.h" | |||
| #include "common/util.h" | |||
| #include "graph/ge_context.h" | |||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "graph/manager/graph_var_manager.h" | |||
| #include "omm/csa_interact.h" | |||
| #include "runtime/dev.h" | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/aipp_utils.h" | |||
| #include "graph/load/model_manager/aipp_utils.h" | |||
| #include <string> | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/cpu_queue_schedule.h" | |||
| #include "graph/load/model_manager/cpu_queue_schedule.h" | |||
| #include "common/debug/ge_log.h" | |||
| #include "common/debug/log.h" | |||
| @@ -20,8 +20,8 @@ | |||
| #include <vector> | |||
| #include "common/ge_inner_error_codes.h" | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/new_model_manager/zero_copy_offset.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/zero_copy_offset.h" | |||
| #include "runtime/kernel.h" | |||
| namespace ge { | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/data_dumper.h" | |||
| #include "graph/load/model_manager/data_dumper.h" | |||
| #include <cstdlib> | |||
| #include <ctime> | |||
| @@ -29,7 +29,7 @@ | |||
| #include "framework/common/util.h" | |||
| #include "graph/anchor.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| #include "graph/manager/util/debug.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| @@ -820,6 +820,7 @@ Status DataDumper::UnloadDumpInfo() { | |||
| for (const auto &op_iter : op_list_) { | |||
| aicpu::dump::Task task; | |||
| task.set_task_id(op_iter.task_id); | |||
| task.set_stream_id(op_iter.stream_id); | |||
| op_mapping_info.mutable_task()->Add(std::move(task)); | |||
| } | |||
| auto ret = ExecuteUnLoadDumpInfo(op_mapping_info); | |||
| @@ -834,7 +835,6 @@ void DataDumper::DumpShrink() { | |||
| compute_graph_.reset(); | |||
| input_map_.clear(); | |||
| ref_info_.clear(); | |||
| op_list_.clear(); | |||
| } | |||
| void DataDumper::PrintCheckLog(string &dump_list_key) { | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/data_inputer.h" | |||
| #include "graph/load/model_manager/data_inputer.h" | |||
| #include <securec.h> | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include <graph/utils/node_utils.h> | |||
| #include <algorithm> | |||
| @@ -36,9 +36,9 @@ | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/ge_context.h" | |||
| #include "graph/graph.h" | |||
| #include "graph/load/new_model_manager/cpu_queue_schedule.h" | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/new_model_manager/tbe_handle_store.h" | |||
| #include "graph/load/model_manager/cpu_queue_schedule.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/tbe_handle_store.h" | |||
| #include "graph/manager/graph_mem_allocator.h" | |||
| #include "graph/manager/graph_var_manager.h" | |||
| #include "graph/manager/trans_var_data_utils.h" | |||
| @@ -446,23 +446,20 @@ void DavinciModel::InitRuntimeParams() { | |||
| runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size); | |||
| } | |||
| void DavinciModel::CheckHasHcomOp() { | |||
| Graph graph = ge_model_->GetGraph(); | |||
| auto compute_graph = GraphUtils::GetComputeGraph(graph); | |||
| if (compute_graph == nullptr) { | |||
| return; | |||
| } | |||
| void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) { | |||
| const set<string> hcom_opp_types({ | |||
| HCOMBROADCAST, HCOMALLGATHER, HCOMALLREDUCE, HCOMSEND, HCOMRECEIVE, HCOMREDUCESCATTER, | |||
| HVDCALLBACKALLREDUCE, HVDCALLBACKALLGATHER, HVDCALLBACKBROADCAST, HVDWAIT, HCOMREDUCE | |||
| }); | |||
| for (const auto &node : compute_graph->GetAllNodes()) { | |||
| OpDescPtr op_desc = node->GetOpDesc(); | |||
| GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGW("Node OpDesc is nullptr"); continue); | |||
| GE_IF_BOOL_EXEC(((op_desc->GetType() == HCOMBROADCAST) || (op_desc->GetType() == HCOMALLGATHER) || | |||
| (op_desc->GetType() == HCOMALLREDUCE) || (op_desc->GetType() == HCOMSEND) || | |||
| (op_desc->GetType() == HCOMRECEIVE) || (op_desc->GetType() == HCOMREDUCESCATTER) || | |||
| (op_desc->GetType() == HVDCALLBACKALLREDUCE) || (op_desc->GetType() == HVDCALLBACKALLGATHER) || | |||
| (op_desc->GetType() == HVDCALLBACKBROADCAST) || (op_desc->GetType() == HVDWAIT) || | |||
| (op_desc->GetType() == HCOMREDUCE)), | |||
| uint32_t stream_id = static_cast<uint32_t>(op_desc->GetStreamId()); | |||
| (void)hcom_streams_.emplace(stream_id); GELOGD("hcom stream: %u.", stream_id); continue); | |||
| if (hcom_opp_types.count(op_desc->GetType()) > 0) { | |||
| uint32_t stream_id = static_cast<uint32_t>(op_desc->GetStreamId()); | |||
| hcom_streams_.emplace(stream_id); | |||
| GELOGD("hcom stream: %u.", stream_id); | |||
| } | |||
| } | |||
| } | |||
| @@ -624,6 +621,7 @@ void DavinciModel::OpDebugUnRegister() { | |||
| // initialize op sequence and call initialization function of each op respectively | |||
| Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | |||
| // validating params | |||
| GELOGI("Priority is %d", priority_); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(priority_ < 0 || priority_ > 7, return PARAM_INVALID, | |||
| "Priority must between 0-7, now is %d", priority_); | |||
| GE_CHK_BOOL_RET_STATUS(ge_model_ != nullptr, PARAM_INVALID, "GeModel is null."); | |||
| @@ -641,7 +639,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
| name_ = ge_model_->GetName(); | |||
| (void)ge::AttrUtils::GetBool(ge_model_, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_); | |||
| GELOGD("The value of ge.l1Fusion in ge_model is %d.", is_l1_fusion_enable_); | |||
| CheckHasHcomOp(); | |||
| CheckHasHcomOp(compute_graph); | |||
| vector<int64_t> huge_stream_list; | |||
| (void)ge::AttrUtils::GetListInt(ge_model_, ATTR_MODEL_HUGE_STREAM_LIST, huge_stream_list); | |||
| @@ -722,7 +720,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
| /// the aicpu opertor needs to destroy history record, and update operator memory address. | |||
| /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel(). | |||
| need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer(); | |||
| (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); | |||
| string fp_ceiling_mode; | |||
| if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { | |||
| @@ -1028,7 +1025,7 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_ | |||
| const vector<OpDescPtr> &output_op_list) { | |||
| GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size()); | |||
| for (auto &item : data_by_index) { | |||
| auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); | |||
| const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); | |||
| GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); | |||
| input_addrs_list_.emplace_back(output_addrs); | |||
| @@ -1036,14 +1033,18 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_ | |||
| GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed"); | |||
| GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed"); | |||
| GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed"); | |||
| GE_CHK_STATUS_RET(InitInputDescInfo(item.second), "Init input desc info failed"); | |||
| if (item.second->GetType() == AIPP_DATA_TYPE) { | |||
| GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); | |||
| is_dynamic_aipp_ = true; | |||
| } | |||
| } | |||
| vector<string> out_node_name; | |||
| (void)AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name); | |||
| GELOGD("Output node size: %zu, out nodes name: %zu", output_op_list.size(), out_node_name.size()); | |||
| for (const auto &op_desc : output_op_list) { | |||
| auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); | |||
| const auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); | |||
| GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size()); | |||
| output_addrs_list_.emplace_back(input_addrs); | |||
| @@ -1061,10 +1062,11 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_ | |||
| if (InitOutputTensorInfo(op_desc) != SUCCESS) { | |||
| return INTERNAL_ERROR; | |||
| } | |||
| GE_CHK_STATUS_RET(InitOutputDescInfo(op_desc, out_node_name), "Init output desc info failed"); | |||
| } | |||
| GE_CHK_STATUS_RET(InitInputDescInfo(data_by_index), "Init input desc info failed"); | |||
| return InitOutputDescInfo(output_op_list); | |||
| return SUCCESS; | |||
| } | |||
| bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | |||
| @@ -1815,7 +1817,7 @@ Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { | |||
| domi::AippOpParams aipp_params; | |||
| GeAttrValue::NAMED_ATTRS aipp_attr; | |||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, | |||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, | |||
| "Data node do not contain param aipp!"); | |||
| GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed"); | |||
| GELOGI("Node data: %s, type: %s, current index: %u, current node related input rank: %u", | |||
| @@ -1875,7 +1877,7 @@ Status DavinciModel::InitAippType(uint32_t index, const OpDescPtr &op_desc, cons | |||
| (void)AttrUtils::GetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); | |||
| for (const auto item : data_list) { | |||
| if (item.second->GetName() == releated_name) { | |||
| GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), item.first, index); | |||
| GELOGI("Find aipp_data [%s] index %u from index %u", releated_name.c_str(), item.first, index); | |||
| aipp_index = item.first; | |||
| } | |||
| } | |||
| @@ -1980,27 +1982,24 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, | |||
| } | |||
| } | |||
| Status DavinciModel::InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index) { | |||
| for (const auto &item : data_by_index) { | |||
| const auto op_desc = item.second; | |||
| GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); | |||
| Status DavinciModel::InitInputDescInfo(const OpDescPtr &op_desc) { | |||
| GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); | |||
| InputOutputDescInfo input; | |||
| ShapeDescription dims_info; | |||
| Format format = op_desc->GetInputDescPtr(0)->GetFormat(); | |||
| CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info); | |||
| InputOutputDescInfo input; | |||
| ShapeDescription dims_info; | |||
| Format format = op_desc->GetInputDescPtr(0)->GetFormat(); | |||
| CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info); | |||
| input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); | |||
| input.name = op_desc->GetName(); | |||
| int64_t input_size = 0; | |||
| GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); | |||
| input.size = input_size; | |||
| input_formats_.push_back(format); | |||
| input_descs_.push_back(input); | |||
| input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); | |||
| input.name = op_desc->GetName(); | |||
| int64_t input_size = 0; | |||
| GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); | |||
| input.size = input_size; | |||
| input_formats_.push_back(format); | |||
| input_descs_.push_back(input); | |||
| input.shape_info = dims_info; | |||
| input_descs_dims_.push_back(input); | |||
| } | |||
| input.shape_info = dims_info; | |||
| input_descs_dims_.push_back(input); | |||
| return SUCCESS; | |||
| } | |||
| @@ -2066,35 +2065,31 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO | |||
| output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); | |||
| } | |||
| Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list) { | |||
| GELOGD("Output node size: %zu", output_op_list.size()); | |||
| for (const auto &op_desc : output_op_list) { | |||
| uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | |||
| for (uint32_t index = 0; index < out_size; index++) { | |||
| string output_name; | |||
| InputOutputDescInfo output; | |||
| uint32_t format_result; | |||
| CreateOutput(index, op_desc, output, format_result); | |||
| std::vector<std::string> src_name = op_desc->GetSrcName(); | |||
| std::vector<int64_t> src_index = op_desc->GetSrcIndex(); | |||
| GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR, | |||
| "construct output_name failed."); | |||
| // forward compatbility, if old om has no out_node_name, need to return output follow origin way | |||
| if (out_size == out_node_name_.size()) { | |||
| // neweast plan, the index will add to name during generate model. | |||
| bool contains_colon = out_node_name_[index].find(":") != std::string::npos; | |||
| output_name = | |||
| contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]); | |||
| } else { | |||
| output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + | |||
| std::to_string(src_index[index]); | |||
| } | |||
| output.name = output_name; | |||
| output_descs_.push_back(output); | |||
| output_formats_.push_back(format_result); | |||
| Status DavinciModel::InitOutputDescInfo(const OpDescPtr &op_desc, const vector<string> &out_node_name) { | |||
| uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | |||
| for (uint32_t i = 0; i < out_size; ++i) { | |||
| string output_name; | |||
| InputOutputDescInfo output; | |||
| uint32_t format_result; | |||
| CreateOutput(i, op_desc, output, format_result); | |||
| std::vector<std::string> src_name = op_desc->GetSrcName(); | |||
| std::vector<int64_t> src_index = op_desc->GetSrcIndex(); | |||
| GE_CHK_BOOL_RET_STATUS(src_name.size() > i && src_index.size() > i, INTERNAL_ERROR, | |||
| "construct output_name failed."); | |||
| // forward compatbility, if old om has no out_node_name, need to return output follow origin way | |||
| if (out_size == out_node_name.size()) { | |||
| // neweast plan, the index will add to name during generate model. | |||
| bool contains_colon = out_node_name[i].find(":") != std::string::npos; | |||
| output_name = contains_colon ? out_node_name[i] : out_node_name[i] + ":" + std::to_string(src_index[i]); | |||
| } else { | |||
| output_name = string("output_") + std::to_string(i) + "_" + src_name[i] + "_" + std::to_string(src_index[i]); | |||
| } | |||
| output.name = output_name; | |||
| output_descs_.push_back(output); | |||
| output_formats_.push_back(format_result); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -2147,11 +2142,6 @@ Status DavinciModel::SyncVarData() { | |||
| RT_MEMCPY_HOST_TO_DEVICE)); | |||
| } | |||
| for (const auto &item : broadcast_variable_) { | |||
| ret = VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, item.first, item.second, mem_base_); | |||
| GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | |||
| item.first.c_str()); | |||
| } | |||
| return ret; | |||
| } | |||
| @@ -2635,12 +2625,6 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b | |||
| /// | |||
| Status DavinciModel::ReturnNoOutput(uint32_t data_id) { | |||
| GELOGI("ReturnNoOutput model id:%u", model_id_); | |||
| for (const auto item : broadcast_variable_) { | |||
| Status ret = VarManager::Instance(session_id_) | |||
| ->SyncBroadCastData2Var(runtime_param_.graph_id, item.first, item.second, mem_base_); | |||
| GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | |||
| item.first.c_str()); | |||
| } | |||
| GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!"); | |||
| std::vector<ge::OutputTensorInfo> outputs; | |||
| @@ -3064,6 +3048,64 @@ Status DavinciModel::MallocKnownArgs() { | |||
| return SUCCESS; | |||
| } | |||
| void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, | |||
| const domi::TaskDef &task_def, size_t task_index) { | |||
| bool flag = GetL1FusionEnableOption(); | |||
| char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||
| INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||
| int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||
| if (env_flag != 0) { | |||
| flag = true; | |||
| } | |||
| TaskDescInfo task_desc_info; | |||
| if (!om_name_.empty()) { | |||
| task_desc_info.model_name = om_name_; | |||
| } else { | |||
| task_desc_info.model_name = name_; | |||
| } | |||
| task_desc_info.op_name = op->GetName(); | |||
| task_desc_info.block_dim = task_def.kernel().block_dim(); | |||
| task_desc_info.task_id = task->GetTaskID(); | |||
| task_desc_info.stream_id = task->GetStreamId(); | |||
| task_desc_info.shape_type = "static"; | |||
| task_desc_info.cur_iter_num = 0; | |||
| // task type | |||
| task_desc_info.task_type = kTaskTypeInvalid; | |||
| auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||
| if (model_task_type == RT_MODEL_TASK_KERNEL) { | |||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||
| const auto &context = kernel_def.context(); | |||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | |||
| if (kernel_type == ccKernelType::TE) { | |||
| task_desc_info.task_type = kTaskTypeAicore; | |||
| } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | |||
| task_desc_info.task_type = kTaskTypeAicpu; | |||
| } else { | |||
| GELOGD("Other kernel type: %u", context.kernel_type()); | |||
| } | |||
| } else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) { | |||
| task_desc_info.task_type = kTaskTypeAicpu; | |||
| } else { | |||
| GELOGD("Skip task type: %d", static_cast<int>(model_task_type)); | |||
| } | |||
| profiler_report_op_info_[task_desc_info.op_name] = | |||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||
| task_desc_info_.emplace_back(task_desc_info); | |||
| if (flag) { | |||
| if (task->GetSktTaskID() != 0xFFFFFFFF) { | |||
| TaskDescInfo task_desc_info; | |||
| string op_name = "super_kernel_" + to_string(task_index); | |||
| task_desc_info.op_name = op_name; | |||
| task_desc_info.task_id = task->GetSktTaskID(); | |||
| profiler_report_op_info_[task_desc_info.op_name] = | |||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||
| task_desc_info_.emplace_back(task_desc_info); | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| Status DavinciModel::DistributeTask() { | |||
| GELOGI("do Distribute."); | |||
| for (auto &task : cpu_task_list_) { | |||
| @@ -3075,18 +3117,11 @@ Status DavinciModel::DistributeTask() { | |||
| } | |||
| task_desc_info_.clear(); | |||
| bool flag = GetL1FusionEnableOption(); | |||
| char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||
| INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||
| int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||
| if (env_flag != 0) { | |||
| flag = true; | |||
| } | |||
| const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | |||
| for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | |||
| auto &task_def = model_task_def->task(task_index); | |||
| auto &task = task_list_.at(task_index); | |||
| GE_CHECK_NOTNULL(task); | |||
| GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); | |||
| // for data dump | |||
| auto op_index = std::max(task_def.kernel().context().op_index(), | |||
| @@ -3106,33 +3141,9 @@ Status DavinciModel::DistributeTask() { | |||
| GE_IF_BOOL_EXEC(no_need_profiling, continue); | |||
| SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); | |||
| // Load task info for profiling | |||
| TaskDescInfo task_desc_info; | |||
| if (!om_name_.empty()) { | |||
| task_desc_info.model_name = om_name_; | |||
| } else { | |||
| task_desc_info.model_name = name_; | |||
| } | |||
| task_desc_info.op_name = op->GetName(); | |||
| task_desc_info.block_dim = task_def.kernel().block_dim(); | |||
| task_desc_info.task_id = task->GetTaskID(); | |||
| task_desc_info.stream_id = task->GetStreamId(); | |||
| task_desc_info.shape_type = "static"; | |||
| task_desc_info.cur_iter_num = 0; | |||
| profiler_report_op_info_[task_desc_info.op_name] = | |||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||
| task_desc_info_.emplace_back(task_desc_info); | |||
| if (flag) { | |||
| if (task->GetSktTaskID() != 0xFFFFFFFF) { | |||
| TaskDescInfo task_desc_info; | |||
| string op_name = "super_kernel_" + to_string(task_index); | |||
| task_desc_info.op_name = op_name; | |||
| task_desc_info.task_id = task->GetSktTaskID(); | |||
| profiler_report_op_info_[task_desc_info.op_name] = | |||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||
| task_desc_info_.emplace_back(task_desc_info); | |||
| } | |||
| } | |||
| // save task info for profiling | |||
| SaveProfilingTaskDescInfo(op, task, task_def, task_index); | |||
| } | |||
| // launch dump kernel to aicpu | |||
| GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed."); | |||
| @@ -3949,8 +3960,11 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<str | |||
| } | |||
| data_dumper_.SetDeviceId(device_id); | |||
| // set loop count addr | |||
| auto get_var_addr = [&](const string &name) -> void *{ | |||
| if (known_node_) { | |||
| data_dumper_.SetLoopAddr(known_shape_global_step_, nullptr, nullptr); | |||
| } else { | |||
| // set loop count addr | |||
| auto get_var_addr = [&](const string &name) -> void *{ | |||
| const auto it = variable_by_name.find(name); | |||
| if (it != variable_by_name.end()) { | |||
| const auto output_sizes = ModelUtils::GetOutputSize(it->second); | |||
| @@ -3963,10 +3977,10 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<str | |||
| GELOGD("op: %s is null.", name.c_str()); | |||
| return nullptr; | |||
| }; | |||
| data_dumper_.SetLoopAddr(get_var_addr(NODE_NAME_GLOBAL_STEP), | |||
| get_var_addr(NODE_NAME_FLOWCTRL_LOOP_PER_ITER), | |||
| get_var_addr(NODE_NAME_FLOWCTRL_LOOP_COND)); | |||
| } | |||
| } | |||
| uint32_t DavinciModel::GetFlowctrlIndex(uint32_t op_index) { | |||
| @@ -3993,14 +4007,18 @@ Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_des | |||
| } else { | |||
| compute_graph_info.model_name = name_; | |||
| } | |||
| std::vector<Format> format = { FORMAT_NULL }; | |||
| std::vector<std::vector<int64_t>> shape = { {0} }; | |||
| std::vector<DataType> data_type = { DT_UNDEFINED }; | |||
| compute_graph_info.op_name = op_desc.op_name; | |||
| compute_graph_info.op_type = op_desc.op_type; | |||
| compute_graph_info.input_format = op_desc.input_format; | |||
| compute_graph_info.input_shape = op_desc.input_shape; | |||
| compute_graph_info.input_data_type = op_desc.input_data_type; | |||
| compute_graph_info.output_format = op_desc.output_format; | |||
| compute_graph_info.output_shape = op_desc.output_shape; | |||
| compute_graph_info.output_data_type = op_desc.output_data_type; | |||
| compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; | |||
| compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; | |||
| compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; | |||
| compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; | |||
| compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; | |||
| compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| auto iter = profiler_report_op_info_.find(op_desc.op_name); | |||
| @@ -32,12 +32,12 @@ | |||
| #include "common/types.h" | |||
| #include "framework/common/util.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/aipp_utils.h" | |||
| #include "graph/load/new_model_manager/data_dumper.h" | |||
| #include "graph/load/new_model_manager/data_inputer.h" | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/new_model_manager/zero_copy_offset.h" | |||
| #include "graph/load/new_model_manager/zero_copy_task.h" | |||
| #include "graph/load/model_manager/aipp_utils.h" | |||
| #include "graph/load/model_manager/data_dumper.h" | |||
| #include "graph/load/model_manager/data_inputer.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/zero_copy_offset.h" | |||
| #include "graph/load/model_manager/zero_copy_task.h" | |||
| #include "graph/model.h" | |||
| #include "graph/node.h" | |||
| #include "graph/op_desc.h" | |||
| @@ -470,6 +470,10 @@ class DavinciModel { | |||
| data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | |||
| } | |||
| void SetKnownShapeGlobalStep(void *global_step) { | |||
| known_shape_global_step_ = global_step; | |||
| } | |||
| void DumperShrink() { | |||
| data_dumper_.DumpShrink(); | |||
| } | |||
| @@ -623,6 +627,9 @@ class DavinciModel { | |||
| Status DistributeTask(); | |||
| void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, | |||
| const domi::TaskDef &task_def, size_t task_index); | |||
| uint8_t *MallocFeatureMapMem(size_t data_size); | |||
| uint8_t *MallocWeightsMem(size_t weights_size); | |||
| @@ -824,7 +831,7 @@ class DavinciModel { | |||
| void OpDebugUnRegister(); | |||
| void CheckHasHcomOp(); | |||
| void CheckHasHcomOp(const ComputeGraphPtr &graph); | |||
| Status DoTaskSink(); | |||
| @@ -847,8 +854,8 @@ class DavinciModel { | |||
| Status InitOutputTensorInfo(const OpDescPtr &op_desc); | |||
| Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs); | |||
| Status InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index); | |||
| Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list); | |||
| Status InitInputDescInfo(const OpDescPtr &op_desc); | |||
| Status InitOutputDescInfo(const OpDescPtr &op_desc, const vector<string> &out_node_name); | |||
| Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc); | |||
| Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc); | |||
| @@ -883,7 +890,6 @@ class DavinciModel { | |||
| GeModelPtr ge_model_; // release after DavinciModel::Init | |||
| bool need_destroy_aicpu_kernel_{false}; | |||
| vector<string> out_node_name_; | |||
| map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init | |||
| @@ -1055,6 +1061,9 @@ class DavinciModel { | |||
| vector<uint32_t> input_formats_; | |||
| vector<InputOutputDescInfo> output_descs_; | |||
| vector<uint32_t> output_formats_; | |||
| // known shape node for dump | |||
| void *known_shape_global_step_; | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| namespace ge { | |||
| DavinciModelParser::DavinciModelParser() {} | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include <string> | |||
| @@ -28,8 +28,8 @@ | |||
| #include "framework/common/util.h" | |||
| #include "graph/common/ge_call_wrapper.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "model/ge_root_model.h" | |||
| #include "graph/common/local_context.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| @@ -527,6 +527,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT | |||
| DataBuffer data; | |||
| data.data = inputs[i].data; | |||
| data.length = inputs[i].length; | |||
| input_data.shapes.emplace_back(inputs[i].dims); | |||
| input_data.blobs.push_back(data); | |||
| } | |||
| if (!GetLocalOmgContext().user_input_dims.empty() && GetLocalOmgContext().need_multi_batch) { | |||
| @@ -1427,7 +1428,7 @@ Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &me | |||
| uint8_t *model_data = nullptr; | |||
| uint32_t model_len = 0; | |||
| Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "parse model content failed!"); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_PARAM_INVALID, "parse model content failed!"); | |||
| OmFileLoadHelper om_file_helper; | |||
| ret = om_file_helper.Init(model_data, model_len); | |||
| @@ -1703,7 +1704,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op | |||
| for (uint32_t i = 0; i < res_op_nums; i++) { | |||
| ReturnCode ret_code = res_ret_code_list.at(i); | |||
| SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); | |||
| GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, | |||
| GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%lu, ret_code:%d", aicpu_info.opType, | |||
| aicpu_info.kernelsType, aicpu_info.opLen, ret_code); | |||
| std::vector<char> op_name; | |||
| op_name.clear(); | |||
| @@ -14,20 +14,13 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| #include <string> | |||
| #include "common/debug/log.h" | |||
| #include "common/op/ge_op_utils.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "runtime/base.h" | |||
| #include "runtime/kernel.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/manager/graph_var_manager.h" | |||
| #include "graph/types.h" | |||
| #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ | |||
| do { \ | |||
| @@ -342,13 +335,13 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||
| int64_t input_offset = v_input_offset[non_const_index]; | |||
| non_const_index++; | |||
| GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), | |||
| VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base); | |||
| uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; | |||
| uint8_t *variable_addr = nullptr; | |||
| GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, input_offset, variable_addr), return {}); | |||
| v_input_data_addr.push_back(variable_addr); | |||
| GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", | |||
| model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | |||
| continue); | |||
| int64_t mem_type; | |||
| bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); | |||
| // feature maps | |||
| @@ -380,6 +373,34 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||
| return v_input_data_addr; | |||
| } | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Get variable address. | |||
| /// @return Status | |||
| /// | |||
| Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, | |||
| uint8_t *&var_addr) { | |||
| rtMemType_t mem_type = ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset); | |||
| switch (mem_type) { | |||
| case RT_MEMORY_RDMA_HBM: | |||
| if (offset < 0) { | |||
| GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast<uint8_t *>(offset)); | |||
| return PARAM_INVALID; | |||
| } | |||
| var_addr = reinterpret_cast<uint8_t *>(offset); | |||
| break; | |||
| case RT_MEMORY_HBM: | |||
| VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base); | |||
| var_addr = model_param.var_base + offset - model_param.logic_var_base; | |||
| break; | |||
| default: | |||
| GELOGE(PARAM_INVALID, "unsupported memory type %u", mem_type); | |||
| return PARAM_INVALID; | |||
| } | |||
| GE_CHECK_NOTNULL(var_addr); | |||
| return SUCCESS; | |||
| } | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Get output data address. | |||
| @@ -404,19 +425,26 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C | |||
| return v_output_data_addr; | |||
| } | |||
| for (size_t i = 0; i < outputs_size; ++i) { | |||
| GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), | |||
| VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base); | |||
| uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; | |||
| v_output_data_addr.push_back(variable_addr); | |||
| GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", | |||
| model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | |||
| continue); | |||
| const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); | |||
| if (tensor_desc == nullptr) { | |||
| GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||
| continue; | |||
| } | |||
| int32_t calc_type = 0; | |||
| bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); | |||
| if (ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { | |||
| GELOGD("%s is an optional output, the address don't need to be saved.", tensor_desc->GetName().c_str()); | |||
| continue; | |||
| } | |||
| GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), | |||
| uint8_t *variable_addr = nullptr; | |||
| GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {}); | |||
| v_output_data_addr.push_back(variable_addr); | |||
| GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", | |||
| model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | |||
| continue); | |||
| int64_t mem_type; | |||
| bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); | |||
| // feature maps | |||
| @@ -21,7 +21,7 @@ | |||
| #include "common/ge_inner_error_codes.h" | |||
| #include "common/types.h" | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include "graph/op_desc.h" | |||
| #include "graph/utils/tensor_adapter.h" | |||
| @@ -107,6 +107,15 @@ class ModelUtils { | |||
| /// @return Status | |||
| /// | |||
| static Status GetRtAddress(const RuntimeParam &model_param, uintptr_t logic_addr, uint8_t *&mem_addr); | |||
| private: | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Get variable address. | |||
| /// @return Status | |||
| /// | |||
| static Status GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, | |||
| uint8_t *&var_addr); | |||
| }; | |||
| } // namespace ge | |||
| @@ -14,11 +14,11 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/end_graph_task_info.h" | |||
| #include "graph/load/model_manager/task_info/end_graph_task_info.h" | |||
| #include "common/properties_manager.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace { | |||
| const uint32_t kDumpFlag = 2; | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class EndGraphTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/event_record_task_info.h" | |||
| #include "graph/load/model_manager/task_info/event_record_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status EventRecordTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -16,7 +16,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class EventRecordTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/event_wait_task_info.h" | |||
| #include "graph/load/model_manager/task_info/event_wait_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status EventWaitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -16,7 +16,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class EventWaitTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/fusion_start_task_info.h" | |||
| #include "graph/load/model_manager/task_info/fusion_start_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status FusionStartTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -16,7 +16,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class FusionStartTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h" | |||
| #include "graph/load/model_manager/task_info/fusion_stop_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status FusionStopTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -16,7 +16,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class FusionStopTaskInfo : public TaskInfo { | |||
| @@ -14,14 +14,14 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/hccl_task_info.h" | |||
| #include "graph/load/model_manager/task_info/hccl_task_info.h" | |||
| #include <utility> | |||
| #include "common/opskernel/ops_kernel_info_store.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| namespace ge { | |||
| std::mutex HcclTaskInfo::hccl_follow_stream_mutex_; | |||
| @@ -23,7 +23,7 @@ | |||
| #include <vector> | |||
| #include "common/opskernel/ge_task_info.h" | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include "graph/manager/util/hcom_util.h" | |||
| namespace ge { | |||
| class HcclTaskInfo : public TaskInfo { | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" | |||
| #include "graph/load/model_manager/task_info/kernel_ex_task_info.h" | |||
| #include <vector> | |||
| @@ -24,8 +24,8 @@ | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/fmk_error_codes.h" | |||
| #include "graph/attr_value.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| namespace ge { | |||
| Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -192,7 +192,7 @@ void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { | |||
| if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
| op_desc->GetName())) { | |||
| dump_flag_ = RT_KERNEL_DUMPFLAG; | |||
| dump_args_ = input_output_addr_; | |||
| dump_args_ = addr; | |||
| } | |||
| } | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include "graph/op_desc.h" | |||
| namespace ge { | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/kernel_task_info.h" | |||
| #include "graph/load/model_manager/task_info/kernel_task_info.h" | |||
| #include <map> | |||
| #include <memory> | |||
| #include <string> | |||
| @@ -25,9 +25,9 @@ | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/l2_cache_optimize.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| #include "runtime/kernel.h" | |||
| #include "super_kernel/super_kernel.h" | |||
| #include "super_kernel/super_kernel_factory.h" | |||
| @@ -22,7 +22,7 @@ | |||
| #include <string> | |||
| #include <vector> | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include "graph/op_desc.h" | |||
| namespace ge { | |||
| class KernelTaskInfo : public TaskInfo { | |||
| @@ -14,9 +14,9 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/label_goto_ex_task_info.h" | |||
| #include "graph/load/model_manager/task_info/label_goto_ex_task_info.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| namespace ge { | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class LabelGotoExTaskInfo : public TaskInfo { | |||
| @@ -14,9 +14,9 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/label_set_task_info.h" | |||
| #include "graph/load/model_manager/task_info/label_set_task_info.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| namespace ge { | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class LabelSetTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h" | |||
| #include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| constexpr uint8_t kLabelSwitchIndexNum = 1; | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class LabelSwitchByIndexTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h" | |||
| #include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace { | |||
| const uint32_t kAlignBytes = 64; | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class MemcpyAddrAsyncTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" | |||
| #include "graph/load/model_manager/task_info/memcpy_async_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include "graph/op_desc.h" | |||
| namespace ge { | |||
| @@ -14,11 +14,11 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/model_exit_task_info.h" | |||
| #include "graph/load/model_manager/task_info/model_exit_task_info.h" | |||
| #include "common/properties_manager.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class ModelExitTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h" | |||
| #include "graph/load/model_manager/task_info/profiler_trace_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status ProfilerTraceTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -16,7 +16,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class ProfilerTraceTaskInfo : public TaskInfo { | |||