diff --git a/build.sh b/build.sh index 5222ab5c..f2fafd48 100644 --- a/build.sh +++ b/build.sh @@ -235,14 +235,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then # fi # if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then - echo "Generating coverage statistics, please wait..." - cd ${BASEPATH} - rm -rf ${BASEPATH}/cov - mkdir ${BASEPATH}/cov - lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info - lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info - cd ${BASEPATH}/cov - genhtml coverage.info + echo "Generating coverage statistics, please wait..." + cd ${BASEPATH} + rm -rf ${BASEPATH}/cov + mkdir ${BASEPATH}/cov + lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info + lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info + cd ${BASEPATH}/cov + genhtml coverage.info fi # generate output package in tar form, including ut/st libraries/executables diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index a8eabf05..12af76ec 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -129,38 +129,38 @@ set(TRAIN_SRC_LIST "graph/label/partitioned_call_label_maker.cc" "graph/label/while_label_maker.cc" "graph/load/graph_loader.cc" - "graph/load/new_model_manager/cpu_queue_schedule.cc" - "graph/load/new_model_manager/data_dumper.cc" - "graph/load/new_model_manager/data_inputer.cc" - "graph/load/new_model_manager/davinci_model.cc" - "graph/load/new_model_manager/davinci_model_parser.cc" - "graph/load/new_model_manager/model_manager.cc" - "graph/load/new_model_manager/model_utils.cc" - "graph/load/new_model_manager/aipp_utils.cc" - "graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "graph/load/new_model_manager/task_info/event_record_task_info.cc" - "graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "graph/load/new_model_manager/task_info/hccl_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_task_info.cc" - "graph/load/new_model_manager/task_info/label_set_task_info.cc" - "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "graph/load/new_model_manager/task_info/task_info.cc" - "graph/load/new_model_manager/tbe_handle_store.cc" - "graph/load/new_model_manager/zero_copy_task.cc" - "graph/load/new_model_manager/zero_copy_offset.cc" + "graph/load/model_manager/cpu_queue_schedule.cc" + "graph/load/model_manager/data_dumper.cc" + "graph/load/model_manager/data_inputer.cc" + "graph/load/model_manager/davinci_model.cc" + "graph/load/model_manager/davinci_model_parser.cc" + "graph/load/model_manager/model_manager.cc" + "graph/load/model_manager/model_utils.cc" + "graph/load/model_manager/aipp_utils.cc" + "graph/load/model_manager/task_info/end_graph_task_info.cc" + "graph/load/model_manager/task_info/model_exit_task_info.cc" + "graph/load/model_manager/task_info/event_record_task_info.cc" + "graph/load/model_manager/task_info/event_wait_task_info.cc" + "graph/load/model_manager/task_info/fusion_start_task_info.cc" + "graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "graph/load/model_manager/task_info/hccl_task_info.cc" + "graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "graph/load/model_manager/task_info/kernel_task_info.cc" + "graph/load/model_manager/task_info/label_set_task_info.cc" + "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" + "graph/load/model_manager/task_info/label_goto_ex_task_info.cc" + "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "graph/load/model_manager/task_info/stream_active_task_info.cc" + "graph/load/model_manager/task_info/stream_switch_task_info.cc" + "graph/load/model_manager/task_info/stream_switchn_task_info.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" + "graph/load/model_manager/task_info/task_info.cc" + "graph/load/model_manager/tbe_handle_store.cc" + "graph/load/model_manager/zero_copy_task.cc" + "graph/load/model_manager/zero_copy_offset.cc" "graph/manager/graph_context.cc" "graph/manager/graph_manager.cc" "graph/manager/graph_manager_utils.cc" @@ -375,6 +375,7 @@ set(TRAIN_SRC_LIST "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" + "hybrid/node_executor/host_cpu/kernel/data_kernel.cc" "hybrid/node_executor/controlop/control_op_executor.cc" "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" "hybrid/node_executor/hccl/hccl_node_executor.cc" @@ -605,37 +606,37 @@ set(INFER_SRC_LIST "graph/manager/util/rt_context_util.cc" "graph/manager/util/variable_accelerate_ctrl.cc" "graph/manager/util/debug.cc" - "graph/load/new_model_manager/model_manager.cc" - "graph/load/new_model_manager/data_inputer.cc" - "graph/load/new_model_manager/davinci_model.cc" - "graph/load/new_model_manager/davinci_model_parser.cc" - "graph/load/new_model_manager/model_utils.cc" - "graph/load/new_model_manager/aipp_utils.cc" - "graph/load/new_model_manager/tbe_handle_store.cc" - "graph/load/new_model_manager/cpu_queue_schedule.cc" - "graph/load/new_model_manager/zero_copy_task.cc" - "graph/load/new_model_manager/zero_copy_offset.cc" - "graph/load/new_model_manager/data_dumper.cc" - "graph/load/new_model_manager/task_info/task_info.cc" - "graph/load/new_model_manager/task_info/event_record_task_info.cc" - "graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_task_info.cc" - "graph/load/new_model_manager/task_info/label_set_task_info.cc" - "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" + "graph/load/model_manager/model_manager.cc" + "graph/load/model_manager/data_inputer.cc" + "graph/load/model_manager/davinci_model.cc" + "graph/load/model_manager/davinci_model_parser.cc" + "graph/load/model_manager/model_utils.cc" + "graph/load/model_manager/aipp_utils.cc" + "graph/load/model_manager/tbe_handle_store.cc" + "graph/load/model_manager/cpu_queue_schedule.cc" + "graph/load/model_manager/zero_copy_task.cc" + "graph/load/model_manager/zero_copy_offset.cc" + "graph/load/model_manager/data_dumper.cc" + "graph/load/model_manager/task_info/task_info.cc" + "graph/load/model_manager/task_info/event_record_task_info.cc" + "graph/load/model_manager/task_info/event_wait_task_info.cc" + "graph/load/model_manager/task_info/fusion_start_task_info.cc" + "graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "graph/load/model_manager/task_info/kernel_task_info.cc" + "graph/load/model_manager/task_info/label_set_task_info.cc" + "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" + "graph/load/model_manager/task_info/label_goto_ex_task_info.cc" + "graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "graph/load/model_manager/task_info/stream_active_task_info.cc" + "graph/load/model_manager/task_info/stream_switch_task_info.cc" + "graph/load/model_manager/task_info/stream_switchn_task_info.cc" + "graph/load/model_manager/task_info/end_graph_task_info.cc" + "graph/load/model_manager/task_info/model_exit_task_info.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" "single_op/task/op_task.cc" "single_op/task/build_task_utils.cc" "single_op/task/tbe_task_builder.cc" @@ -706,7 +707,7 @@ target_compile_options(ge_runner PRIVATE -O2 -fno-common $<$:-Werror=unused-variable> - $<$:-Werror=unused-const-variable> + $<$:-Werror=unused-const-variable -Werror=format> ) target_include_directories(ge_runner SYSTEM PRIVATE @@ -775,7 +776,7 @@ target_compile_options(ge_compiler PRIVATE -O2 -fno-common $<$:-Werror=unused-variable> - $<$:-Werror=unused-const-variable> + $<$:-Werror=unused-const-variable -Werror=format> ) target_include_directories(ge_compiler SYSTEM PRIVATE diff --git a/ge/client/proto/task.proto b/ge/client/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/client/proto/task.proto +++ b/ge/client/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/ge/common/auth/file_saver.cc b/ge/common/auth/file_saver.cc index e708653a..12999e54 100755 --- a/ge/common/auth/file_saver.cc +++ b/ge/common/auth/file_saver.cc @@ -62,7 +62,7 @@ Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) { while (size > size_1g) { write_count = mmWrite(fd, reinterpret_cast(seek), size_1g); if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { - GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno)); + GELOGE(FAILED, "Write data failed. mmpa_errorno = %ld, %s", write_count, strerror(errno)); return FAILED; } size -= size_1g; @@ -75,7 +75,7 @@ Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) { // -1: Failed to write to file; - 2: Illegal parameter if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { - GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno)); + GELOGE(FAILED, "Write data failed. mmpa_errorno = %ld, %s", write_count, strerror(errno)); return FAILED; } @@ -133,7 +133,7 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi WriteData(static_cast(&model_partition_table), table_size, fd) != SUCCESS, ret = FAILED; break); // Write partition data for (const auto &partitionData : partition_datas) { - GELOGI("GC:size[%zu]", partitionData.size); + GELOGI("GC:size[%u]", partitionData.size); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( WriteData(static_cast(partitionData.data), partitionData.size, fd) != SUCCESS, ret = FAILED; break); @@ -305,7 +305,7 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi // Write partition data auto &cur_partition_datas = all_partition_datas[index]; for (const auto &partition_data : cur_partition_datas) { - GELOGI("GC:size[%zu]", partition_data.size); + GELOGI("GC:size[%u]", partition_data.size); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( WriteData(static_cast(partition_data.data), partition_data.size, fd) != SUCCESS, ret = FAILED; break); diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc index 0b9e9dcc..5c768e22 100755 --- a/ge/common/dump/dump_op.cc +++ b/ge/common/dump/dump_op.cc @@ -99,8 +99,8 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) { } int64_t output_size = 0; if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { - GELOGE(PARAM_INVALID, "Get output size filed"); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed"); + return ACL_ERROR_GE_INTERNAL_ERROR; } GELOGD("Get output size in lanch dump op is %ld", output_size); output.set_size(output_size); @@ -126,8 +126,8 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) { } int64_t input_size = 0; if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { - GELOGE(PARAM_INVALID, "Get output size filed"); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed"); + return ACL_ERROR_GE_INTERNAL_ERROR; } GELOGD("Get input size in lanch dump op is %ld", input_size); input.set_size(input_size); @@ -151,31 +151,31 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { size_t proto_size = op_mapping_info.ByteSizeLong(); bool ret = op_mapping_info.SerializeToString(&proto_msg); if (!ret || proto_size == 0) { - GELOGE(FAILED, "Protobuf serialize failed,proto_size is %zu", proto_size); - return FAILED; + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Protobuf serialize failed, proto_size is %zu", proto_size); + return ACL_ERROR_GE_INTERNAL_ERROR; } rtError_t rt_ret = rtMalloc(&proto_dev_mem_, proto_size, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); - return RT_FAILED; + GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(proto_dev_mem_, proto_size, proto_msg.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); - return RT_FAILED; + GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMalloc(&proto_size_dev_mem_, sizeof(size_t), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); - return RT_FAILED; + GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(proto_size_dev_mem_, sizeof(size_t), &proto_size, sizeof(size_t), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); - return RT_FAILED; + GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } constexpr int32_t io_addr_num = 2; @@ -193,8 +193,8 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { nullptr, // no need smDesc stream_); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret); - return rt_ret; + GELOGE(rt_ret, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("Kernel launch dump op success"); return SUCCESS; @@ -204,9 +204,15 @@ Status DumpOp::LaunchDumpOp() { GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str()); int32_t device_id = 0; rtError_t rt_ret = rtGetDevice(&device_id); - if (rt_ret != RT_ERROR_NONE || device_id < 0) { - GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); - return RT_FAILED; + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + if (device_id < 0) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, + "Check device_id failed, device_id = %d, which should be not less than 0.", + device_id); + return ACL_ERROR_GE_INTERNAL_ERROR; } aicpu::dump::OpMappingInfo op_mapping_info; auto dump_path = dump_properties_.GetDumpPath() + std::to_string(device_id) + "/"; @@ -232,29 +238,31 @@ Status DumpOp::LaunchDumpOp() { task.mutable_op()->set_op_name(op_desc_->GetName()); task.mutable_op()->set_op_type(op_desc_->GetType()); if (dump_properties_.GetDumpMode() == kDumpOutput) { - if (DumpOutput(task) != SUCCESS) { - GELOGE(FAILED, "Dump output failed"); - return FAILED; + auto ret = DumpOutput(task); + if (ret != SUCCESS) { + GELOGE(ret, "Dump output failed"); + return ret; } op_mapping_info.mutable_task()->Add(std::move(task)); } if (dump_properties_.GetDumpMode() == kDumpInput) { - if (DumpInput(task) != SUCCESS) { - GELOGE(FAILED, "Dump input failed"); - return FAILED; + auto ret = DumpInput(task); + if (ret != SUCCESS) { + GELOGE(ret, "Dump input failed"); + return ret; } op_mapping_info.mutable_task()->Add(std::move(task)); } if (dump_properties_.GetDumpMode() == kDumpAll) { auto ret = DumpOutput(task); if (ret != SUCCESS) { - GELOGE(FAILED, "Dump output failed when in dumping all"); - return FAILED; + GELOGE(ret, "Dump output failed when in dumping all"); + return ret; } ret = DumpInput(task); if (ret != SUCCESS) { - GELOGE(FAILED, "Dump input failed when in dumping all"); - return FAILED; + GELOGE(ret, "Dump input failed when in dumping all"); + return ret; } op_mapping_info.mutable_task()->Add(std::move(task)); } diff --git a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc index 85f4038e..0cb581d7 100644 --- a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc +++ b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc @@ -162,7 +162,7 @@ Status FormatTransferC1hwncoc0Hwcn::TransFormat(const TransArgs &args, TransResu Status FormatTransferC1hwncoc0Hwcn::TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, std::vector &dst_shape) { GELOGD("The shape derivation from C1HWNCoC0 to HWCN is not unique. Trans shape in this direction is not supported"); - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferC1hwncoc0Hwcn, FORMAT_C1HWNCoC0, FORMAT_HWCN) diff --git a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc index 79af84f7..eaa19d7d 100644 --- a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc +++ b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc @@ -32,7 +32,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat std::vector &dst_shape) { auto c0 = GetCubeSizeByDataType(data_type); if (c0 < 0) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } auto c1 = Ceil(c, c0); @@ -50,7 +50,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat Status TransShapeDhwckToFz3D(const std::vector &src_shape, DataType data_type, std::vector &dst_shape) { if (!CheckShapeValid(src_shape, kDhwcnDimsNum)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } auto d = src_shape.at(kDhwcnD); auto h = src_shape.at(kDhwcnH); @@ -163,14 +163,14 @@ Status FormatTransferDhwcnFractalZ3D::TransShape(Format src_format, const std::v DataType data_type, Format dst_format, std::vector &dst_shape) { if (CheckDataTypeSupport(data_type) != SUCCESS) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } if (src_format == FORMAT_DHWCN && dst_format == FORMAT_FRACTAL_Z_3D) { return TransShapeDhwckToFz3D(src_shape, data_type, dst_shape); } - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferDhwcnFractalZ3D, FORMAT_DHWCN, FORMAT_FRACTAL_Z_3D) diff --git a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc index cd1e0607..3a18312a 100644 --- a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc +++ b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc @@ -32,7 +32,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat std::vector &dst_shape) { auto c0 = GetCubeSizeByDataType(data_type); if (c0 < 0) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } auto c1 = Ceil(c, c0); @@ -50,7 +50,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat Status TransShapeDhwncToFz3DTranspose(const std::vector &src_shape, DataType data_type, std::vector &dst_shape) { if (!CheckShapeValid(src_shape, kDhwncDimsNum)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } auto d = src_shape.at(kDhwncD); auto h = src_shape.at(kDhwncH); @@ -164,14 +164,14 @@ Status FormatTransferDhwncFractalZ3DTranspose::TransShape(Format src_format, con DataType data_type, Format dst_format, std::vector &dst_shape) { if (CheckDataTypeSupport(data_type) != SUCCESS) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } if (src_format == FORMAT_DHWNC && dst_format == FORMAT_FRACTAL_Z_3D_TRANSPOSE) { return TransShapeDhwncToFz3DTranspose(src_shape, data_type, dst_shape); } - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferDhwncFractalZ3DTranspose, FORMAT_DHWNC, FORMAT_FRACTAL_Z_3D_TRANSPOSE) diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc index cb528453..c3b288c1 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc @@ -87,8 +87,8 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap hw_shape.push_back(DIM_DEFAULT_VALUE); hw_shape.push_back(src_shape[kNdDimIndexN]); if (!IsShapeValid(dst_shape)) { - GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return SUCCESS; default: @@ -106,8 +106,8 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); if (!IsShapeValid(dst_shape)) { - GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return SUCCESS; } @@ -299,11 +299,19 @@ Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult & Status FormatTransferFractalNz::TransShape(Format src_format, const ShapeVector &src_shape, DataType data_type, Format dst_format, ShapeVector &dst_shape) { - if (!IsDataTypeSupport(data_type) || !CheckShape(src_format, src_shape)) { - GELOGE(PARAM_INVALID, "Trans format from %s to %s, src shape %s, data type %s is not supported", + if (!IsDataTypeSupport(data_type)) { + GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, + "Trans format from %s to %s, src shape %s, data type %s is not supported", TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; + } + if (!CheckShape(src_format, src_shape)) { + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, + "Trans format from %s to %s, src shape %s, data type %s is not supported", + TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), + ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } ShapeVector hw_shape; return TransShapeToFracNz(src_shape, data_type, dst_shape, hw_shape); @@ -334,7 +342,7 @@ Status FormatTransferFractalNzND::TransShape(Format src_format, const ShapeVecto Format dst_format, ShapeVector &dst_shape) { GELOGD("The shape derivation from %s to %s is not unique. Trans shape is not supported", TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str()); - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferFractalNz, FORMAT_ND, FORMAT_FRACTAL_NZ) diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_z.cc b/ge/common/formats/format_transfers/format_transfer_fractal_z.cc index dbceb911..45c6d157 100644 --- a/ge/common/formats/format_transfers/format_transfer_fractal_z.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_z.cc @@ -42,7 +42,7 @@ Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_ Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector &dst_shape) { auto c0 = GetCubeSizeByDataType(data_type); if (c0 < 0) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } auto c1 = Ceil(c, c0); @@ -54,15 +54,16 @@ Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_ dst_shape.push_back(kNiSize); dst_shape.push_back(c0); if (!IsShapeValid(dst_shape)) { - GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", + ShapeToString(dst_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return SUCCESS; } Status TransShapeNchwToFz(const std::vector &src_shape, DataType data_type, std::vector &dst_shape) { if (!CheckShapeValid(src_shape, kNchwDimsNum)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } auto n = src_shape.at(kNchwN); @@ -74,7 +75,7 @@ Status TransShapeNchwToFz(const std::vector &src_shape, DataType data_t Status TransShapeHwcnToFz(const std::vector &src_shape, DataType data_type, std::vector &dst_shape) { if (!CheckShapeValid(src_shape, kHwcnDimsNum)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } auto h = src_shape.at(kHwcnH); @@ -87,7 +88,7 @@ Status TransShapeHwcnToFz(const std::vector &src_shape, DataType data_t Status TransShapeNhwcToFz(const std::vector &src_shape, DataType data_type, std::vector &dst_shape) { if (!CheckShapeValid(src_shape, kNhwcDimsNum)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } auto n = src_shape.at(kNhwcN); @@ -369,7 +370,7 @@ Status FormatTransferFractalZ::TransFormat(const TransArgs &args, TransResult &r Status FormatTransferFractalZ::TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, std::vector &dst_shape) { if (CheckDataTypeSupport(data_type) != SUCCESS) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } if (src_format == FORMAT_NHWC && dst_format == FORMAT_FRACTAL_Z) { @@ -382,7 +383,7 @@ Status FormatTransferFractalZ::TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, std::vector &dst_shape) { GELOGD("The shape derivation from FracZ to HWCN is not unique. Trans shape in this direction is not supported"); - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferFracZHwcn, FORMAT_FRACTAL_Z, FORMAT_HWCN) diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc index 915d0d76..90bf8fcb 100755 --- a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc @@ -160,7 +160,7 @@ Status FormatTransferFracZNchw::TransFormat(const TransArgs &args, TransResult & Status FormatTransferFracZNchw::TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, std::vector &dst_shape) { GELOGD("The shape derivation from FracZ to NCHW is not unique. Trans shape in this direction is not supported"); - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferFracZNchw, FORMAT_FRACTAL_Z, FORMAT_NCHW) diff --git a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc index 7840b556..1e29baf2 100755 --- a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc @@ -43,8 +43,9 @@ Status TransShapeHwcnToC1hwncoc0(const DataType &data_type, const std::vector &dst_shape) { if (src_format == FORMAT_HWCN && CheckDataTypeSupported(data_type)) { if (!CheckShapeValid(src_shape, kHwcnDimsNum)) { - GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", + ShapeToString(src_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return TransShapeHwcnToC1hwncoc0(data_type, src_shape, dst_shape); + } else if (src_format != FORMAT_HWCN) { + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } else { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } } diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc index a37ba2b5..fd09b34c 100755 --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc @@ -157,7 +157,7 @@ Status FormatTransferNc1hwc0Nhwc::TransFormat(const TransArgs &args, TransResult Status FormatTransferNc1hwc0Nhwc::TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, std::vector &dst_shape) { GELOGD("The shape derivation from NC1HWC0 to NHWC is not unique. Trans shape in this direction is not supported"); - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferNc1hwc0Nhwc, FORMAT_NC1HWC0, FORMAT_NHWC) diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc index 49b19f46..dd8721c0 100644 --- a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc +++ b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc @@ -45,7 +45,7 @@ Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector &dst_shape) { auto c0 = GetCubeSizeByDataType(data_type); if (c0 < 0) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } auto chw = c * h * w; @@ -59,8 +59,9 @@ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type dst_shape.push_back(c0); if (!IsShapeValid(dst_shape)) { - GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", + ShapeToString(dst_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return SUCCESS; } @@ -68,7 +69,7 @@ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type Status TransShapeNchwToFzC04(const std::vector &src_shape, DataType data_type, std::vector &dst_shape) { if (!CheckShapeValid(src_shape, kNchwDimsNum)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } auto n = src_shape.at(kNchwN); @@ -293,13 +294,13 @@ Status FormatTransferNchwToFZC04::TransFormat(const TransArgs &args, TransResult Status FormatTransferNchwToFZC04::TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, std::vector &dst_shape) { if (CheckDataTypeSupport(data_type) != SUCCESS) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } if (src_format == FORMAT_NCHW && dst_format == FORMAT_FRACTAL_Z_C04) { return TransShapeNchwToFzC04(src_shape, data_type, dst_shape); } - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferNchwToFZC04, FORMAT_NCHW, FORMAT_FRACTAL_Z_C04) diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc index 98af1efa..752a4d64 100755 --- a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc @@ -32,12 +32,13 @@ Status TransShapeNchwToNc1hwc0(const std::vector &src_shape, DataType d std::vector &dst_shape) { int64_t c0 = GetCubeSizeByDataType(data_type); if (c0 <= 0) { - GELOGE(PARAM_INVALID, "Failed to get cube size, the data type is invalid"); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } if (!CheckShapeValid(src_shape, kNchwDimsNum)) { - GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", + ShapeToString(src_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } dst_shape.clear(); dst_shape.push_back(src_shape.at(kNchwN)); @@ -46,8 +47,9 @@ Status TransShapeNchwToNc1hwc0(const std::vector &src_shape, DataType d dst_shape.push_back(src_shape.at(kNchwW)); dst_shape.push_back(c0); if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) { - GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", + ShapeToString(dst_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return SUCCESS; } @@ -193,7 +195,7 @@ Status FormatTransferNchwNc1hwc0::TransShape(Format src_format, const std::vecto if (src_format == FORMAT_NCHW) { return TransShapeNchwToNc1hwc0(src_shape, data_type, dst_shape); } else { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } } diff --git a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc index 8faaf4e7..2c6b392d 100755 --- a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc @@ -34,8 +34,8 @@ Status TransShapeNhwcToNc1hwc0(const std::vector &src_shape, DataType d std::vector &dst_shape) { int64_t c0 = GetCubeSizeByDataType(data_type); if (c0 <= 0) { - GELOGE(PARAM_INVALID, "Failed to get cube size, the data type is invalid"); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } dst_shape.clear(); dst_shape.push_back(src_shape.at(kNhwcN)); @@ -44,8 +44,9 @@ Status TransShapeNhwcToNc1hwc0(const std::vector &src_shape, DataType d dst_shape.push_back(src_shape.at(kNhwcW)); dst_shape.push_back(c0); if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) { - GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", + ShapeToString(dst_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return SUCCESS; } @@ -189,12 +190,15 @@ Status FormatTransferNhwcNc1hwc0::TransShape(Format src_format, const std::vecto DataType data_type, Format dst_format, std::vector &dst_shape) { if (src_format == FORMAT_NHWC && CheckDataTypeSupported(data_type)) { if (!CheckShapeValid(src_shape, kNhwcDimsNum)) { - GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", + ShapeToString(src_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return TransShapeNhwcToNc1hwc0(src_shape, data_type, dst_shape); + } else if (src_format != FORMAT_NHWC) { + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } else { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } } diff --git a/ge/common/formats/format_transfers/format_transfer_transpose.cc b/ge/common/formats/format_transfers/format_transfer_transpose.cc index 9be74b1f..de0b456c 100755 --- a/ge/common/formats/format_transfers/format_transfer_transpose.cc +++ b/ge/common/formats/format_transfers/format_transfer_transpose.cc @@ -211,16 +211,16 @@ Status GetPermByForamt(Format src_format, Format dst_format, std::vectorsecond.find(dst_format); if (iter == dst_iter->second.end()) { std::string error = "Failed to trans shape, do not support transpose from format " + FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " + FmtToStr(TypeUtils::FormatToSerialString(dst_format)); - GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); - return UNSUPPORTED; + GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str()); + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } perm = iter->second; return SUCCESS; @@ -244,7 +244,7 @@ Status FormatTransferTranspose::TransShape(Format src_format, const std::vector< std::vector perm_arg; GE_CHK_STATUS_RET_NOLOG(GetPermByForamt(src_format, dst_format, perm_arg)); if (!IsShapeArgValid(src_shape, perm_arg)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } dst_shape = TransShapeByPerm(src_shape, perm_arg); return SUCCESS; diff --git a/ge/common/formats/formats.cc b/ge/common/formats/formats.cc index 0b21a884..2b979e9a 100755 --- a/ge/common/formats/formats.cc +++ b/ge/common/formats/formats.cc @@ -64,8 +64,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransShape(Format src_form std::string error = "Failed to trans data from format " + FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); - GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); - return UNSUPPORTED; + GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str()); + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } return transfer->TransShape(src_format, src_shape, data_type, dst_format, dst_shape); diff --git a/ge/common/formats/utils/formats_trans_utils.cc b/ge/common/formats/utils/formats_trans_utils.cc index 18f2d70f..052951ce 100755 --- a/ge/common/formats/utils/formats_trans_utils.cc +++ b/ge/common/formats/utils/formats_trans_utils.cc @@ -32,7 +32,7 @@ int64_t GetCubeSizeByDataType(DataType data_type) { if (size <= 0) { std::string error = "Failed to get cube size, the data type " + FmtToStr(TypeUtils::DataTypeToSerialString(data_type)) + " is invalid"; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); return -1; } else if (size == 1) { return kCubeSize * 2; // 32 bytes cube size @@ -61,7 +61,7 @@ bool CheckShapeValid(const std::vector &shape, const int64_t expect_dim if (expect_dims <= 0 || shape.size() != static_cast(expect_dims)) { std::string error = "Invalid shape, dims num " + FmtToStr(shape.size()) + ", expect " + FmtToStr(expect_dims); - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); return false; } return IsShapeValid(shape); @@ -75,12 +75,12 @@ bool IsShapeValid(const std::vector &shape) { for (auto dim : shape) { if (dim < 0) { std::string error = "Invalid negative dims in the shape " + FmtToStr(ShapeToString(shape)); - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); return false; } if (dim != 0 && kShapeItemNumMAX / dim < num) { std::string error = "Shape overflow, the total count should be less than " + FmtToStr(kShapeItemNumMAX); - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); return false; } num *= dim; @@ -108,7 +108,7 @@ bool IsTransShapeSrcCorrect(const TransArgs &args, std::vector &expect_ FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", invalid relationship between src shape " + FmtToStr(ShapeToString(args.src_shape)) + " and dst " + FmtToStr(ShapeToString(args.dst_shape)); - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); return false; } return true; @@ -121,7 +121,7 @@ bool IsTransShapeDstCorrect(const TransArgs &args, std::vector &expect_ FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", the dst shape" + FmtToStr(ShapeToString(args.dst_shape)) + " is invalid, expect" + FmtToStr(ShapeToString(expect_shape)); - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); return false; } return true; diff --git a/ge/common/ge/plugin_manager.cc b/ge/common/ge/plugin_manager.cc index 75a36d99..38de251e 100644 --- a/ge/common/ge/plugin_manager.cc +++ b/ge/common/ge/plugin_manager.cc @@ -93,7 +93,7 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec std::vector path_vec; SplitPath(path, path_vec); for (const auto &single_path : path_vec) { - GE_IF_BOOL_EXEC(single_path.length() >= MMPA_MAX_PATH, GELOGE(GE_PLGMGR_PATH_INVALID, + GE_IF_BOOL_EXEC(single_path.length() >= MMPA_MAX_PATH, GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "The shared library file path is too long!"); continue); // load break when number of loaded so reach maximum @@ -125,7 +125,8 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec GE_IF_BOOL_EXEC(error == nullptr, error = ""); ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, {"mmDlopen", "shared library path is " + FmtToStr(file_path_dlopen) + ". Errormessage" + FmtToStr(error)}); - GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen the shared library path[%s]. Errormessage[%s]!", + GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID, + "Failed to dlopen the shared library path[%s]. Errormessage[%s]!", file_path_dlopen.c_str(), error); continue; } @@ -138,8 +139,8 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, {"mmDlsym", FmtToStr(func_name) + " is skipped since function" + FmtToStr(func_name) + " is not existed!"}); - GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(), - func_name.c_str()); + GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", + func_name.c_str(), func_name.c_str()); is_valid = false; break; } diff --git a/ge/common/helper/model_cache_helper.cc b/ge/common/helper/model_cache_helper.cc index 0b592e11..bf8c3ce0 100755 --- a/ge/common/helper/model_cache_helper.cc +++ b/ge/common/helper/model_cache_helper.cc @@ -28,7 +28,7 @@ #include "framework/common/util.h" #include "graph/detail/attributes_holder.h" #include "graph/detail/model_serialize_imp.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/model.h" #include "graph/utils/graph_utils.h" #include "graph/utils/tensor_utils.h" @@ -1000,8 +1000,8 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const { auto offset = (tensor_addr_mgr.offset); // Check logic address and offset if (logic_address - offset != VarManager::Instance(session_id_)->GetVarMemLogicBase()) { - GELOGW("Check logic_address[%u] and offset [%u] of %s failed, var mem logic base is %u, abandon", logic_address, - offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase()); + GELOGW("Check logic_address[%lu] and offset [%lu] of %s failed, var mem logic base is %lu, abandon", + logic_address, offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase()); return PARAM_INVALID; } // Offset is needed by SaveVarVddr instead of logic address diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index 1d5a4a9b..37cb53bc 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -23,7 +23,7 @@ #include "framework/common/debug/ge_log.h" #include "framework/omg/version.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" @@ -479,8 +479,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); if (status != SUCCESS) { - GELOGE(status, "Parse model content failed!"); - return status; + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); + return ACL_ERROR_GE_PARAM_INVALID; } file_header_ = reinterpret_cast(model_data.model_data); @@ -517,8 +517,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod } if (is_assign_model_) { - GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); - return GE_EXEC_LOAD_MODEL_REPEATED; + GELOGE(ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); + return ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED; } if (ReleaseLocalModelData() != SUCCESS) { @@ -528,8 +528,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); if (status != SUCCESS) { - GELOGE(status, "Parse model content failed!"); - return status; + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); + return ACL_ERROR_GE_PARAM_INVALID; } file_header_ = reinterpret_cast(model_data.model_data); @@ -537,7 +537,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod //model verison 1.0 file header does not have model_num member is_unknown_shape_model_ = file_header_->version >= ge::MODEL_VERSION && file_header_->model_num > kStatiOmFileModelNum; - GELOGD("cur om model is ge root model or no %d, model version %zu", is_unknown_shape_model_, file_header_->version); + GELOGD("cur om model is ge root model or no %d, model version %u", is_unknown_shape_model_, file_header_->version); OmFileLoadHelper om_load_helper; if (is_unknown_shape_model_) { @@ -609,7 +609,7 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { GeModelPtr cur_model = ge::MakeShared(); Status ret = LoadModelData(om_load_helper, cur_model, mode_index); if (ret != SUCCESS) { - return GE_EXEC_LOAD_MODEL_PARTITION_FAILED; + return ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED; } if (is_first_model) { @@ -622,22 +622,22 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { ret = LoadWeights(om_load_helper, cur_model, mode_index); if (ret != SUCCESS) { - return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; + return ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; } ret = LoadTBEKernelStore(om_load_helper, cur_model, mode_index); if (ret != SUCCESS) { - return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; + return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; } ret = LoadCustAICPUKernelStore(om_load_helper, cur_model, mode_index); if (ret != SUCCESS) { - return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; + return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; } ret = LoadTask(om_load_helper, cur_model, mode_index); if (ret != SUCCESS) { - return GE_EXEC_LOAD_TASK_PARTITION_FAILED; + return ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED; } root_model_->SetSubgraphInstanceNameToModel(cur_model->GetName(), cur_model); } @@ -746,7 +746,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(Om GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed."); return INTERNAL_ERROR; } - GELOGD("TASK_INFO op_size:%zu, stream_num:%u", task->op().size(), task->stream_num()); + GELOGD("TASK_INFO op_size:%d, stream_num:%u", task->op().size(), task->stream_num()); } cur_model->SetModelTaskDef(task); return SUCCESS; diff --git a/ge/common/helper/om_file_helper.cc b/ge/common/helper/om_file_helper.cc index d1c52b13..b42aa759 100644 --- a/ge/common/helper/om_file_helper.cc +++ b/ge/common/helper/om_file_helper.cc @@ -203,7 +203,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m auto partition_table = reinterpret_cast(model_data + cur_offset); size_t partition_table_size = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); cur_offset += partition_table_size; - GELOGD("Cur model index %zu: ModelPartitionTable num :%u, " + GELOGD("Cur model index %u: ModelPartitionTable num :%u, " "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); if (model_data_size <= cur_offset) { @@ -219,7 +219,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m partition.type = partition_table->partition[i].type; if (index >= model_contexts_.size()) { if (index != model_contexts_.size()) { - GELOGE(FAILED, "cur index is %zu make model_contexts_ overflow", index); + GELOGE(FAILED, "cur index is %u make model_contexts_ overflow", index); return FAILED; } @@ -231,16 +231,16 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m } if (partition.size > model_data_size || cur_offset > model_data_size - partition.size) { - GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", + GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %u is greater than the model data size %u.", partition.size + cur_offset, model_data_size); return GE_EXEC_MODEL_DATA_SIZE_INVALID; } cur_offset += partition.size; - GELOGD("Partition, type:%d, size:%u, model_index:%zu", static_cast(partition.type), partition.size, index); + GELOGD("Partition, type:%d, size:%u, model_index:%u", static_cast(partition.type), partition.size, index); } } if (cur_offset != model_data_size) { - GELOGE(FAILED, "do not get the complete model, read end offset:%zu, all size:%zu", cur_offset, model_data_size); + GELOGE(FAILED, "do not get the complete model, read end offset:%u, all size:%u", cur_offset, model_data_size); return FAILED; } return SUCCESS; diff --git a/ge/common/kernel_store.cc b/ge/common/kernel_store.cc index 0fad096a..d746fd10 100755 --- a/ge/common/kernel_store.cc +++ b/ge/common/kernel_store.cc @@ -51,7 +51,7 @@ bool KernelStore::Build() { kernel_head.name_len = static_cast(kernel->GetName().length()); kernel_head.bin_len = static_cast(kernel->GetBinDataSize()); - GELOGD("get kernel bin name %s, addr %p, size %u", + GELOGD("get kernel bin name %s, addr %p, size %zu", kernel->GetName().c_str(), kernel->GetBinData(), kernel->GetBinDataSize()); mem_ret = memcpy_s(next_buffer, remain_len, &kernel_head, sizeof(kernel_head)); GE_CHK_BOOL_EXEC_NOLOG(mem_ret == EOK, return false); diff --git a/ge/common/math/math_util.h b/ge/common/math/math_util.h index 3255e3c1..e077f4b5 100755 --- a/ge/common/math/math_util.h +++ b/ge/common/math/math_util.h @@ -878,11 +878,11 @@ inline Status CheckInt32DivOverflow(int32_t a, int32_t b) { return INTERNAL_ERROR; \ } -#define FMK_INT64_UINT32_MULCHECK(a, b) \ - if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int64 %ld and UINT32 %u multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT64_UINT32_MULCHECK(a, b) \ + if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int64 %ld and Uint32 %u multiplication can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } #define FMK_FP16_ZEROCHECK(a) \ diff --git a/ge/common/model_parser/base.cc b/ge/common/model_parser/base.cc index 64277199..22837be6 100644 --- a/ge/common/model_parser/base.cc +++ b/ge/common/model_parser/base.cc @@ -34,7 +34,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro ge::ModelData &model_data) { std::string real_path = RealPath(model_path); if (real_path.empty()) { - GELOGE(GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); + GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; } diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index 43ed6434..9060f82b 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -181,7 +181,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le if (type != kProfCommandhandleFinalize) { command.module_index = prof_config_param->profSwitch; } - GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(), + GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%lx", iter->second.c_str(), command.module_index); if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); @@ -192,7 +192,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le return ge::FAILED; } - GELOGI("Successfully execute profiling command type: %d, command 0x%llx.", type, command.module_index); + GELOGI("Successfully execute profiling command type: %d, command 0x%lx.", type, command.module_index); return ge::SUCCESS; } diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 1fc4dba6..86b1b2c5 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -21,7 +21,7 @@ #include "framework/common/string_util.h" #include "graph/ge_context.h" #include "runtime/base.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace { const char *const kTrainingTrace = "training_trace"; @@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin uint32_t stream_id = task.stream_id; std::string shape_type = task.shape_type; int64_t cur_iter_num = task.cur_iter_num; + uint32_t task_type = task.task_type; data = model_name.append(" ") .append(op_name).append(" ") .append(std::to_string(block_dim)).append(" ") @@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin .append(std::to_string(stream_id)).append(" ") .append(std::to_string(model_id)).append(" ") .append(shape_type).append(" ") - .append(std::to_string(cur_iter_num)).append("\n"); + .append(std::to_string(cur_iter_num)).append(" ") + .append(std::to_string(task_type)).append("\n"); ReporterData reporter_data{}; reporter_data.deviceId = device_id; @@ -538,7 +540,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi for (auto device_id_module : device_id_module_map_) { if (device_id_module.second != 0) { uint32_t device_id = static_cast(device_id_module.first); - GELOGI("Prof finalize: device_id: %u, module: 0x%llx.", device_id, device_id_module.second); + GELOGI("Prof finalize: device_id: %u, module: 0x%lx.", device_id, device_id_module.second); rt_ret = rtProfilerStop(device_id_module.second, 1, &device_id); if (rt_ret != RT_ERROR_NONE) { GELOGE(FAILED, "Runtime profiler stop failed."); @@ -627,7 +629,7 @@ Status ProfilingManager::ProfParseParam(const std::map } if (device_num == 0 || device_num > kMaxDeviceNum || device_num != static_cast(device_list.size())) { - GELOGE(FAILED, "Config para device num: %d not equal to device list size: %d.", device_num, device_list.size()); + GELOGE(FAILED, "Config para device num: %d not equal to device list size: %zu.", device_num, device_list.size()); return FAILED; } #endif @@ -657,7 +659,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt for (int32_t i = 0; i < device_num; i++) { device_id_ptr[i] = static_cast(device_list[i]); } - GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); + GELOGI("Runtime config param: 0x%lx, device num: %d.", module, device_num); rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { @@ -699,7 +701,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt for (int32_t i = 0; i < device_num; i++) { device_id_ptr[i] = static_cast(device_list[i]); } - GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); + GELOGI("Prof stop: runtime config param: 0x%lx, device num: %d", module, device_num); rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); diff --git a/ge/common/proto/task.proto b/ge/common/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/common/proto/task.proto +++ b/ge/common/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/ge/common/types.cc b/ge/common/types.cc index 268e7caa..90ff9fe4 100644 --- a/ge/common/types.cc +++ b/ge/common/types.cc @@ -388,6 +388,7 @@ REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite"); +REGISTER_OPTYPE_DEFINE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite"); REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp"); diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 755bdf97..26e53c7b 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -32,37 +32,37 @@ set(SRC_LIST "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" "../model/ge_model.cc" "../model/ge_root_model.cc" - "../graph/load/new_model_manager/davinci_model.cc" - "../graph/load/new_model_manager/davinci_model_parser.cc" - "../graph/load/new_model_manager/model_manager.cc" - "../graph/load/new_model_manager/tbe_handle_store.cc" - "../graph/load/new_model_manager/cpu_queue_schedule.cc" - "../graph/load/new_model_manager/model_utils.cc" - "../graph/load/new_model_manager/aipp_utils.cc" - "../graph/load/new_model_manager/data_inputer.cc" - "../graph/load/new_model_manager/data_dumper.cc" - "../graph/load/new_model_manager/zero_copy_task.cc" - "../graph/load/new_model_manager/zero_copy_offset.cc" - "../graph/load/new_model_manager/task_info/task_info.cc" - "../graph/load/new_model_manager/task_info/event_record_task_info.cc" - "../graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "../graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "../graph/load/new_model_manager/task_info/kernel_task_info.cc" - "../graph/load/new_model_manager/task_info/label_set_task_info.cc" - "../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "../graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "../graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" + "../graph/load/model_manager/davinci_model.cc" + "../graph/load/model_manager/davinci_model_parser.cc" + "../graph/load/model_manager/model_manager.cc" + "../graph/load/model_manager/tbe_handle_store.cc" + "../graph/load/model_manager/cpu_queue_schedule.cc" + "../graph/load/model_manager/model_utils.cc" + "../graph/load/model_manager/aipp_utils.cc" + "../graph/load/model_manager/data_inputer.cc" + "../graph/load/model_manager/data_dumper.cc" + "../graph/load/model_manager/zero_copy_task.cc" + "../graph/load/model_manager/zero_copy_offset.cc" + "../graph/load/model_manager/task_info/task_info.cc" + "../graph/load/model_manager/task_info/event_record_task_info.cc" + "../graph/load/model_manager/task_info/event_wait_task_info.cc" + "../graph/load/model_manager/task_info/fusion_start_task_info.cc" + "../graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "../graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "../graph/load/model_manager/task_info/kernel_task_info.cc" + "../graph/load/model_manager/task_info/label_set_task_info.cc" + "../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" + "../graph/load/model_manager/task_info/label_goto_ex_task_info.cc" + "../graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "../graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "../graph/load/model_manager/task_info/stream_active_task_info.cc" + "../graph/load/model_manager/task_info/stream_switch_task_info.cc" + "../graph/load/model_manager/task_info/stream_switchn_task_info.cc" + "../graph/load/model_manager/task_info/end_graph_task_info.cc" + "../graph/load/model_manager/task_info/model_exit_task_info.cc" + "../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" + "../graph/load/model_manager/task_info/super_kernel/super_kernel.cc" "../graph/common/local_context.cc" "../opskernel_manager/ops_kernel_builder_manager.cc" "../single_op/single_op_manager.cc" @@ -104,6 +104,7 @@ set(SRC_LIST "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" + "../hybrid/node_executor/host_cpu/kernel/data_kernel.cc" "../hybrid/node_executor/controlop/control_op_executor.cc" "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" "../hybrid/node_executor/rts/rts_node_executor.cc" diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 0ea0e66d..af8237e0 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -29,15 +29,15 @@ #include "framework/common/util.h" #include "graph/execute/graph_execute.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/model.h" #include "graph/utils/graph_utils.h" #include "mmpa/mmpa_api.h" #include "single_op/single_op_manager.h" #include "graph/manager/graph_var_manager.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "opskernel_manager/ops_kernel_builder_manager.h" using std::string; @@ -226,7 +226,7 @@ Status GeExecutor::Initialize() { } GE_CHK_STATUS_RET(OpsKernelBuilderManager::Instance().Initialize({}, false), - "Failed to initialize OpsKernelBuilders"); + "Failed to initialize OpsKernelBuilders."); // Start profiling Options profiling_options; @@ -454,7 +454,7 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector & if (all_data_dims[i] < 0) { cur_dynamic_dims.push_back(dynamic_dims[i]); } else if (static_cast(all_data_dims[i]) != dynamic_dims[i]) { - GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %d should be %d", + GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %lu should be %ld", i, dynamic_dims[i], all_data_dims[i]); return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; } diff --git a/ge/executor/module.mk b/ge/executor/module.mk index 87abdade..4966eeb5 100644 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -22,37 +22,37 @@ local_ge_executor_src_files := \ ../graph/manager/util/debug.cc \ ../model/ge_model.cc \ ../model/ge_root_model.cc \ - ../graph/load/new_model_manager/davinci_model.cc \ - ../graph/load/new_model_manager/davinci_model_parser.cc \ - ../graph/load/new_model_manager/model_manager.cc \ - ../graph/load/new_model_manager/tbe_handle_store.cc \ - ../graph/load/new_model_manager/cpu_queue_schedule.cc \ - ../graph/load/new_model_manager/model_utils.cc \ - ../graph/load/new_model_manager/aipp_utils.cc \ - ../graph/load/new_model_manager/data_inputer.cc \ - ../graph/load/new_model_manager/data_dumper.cc \ - ../graph/load/new_model_manager/zero_copy_task.cc \ - ../graph/load/new_model_manager/zero_copy_offset.cc \ - ../graph/load/new_model_manager/task_info/task_info.cc \ - ../graph/load/new_model_manager/task_info/event_record_task_info.cc \ - ../graph/load/new_model_manager/task_info/event_wait_task_info.cc \ - ../graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ - ../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ - ../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ - ../graph/load/new_model_manager/task_info/kernel_task_info.cc \ - ../graph/load/new_model_manager/task_info/label_set_task_info.cc \ - ../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ - ../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ - ../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ - ../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ - ../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ - ../graph/load/new_model_manager/task_info/stream_active_task_info.cc \ - ../graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ - ../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ - ../graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - ../graph/load/new_model_manager/task_info/model_exit_task_info.cc \ - ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ - ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ + ../graph/load/model_manager/davinci_model.cc \ + ../graph/load/model_manager/davinci_model_parser.cc \ + ../graph/load/model_manager/model_manager.cc \ + ../graph/load/model_manager/tbe_handle_store.cc \ + ../graph/load/model_manager/cpu_queue_schedule.cc \ + ../graph/load/model_manager/model_utils.cc \ + ../graph/load/model_manager/aipp_utils.cc \ + ../graph/load/model_manager/data_inputer.cc \ + ../graph/load/model_manager/data_dumper.cc \ + ../graph/load/model_manager/zero_copy_task.cc \ + ../graph/load/model_manager/zero_copy_offset.cc \ + ../graph/load/model_manager/task_info/task_info.cc \ + ../graph/load/model_manager/task_info/event_record_task_info.cc \ + ../graph/load/model_manager/task_info/event_wait_task_info.cc \ + ../graph/load/model_manager/task_info/fusion_start_task_info.cc \ + ../graph/load/model_manager/task_info/fusion_stop_task_info.cc \ + ../graph/load/model_manager/task_info/kernel_ex_task_info.cc \ + ../graph/load/model_manager/task_info/kernel_task_info.cc \ + ../graph/load/model_manager/task_info/label_set_task_info.cc \ + ../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ + ../graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ + ../graph/load/model_manager/task_info/memcpy_async_task_info.cc \ + ../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ + ../graph/load/model_manager/task_info/profiler_trace_task_info.cc \ + ../graph/load/model_manager/task_info/stream_active_task_info.cc \ + ../graph/load/model_manager/task_info/stream_switch_task_info.cc \ + ../graph/load/model_manager/task_info/stream_switchn_task_info.cc \ + ../graph/load/model_manager/task_info/end_graph_task_info.cc \ + ../graph/load/model_manager/task_info/model_exit_task_info.cc \ + ../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ + ../graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ ../opskernel_manager/ops_kernel_builder_manager.cc \ ../single_op/single_op_manager.cc \ ../single_op/single_op_model.cc \ @@ -95,6 +95,7 @@ local_ge_executor_src_files := \ ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ + ../hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ ../hybrid/node_executor/controlop/control_op_executor.cc \ ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ ../hybrid/node_executor/rts/rts_node_executor.cc \ diff --git a/ge/executor/proto/task.proto b/ge/executor/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/executor/proto/task.proto +++ b/ge/executor/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index 6f9e60db..a20ff437 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -228,37 +228,37 @@ OME_HOST_SRC_FILES := \ graph/manager/util/rt_context_util.cc \ graph/manager/util/variable_accelerate_ctrl.cc \ graph/manager/util/debug.cc \ - graph/load/new_model_manager/model_manager.cc \ - graph/load/new_model_manager/data_inputer.cc \ - graph/load/new_model_manager/davinci_model.cc \ - graph/load/new_model_manager/davinci_model_parser.cc \ - graph/load/new_model_manager/model_utils.cc \ - graph/load/new_model_manager/aipp_utils.cc \ - graph/load/new_model_manager/tbe_handle_store.cc \ - graph/load/new_model_manager/cpu_queue_schedule.cc \ - graph/load/new_model_manager/zero_copy_task.cc \ - graph/load/new_model_manager/zero_copy_offset.cc \ - graph/load/new_model_manager/data_dumper.cc \ - graph/load/new_model_manager/task_info/task_info.cc \ - graph/load/new_model_manager/task_info/event_record_task_info.cc \ - graph/load/new_model_manager/task_info/event_wait_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_task_info.cc \ - graph/load/new_model_manager/task_info/label_set_task_info.cc \ - graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ - graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ - graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ - graph/load/new_model_manager/task_info/stream_active_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ - graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - graph/load/new_model_manager/task_info/model_exit_task_info.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ + graph/load/model_manager/model_manager.cc \ + graph/load/model_manager/data_inputer.cc \ + graph/load/model_manager/davinci_model.cc \ + graph/load/model_manager/davinci_model_parser.cc \ + graph/load/model_manager/model_utils.cc \ + graph/load/model_manager/aipp_utils.cc \ + graph/load/model_manager/tbe_handle_store.cc \ + graph/load/model_manager/cpu_queue_schedule.cc \ + graph/load/model_manager/zero_copy_task.cc \ + graph/load/model_manager/zero_copy_offset.cc \ + graph/load/model_manager/data_dumper.cc \ + graph/load/model_manager/task_info/task_info.cc \ + graph/load/model_manager/task_info/event_record_task_info.cc \ + graph/load/model_manager/task_info/event_wait_task_info.cc \ + graph/load/model_manager/task_info/fusion_start_task_info.cc \ + graph/load/model_manager/task_info/fusion_stop_task_info.cc \ + graph/load/model_manager/task_info/kernel_ex_task_info.cc \ + graph/load/model_manager/task_info/kernel_task_info.cc \ + graph/load/model_manager/task_info/label_set_task_info.cc \ + graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ + graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ + graph/load/model_manager/task_info/memcpy_async_task_info.cc \ + graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ + graph/load/model_manager/task_info/profiler_trace_task_info.cc \ + graph/load/model_manager/task_info/stream_active_task_info.cc \ + graph/load/model_manager/task_info/stream_switch_task_info.cc \ + graph/load/model_manager/task_info/stream_switchn_task_info.cc \ + graph/load/model_manager/task_info/end_graph_task_info.cc \ + graph/load/model_manager/task_info/model_exit_task_info.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ single_op/task/op_task.cc \ single_op/task/build_task_utils.cc \ single_op/task/tbe_task_builder.cc \ @@ -270,7 +270,7 @@ OME_HOST_SRC_FILES := \ single_op/single_op_manager.cc \ hybrid/hybrid_davinci_model_stub.cc \ hybrid/node_executor/aicpu/aicpu_ext_info.cc \ - # graph/load/new_model_manager/task_info/hccl_task_info.cc + # graph/load/model_manager/task_info/hccl_task_info.cc OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES) diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc index 06dc2b96..4aebffb4 100755 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -33,7 +33,7 @@ namespace { uint64_t size = data_num * sizeof(TYPE); \ ge_tensor = MakeShared(out_desc, size); \ GE_CHECK_NOTNULL(ge_tensor); \ - GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, size); \ + GELOGD("node:%s allocate output %zu success, size=%ld", op_desc->GetName().c_str(), i, size); \ ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ } else { \ @@ -72,7 +72,7 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) { num_size = max_range_size; } if (num_size < 0) { - GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%lld.", num_size); + GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%ld.", num_size); return INTERNAL_ERROR; } data_num = static_cast(num_size); diff --git a/ge/ge_local_engine/proto/task.proto b/ge/ge_local_engine/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/ge_local_engine/proto/task.proto +++ b/ge/ge_local_engine/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 460d5068..4434dc2b 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -54,38 +54,38 @@ LIBGE_LOCAL_SRC_FILES := \ graph/label/partitioned_call_label_maker.cc \ graph/label/while_label_maker.cc \ graph/load/graph_loader.cc \ - graph/load/new_model_manager/cpu_queue_schedule.cc \ - graph/load/new_model_manager/data_dumper.cc \ - graph/load/new_model_manager/data_inputer.cc \ - graph/load/new_model_manager/davinci_model.cc \ - graph/load/new_model_manager/davinci_model_parser.cc \ - graph/load/new_model_manager/model_manager.cc \ - graph/load/new_model_manager/model_utils.cc \ - graph/load/new_model_manager/aipp_utils.cc \ - graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - graph/load/new_model_manager/task_info/model_exit_task_info.cc \ - graph/load/new_model_manager/task_info/event_record_task_info.cc \ - graph/load/new_model_manager/task_info/event_wait_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ - graph/load/new_model_manager/task_info/hccl_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_task_info.cc \ - graph/load/new_model_manager/task_info/label_set_task_info.cc \ - graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ - graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ - graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ - graph/load/new_model_manager/task_info/stream_active_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ - graph/load/new_model_manager/task_info/task_info.cc \ - graph/load/new_model_manager/tbe_handle_store.cc \ - graph/load/new_model_manager/zero_copy_task.cc \ - graph/load/new_model_manager/zero_copy_offset.cc \ + graph/load/model_manager/cpu_queue_schedule.cc \ + graph/load/model_manager/data_dumper.cc \ + graph/load/model_manager/data_inputer.cc \ + graph/load/model_manager/davinci_model.cc \ + graph/load/model_manager/davinci_model_parser.cc \ + graph/load/model_manager/model_manager.cc \ + graph/load/model_manager/model_utils.cc \ + graph/load/model_manager/aipp_utils.cc \ + graph/load/model_manager/task_info/end_graph_task_info.cc \ + graph/load/model_manager/task_info/model_exit_task_info.cc \ + graph/load/model_manager/task_info/event_record_task_info.cc \ + graph/load/model_manager/task_info/event_wait_task_info.cc \ + graph/load/model_manager/task_info/fusion_start_task_info.cc \ + graph/load/model_manager/task_info/fusion_stop_task_info.cc \ + graph/load/model_manager/task_info/hccl_task_info.cc \ + graph/load/model_manager/task_info/kernel_ex_task_info.cc \ + graph/load/model_manager/task_info/kernel_task_info.cc \ + graph/load/model_manager/task_info/label_set_task_info.cc \ + graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ + graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ + graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ + graph/load/model_manager/task_info/memcpy_async_task_info.cc \ + graph/load/model_manager/task_info/profiler_trace_task_info.cc \ + graph/load/model_manager/task_info/stream_active_task_info.cc \ + graph/load/model_manager/task_info/stream_switch_task_info.cc \ + graph/load/model_manager/task_info/stream_switchn_task_info.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ + graph/load/model_manager/task_info/task_info.cc \ + graph/load/model_manager/tbe_handle_store.cc \ + graph/load/model_manager/zero_copy_task.cc \ + graph/load/model_manager/zero_copy_offset.cc \ graph/manager/graph_context.cc \ graph/manager/graph_manager.cc \ graph/manager/graph_manager_utils.cc \ @@ -300,6 +300,7 @@ LIBGE_LOCAL_SRC_FILES := \ hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ + hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ hybrid/node_executor/controlop/control_op_executor.cc \ hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ hybrid/node_executor/hccl/hccl_node_executor.cc \ diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index d032965b..fe7ea3bf 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -670,7 +670,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, bool is_offline) { if (!is_offline) { - (void)AttrUtils::SetBool(op_desc, ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, true); + (void)AttrUtils::SetBool(op_desc, ATTR_SINGLE_OP_SCENE, true); } if (CheckForSingleOp(op_desc, inputs, outputs) != SUCCESS) { diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index ed77a7f1..2731e076 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -37,6 +37,8 @@ using domi::BuildMode; namespace { const int32_t kInvalidPerfLevel = -1; +const int64_t kProfilingArStep = 2; +const int64_t kProfilingArStartLogid = 3; enum NodeType { kSubgraphData, kSubgraphNode, kOthers }; } // namespace namespace ge { @@ -187,8 +189,7 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph return SUCCESS; } -Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, - GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { +Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { if (comp_graph == nullptr) { GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null."); return GE_GRAPH_PARAM_NULLPTR; @@ -203,18 +204,18 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vectorGetGraphUnknownFlag()) { GE_CHK_STATUS_RET( - BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id), + BuildForDynamicShapeGraph(comp_graph, ge_root_model_ptr, ge_model_ptr, session_id), "Build for dynamic shape graph failed."); return SUCCESS; } - GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, subgraph_ptr_list, ge_model_ptr, session_id), + GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, ge_model_ptr, session_id), "Build for known shape graph failed."); ge_root_model_ptr->SetSubgraphInstanceNameToModel(comp_graph->GetName(), ge_model_ptr); return SUCCESS; } -Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_list, +Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id) { if (ge::GetContext().GetHostExecFlag()) { GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed."); @@ -222,7 +223,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v } GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str()); - Status ret = SecondPartition(comp_graph, subgraph_list); + Status ret = SecondPartition(comp_graph); GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str()); auto subgraph_map = graph_partitioner_.GetSubGraphMap(); @@ -458,6 +459,11 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { if (all_reduce_node_index[i] == node_index) { GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true); + GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), + GELOGE(FAILED, "Multiply result is out of range."); + return FAILED); + int64_t log_id = i * kProfilingArStep + kProfilingArStartLogid; + (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); continue; } } @@ -470,7 +476,6 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { } Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, - std::vector &subgraph_ptr_list, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, uint64_t session_id) { GELOGI("Start to build BuildForDynamicShape for dynamic shape."); @@ -517,7 +522,7 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, } } // known shape build flow - GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id), + GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, ge_model_ptr, session_id), "Build for known shape graph failed."); } ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr); @@ -719,7 +724,7 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc) return SUCCESS; } -Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list) { +Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph) { GE_TIMESTAMP_START(GraphPartition2); auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning); if (ret != SUCCESS) { @@ -727,10 +732,8 @@ Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vectorGetName().c_str(), mem_type); + GELOGD("[%s] has attr input_memory_type %u", op_desc->GetName().c_str(), mem_type); for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); @@ -755,7 +758,7 @@ Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) { while (true) { const auto &src_desc = src_node->GetOpDesc(); GE_IF_BOOL_EXEC(src_desc == nullptr, continue); - GELOGD("[%s:%u] set attr output_memory_type %ld", src_desc->GetName().c_str(), src_out_anchor->GetIdx(), + GELOGD("[%s:%u] set attr output_memory_type %d", src_desc->GetName().c_str(), src_out_anchor->GetIdx(), mem_type); if (!AttrUtils::SetInt(src_desc->MutableOutputDesc(src_out_anchor->GetIdx()), ATTR_OUTPUT_MEMORY_TYPE, mem_type)) { diff --git a/ge/graph/build/graph_builder.h b/ge/graph/build/graph_builder.h index 524b60e0..fb9ab6bd 100644 --- a/ge/graph/build/graph_builder.h +++ b/ge/graph/build/graph_builder.h @@ -47,8 +47,7 @@ class GraphBuilder { GraphBuilder(const GraphBuilder &in) = delete; GraphBuilder &operator=(const GraphBuilder &in) = delete; virtual ~GraphBuilder() = default; - Status Build(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, - GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); + Status Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); void SetOptions(const GraphManagerOptions &options); private: @@ -59,12 +58,12 @@ class GraphBuilder { Status UpdateDataInputSize(const ge::NodePtr &node_ptr); Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr); Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc); - Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list); + Status SecondPartition(ge::ComputeGraphPtr &comp_graph); Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph); - Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, + Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); - Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_list, + Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 21d6a49e..ebd23948 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -24,6 +24,7 @@ #include "graph/buffer.h" #include "graph/ge_attr_value.h" #include "graph/ge_context.h" +#include "graph/types.h" #include "graph/node.h" #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" @@ -542,11 +543,31 @@ void GetMaxBatchAllMemorySize(std::map> &batch_all_ } } +void BlockMemAssigner::MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node) { + auto node_op_desc = node->GetOpDesc(); + GE_IF_BOOL_EXEC(node_op_desc == nullptr, return); + // if input size just one and from variable, no need to reassign continuous memory + bool is_input_continuous = false; + (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + if (is_input_continuous && (node_op_desc->GetInputsSize() == 1)) { + auto peer_out_anchor = node->GetInDataAnchor(0)->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, return); + auto in_node = peer_out_anchor->GetOwnerNode(); + GE_IF_BOOL_EXEC(in_node == nullptr, return); + if (in_node->GetType() == VARIABLE || in_node->GetType() == CONSTANT) { + GELOGI("node only one input and from variable, set continuous alloced. node_name:%s", node->GetName().c_str()); + (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); + } + } +} + void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { vector temp; std::map> batch_all_memory_size; std::map batch_total_size; for (const NodePtr &n : compute_graph_->GetAllNodes()) { + MarkContinuousAllocedForOneInputFromVariable(n); + auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); @@ -1131,18 +1152,73 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, return block; } -MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector &ranges, - const bool is_op_reuse_mem) { - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); +bool IsOutputIndexRef(const OpDescPtr &op_desc, uint32_t index) { + auto output_tensor = op_desc->GetOutputDescPtr(index); + bool dst_reuse_input = false; + (void)ge::TensorUtils::GetReuseInput(*output_tensor, dst_reuse_input); + if (dst_reuse_input) { + return true; + } + + bool is_ref = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref); + if (is_ref) { + string output_name = op_desc->GetOutputNameByIndex(index); + for (const auto &input_name : op_desc->GetAllInputNames()) { + if (output_name == input_name) { + return true;; + } + } + } + return false; +} + +void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, + const NodePtr &n) { + const auto node_op_desc = n->GetOpDesc(); + for (uint32_t index = 0; index < static_cast(node_op_desc->GetOutputsSize()); index++) { + if (!IsOutputIndexRef(node_op_desc, index)) { + isAllOutputRef = false; + break; + } else { + zero_memory_list_.emplace_back(n, kOutput, index); + isOutputHasRef = true; + } + } +} + + +Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector &ranges, + const bool is_op_reuse_mem) { + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null."); auto node_op_desc = n->GetOpDesc(); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null."); + + // continuous output support ref only when all output ref input + bool isAllOutputRef = true; + bool isOutputHasRef = false; + + ContinuousOutRefCheck(isAllOutputRef, isOutputHasRef, n); + + if (isAllOutputRef) { + GELOGI("continuous output node ref all input, skip continuous alloc, node_name:%s", n->GetName().c_str()); + return SUCCESS; + } + + if (!isAllOutputRef && isOutputHasRef) { + GELOGE(INTERNAL_ERROR, "continuous output node ref part input, not support this situation, node_name:%s", + n->GetName().c_str()); + return INTERNAL_ERROR; + } + MemoryBlock *block = nullptr; int64_t total_size = 0; int64_t memory_type = RT_MEMORY_HBM; for (uint32_t index = 0; index < static_cast(node_op_desc->GetOutputsSize()); index++) { auto output_op_desc = node_op_desc->GetOutputDescPtr(index); if (output_op_desc == nullptr) { - return nullptr; + GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); + return INTERNAL_ERROR; } if (CheckIsZeroMemNodeType(n->GetType())) { @@ -1152,8 +1228,8 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec int64_t size = 0; if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { - GELOGI("Get size failed"); - return nullptr; + GELOGE(INTERNAL_ERROR, "Get size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); + return INTERNAL_ERROR; } size_t align_size = static_cast(size); AlignMemOffset(align_size); @@ -1176,7 +1252,7 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec } if (total_size == 0) { - return nullptr; + return SUCCESS; } auto block_size = GetBlockSize(total_size, ranges); @@ -1190,8 +1266,11 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec // hccl task need align header and tail block->first_continuous_block_ = true; block->last_continuous_block_ = true; + } else { + GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); + return INTERNAL_ERROR; } - return block; + return SUCCESS; } MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector &ranges, @@ -1203,9 +1282,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, NodeIndexIO node_index_io(n, index, kOut); int64_t size = 0; auto output_op_desc = node_op_desc->GetOutputDescPtr(index); - if (output_op_desc != nullptr) { - GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); - } + GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr); + GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); size_t no_align_size = 0; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, return nullptr, "Get no align size failed"); @@ -1231,6 +1309,13 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, AlignMemOffset(align_size); theory_memory_size_ += align_size; } else { + // if ref input is variable, can not find symbol, must judge alone + if (IsOutputIndexRef(node_op_desc, index)) { + zero_memory_list_.emplace_back(n, kOutput, index, false); + GELOGI("ref mode skip out block assign. node_name: %s, index:%d", n->GetName().c_str(), index); + return nullptr; + } + int64_t max_size = size; int64_t memory_type = RT_MEMORY_HBM; auto iter1 = anchor_to_symbol_.find(node_index_io.ToString()); @@ -1477,8 +1562,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); if (IsContinuousOutput(node)) { - (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); - return SUCCESS; + return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); } for (uint32_t i = 0; i < static_cast(op_desc->GetOutputsSize()); i++) { int64_t size = 0; @@ -1486,6 +1570,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector if (output_op_desc != nullptr) { GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); } + // fusion: other type's size not means malloc HBM memory bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; if (l1_flag) { @@ -1493,6 +1578,11 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); size = 0; } + + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(output_op_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + GE_IF_BOOL_EXEC((ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))), size = 0;); + std::string peer_name; uint32_t peer_input_index = 0; bool out_node_set_continuous_input = false; @@ -1973,9 +2063,8 @@ Status BlockMemAssigner::Assign() { bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || - (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || - (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || - (node_type == HVDCALLBACKBROADCAST); + (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || + (node_type == ASSIGN) || (node_type == HVDWAIT); } bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index 78584078..4401108d 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -448,7 +448,11 @@ class BlockMemAssigner : public MemAssigner { bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); - MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem); + void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n); + + Status ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem); + + void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node); std::unordered_map>> reusable_blocks_; diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index f94eb275..8c5d8940 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -88,6 +88,14 @@ Status VariableMemoryAssigner::AssignVarAttr2Nodes() { return ge::SUCCESS; } +Status VariableMemoryAssigner::AssignMemory2HasRefAttrNode() { + Status result = ge::VarMemAssignUtil::AssignMemory2HasRefAttrNode(compute_graph_); + if (result != ge::SUCCESS) { + return result; + } + return ge::SUCCESS; +} + Status GraphMemoryAssigner::AssignMemory() { ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_)); if (mem_assigner->Assign() != ge::SUCCESS) { @@ -135,6 +143,19 @@ ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { return ge::SUCCESS; } +ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() { + auto variable_assigner = + std::unique_ptr(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); + if (variable_assigner == nullptr) { + GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); + return ge::FAILED; + } + if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) { + return ge::FAILED; + } + return ge::SUCCESS; +} + ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size) { @@ -371,10 +392,10 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { // Assign continuous input memory bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); int64_t memory_type = RT_MEMORY_HBM; - GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); if (continuous_input) { int64_t mem_clean_start = 0; int64_t mem_clean_size = 0; + GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type, continuous_type); if (ret != ge::SUCCESS) { GELOGE(ret, "Assign continuous input memory failed!"); @@ -412,6 +433,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { // Assign continuous output memory bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); if (continuous_output) { + GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), "Get node memory type failed."); ret = AssignContinuousOutputMemory(node, memory_type, continuous_type); if (ret != ge::SUCCESS) { GELOGE(ret, "Assign continuous output memory failed!"); @@ -640,9 +662,7 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { } int64_t atomic_mem_size = static_cast(mem_iter->second.mem_offset_) - atomic_mem_start; - GE_CHECK_NOTNULL(mem_assigner_); - GE_CHECK_NOTNULL(mem_assigner_->GetPriorityAssinger()); - if ((atomic_mem_size != 0) && (iter_batch.first == mem_assigner_->GetPriorityAssinger()->GetMaxBatchLabel())) { + if (atomic_mem_size != 0) { GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM), "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); } @@ -1233,8 +1253,8 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset)); } - GELOGD("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", - has_mem_type_attr == true ? "Fusion" : "", + GELOGD("%s node[%s] input[%ld] is set from node[%s] out index[%lu] offset[%ld]", + has_mem_type_attr ? "Fusion" : "", tmp_op_desc->GetName().c_str(), valid_input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index a380e594..be6c47b0 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -63,6 +63,8 @@ class VariableMemoryAssigner { /// ge::Status AssignVarAttr2Nodes(); + ge::Status AssignMemory2HasRefAttrNode(); + private: ge::ComputeGraphPtr compute_graph_; }; @@ -99,6 +101,8 @@ class GraphMemoryAssigner { /// ge::Status AssignVarAttr2Nodes(); + ge::Status AssignMemory2HasRefAttrNode(); + ge::Status ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset); ge::Status AssignZeroCopyMemory(map &mem_offset, size_t &zero_mem_copy_size); diff --git a/ge/graph/build/memory/memory_assigner.cc b/ge/graph/build/memory/memory_assigner.cc index 055103a9..0f58a040 100755 --- a/ge/graph/build/memory/memory_assigner.cc +++ b/ge/graph/build/memory/memory_assigner.cc @@ -40,6 +40,11 @@ Status MemoryAssigner::AssignMemory(bool is_loop_graph, map &me return ge::FAILED; } + if (graph_mem_assigner.AssignMemory2HasRefAttrNode() != ge::SUCCESS) { + GELOGE(ge::FAILED, "Assign memory to node which has ref attr failed!"); + return ge::FAILED; + } + // Assign memory for reference if (graph_mem_assigner.AssignReferenceMemory() != ge::SUCCESS) { GELOGE(ge::FAILED, "Assign reference memory failed!"); diff --git a/ge/graph/build/memory/var_mem_assign_util.cc b/ge/graph/build/memory/var_mem_assign_util.cc index 639bfaa0..f910d2e2 100755 --- a/ge/graph/build/memory/var_mem_assign_util.cc +++ b/ge/graph/build/memory/var_mem_assign_util.cc @@ -33,10 +33,7 @@ using std::vector; namespace ge { Status VarMemAssignUtil::AssignVarMemory(ge::ComputeGraphPtr &compute_graph) { - GE_CHK_STATUS_RET(AssignMemory2VariableNode(compute_graph)); - GE_CHK_STATUS_RET(AssignMemory2HasRefAttrNode(compute_graph)); - - return SUCCESS; + return AssignMemory2VariableNode(compute_graph); } Status VarMemAssignUtil::AssignConstantOpMemory(ge::ComputeGraphPtr &compute_graph) { @@ -60,9 +57,14 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr return FAILED); ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0); GE_CHECK_NOTNULL(tensor_desc); + rtMemType_t memory_type = RT_MEMORY_HBM; + uint32_t mem_type = 0; + if (AttrUtils::GetInt(n->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) { + memory_type = RT_MEMORY_RDMA_HBM; + } if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) { GE_CHK_STATUS_RET( - VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM)); + VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, memory_type)); GE_IF_BOOL_EXEC(n->GetType() == VARIABLE, GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID()))); GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) @@ -70,7 +72,6 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr } uint8_t *dev_ptr = nullptr; - rtMemType_t memory_type = RT_MEMORY_HBM; GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) ->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); vector output_list = n->GetOpDesc()->GetOutputOffset(); diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc index 63112ea8..88ffda02 100644 --- a/ge/graph/build/stream_allocator.cc +++ b/ge/graph/build/stream_allocator.cc @@ -1013,6 +1013,24 @@ bool StreamAllocator::IsActivated(int64_t stream_id) const { return false; } +// Iteraotor loop : +// StreamSwitch -> StreamActive +// FpBp loop: +// StreamSwitch -> AssignAdd -> StreamActive +NodePtr FindSwitchNodeBeforeLoopActiveNode(const NodePtr &active_node) { + for (auto pre_node : active_node->GetInControlNodes()) { + if (pre_node->GetType() == STREAMSWITCH) { + return pre_node; + } + for (auto pre_pre_node : pre_node->GetInControlNodes()) { + if (pre_pre_node->GetType() == STREAMSWITCH) { + return pre_pre_node; + } + } + } + return nullptr; +} + Status StreamAllocator::SetActiveStreamsForLoop() { vector loop_active_streams; for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) { @@ -1038,6 +1056,13 @@ Status StreamAllocator::SetActiveStreamsForLoop() { bool is_loop_active = false; if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) { vector activated_label_list; + + NodePtr pre_switch_node = FindSwitchNodeBeforeLoopActiveNode(node); + if (pre_switch_node == nullptr) { + GELOGE(FAILED, "find switch node before loop active node %s failed", node->GetName().c_str()); + return FAILED; + } + if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) || activated_label_list.empty()) { GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, loop_active_streams), @@ -1053,7 +1078,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { // it may cause some stream actived by iterator next step when this stream still alive. // If above situation happen, active message will lose, cause process block in next iteration. // In order to avoid this abnormal happen, - // add event between each last node and iterator active node in target active stream + // add event between each last node and iterator switch node GELOGI("there are %zu next iterator target streams has streamswitch node.", streams_skip_iterator_event.size()); for (auto iter : stream_id_to_last_node) { if (streams_skip_iterator_event.find(iter.first) != streams_skip_iterator_event.end()) { @@ -1067,7 +1092,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { continue; } AddSendEventId(iter.second, event_num_); - AddRecvEventId(node, event_num_); + AddRecvEventId(pre_switch_node, event_num_); event_num_++; } diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index bb72fa8a..8bd7d32e 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -234,6 +234,19 @@ Status TaskGenerator::SaveFusionNodes(map> &fusion return SUCCESS; } +bool TaskGenerator::IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const { + auto parent_graph_ptr = graph->GetParentGraph(); + if (parent_graph_ptr == nullptr) { + return false; + } + auto root_graph_ptr = GraphUtils::FindRootGraph(parent_graph_ptr); + if (root_graph_ptr == nullptr) { + return false; + } + + return root_graph_ptr->GetGraphUnknownFlag(); +} + Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &graph, vector &task_def_list, map &op_name_map) { GELOGD("Beign to generate task, graph name is %s.", graph->GetName().c_str()); @@ -274,7 +287,6 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra }; GE_MAKE_GUARD(release, callback); - uint64_t all_reduce_node_idx = 0; for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { OpDescPtr op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -293,7 +305,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra // Part2: Call auto fusion_task_info = FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, - ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes, all_reduce_node_idx}; + ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); // continue directly @@ -317,8 +329,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra type.c_str()); // Profiling task size_t task_list_size_before = task_def_list.size(); - GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, - node_index, task_def_list, all_reduce_node_idx)); + GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); int64_t op_id = op_desc->GetId(); // Compatible with dynamic shape scenes, the default is 0 int64_t stream_id = 0; @@ -338,8 +349,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra return ret; } // Profiling task - GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, - node_index, task_def_list, all_reduce_node_idx)); + GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); size_t task_list_size_after = task_def_list.size(); // If tasks is reduced if (task_list_size_after < task_list_size_before) { @@ -382,7 +392,6 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info auto &op_name_map = fusion_task_info.op_name_map; auto &profiling_point = fusion_task_info.profiling_point; auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes; - auto &all_reduce_idx = fusion_task_info.all_reduce_node_idx; // If op_desc have this attr, call nodes with same group key in a stream together if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) && (fusion_nodes_seen.count(node.get()) == 0)) { @@ -429,8 +438,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info return INTERNAL_ERROR; } // profiling task - (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, - node_index, task_def_list, all_reduce_idx); + (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); run_context.stream = run_context.graphStreamList[stream_id]; GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id); @@ -443,8 +451,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info return ret; } // profiling task - (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, - node_index, task_def_list, all_reduce_idx); + (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); size_t task_list_size_after = task_def_list.size(); // if tasks is reduced if (task_list_size_after < task_list_size_before) { @@ -466,11 +473,10 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast(ops_kernel_info_store_ptr)); } - GELOGI( - "Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]" - " task finished, generate %u task(s).", - op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id, - task_list_size_after - task_list_size_before); + GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]" + " task finished, generate %zu task(s).", + op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id, + task_list_size_after - task_list_size_before); // record nodes which have call generate task successfully fusion_nodes_seen.insert(fusion_node.get()); @@ -681,7 +687,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP } } if (graph->GetNeedIteration()) { - if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") { + if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) { profiling_point.end_index.insert(current_idx); GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", op_desc->GetName().c_str(), current_idx); @@ -850,6 +856,13 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi GELOGD("Profiling is not open."); return SUCCESS; } + + // subgraph of dynamic graph no need to find index, has been found in parent graph + if (IsSubGraphOfDynamicGraph(graph)) { + GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str()); + return SUCCESS; + } + GELOGI("Start get FP/BP index."); std::string fp_point_str; std::string bp_point_str; @@ -887,9 +900,47 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi return SUCCESS; } +Status TaskGenerator::InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector &all_reduce_nodes, + uint32_t node_index, std::vector &task_def_list, + bool is_insert_bp_profiling_task) { + bool is_insert_all_reduce_task = false; + int64_t ar_log_id = 0xFFFF; + if (is_insert_bp_profiling_task) { + (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id); + is_insert_all_reduce_task = true; + } + if (!is_insert_all_reduce_task) { + for (size_t i = 0; i < all_reduce_nodes.size(); i++) { + if (all_reduce_nodes[i] == node_index) { + GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), + GELOGE(FAILED, "Multiply result is out of range."); + return FAILED); + ar_log_id = i * kProfilingArStep + kProfilingArStartLogid; + is_insert_all_reduce_task = true; + break; + } + } + } + + if (is_insert_all_reduce_task) { + GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id); + TaskDef ar_task_def; + ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); + ar_task_def.set_stream_id(op_desc->GetStreamId()); + LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); + if (ar_log_def != nullptr) { + ar_log_def->set_logid(ar_log_id); + ar_log_def->set_notify(false); + } + task_def_list.push_back(ar_task_def); + } + + return SUCCESS; +} + Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, vector &all_reduce_nodes, uint32_t node_index, - vector &task_def_list, uint64_t &all_reduce_node_idx) { + vector &task_def_list) { const char *profiling_mode = std::getenv(kProfilingMode); bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || ProfilingManager::Instance().ProfilingTrainingTraceOn(); @@ -932,19 +983,31 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const } bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); - uint64_t all_reduce_task_idx = 0; + if (is_all_reduce) { + (void)InsertProfilingArTaskBefore(op_desc, all_reduce_nodes, node_index, + task_def_list, is_insert_bp_profiling_task); + } + + return SUCCESS; +} + +Status TaskGenerator::InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector &all_reduce_nodes, + uint32_t node_index, std::vector &task_def_list, + bool is_insert_bp_profiling_task) { bool is_insert_all_reduce_task = false; - if (is_all_reduce && is_insert_bp_profiling_task) { - all_reduce_task_idx = all_reduce_node_idx; + int64_t ar_log_id = 0xFFFF; + if (is_insert_bp_profiling_task) { + (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id); + ar_log_id += 1; is_insert_all_reduce_task = true; } - if (is_all_reduce) { - all_reduce_node_idx++; - } if (!is_insert_all_reduce_task) { for (size_t i = 0; i < all_reduce_nodes.size(); i++) { if (all_reduce_nodes[i] == node_index) { - all_reduce_task_idx = i; + GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), + GELOGE(FAILED, "Multiply result is out of range."); + return FAILED); + ar_log_id = i * kProfilingArStep + kProfilingArEndLogid; is_insert_all_reduce_task = true; break; } @@ -952,28 +1015,24 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const } if (is_insert_all_reduce_task) { - GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); + GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id); TaskDef ar_task_def; ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); ar_task_def.set_stream_id(op_desc->GetStreamId()); LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); if (ar_log_def != nullptr) { - GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), - GELOGE(FAILED, "Multiply result is out of range."); - return FAILED); - auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArStartLogid; - ar_log_def->set_logid(log_id); + ar_log_def->set_logid(ar_log_id); ar_log_def->set_notify(false); - (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); } task_def_list.push_back(ar_task_def); } + return SUCCESS; } Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, vector &all_reduce_nodes, uint32_t node_index, - vector &task_def_list, uint64_t all_reduce_node_idx) { + vector &task_def_list) { GE_CHECK_NOTNULL(op_desc); const char *profiling_mode = std::getenv(kProfilingMode); bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || @@ -1018,36 +1077,11 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P task_def_list.emplace_back(end_task_def); } - uint32_t all_reduce_task_idx = 0; - bool is_insert_all_reduce_task = false; - if (is_all_reduce && is_insert_bp_profiling_task) { - all_reduce_task_idx = all_reduce_node_idx; - is_insert_all_reduce_task = true; - } - - for (size_t i = 0; i < all_reduce_nodes.size(); i++) { - if (all_reduce_nodes[i] == node_index) { - all_reduce_task_idx = i; - is_insert_all_reduce_task = true; - break; - } + if (is_all_reduce) { + (void)InsertProfilingArTaskAfter(op_desc, all_reduce_nodes, node_index, + task_def_list, is_insert_bp_profiling_task); } - if (is_insert_all_reduce_task) { - GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); - TaskDef ar_task_def; - ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); - ar_task_def.set_stream_id(op_desc->GetStreamId()); - LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); - GE_CHECK_NOTNULL(ar_log_def); - GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), - GELOGE(FAILED, "Multiply result is out of range."); - return FAILED); - auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArEndLogid; - ar_log_def->set_logid(log_id); - ar_log_def->set_notify(false); - task_def_list.emplace_back(ar_task_def); - } return SUCCESS; } diff --git a/ge/graph/build/task_generator.h b/ge/graph/build/task_generator.h index 5970954c..9f12d568 100755 --- a/ge/graph/build/task_generator.h +++ b/ge/graph/build/task_generator.h @@ -129,10 +129,16 @@ class TaskGenerator { std::vector &all_reduce_nodes) const; Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, std::vector &all_reduce_nodes, uint32_t node_index, - std::vector &task_def_list, uint64_t &all_reduce_node_idx); + std::vector &task_def_list); + Status InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector &all_reduce_nodes, + uint32_t node_index, std::vector &task_def_listy, + bool is_insert_bp_profiling_task); Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, std::vector &all_reduce_nodes, uint32_t node_index, - std::vector &task_def_list, uint64_t all_reduce_node_idx); + std::vector &task_def_list); + Status InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector &all_reduce_nodes, + uint32_t node_index, std::vector &task_def_list, + bool is_insert_bp_profiling_task); static bool IsProfPoint(const OpDescPtr &op, const std::string &name); @@ -155,6 +161,8 @@ class TaskGenerator { Status SetKnownShapeStream(RunContext &run_context, int64_t stream_id); + bool IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const; + uint8_t *var_mem_base_ = nullptr; uint64_t var_mem_size_ = 0; }; diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc index 3c5618e8..79c22a29 100755 --- a/ge/graph/execute/graph_execute.cc +++ b/ge/graph/execute/graph_execute.cc @@ -21,7 +21,7 @@ #include "common/ge_inner_error_codes.h" #include "common/model_parser/base.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "omm/csa_interact.h" #include "runtime/dev.h" #include "runtime/mem.h" diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index 6272e581..29afc939 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -22,8 +22,8 @@ #include "common/helper/model_helper.h" #include "common/util.h" #include "graph/ge_context.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "omm/csa_interact.h" #include "runtime/dev.h" diff --git a/ge/graph/load/new_model_manager/aipp_utils.cc b/ge/graph/load/model_manager/aipp_utils.cc similarity index 98% rename from ge/graph/load/new_model_manager/aipp_utils.cc rename to ge/graph/load/model_manager/aipp_utils.cc index e0e60d2b..8a18c421 100755 --- a/ge/graph/load/new_model_manager/aipp_utils.cc +++ b/ge/graph/load/model_manager/aipp_utils.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/aipp_utils.h" +#include "graph/load/model_manager/aipp_utils.h" #include diff --git a/ge/graph/load/new_model_manager/aipp_utils.h b/ge/graph/load/model_manager/aipp_utils.h similarity index 100% rename from ge/graph/load/new_model_manager/aipp_utils.h rename to ge/graph/load/model_manager/aipp_utils.h diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc b/ge/graph/load/model_manager/cpu_queue_schedule.cc similarity index 99% rename from ge/graph/load/new_model_manager/cpu_queue_schedule.cc rename to ge/graph/load/model_manager/cpu_queue_schedule.cc index 430321bd..d9b716ea 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc +++ b/ge/graph/load/model_manager/cpu_queue_schedule.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/cpu_queue_schedule.h" +#include "graph/load/model_manager/cpu_queue_schedule.h" #include "common/debug/ge_log.h" #include "common/debug/log.h" diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.h b/ge/graph/load/model_manager/cpu_queue_schedule.h similarity index 97% rename from ge/graph/load/new_model_manager/cpu_queue_schedule.h rename to ge/graph/load/model_manager/cpu_queue_schedule.h index 8999e975..de4c5327 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.h +++ b/ge/graph/load/model_manager/cpu_queue_schedule.h @@ -20,8 +20,8 @@ #include #include "common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/task_info/task_info.h" -#include "graph/load/new_model_manager/zero_copy_offset.h" +#include "graph/load/model_manager/task_info/task_info.h" +#include "graph/load/model_manager/zero_copy_offset.h" #include "runtime/kernel.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/model_manager/data_dumper.cc similarity index 99% rename from ge/graph/load/new_model_manager/data_dumper.cc rename to ge/graph/load/model_manager/data_dumper.cc index a12a2b2a..235cffa9 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/model_manager/data_dumper.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/data_dumper.h" +#include "graph/load/model_manager/data_dumper.h" #include #include @@ -29,7 +29,7 @@ #include "framework/common/util.h" #include "graph/anchor.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/util/debug.h" #include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" @@ -820,6 +820,7 @@ Status DataDumper::UnloadDumpInfo() { for (const auto &op_iter : op_list_) { aicpu::dump::Task task; task.set_task_id(op_iter.task_id); + task.set_stream_id(op_iter.stream_id); op_mapping_info.mutable_task()->Add(std::move(task)); } auto ret = ExecuteUnLoadDumpInfo(op_mapping_info); @@ -834,7 +835,6 @@ void DataDumper::DumpShrink() { compute_graph_.reset(); input_map_.clear(); ref_info_.clear(); - op_list_.clear(); } void DataDumper::PrintCheckLog(string &dump_list_key) { diff --git a/ge/graph/load/new_model_manager/data_dumper.h b/ge/graph/load/model_manager/data_dumper.h similarity index 100% rename from ge/graph/load/new_model_manager/data_dumper.h rename to ge/graph/load/model_manager/data_dumper.h diff --git a/ge/graph/load/new_model_manager/data_inputer.cc b/ge/graph/load/model_manager/data_inputer.cc similarity index 94% rename from ge/graph/load/new_model_manager/data_inputer.cc rename to ge/graph/load/model_manager/data_inputer.cc index 5efc710e..0fe75465 100755 --- a/ge/graph/load/new_model_manager/data_inputer.cc +++ b/ge/graph/load/model_manager/data_inputer.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include diff --git a/ge/graph/load/new_model_manager/data_inputer.h b/ge/graph/load/model_manager/data_inputer.h similarity index 100% rename from ge/graph/load/new_model_manager/data_inputer.h rename to ge/graph/load/model_manager/data_inputer.h diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc similarity index 96% rename from ge/graph/load/new_model_manager/davinci_model.cc rename to ge/graph/load/model_manager/davinci_model.cc index 2afbdf30..95fd8392 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include #include @@ -36,9 +36,9 @@ #include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" #include "graph/graph.h" -#include "graph/load/new_model_manager/cpu_queue_schedule.h" -#include "graph/load/new_model_manager/model_manager.h" -#include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/load/model_manager/cpu_queue_schedule.h" +#include "graph/load/model_manager/model_manager.h" +#include "graph/load/model_manager/tbe_handle_store.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/trans_var_data_utils.h" @@ -446,23 +446,20 @@ void DavinciModel::InitRuntimeParams() { runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size); } -void DavinciModel::CheckHasHcomOp() { - Graph graph = ge_model_->GetGraph(); - auto compute_graph = GraphUtils::GetComputeGraph(graph); - if (compute_graph == nullptr) { - return; - } +void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) { + const set hcom_opp_types({ + HCOMBROADCAST, HCOMALLGATHER, HCOMALLREDUCE, HCOMSEND, HCOMRECEIVE, HCOMREDUCESCATTER, + HVDCALLBACKALLREDUCE, HVDCALLBACKALLGATHER, HVDCALLBACKBROADCAST, HVDWAIT, HCOMREDUCE + }); + for (const auto &node : compute_graph->GetAllNodes()) { OpDescPtr op_desc = node->GetOpDesc(); GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGW("Node OpDesc is nullptr"); continue); - GE_IF_BOOL_EXEC(((op_desc->GetType() == HCOMBROADCAST) || (op_desc->GetType() == HCOMALLGATHER) || - (op_desc->GetType() == HCOMALLREDUCE) || (op_desc->GetType() == HCOMSEND) || - (op_desc->GetType() == HCOMRECEIVE) || (op_desc->GetType() == HCOMREDUCESCATTER) || - (op_desc->GetType() == HVDCALLBACKALLREDUCE) || (op_desc->GetType() == HVDCALLBACKALLGATHER) || - (op_desc->GetType() == HVDCALLBACKBROADCAST) || (op_desc->GetType() == HVDWAIT) || - (op_desc->GetType() == HCOMREDUCE)), - uint32_t stream_id = static_cast(op_desc->GetStreamId()); - (void)hcom_streams_.emplace(stream_id); GELOGD("hcom stream: %u.", stream_id); continue); + if (hcom_opp_types.count(op_desc->GetType()) > 0) { + uint32_t stream_id = static_cast(op_desc->GetStreamId()); + hcom_streams_.emplace(stream_id); + GELOGD("hcom stream: %u.", stream_id); + } } } @@ -624,6 +621,7 @@ void DavinciModel::OpDebugUnRegister() { // initialize op sequence and call initialization function of each op respectively Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { // validating params + GELOGI("Priority is %d", priority_); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(priority_ < 0 || priority_ > 7, return PARAM_INVALID, "Priority must between 0-7, now is %d", priority_); GE_CHK_BOOL_RET_STATUS(ge_model_ != nullptr, PARAM_INVALID, "GeModel is null."); @@ -641,7 +639,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size name_ = ge_model_->GetName(); (void)ge::AttrUtils::GetBool(ge_model_, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_); GELOGD("The value of ge.l1Fusion in ge_model is %d.", is_l1_fusion_enable_); - CheckHasHcomOp(); + CheckHasHcomOp(compute_graph); vector huge_stream_list; (void)ge::AttrUtils::GetListInt(ge_model_, ATTR_MODEL_HUGE_STREAM_LIST, huge_stream_list); @@ -722,7 +720,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size /// the aicpu opertor needs to destroy history record, and update operator memory address. /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel(). need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer(); - (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); string fp_ceiling_mode; if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { @@ -1028,7 +1025,7 @@ Status DavinciModel::GenInputOutputInfo(const map &data_by_ const vector &output_op_list) { GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size()); for (auto &item : data_by_index) { - auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); + const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); input_addrs_list_.emplace_back(output_addrs); @@ -1036,14 +1033,18 @@ Status DavinciModel::GenInputOutputInfo(const map &data_by_ GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed"); GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed"); GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed"); + GE_CHK_STATUS_RET(InitInputDescInfo(item.second), "Init input desc info failed"); if (item.second->GetType() == AIPP_DATA_TYPE) { GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); is_dynamic_aipp_ = true; } } + vector out_node_name; + (void)AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name); + GELOGD("Output node size: %zu, out nodes name: %zu", output_op_list.size(), out_node_name.size()); for (const auto &op_desc : output_op_list) { - auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); + const auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size()); output_addrs_list_.emplace_back(input_addrs); @@ -1061,10 +1062,11 @@ Status DavinciModel::GenInputOutputInfo(const map &data_by_ if (InitOutputTensorInfo(op_desc) != SUCCESS) { return INTERNAL_ERROR; } + + GE_CHK_STATUS_RET(InitOutputDescInfo(op_desc, out_node_name), "Init output desc info failed"); } - GE_CHK_STATUS_RET(InitInputDescInfo(data_by_index), "Init input desc info failed"); - return InitOutputDescInfo(output_op_list); + return SUCCESS; } bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { @@ -1815,7 +1817,7 @@ Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { domi::AippOpParams aipp_params; GeAttrValue::NAMED_ATTRS aipp_attr; - GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, + GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, "Data node do not contain param aipp!"); GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed"); GELOGI("Node data: %s, type: %s, current index: %u, current node related input rank: %u", @@ -1875,7 +1877,7 @@ Status DavinciModel::InitAippType(uint32_t index, const OpDescPtr &op_desc, cons (void)AttrUtils::GetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); for (const auto item : data_list) { if (item.second->GetName() == releated_name) { - GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), item.first, index); + GELOGI("Find aipp_data [%s] index %u from index %u", releated_name.c_str(), item.first, index); aipp_index = item.first; } } @@ -1980,27 +1982,24 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, } } -Status DavinciModel::InitInputDescInfo(const map &data_by_index) { - for (const auto &item : data_by_index) { - const auto op_desc = item.second; - GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); +Status DavinciModel::InitInputDescInfo(const OpDescPtr &op_desc) { + GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); - InputOutputDescInfo input; - ShapeDescription dims_info; - Format format = op_desc->GetInputDescPtr(0)->GetFormat(); - CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info); + InputOutputDescInfo input; + ShapeDescription dims_info; + Format format = op_desc->GetInputDescPtr(0)->GetFormat(); + CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info); - input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); - input.name = op_desc->GetName(); - int64_t input_size = 0; - GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); - input.size = input_size; - input_formats_.push_back(format); - input_descs_.push_back(input); + input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); + input.name = op_desc->GetName(); + int64_t input_size = 0; + GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); + input.size = input_size; + input_formats_.push_back(format); + input_descs_.push_back(input); - input.shape_info = dims_info; - input_descs_dims_.push_back(input); - } + input.shape_info = dims_info; + input_descs_dims_.push_back(input); return SUCCESS; } @@ -2066,35 +2065,31 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); } -Status DavinciModel::InitOutputDescInfo(const vector &output_op_list) { - GELOGD("Output node size: %zu", output_op_list.size()); - for (const auto &op_desc : output_op_list) { - uint32_t out_size = static_cast(op_desc->GetInputsSize()); - for (uint32_t index = 0; index < out_size; index++) { - string output_name; - InputOutputDescInfo output; - uint32_t format_result; - CreateOutput(index, op_desc, output, format_result); - - std::vector src_name = op_desc->GetSrcName(); - std::vector src_index = op_desc->GetSrcIndex(); - GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR, - "construct output_name failed."); - // forward compatbility, if old om has no out_node_name, need to return output follow origin way - if (out_size == out_node_name_.size()) { - // neweast plan, the index will add to name during generate model. - bool contains_colon = out_node_name_[index].find(":") != std::string::npos; - output_name = - contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]); - } else { - output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + - std::to_string(src_index[index]); - } - output.name = output_name; - output_descs_.push_back(output); - output_formats_.push_back(format_result); +Status DavinciModel::InitOutputDescInfo(const OpDescPtr &op_desc, const vector &out_node_name) { + uint32_t out_size = static_cast(op_desc->GetInputsSize()); + for (uint32_t i = 0; i < out_size; ++i) { + string output_name; + InputOutputDescInfo output; + uint32_t format_result; + CreateOutput(i, op_desc, output, format_result); + + std::vector src_name = op_desc->GetSrcName(); + std::vector src_index = op_desc->GetSrcIndex(); + GE_CHK_BOOL_RET_STATUS(src_name.size() > i && src_index.size() > i, INTERNAL_ERROR, + "construct output_name failed."); + // forward compatbility, if old om has no out_node_name, need to return output follow origin way + if (out_size == out_node_name.size()) { + // neweast plan, the index will add to name during generate model. + bool contains_colon = out_node_name[i].find(":") != std::string::npos; + output_name = contains_colon ? out_node_name[i] : out_node_name[i] + ":" + std::to_string(src_index[i]); + } else { + output_name = string("output_") + std::to_string(i) + "_" + src_name[i] + "_" + std::to_string(src_index[i]); } + output.name = output_name; + output_descs_.push_back(output); + output_formats_.push_back(format_result); } + return SUCCESS; } @@ -2147,11 +2142,6 @@ Status DavinciModel::SyncVarData() { RT_MEMCPY_HOST_TO_DEVICE)); } - for (const auto &item : broadcast_variable_) { - ret = VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, item.first, item.second, mem_base_); - GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, - item.first.c_str()); - } return ret; } @@ -2635,12 +2625,6 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b /// Status DavinciModel::ReturnNoOutput(uint32_t data_id) { GELOGI("ReturnNoOutput model id:%u", model_id_); - for (const auto item : broadcast_variable_) { - Status ret = VarManager::Instance(session_id_) - ->SyncBroadCastData2Var(runtime_param_.graph_id, item.first, item.second, mem_base_); - GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, - item.first.c_str()); - } GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!"); std::vector outputs; @@ -3064,6 +3048,64 @@ Status DavinciModel::MallocKnownArgs() { return SUCCESS; } +void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, + const domi::TaskDef &task_def, size_t task_index) { + bool flag = GetL1FusionEnableOption(); + char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; + INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); + int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; + if (env_flag != 0) { + flag = true; + } + + TaskDescInfo task_desc_info; + if (!om_name_.empty()) { + task_desc_info.model_name = om_name_; + } else { + task_desc_info.model_name = name_; + } + task_desc_info.op_name = op->GetName(); + task_desc_info.block_dim = task_def.kernel().block_dim(); + task_desc_info.task_id = task->GetTaskID(); + task_desc_info.stream_id = task->GetStreamId(); + task_desc_info.shape_type = "static"; + task_desc_info.cur_iter_num = 0; + // task type + task_desc_info.task_type = kTaskTypeInvalid; + auto model_task_type = static_cast(task_def.type()); + if (model_task_type == RT_MODEL_TASK_KERNEL) { + const domi::KernelDef &kernel_def = task_def.kernel(); + const auto &context = kernel_def.context(); + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == ccKernelType::TE) { + task_desc_info.task_type = kTaskTypeAicore; + } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { + task_desc_info.task_type = kTaskTypeAicpu; + } else { + GELOGD("Other kernel type: %u", context.kernel_type()); + } + } else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) { + task_desc_info.task_type = kTaskTypeAicpu; + } else { + GELOGD("Skip task type: %d", static_cast(model_task_type)); + } + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); + task_desc_info_.emplace_back(task_desc_info); + if (flag) { + if (task->GetSktTaskID() != 0xFFFFFFFF) { + TaskDescInfo task_desc_info; + string op_name = "super_kernel_" + to_string(task_index); + task_desc_info.op_name = op_name; + task_desc_info.task_id = task->GetSktTaskID(); + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); + task_desc_info_.emplace_back(task_desc_info); + } + } + return; +} + Status DavinciModel::DistributeTask() { GELOGI("do Distribute."); for (auto &task : cpu_task_list_) { @@ -3075,18 +3117,11 @@ Status DavinciModel::DistributeTask() { } task_desc_info_.clear(); - bool flag = GetL1FusionEnableOption(); - char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; - INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); - int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; - if (env_flag != 0) { - flag = true; - } - const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { auto &task_def = model_task_def->task(task_index); auto &task = task_list_.at(task_index); + GE_CHECK_NOTNULL(task); GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); // for data dump auto op_index = std::max(task_def.kernel().context().op_index(), @@ -3106,33 +3141,9 @@ Status DavinciModel::DistributeTask() { GE_IF_BOOL_EXEC(no_need_profiling, continue); SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); - // Load task info for profiling - TaskDescInfo task_desc_info; - if (!om_name_.empty()) { - task_desc_info.model_name = om_name_; - } else { - task_desc_info.model_name = name_; - } - task_desc_info.op_name = op->GetName(); - task_desc_info.block_dim = task_def.kernel().block_dim(); - task_desc_info.task_id = task->GetTaskID(); - task_desc_info.stream_id = task->GetStreamId(); - task_desc_info.shape_type = "static"; - task_desc_info.cur_iter_num = 0; - profiler_report_op_info_[task_desc_info.op_name] = - std::pair(task_desc_info.task_id, task_desc_info.stream_id); - task_desc_info_.emplace_back(task_desc_info); - if (flag) { - if (task->GetSktTaskID() != 0xFFFFFFFF) { - TaskDescInfo task_desc_info; - string op_name = "super_kernel_" + to_string(task_index); - task_desc_info.op_name = op_name; - task_desc_info.task_id = task->GetSktTaskID(); - profiler_report_op_info_[task_desc_info.op_name] = - std::pair(task_desc_info.task_id, task_desc_info.stream_id); - task_desc_info_.emplace_back(task_desc_info); - } - } + + // save task info for profiling + SaveProfilingTaskDescInfo(op, task, task_def, task_index); } // launch dump kernel to aicpu GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed."); @@ -3949,8 +3960,11 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map void *{ + if (known_node_) { + data_dumper_.SetLoopAddr(known_shape_global_step_, nullptr, nullptr); + } else { + // set loop count addr + auto get_var_addr = [&](const string &name) -> void *{ const auto it = variable_by_name.find(name); if (it != variable_by_name.end()) { const auto output_sizes = ModelUtils::GetOutputSize(it->second); @@ -3963,10 +3977,10 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map &graph_des } else { compute_graph_info.model_name = name_; } + + std::vector format = { FORMAT_NULL }; + std::vector> shape = { {0} }; + std::vector data_type = { DT_UNDEFINED }; compute_graph_info.op_name = op_desc.op_name; compute_graph_info.op_type = op_desc.op_type; - compute_graph_info.input_format = op_desc.input_format; - compute_graph_info.input_shape = op_desc.input_shape; - compute_graph_info.input_data_type = op_desc.input_data_type; - compute_graph_info.output_format = op_desc.output_format; - compute_graph_info.output_shape = op_desc.output_shape; - compute_graph_info.output_data_type = op_desc.output_data_type; + compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; + compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; + compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; + compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; + compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; + compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; uint32_t task_id = 0; uint32_t stream_id = 0; auto iter = profiler_report_op_info_.find(op_desc.op_name); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h similarity index 97% rename from ge/graph/load/new_model_manager/davinci_model.h rename to ge/graph/load/model_manager/davinci_model.h index 4108f2c7..53e9cd4d 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -32,12 +32,12 @@ #include "common/types.h" #include "framework/common/util.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/aipp_utils.h" -#include "graph/load/new_model_manager/data_dumper.h" -#include "graph/load/new_model_manager/data_inputer.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/zero_copy_offset.h" -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/aipp_utils.h" +#include "graph/load/model_manager/data_dumper.h" +#include "graph/load/model_manager/data_inputer.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/zero_copy_offset.h" +#include "graph/load/model_manager/zero_copy_task.h" #include "graph/model.h" #include "graph/node.h" #include "graph/op_desc.h" @@ -470,6 +470,10 @@ class DavinciModel { data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); } + void SetKnownShapeGlobalStep(void *global_step) { + known_shape_global_step_ = global_step; + } + void DumperShrink() { data_dumper_.DumpShrink(); } @@ -623,6 +627,9 @@ class DavinciModel { Status DistributeTask(); + void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, + const domi::TaskDef &task_def, size_t task_index); + uint8_t *MallocFeatureMapMem(size_t data_size); uint8_t *MallocWeightsMem(size_t weights_size); @@ -824,7 +831,7 @@ class DavinciModel { void OpDebugUnRegister(); - void CheckHasHcomOp(); + void CheckHasHcomOp(const ComputeGraphPtr &graph); Status DoTaskSink(); @@ -847,8 +854,8 @@ class DavinciModel { Status InitOutputTensorInfo(const OpDescPtr &op_desc); Status GenOutputTensorInfo(OutputData *output_data, vector &outputs); - Status InitInputDescInfo(const map &data_by_index); - Status InitOutputDescInfo(const vector &output_op_list); + Status InitInputDescInfo(const OpDescPtr &op_desc); + Status InitOutputDescInfo(const OpDescPtr &op_desc, const vector &out_node_name); Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc); Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc); @@ -883,7 +890,6 @@ class DavinciModel { GeModelPtr ge_model_; // release after DavinciModel::Init bool need_destroy_aicpu_kernel_{false}; - vector out_node_name_; map op_list_; // release after DavinciModel::Init @@ -1055,6 +1061,9 @@ class DavinciModel { vector input_formats_; vector output_descs_; vector output_formats_; + + // known shape node for dump + void *known_shape_global_step_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.cc b/ge/graph/load/model_manager/davinci_model_parser.cc similarity index 92% rename from ge/graph/load/new_model_manager/davinci_model_parser.cc rename to ge/graph/load/model_manager/davinci_model_parser.cc index 76526de2..c6f48b84 100644 --- a/ge/graph/load/new_model_manager/davinci_model_parser.cc +++ b/ge/graph/load/model_manager/davinci_model_parser.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" namespace ge { DavinciModelParser::DavinciModelParser() {} diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.h b/ge/graph/load/model_manager/davinci_model_parser.h similarity index 100% rename from ge/graph/load/new_model_manager/davinci_model_parser.h rename to ge/graph/load/model_manager/davinci_model_parser.h diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc similarity index 99% rename from ge/graph/load/new_model_manager/model_manager.cc rename to ge/graph/load/model_manager/model_manager.cc index edc60e50..4eb3254b 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include @@ -28,8 +28,8 @@ #include "framework/common/util.h" #include "graph/common/ge_call_wrapper.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "model/ge_root_model.h" #include "graph/common/local_context.h" #include "graph/utils/attr_utils.h" @@ -527,6 +527,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector &aicpu_op for (uint32_t i = 0; i < res_op_nums; i++) { ReturnCode ret_code = res_ret_code_list.at(i); SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); - GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, + GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%lu, ret_code:%d", aicpu_info.opType, aicpu_info.kernelsType, aicpu_info.opLen, ret_code); std::vector op_name; op_name.clear(); diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/model_manager/model_manager.h similarity index 100% rename from ge/graph/load/new_model_manager/model_manager.h rename to ge/graph/load/model_manager/model_manager.h diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/model_manager/model_utils.cc similarity index 92% rename from ge/graph/load/new_model_manager/model_utils.cc rename to ge/graph/load/model_manager/model_utils.cc index 22a657ad..410e9364 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/model_manager/model_utils.cc @@ -14,20 +14,13 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/model_utils.h" - +#include "graph/load/model_manager/model_utils.h" #include - #include "common/debug/log.h" #include "common/op/ge_op_utils.h" -#include "graph/debug/ge_attr_define.h" -#include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" -#include "runtime/base.h" -#include "runtime/kernel.h" - -#include "framework/common/debug/ge_log.h" #include "graph/manager/graph_var_manager.h" +#include "graph/types.h" #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ do { \ @@ -342,13 +335,13 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co int64_t input_offset = v_input_offset[non_const_index]; non_const_index++; GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base); - uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; + uint8_t *variable_addr = nullptr; + GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, input_offset, variable_addr), return {}); v_input_data_addr.push_back(variable_addr); GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); continue); - + int64_t mem_type; bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); // feature maps @@ -380,6 +373,34 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co return v_input_data_addr; } +/// +/// @ingroup ge +/// @brief Get variable address. +/// @return Status +/// +Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, + uint8_t *&var_addr) { + rtMemType_t mem_type = ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset); + switch (mem_type) { + case RT_MEMORY_RDMA_HBM: + if (offset < 0) { + GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast(offset)); + return PARAM_INVALID; + } + var_addr = reinterpret_cast(offset); + break; + case RT_MEMORY_HBM: + VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base); + var_addr = model_param.var_base + offset - model_param.logic_var_base; + break; + default: + GELOGE(PARAM_INVALID, "unsupported memory type %u", mem_type); + return PARAM_INVALID; + } + GE_CHECK_NOTNULL(var_addr); + return SUCCESS; +} + /// /// @ingroup ge /// @brief Get output data address. @@ -404,19 +425,26 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C return v_output_data_addr; } for (size_t i = 0; i < outputs_size; ++i) { - GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base); - uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; - v_output_data_addr.push_back(variable_addr); - GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", - model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); - continue); const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); if (tensor_desc == nullptr) { GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); continue; } + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + GELOGD("%s is an optional output, the address don't need to be saved.", tensor_desc->GetName().c_str()); + continue; + } + GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), + uint8_t *variable_addr = nullptr; + GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {}); + v_output_data_addr.push_back(variable_addr); + GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", + model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); + continue); + int64_t mem_type; bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); // feature maps diff --git a/ge/graph/load/new_model_manager/model_utils.h b/ge/graph/load/model_manager/model_utils.h similarity index 90% rename from ge/graph/load/new_model_manager/model_utils.h rename to ge/graph/load/model_manager/model_utils.h index 4b3d7ae7..26f8d700 100755 --- a/ge/graph/load/new_model_manager/model_utils.h +++ b/ge/graph/load/model_manager/model_utils.h @@ -21,7 +21,7 @@ #include "common/ge_inner_error_codes.h" #include "common/types.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" #include "graph/utils/tensor_adapter.h" @@ -107,6 +107,15 @@ class ModelUtils { /// @return Status /// static Status GetRtAddress(const RuntimeParam &model_param, uintptr_t logic_addr, uint8_t *&mem_addr); + + private: + /// + /// @ingroup ge + /// @brief Get variable address. + /// @return Status + /// + static Status GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, + uint8_t *&var_addr); }; } // namespace ge diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc similarity index 95% rename from ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc rename to ge/graph/load/model_manager/task_info/end_graph_task_info.cc index b8b02f59..c306c650 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc +++ b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/end_graph_task_info.h" +#include "graph/load/model_manager/task_info/end_graph_task_info.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace { const uint32_t kDumpFlag = 2; diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h b/ge/graph/load/model_manager/task_info/end_graph_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/end_graph_task_info.h rename to ge/graph/load/model_manager/task_info/end_graph_task_info.h index 614544f9..efce19b2 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h +++ b/ge/graph/load/model_manager/task_info/end_graph_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class EndGraphTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc b/ge/graph/load/model_manager/task_info/event_record_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/event_record_task_info.cc rename to ge/graph/load/model_manager/task_info/event_record_task_info.cc index 11589258..f736c386 100755 --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc +++ b/ge/graph/load/model_manager/task_info/event_record_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/event_record_task_info.h" +#include "graph/load/model_manager/task_info/event_record_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status EventRecordTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h b/ge/graph/load/model_manager/task_info/event_record_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/event_record_task_info.h rename to ge/graph/load/model_manager/task_info/event_record_task_info.h index d3f5961e..a79f1d3b 100755 --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h +++ b/ge/graph/load/model_manager/task_info/event_record_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class EventRecordTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc b/ge/graph/load/model_manager/task_info/event_wait_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc rename to ge/graph/load/model_manager/task_info/event_wait_task_info.cc index 5701179b..34058502 100755 --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc +++ b/ge/graph/load/model_manager/task_info/event_wait_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/event_wait_task_info.h" +#include "graph/load/model_manager/task_info/event_wait_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status EventWaitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h b/ge/graph/load/model_manager/task_info/event_wait_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/event_wait_task_info.h rename to ge/graph/load/model_manager/task_info/event_wait_task_info.h index a92252d7..bd8acab1 100755 --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h +++ b/ge/graph/load/model_manager/task_info/event_wait_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class EventWaitTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc b/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc similarity index 92% rename from ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc rename to ge/graph/load/model_manager/task_info/fusion_start_task_info.cc index 32c79647..6feea9e4 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc +++ b/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/fusion_start_task_info.h" +#include "graph/load/model_manager/task_info/fusion_start_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status FusionStartTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h b/ge/graph/load/model_manager/task_info/fusion_start_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h rename to ge/graph/load/model_manager/task_info/fusion_start_task_info.h index b1897533..284a5e0f 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h +++ b/ge/graph/load/model_manager/task_info/fusion_start_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class FusionStartTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc similarity index 92% rename from ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc rename to ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc index dd4edfd0..22d1589c 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc +++ b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h" +#include "graph/load/model_manager/task_info/fusion_stop_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status FusionStopTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h rename to ge/graph/load/model_manager/task_info/fusion_stop_task_info.h index 880ca487..994498d5 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h +++ b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class FusionStopTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/model_manager/task_info/hccl_task_info.cc similarity index 98% rename from ge/graph/load/new_model_manager/task_info/hccl_task_info.cc rename to ge/graph/load/model_manager/task_info/hccl_task_info.cc index 7b18a9a3..2d0ad560 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/ge/graph/load/model_manager/task_info/hccl_task_info.cc @@ -14,14 +14,14 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" #include #include "common/opskernel/ops_kernel_info_store.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" namespace ge { std::mutex HcclTaskInfo::hccl_follow_stream_mutex_; diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h b/ge/graph/load/model_manager/task_info/hccl_task_info.h similarity index 97% rename from ge/graph/load/new_model_manager/task_info/hccl_task_info.h rename to ge/graph/load/model_manager/task_info/hccl_task_info.h index 777f5bbf..3df155ad 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h +++ b/ge/graph/load/model_manager/task_info/hccl_task_info.h @@ -23,7 +23,7 @@ #include #include "common/opskernel/ge_task_info.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/manager/util/hcom_util.h" namespace ge { class HcclTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc similarity index 98% rename from ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc rename to ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc index 98d9cb78..6da1bf63 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" #include @@ -24,8 +24,8 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/fmk_error_codes.h" #include "graph/attr_value.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_manager.h" namespace ge { Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { @@ -192,7 +192,7 @@ void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), op_desc->GetName())) { dump_flag_ = RT_KERNEL_DUMPFLAG; - dump_args_ = input_output_addr_; + dump_args_ = addr; } } diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h similarity index 97% rename from ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h rename to ge/graph/load/model_manager/task_info/kernel_ex_task_info.h index f6873c6c..265316ce 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc similarity index 99% rename from ge/graph/load/new_model_manager/task_info/kernel_task_info.cc rename to ge/graph/load/model_manager/task_info/kernel_task_info.cc index 83bf2779..27fe8eb0 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" #include #include #include @@ -25,9 +25,9 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/l2_cache_optimize.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_manager.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" #include "runtime/kernel.h" #include "super_kernel/super_kernel.h" #include "super_kernel/super_kernel_factory.h" diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/model_manager/task_info/kernel_task_info.h similarity index 98% rename from ge/graph/load/new_model_manager/task_info/kernel_task_info.h rename to ge/graph/load/model_manager/task_info/kernel_task_info.h index cea25320..7cabf259 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.h @@ -22,7 +22,7 @@ #include #include -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { class KernelTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc rename to ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc index 393c0b31..1921c85d 100755 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/label_goto_ex_task_info.h" +#include "graph/load/model_manager/task_info/label_goto_ex_task_info.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h rename to ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h index f83cd1d9..25310368 100755 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h +++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class LabelGotoExTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc b/ge/graph/load/model_manager/task_info/label_set_task_info.cc similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_set_task_info.cc rename to ge/graph/load/model_manager/task_info/label_set_task_info.cc index 5fa96a96..45cb586a 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc +++ b/ge/graph/load/model_manager/task_info/label_set_task_info.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/label_set_task_info.h" +#include "graph/load/model_manager/task_info/label_set_task_info.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h b/ge/graph/load/model_manager/task_info/label_set_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_set_task_info.h rename to ge/graph/load/model_manager/task_info/label_set_task_info.h index bb02ccf0..36e41f1b 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h +++ b/ge/graph/load/model_manager/task_info/label_set_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class LabelSetTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc rename to ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc index ae7865a4..c2997678 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc +++ b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h" +#include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { constexpr uint8_t kLabelSwitchIndexNum = 1; diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h rename to ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h index 538b2d68..00ca0844 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h +++ b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class LabelSwitchByIndexTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc similarity index 96% rename from ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc rename to ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc index b95705f0..a1f58e42 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc +++ b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h" +#include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace { const uint32_t kAlignBytes = 64; diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h rename to ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h index c7645b9f..4631c67c 100644 --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h +++ b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class MemcpyAddrAsyncTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc rename to ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc index fa320d81..22f9267d 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc +++ b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" +#include "graph/load/model_manager/task_info/memcpy_async_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h rename to ge/graph/load/model_manager/task_info/memcpy_async_task_info.h index 43b5ba13..728305ff 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h +++ b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc b/ge/graph/load/model_manager/task_info/model_exit_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc rename to ge/graph/load/model_manager/task_info/model_exit_task_info.cc index ff8057aa..eb200e3f 100644 --- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc +++ b/ge/graph/load/model_manager/task_info/model_exit_task_info.cc @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/model_exit_task_info.h" +#include "graph/load/model_manager/task_info/model_exit_task_info.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h b/ge/graph/load/model_manager/task_info/model_exit_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/model_exit_task_info.h rename to ge/graph/load/model_manager/task_info/model_exit_task_info.h index c219fcc8..1e4a3923 100644 --- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h +++ b/ge/graph/load/model_manager/task_info/model_exit_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class ModelExitTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc rename to ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc index 533c459a..b8fd1828 100755 --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc +++ b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h" +#include "graph/load/model_manager/task_info/profiler_trace_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status ProfilerTraceTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h rename to ge/graph/load/model_manager/task_info/profiler_trace_task_info.h index 8989096d..b57ebfae 100755 --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h +++ b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class ProfilerTraceTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc b/ge/graph/load/model_manager/task_info/stream_active_task_info.cc similarity index 95% rename from ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc rename to ge/graph/load/model_manager/task_info/stream_active_task_info.cc index 33ebea3b..ec807777 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc +++ b/ge/graph/load/model_manager/task_info/stream_active_task_info.cc @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/stream_active_task_info.h" +#include "graph/load/model_manager/task_info/stream_active_task_info.h" #include #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h b/ge/graph/load/model_manager/task_info/stream_active_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/stream_active_task_info.h rename to ge/graph/load/model_manager/task_info/stream_active_task_info.h index c6b263b4..dfbf48d1 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h +++ b/ge/graph/load/model_manager/task_info/stream_active_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class StreamActiveTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc b/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc rename to ge/graph/load/model_manager/task_info/stream_switch_task_info.cc index 616ba85f..f129950a 100644 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc +++ b/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/stream_switch_task_info.h" +#include "graph/load/model_manager/task_info/stream_switch_task_info.h" #include #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h b/ge/graph/load/model_manager/task_info/stream_switch_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h rename to ge/graph/load/model_manager/task_info/stream_switch_task_info.h index a72d7de2..0e75e183 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h +++ b/ge/graph/load/model_manager/task_info/stream_switch_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class StreamSwitchTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc rename to ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc index 27adbbe4..35eb23e3 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc +++ b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc @@ -13,12 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/stream_switchn_task_info.h" +#include "graph/load/model_manager/task_info/stream_switchn_task_info.h" #include #include "framework/common/debug/ge_log.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" namespace { const uint8_t kStreamSwitchnInputNum = 1; diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h rename to ge/graph/load/model_manager/task_info/stream_switchn_task_info.h index 3d65a086..6e6ca190 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h +++ b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.h similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel.h diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h diff --git a/ge/graph/load/new_model_manager/task_info/task_info.cc b/ge/graph/load/model_manager/task_info/task_info.cc similarity index 94% rename from ge/graph/load/new_model_manager/task_info/task_info.cc rename to ge/graph/load/model_manager/task_info/task_info.cc index 674d477f..e521f95c 100755 --- a/ge/graph/load/new_model_manager/task_info/task_info.cc +++ b/ge/graph/load/model_manager/task_info/task_info.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include diff --git a/ge/graph/load/new_model_manager/task_info/task_info.h b/ge/graph/load/model_manager/task_info/task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/task_info.h rename to ge/graph/load/model_manager/task_info/task_info.h index 26f22564..99ec3c4e 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info.h +++ b/ge/graph/load/model_manager/task_info/task_info.h @@ -22,8 +22,8 @@ #include "cce/customize.h" #include "framework/common/taskdown_common.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/ts_mem_mall.h" -#include "graph/load/new_model_manager/task_info/task_info_factory.h" +#include "graph/load/model_manager/ts_mem_mall.h" +#include "graph/load/model_manager/task_info/task_info_factory.h" #include "proto/task.pb.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/task_info_factory.h b/ge/graph/load/model_manager/task_info/task_info_factory.h similarity index 100% rename from ge/graph/load/new_model_manager/task_info/task_info_factory.h rename to ge/graph/load/model_manager/task_info/task_info_factory.h diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.cc b/ge/graph/load/model_manager/tbe_handle_store.cc similarity index 100% rename from ge/graph/load/new_model_manager/tbe_handle_store.cc rename to ge/graph/load/model_manager/tbe_handle_store.cc diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.h b/ge/graph/load/model_manager/tbe_handle_store.h similarity index 100% rename from ge/graph/load/new_model_manager/tbe_handle_store.h rename to ge/graph/load/model_manager/tbe_handle_store.h diff --git a/ge/graph/load/new_model_manager/ts_mem_mall.h b/ge/graph/load/model_manager/ts_mem_mall.h similarity index 100% rename from ge/graph/load/new_model_manager/ts_mem_mall.h rename to ge/graph/load/model_manager/ts_mem_mall.h diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.cc b/ge/graph/load/model_manager/zero_copy_offset.cc similarity index 98% rename from ge/graph/load/new_model_manager/zero_copy_offset.cc rename to ge/graph/load/model_manager/zero_copy_offset.cc index f27d862d..3f8555bb 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.cc +++ b/ge/graph/load/model_manager/zero_copy_offset.cc @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/zero_copy_offset.h" +#include "graph/load/model_manager/zero_copy_offset.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/zero_copy_task.h" namespace ge { namespace { diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/model_manager/zero_copy_offset.h similarity index 98% rename from ge/graph/load/new_model_manager/zero_copy_offset.h rename to ge/graph/load/model_manager/zero_copy_offset.h index 66fcd887..fc63fced 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.h +++ b/ge/graph/load/model_manager/zero_copy_offset.h @@ -25,7 +25,7 @@ #include "external/ge/ge_api_error_codes.h" #include "framework/common/ge_types.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/zero_copy_task.h" #include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" #include "runtime/mem.h" diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/model_manager/zero_copy_task.cc similarity index 97% rename from ge/graph/load/new_model_manager/zero_copy_task.cc rename to ge/graph/load/model_manager/zero_copy_task.cc index b938f14b..367de87a 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/model_manager/zero_copy_task.cc @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/zero_copy_task.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "common/ge_compiler_options.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/zero_copy_task.h b/ge/graph/load/model_manager/zero_copy_task.h similarity index 100% rename from ge/graph/load/new_model_manager/zero_copy_task.h rename to ge/graph/load/model_manager/zero_copy_task.h diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index d6027a08..bfef4001 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -100,14 +100,14 @@ Status CachingAllocator::Initialize(uint32_t device_id) { } auto bin_ptr = new (std::nothrow) BlockBin(BlockComparator); if (bin_ptr == nullptr) { - GELOGE(ge::FAILED, "Alloc BlockBin failed."); - return ge::FAILED; + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc BlockBin failed."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; } free_block_bins_[i] = bin_ptr; } memory_allocator_ = MemManager::Instance(memory_type_); if (memory_allocator_ == nullptr) { - return ge::FAILED; + return ACL_ERROR_GE_INTERNAL_ERROR; } return ge::SUCCESS; } diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index b0d412dc..410611b0 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -92,6 +92,7 @@ #include "graph/passes/unused_args_clean_pass.h" #include "graph/passes/global_step_insert_pass.h" #include "graph/passes/memcpy_addr_async_pass.h" +#include "graph/passes/hccl_memcpy_pass.h" #include "graph/build/label_allocator.h" #include "graph/utils/tensor_adapter.h" #include "inc/pass_manager.h" @@ -2150,6 +2151,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { new (std::nothrow) TransOpWithoutReshapeFusionPass)) GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", new (std::nothrow) TransOpBreadthFusionPass)) + GE_CHK_STATUS_RET( + after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass)); GE_TIMESTAMP_START(after_merge_passes); auto ret = after_merge_passes.Run(compute_graph); @@ -2776,7 +2779,7 @@ Status GraphManager::ParseInputsDimsForGetNexNosinkAndData(const vector } GetLocalOmgContext().user_real_input_dims.emplace_back(input_tensor.at(index).dims); - GELOGI("Shape dims of %d data is %s.", index, formats::JoinToString(input_tensor.at(index).dims).c_str()); + GELOGI("Shape dims of %zu data is %s.", index, formats::JoinToString(input_tensor.at(index).dims).c_str()); } return SUCCESS; } @@ -3121,9 +3124,8 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp graph_name.append(std::to_string(graph_node->GetGraphId())); compute_graph->SetName(graph_name); } - std::vector sub_graph_list; - auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, sub_graph_list, ge_root_model, - session_id); + + auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, ge_root_model, session_id); if (ret != SUCCESS) { GELOGE(ret, "SubGraph build Failed."); return ret; diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc index f3037299..428b08ae 100755 --- a/ge/graph/manager/graph_mem_allocator.cc +++ b/ge/graph/manager/graph_mem_allocator.cc @@ -64,9 +64,10 @@ uint8_t *MemoryAllocator::MallocMemory(const string &purpose, size_t memory_size Status MemoryAllocator::FreeMemory(uint8_t *memory_addr, uint32_t device_id) const { GELOGI("MemoryAllocator::FreeMemory device_id = %u", device_id); - if (rtFree(memory_addr) != RT_ERROR_NONE) { - GELOGE(ge::INTERNAL_ERROR, "MemoryAllocator::MallocMemory device_id = %u", device_id); - return ge::INTERNAL_ERROR; + auto rtRet = rtFree(memory_addr); + if (rtRet != RT_ERROR_NONE) { + GELOGE(rtRet, "MemoryAllocator::MallocMemory device_id = %u", device_id); + return RT_ERROR_TO_GE_STATUS(rtRet); } memory_addr = nullptr; return ge::SUCCESS; @@ -168,31 +169,36 @@ Status MemManager::Initialize(const std::vector &memory_type) { memory_allocator_map_[index] = memory_allocator; GELOGI("Create MemoryAllocator memory type[%u] success.", index); } else { - GELOGE(ge::INTERNAL_ERROR, "Alloc MemoryAllocator failed."); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc MemoryAllocator failed."); } } else { memory_allocator = it->second; } if (memory_allocator == nullptr) { - GELOGE(ge::INTERNAL_ERROR, "Create MemoryAllocator failed."); - return ge::INTERNAL_ERROR; + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create MemoryAllocator failed."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; } else { memory_allocator->Initialize(0); } } - if (InitAllocator(memory_type, caching_allocator_map_) != SUCCESS) { - GELOGE(ge::INTERNAL_ERROR, "Create CachingAllocator failed."); - return ge::INTERNAL_ERROR; + auto ret = InitAllocator(memory_type, caching_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create CachingAllocator failed."); + return ret; } - if (InitAllocator(memory_type, rdma_allocator_map_) != SUCCESS) { - GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed."); - return ge::INTERNAL_ERROR; + + ret = InitAllocator(memory_type, rdma_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create RdmaAllocator failed."); + return ret; } - if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) { - GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed."); - return ge::INTERNAL_ERROR; + + ret = InitAllocator(memory_type, host_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create HostMemAllocator failed."); + return ret; } return SUCCESS; } @@ -229,7 +235,7 @@ MemoryAllocator *MemManager::GetMemoryAllocator(rtMemType_t memory_type) { // Usually impossible if (memory_allocator == nullptr) { - GELOGE(ge::INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type); + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type); static MemoryAllocator default_memory_allocator(RT_MEMORY_RESERVED); return &default_memory_allocator; } diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h index bd75dbb9..d3468e75 100644 --- a/ge/graph/manager/graph_mem_allocator.h +++ b/ge/graph/manager/graph_mem_allocator.h @@ -192,18 +192,18 @@ class MemManager { allocate_map[index] = allocator; GELOGI("Create Allocator memory type[%u] success.", index); } else { - GELOGE(INTERNAL_ERROR, "Alloc Allocator failed."); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed."); } } else { allocator = it->second; } if (allocator == nullptr) { - GELOGE(INTERNAL_ERROR, "Create Allocator failed."); - return INTERNAL_ERROR; + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; } else { if (allocator->Initialize() != SUCCESS) { - return INTERNAL_ERROR; + return ACL_ERROR_GE_INTERNAL_ERROR; } } } diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 821de257..d0292885 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -16,17 +16,10 @@ #include "graph/manager/graph_var_manager.h" -#include - -#include "common/l2_cache_optimize.h" -#include "common/types.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/debug/log.h" -#include "ge/ge_api_types.h" #include "graph/debug/ge_attr_define.h" #include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/rdma_pool_allocator.h" #include "graph/manager/trans_var_data_utils.h" -#include "graph/utils/attr_utils.h" #include "graph/utils/type_utils.h" using std::map; @@ -37,7 +30,7 @@ namespace ge { VarResource::VarResource(uint64_t session_id) : session_id_(session_id) {} VarResource::~VarResource() { - var_offset_set_.clear(); + var_offset_map_.clear(); var_addr_mgr_map_.clear(); cur_var_tensor_desc_map_.clear(); var_broad_cast_info_.clear(); @@ -91,8 +84,10 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen std::string var_key = VarKey(var_name, tensor_desc); GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str()); if (var_addr_mgr_map_.count(var_key) == 0) { - uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() + - static_cast(reinterpret_cast(address)); + uint64_t logic_address = static_cast(reinterpret_cast(address)); + if (memory_type != RT_MEMORY_RDMA_HBM) { + logic_address += VarManager::Instance(session_id_)->GetVarMemLogicBase(); + } GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str()); @@ -102,7 +97,7 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen var_addr_mgr.tensor_desc = tensor_desc; var_addr_mgr.memory_type = memory_type; var_addr_mgr_map_[var_key] = var_addr_mgr; - var_offset_set_.insert(logic_address); + var_offset_map_[logic_address] = memory_type; return SUCCESS; } @@ -211,7 +206,14 @@ ge::Status VarResource::SyncVarData(uint32_t graph_id, const std::string &var_na return SyncVarData2BroadCast(graph_id, var_name, var_tensor_desc, base_ptr); } -bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_set_.count(offset) > 0; } +bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_map_.count(offset) > 0; } + +rtMemType_t VarResource::GetVarMemType(const int64_t &offset) { + if (var_offset_map_.count(offset) > 0) { + return var_offset_map_[offset]; + } + return RT_MEMORY_RESERVED; +} VarTransRoad *VarResource::GetTransRoad(const std::string &var_name) { auto iter = var_to_trans_road_.find(var_name); @@ -252,7 +254,19 @@ Status VarResource::SetAllocatedGraphId(const std::string &var_name, uint32_t gr MemResource::MemResource() : total_size_(0), var_mem_size_(0) {} -Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) { +MemResource *MemResource::BuildMemResourceFromType(rtMemType_t mem_type) { + switch (mem_type) { + case RT_MEMORY_HBM: + return new (std::nothrow) HbmMemResource(); + case RT_MEMORY_RDMA_HBM: + return new (std::nothrow) RdmaMemResource(); + default: + return nullptr; + } +} + +Status HbmMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, + size_t &mem_offset) { size = (size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize; uint64_t real_size = size; total_size_ = VarManager::Instance(session_id)->GetVarMemMaxSize(); @@ -282,6 +296,19 @@ Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uin return SUCCESS; } +Status RdmaMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) { + uint8_t *buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(size); + if (buffer == nullptr) { + GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %lu", var_name.c_str(), size); + return MEMALLOC_FAILED; + } + address = static_cast(reinterpret_cast(buffer)); + var_mem_size_ += size; + GELOGI("[IMAS]AssignVarMem Set session_%lu name[%s] output[%d] addr to [%p] size[%lu].", + session_id, var_name.c_str(), 0, buffer, size); + return SUCCESS; +} + uint64_t MemResource::GetVarMemSize() const { return var_mem_size_; } void MemResource::UpdateVarMemSize(int64_t mem_size) { var_mem_size_ = mem_size; }; @@ -428,7 +455,7 @@ Status VarManager::UpdateVarMemSize(rtMemType_t memory_type, int64_t mem_size) { MemResource *mem_resource = nullptr; auto iter = mem_resource_map_.find(memory_type); if (iter == mem_resource_map_.end()) { - mem_resource = new (std::nothrow) MemResource(); + mem_resource = MemResource::BuildMemResourceFromType(memory_type); if (mem_resource == nullptr) { GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type); return ge::INTERNAL_ERROR; @@ -465,7 +492,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen MemResource *mem_resource = nullptr; auto it = mem_resource_map_.find(memory_type); if (it == mem_resource_map_.end()) { - mem_resource = new (std::nothrow) MemResource(); + mem_resource = MemResource::BuildMemResourceFromType(memory_type); if (mem_resource == nullptr) { GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type); return ge::INTERNAL_ERROR; @@ -629,6 +656,15 @@ bool VarManager::IsVarAddr(const int64_t &offset) { return var_resource_->IsVarAddr(offset); } +rtMemType_t VarManager::GetVarMemType(const int64_t &offset) { + std::lock_guard lock(mutex_); + if (var_resource_ == nullptr) { + GELOGW("VarManager has not been init."); + return RT_MEMORY_RESERVED; + } + return var_resource_->GetVarMemType(offset); +} + ge::Status VarManager::MallocVarMemory(size_t memory_size) { std::lock_guard lock(mutex_); uint8_t *var_mem_base = nullptr; @@ -654,12 +690,18 @@ ge::Status VarManager::MallocVarMemory(size_t memory_size) { uint8_t *VarManager::GetVarMemoryBase(rtMemType_t memory_type) { std::lock_guard lock(mutex_); + if (memory_type == RT_MEMORY_RDMA_HBM) { + return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr(); + } string memory_key = std::to_string(session_id_); return MemManager::Instance(memory_type)->GetMemoryAddr(memory_key); } uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) { std::lock_guard lock(mutex_); + if (memory_type == RT_MEMORY_RDMA_HBM) { + return logic_addr; + } string mem_key = std::to_string(session_id_); uint8_t *mem_base = MemManager::Instance(memory_type)->GetMemoryAddr(mem_key); if (mem_base == nullptr) { diff --git a/ge/graph/manager/graph_var_manager.h b/ge/graph/manager/graph_var_manager.h index 9cf0068c..924ddcb7 100755 --- a/ge/graph/manager/graph_var_manager.h +++ b/ge/graph/manager/graph_var_manager.h @@ -158,13 +158,15 @@ class VarResource { bool IsVarAddr(const int64_t &offset); + rtMemType_t GetVarMemType(const int64_t &offset); + std::unordered_map GetAllVarDesc() const { return cur_var_tensor_desc_map_; } private: std::string VarKey(const std::string &var_name, const ge::GeTensorDesc &tensor_desc); uint64_t session_id_; - std::unordered_set var_offset_set_; + std::unordered_map var_offset_map_; std::unordered_map var_addr_mgr_map_; std::unordered_map cur_var_tensor_desc_map_; std::unordered_map> var_to_trans_road_; @@ -176,19 +178,36 @@ class VarResource { class MemResource { public: MemResource(); - ~MemResource() = default; + virtual ~MemResource() = default; + static MemResource *BuildMemResourceFromType(rtMemType_t mem_type); - Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset); + virtual Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) = 0; uint64_t GetVarMemSize() const; void UpdateVarMemSize(int64_t mem_size); - private: + protected: uint64_t total_size_; uint64_t var_mem_size_; }; +class HbmMemResource : public MemResource { + public: + HbmMemResource() = default; + ~HbmMemResource() override = default; + + Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override; +}; + +class RdmaMemResource : public MemResource { + public: + RdmaMemResource() = default; + ~RdmaMemResource() override = default; + + Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override; +}; + class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { public: static VarManager *Instance(uint64_t session_id); @@ -275,6 +294,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { bool IsVarAddr(const int64_t &offset); + rtMemType_t GetVarMemType(const int64_t &offset); + uint8_t *GetVarMemoryBase(rtMemType_t memory_type); uint8_t *GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type); diff --git a/ge/graph/manager/rdma_pool_allocator.cc b/ge/graph/manager/rdma_pool_allocator.cc index 93d1fd1d..ed243801 100644 --- a/ge/graph/manager/rdma_pool_allocator.cc +++ b/ge/graph/manager/rdma_pool_allocator.cc @@ -51,7 +51,7 @@ RdmaPoolAllocator::RdmaPoolAllocator(rtMemType_t memory_type) Status RdmaPoolAllocator::Initialize() { memory_allocator_ = MemManager::Instance(memory_type_); if (memory_allocator_ == nullptr) { - return ge::FAILED; + return ACL_ERROR_GE_INTERNAL_ERROR; } return ge::SUCCESS; } diff --git a/ge/graph/manager/rdma_pool_allocator.h b/ge/graph/manager/rdma_pool_allocator.h index 4d8cf71e..0a895a11 100644 --- a/ge/graph/manager/rdma_pool_allocator.h +++ b/ge/graph/manager/rdma_pool_allocator.h @@ -53,6 +53,10 @@ class RdmaPoolAllocator { Status GetBaseAddr(uint64_t &base_addr, uint64_t &mem_size); + uint8_t *GetRdmaBaseAddr() { return rdma_base_addr_; } + + size_t GetRdmaMemSize() { return rdma_mem_size_; } + private: void MergeBlocks(Block *dst, Block *src); diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 6c81b21f..2a60765f 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -51,6 +51,13 @@ using ClusterPtr = std::shared_ptr; static bool IsInExperimentalMode(const ComputeGraphPtr &root_graph) { for (const auto &node : root_graph->GetAllNodes()) { GE_CHECK_NOTNULL(node->GetOpDesc()); + // not do partition in single op scene. + bool is_singleop = false; + (void)AttrUtils::GetBool(node->GetOpDesc(), ATTR_SINGLE_OP_SCENE, is_singleop); + if (is_singleop) { + return false; + } + for (const auto &input_desc : node->GetOpDesc()->GetAllInputsDesc()) { auto type = input_desc.GetDataType(); if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { @@ -213,6 +220,7 @@ std::string DynamicShapePartitioner::DebugString() const { size_t data = 0; size_t netoutput = 0; size_t is_inputnode = 0; + size_t stage = 0; std::stringstream ss; ss << "All unknown shape nodes:" << std::endl; for (const auto &node : unknown_shape_nodes_) { @@ -229,10 +237,13 @@ std::string DynamicShapePartitioner::DebugString() const { netoutput++; } else if (cluster->IsInputNode()) { is_inputnode++; + } else if (cluster->IsIndependent()) { + stage++; } } ss << "All clusters:" << unique_clusters_.size() << ", data:" << data << ", known:" << known - << ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode << std::endl; + << ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode + << ", stage:" << stage << std::endl; for (const auto &cluster : unique_clusters_) { ss << " " << cluster->DebugString() << std::endl; } @@ -272,12 +283,15 @@ Status DynamicShapePartitioner::InitClusters() { for (const auto &node : graph->GetDirectNode()) { Cluster::Type type = Cluster::DATA; bool is_input = ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) && node->GetInNodes().empty(); + REQUIRE_NOT_NULL(node->GetOpDesc(), "op_desc is null"); if (node->GetType() == DATA) { type = Cluster::DATA; } else if (is_input) { type = Cluster::INPUT_NODE; } else if (node->GetType() == NETOUTPUT) { type = Cluster::NETOUTPUT; + } else if ((node->GetType() == PARTITIONEDCALL) && (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL))) { + type = Cluster::STAGE; } else if (unknown_shape_nodes_.count(node) > 0) { type = Cluster::UNKNOWN_SHAPE; } else { @@ -360,6 +374,9 @@ static std::string ToString(const std::vector &clusters) { void DynamicShapePartitioner::MergeClustersUnknownShape() { // Merge unknown shape clusters for (const auto &cluster : ordered_cluster_) { + if (cluster->IsIndependent()) { + continue; + } for (const auto &in_cluster : cluster->Inputs()) { if (!in_cluster->IsUnknownShape()) { continue; @@ -379,6 +396,9 @@ void DynamicShapePartitioner::MergeClustersUnknownShape() { void DynamicShapePartitioner::MergeClustersKnownShape() { // Merge known shape clusters for (const auto &cluster : ordered_cluster_) { + if (cluster->IsIndependent()) { + continue; + } if (cluster->IsRefVariable() && cluster->Inputs().size() == 1) { auto in_cluster = *(cluster->Inputs().begin()); in_cluster->Merge(cluster); @@ -606,6 +626,7 @@ void Cluster::UpdateRank(size_t rank) { bool Cluster::IsData() const { return type_ == DATA; }; bool Cluster::IsKnownShape() const { return type_ == KNOWN_SHAPE; }; bool Cluster::IsUnknownShape() const { return type_ == UNKNOWN_SHAPE; }; +bool Cluster::IsIndependent() const { return type_ == STAGE; }; bool Cluster::IsNetOutput() const { return type_ == NETOUTPUT; }; bool Cluster::IsInputNode() const { return type_ == INPUT_NODE; }; bool Cluster::IsRefVariable() const { @@ -641,6 +662,9 @@ void Cluster::RemoveOutput(ClusterPtr out) { out->in_clusters_.end()); }; void Cluster::Merge(ClusterPtr other) { + if (other->IsIndependent()) { + return; + } nodes_.insert(nodes_.end(), other->nodes_.begin(), other->nodes_.end()); other->in_clusters_.erase(std::remove(other->in_clusters_.begin(), other->in_clusters_.end(), shared_from_this()), other->in_clusters_.end()); @@ -689,7 +713,9 @@ std::vector Cluster::MergeAllPathFrom(ClusterPtr other) { std::unordered_set forward_reached_clusters; std::unordered_set backward_reached_clusters; std::vector path_clusters; - + if (other->IsIndependent()) { + return path_clusters; + } if (std::find(other->out_clusters_.begin(), other->out_clusters_.end(), shared_from_this()) == other->out_clusters_.end()) { return path_clusters; @@ -772,7 +798,7 @@ Status Cluster::BuildFrame() { } } } - if (IsData()) { + if (IsData() || IsIndependent()) { for (const auto &anchor : node->GetAllOutDataAnchors()) { AddFrameOutput(anchor); } @@ -888,7 +914,7 @@ Status Cluster::CombinePartitionFrame() { } Status Cluster::BuildPartitionSubgraph() { - if (IsData() || IsNetOutput()) { + if (IsData() || IsNetOutput() || IsIndependent()) { return SUCCESS; } int64_t parent_node_index = 0; diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h index 9772615e..e8408ff9 100644 --- a/ge/graph/partition/dynamic_shape_partition.h +++ b/ge/graph/partition/dynamic_shape_partition.h @@ -32,7 +32,7 @@ class DynamicShapePartitioner { // DATA:DATA, UNKNOWN_SHAPE:unknowshape, KNOWN_SHAPE:knowshape, NETOUTPUT:NETOUTPUT. class Cluster : public std::enable_shared_from_this { public: - enum Type { DATA, INPUT_NODE, NETOUTPUT, KNOWN_SHAPE, UNKNOWN_SHAPE }; + enum Type { DATA, INPUT_NODE, NETOUTPUT, STAGE, KNOWN_SHAPE, UNKNOWN_SHAPE }; Cluster(size_t rank, Type type, NodePtr node, DynamicShapePartitioner *partitioner) : id_(rank), min_(rank), max_(rank), type_(type), partitioner_(partitioner) { nodes_.push_back(node); @@ -45,6 +45,7 @@ class DynamicShapePartitioner { bool IsData() const; bool IsKnownShape() const; bool IsUnknownShape() const; + bool IsIndependent() const; bool IsNetOutput() const; std::vector> Inputs() const; std::vector> Outputs() const; diff --git a/ge/graph/partition/stage_partition.cc b/ge/graph/partition/stage_partition.cc index 93a06afe..f6e49bbd 100644 --- a/ge/graph/partition/stage_partition.cc +++ b/ge/graph/partition/stage_partition.cc @@ -25,6 +25,10 @@ #include "common/types.h" namespace ge { +namespace { +const std::set kSrcNodeTypes = { DATA, AIPPDATA, ANN_DATA }; +} + Status StagePartitioner::Partition() { GE_CHECK_NOTNULL(root_graph_); if (root_graph_->GetParentGraph() != nullptr) { @@ -37,6 +41,10 @@ Status StagePartitioner::Partition() { if (!AttrUtils::GetInt(op_desc, ATTR_STAGE_LEVEL, level)) { continue; } + if ((kSrcNodeTypes.count(op_desc->GetType()) != 0) && node->GetInAllNodes().empty()) { + continue; + } + GELOGD("original node %s for stage %u", node->GetName().c_str(), level); stage_nodes_[level].insert(node); } if (stage_nodes_.empty()) { @@ -54,6 +62,13 @@ Status StagePartitioner::Partition() { return FAILED; } + root_graph_->TopologicalSorting([](const NodePtr &a, const NodePtr &b) -> bool { + uint32_t a_level = UINT32_MAX; + (void)AttrUtils::GetInt(a->GetOpDesc(), ATTR_STAGE_LEVEL, a_level); + uint32_t b_level = UINT32_MAX; + (void)AttrUtils::GetInt(b->GetOpDesc(), ATTR_STAGE_LEVEL, b_level); + return a_level < b_level; + }); if (root_graph_->TopologicalSorting() != GRAPH_SUCCESS) { GELOGE(FAILED, "Topological sort for graph %s after stage partition failed, " "maybe stage_level was not set correctly.", root_graph_->GetName().c_str()); @@ -76,20 +91,26 @@ Status StagePartitioner::SplitStageLevel() { auto node = nodes.top(); nodes.pop(); GE_CHECK_NOTNULL(node->GetOpDesc()); - if (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL) && (cur_stage_nodes.count(node) == 0)) { + uint32_t tmp_level = cur_stage_level; + (void)AttrUtils::GetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, tmp_level); + if (tmp_level != cur_stage_level) { continue; } for (const auto &in_node : node->GetInAllNodes()) { if (visited_stage_nodes.count(in_node) != 0) { continue; } + if (!AttrUtils::SetInt(in_node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) { + GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", in_node->GetName().c_str()); + return INTERNAL_ERROR; + } + GELOGD("Mark stage_level node %s, stage_level=%u", in_node->GetName().c_str(), cur_stage_level); + if ((kSrcNodeTypes.count(in_node->GetType()) != 0) && in_node->GetInAllNodes().empty()) { + GELOGD("skip data node %s for stage %u", in_node->GetName().c_str(), cur_stage_level); + continue; + } nodes.push(in_node); } - if (!AttrUtils::SetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) { - GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", node->GetName().c_str()); - return INTERNAL_ERROR; - } - GELOGD("Mark stage_level node %s, stage_level=%u", node->GetName().c_str(), cur_stage_level); visited_stage_nodes.emplace(node); } for (const auto &node : visited_stage_nodes) { @@ -219,6 +240,11 @@ NodePtr StagePartitioner::BuildSubgraphNode(const std::string &graph_name, const op_desc->AddSubgraphName("f"); op_desc->SetSubgraphInstanceName(0, graph_name); + if (!AttrUtils::SetInt(op_desc, ATTR_STAGE_LEVEL, stage_info.stage_level)) { + GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed", op_desc->GetName().c_str()); + return nullptr; + } + NodePtr subgraph_node = root_graph_->AddNode(op_desc); if (subgraph_node == nullptr) { GELOGE(FAILED, "Add node %s failed.", graph_name.c_str()); diff --git a/ge/graph/passes/common_subexpression_elimination_pass.cc b/ge/graph/passes/common_subexpression_elimination_pass.cc index 7d9724fc..3587b03e 100644 --- a/ge/graph/passes/common_subexpression_elimination_pass.cc +++ b/ge/graph/passes/common_subexpression_elimination_pass.cc @@ -26,9 +26,6 @@ namespace ge { namespace { -std::set un_compute_attrs = { - {ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES}, -}; std::string GetCseKey(const NodePtr &node) { std::stringstream ss; @@ -53,7 +50,7 @@ std::string GetCseKey(const NodePtr &node) { ss << name << "-"; } - ss << "attrs-" << AttrUtils::GetAttrsStrAfterRid(node->GetOpDesc(), un_compute_attrs); + ss << "attrs-" << AttrUtils::GetAllAttrsStr(node->GetOpDesc()); return ss.str(); } diff --git a/ge/graph/passes/cond_remove_pass.cc b/ge/graph/passes/cond_remove_pass.cc index bf2e1170..9ecc79a6 100644 --- a/ge/graph/passes/cond_remove_pass.cc +++ b/ge/graph/passes/cond_remove_pass.cc @@ -203,7 +203,7 @@ bool CondRemovePass::CheckIfCondConstInput(const OutDataAnchorPtr &cond_out_anch // Get weights from peer node auto weights = OpDescUtils::GetWeights(out_node); if (weights.size() <= static_cast(cond_out_anchor->GetIdx())) { - GELOGI("Get weights of node %s out index %d, weight size %u is not fit for data index %d.", + GELOGI("Get weights of node %s out index %d, weight size %zu is not fit for data index %d.", out_node->GetName().c_str(), cond_out_anchor->GetIdx(), weights.size(), cond_out_anchor->GetIdx()); return false; } @@ -234,14 +234,14 @@ Status CondRemovePass::ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, c const auto &output_desc_size = node->GetOpDesc()->GetOutputsSize(); // Create subgraph opdesc & node auto partitioncall_opdesc = - CreateSubgraphOpDesc(save_branch->GetName(), input_desc_size - kConditionIndexNum, output_desc_size); + CreateSubgraphOpDesc(node, save_branch->GetName(), input_desc_size - kConditionIndexNum, output_desc_size); auto partitioncall_node = node->GetOwnerComputeGraph()->AddNode(partitioncall_opdesc); // Link node's peerout anchors to new node's inanchors for (const auto &input_anchor : node->GetAllInAnchors()) { for (const auto &peerout_anchor : input_anchor->GetPeerAnchors()) { if (GraphUtils::AddEdge(peerout_anchor, partitioncall_node->GetInAnchor( input_anchor->GetIdx() - kConditionIndexNum)) != ge::GRAPH_SUCCESS) { - GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%d, output num:%d", + GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%zu, output num:%zu", peerout_anchor->GetOwnerNode()->GetName().c_str(), peerout_anchor->GetIdx(), partitioncall_node->GetName().c_str(), input_anchor->GetIdx(), input_desc_size, output_desc_size); @@ -254,14 +254,14 @@ Status CondRemovePass::ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, c for (const auto &output_anchor : node->GetAllOutAnchors()) { for (const auto &peerin_anchor : output_anchor->GetPeerAnchors()) { if (GraphUtils::RemoveEdge(node->GetOutAnchor(output_anchor->GetIdx()), peerin_anchor) != ge::GRAPH_SUCCESS) { - GELOGE(FAILED, "Remove edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%d, output num:%d", + GELOGE(FAILED, "Remove edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%zu, output num:%zu", node->GetName().c_str(), output_anchor->GetIdx(), peerin_anchor->GetOwnerNode()->GetName().c_str(), peerin_anchor->GetIdx(), input_desc_size, output_desc_size); return FAILED; } if (GraphUtils::AddEdge(partitioncall_node->GetOutAnchor(output_anchor->GetIdx()), peerin_anchor) != ge::GRAPH_SUCCESS) { - GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%d, output num:%d", + GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%zu, output num:%zu", partitioncall_node->GetName().c_str(), output_anchor->GetIdx(), peerin_anchor->GetOwnerNode()->GetName().c_str(), peerin_anchor->GetIdx(), input_desc_size, output_desc_size); @@ -289,7 +289,8 @@ Status CondRemovePass::ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, c /// @param [in] output_num /// @return OpDescPtr /// -OpDescPtr CondRemovePass::CreateSubgraphOpDesc(const std::string &name, size_t input_num, size_t output_num) { +OpDescPtr CondRemovePass::CreateSubgraphOpDesc(const NodePtr &node, const std::string &name, size_t input_num, + size_t output_num) { OpDescBuilder op_desc_builder(name, PARTITIONEDCALL); op_desc_builder.AddDynamicInput("args", input_num).AddDynamicOutput("output", output_num); @@ -299,6 +300,16 @@ OpDescPtr CondRemovePass::CreateSubgraphOpDesc(const std::string &name, size_t i size_t index = op_desc->GetSubgraphInstanceNames().size(); op_desc->AddSubgraphName("f"); op_desc->SetSubgraphInstanceName(static_cast(index), name); + + auto node_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL_EXEC(node_desc, return nullptr); + for (size_t i = 0; i < input_num; ++i) { + (void)op_desc->UpdateInputDesc(i, node_desc->GetInputDesc(i + 1)); + } + for (size_t i = 0; i < output_num; ++i) { + (void)op_desc->UpdateOutputDesc(i, node_desc->GetOutputDesc(i)); + } + return op_desc; } diff --git a/ge/graph/passes/cond_remove_pass.h b/ge/graph/passes/cond_remove_pass.h index 72ca64b8..e466d684 100644 --- a/ge/graph/passes/cond_remove_pass.h +++ b/ge/graph/passes/cond_remove_pass.h @@ -70,7 +70,7 @@ class CondRemovePass : public BaseNodePass { /// Status ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, const ComputeGraphPtr &save_branch); - OpDescPtr CreateSubgraphOpDesc(const std::string &name, size_t input_num, size_t output_num); + OpDescPtr CreateSubgraphOpDesc(const NodePtr &node, const std::string &name, size_t input_num, size_t output_num); int32_t GetCondIndex(const ConstGeTensorPtr &tensor); }; diff --git a/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc b/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc index 6fa63642..293fd132 100644 --- a/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc +++ b/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc @@ -58,9 +58,9 @@ Status DynamicSingleOpResetShapePass::Run(ComputeGraphPtr graph) { continue; } - // pass node without attr: ATTR_DYNAMIC_SHAPE_SINGLE_AICPU + // pass node without attr: ATTR_SINGLE_OP_SCENE bool single_aicpu_unknown = false; - if (!AttrUtils::GetBool(node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, single_aicpu_unknown) || + if (!AttrUtils::GetBool(node->GetOpDesc(), ATTR_SINGLE_OP_SCENE, single_aicpu_unknown) || !single_aicpu_unknown) { continue; } diff --git a/ge/graph/passes/for_pass.cc b/ge/graph/passes/for_pass.cc index 31dee390..3b7a0886 100644 --- a/ge/graph/passes/for_pass.cc +++ b/ge/graph/passes/for_pass.cc @@ -469,7 +469,7 @@ Status ForPass::BuildWhileLink(const WhileInfo &while_info) { continue; } GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(peer_out_anchor, in_data_anchor), - "Add data-edge %s:%d->%s:%d failed.", + "Add data-edge %s:%d->%s:%zu failed.", peer_out_anchor->GetOwnerNode()->GetName().c_str(), peer_out_anchor->GetIdx(), while_node->GetName().c_str(), i); } @@ -480,7 +480,7 @@ Status ForPass::BuildWhileLink(const WhileInfo &while_info) { GE_CHECK_NOTNULL(out_data_anchor); for (auto &peer_in_anchor : while_info.data_outputs[i]) { GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(out_data_anchor, peer_in_anchor), - "Add data-edge %s:%d->%s:%d failed.", + "Add data-edge %s:%zu->%s:%d failed.", while_node->GetName().c_str(), i + kWhileOutputIndex, peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx()); } diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc index 21747f42..3f607f84 100755 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -28,6 +28,8 @@ namespace { const int32_t kAnchorSize = 1; const int kAnchorNum = 0; +const int32_t kAnchorAssignRefIndex = 0; +const int32_t kAnchorAssignValueIndex = 1; const char *const kInputMutable = "_input_mutable"; } // namespace namespace ge { @@ -35,43 +37,147 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(PARAM_INVALID, "param [graph] must not be null."); return PARAM_INVALID); for (const auto &node : graph->GetDirectNode()) { auto op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(op_desc == nullptr, continue); + if (op_desc == nullptr) { + GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str()); + return INTERNAL_ERROR; + } + + Status ret = ContinuousInputProcess(graph, node); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy, node_name:%s.", node->GetName().c_str()); + return ret; + } + + ret = MutableInputProcess(graph, node); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "failed MutableInputProcess, node_name:%s.", node->GetName().c_str()); + return ret; + } + + ret = P2pmemInputProcess(graph, node); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "failed P2pmemInputProcess, node_name:%s.", node->GetName().c_str()); + return ret; + } + + } + return SUCCESS; +} + +// If node has _input_mutable attr, means input mem may be modified when op execute. +// In order to avoid to affect another op execute with same input when data modified, +// need to inset memcpy node between. +// also works on situation that input is variable or const. +Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { + auto op_desc = node->GetOpDesc(); + + bool node_input_mutable = false; + if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { + return SUCCESS; + } + + if (!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable)) { + GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); + return FAILED; + } + if (!node_input_mutable) { + return SUCCESS; + } - bool node_input_mutable = false; - if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { + GELOGI("input mutable hcom op is:%s.", op_desc->GetName().c_str()); + for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { + if (hccl_in_anchor == nullptr) { continue; } + auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(src_out_anchor); - GE_IF_BOOL_EXEC(!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable), - GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); return FAILED); - if (!node_input_mutable) { + int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); + if (src_out_anchor_size == kAnchorSize) { + // Identity needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. + if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { + Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); + return ret; + } + } continue; } - GELOGI("hcom op is:%s.", op_desc->GetName().c_str()); + Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); + return ret; + } + } + return SUCCESS; +} + +// If broadcast input size is bigger than 1, and input from variable, +// cause by broadcast input memory should be continuous, +// another featuremap mem will be allocated for broadcast input. +// In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step. +// In order to avoid move action out of model, use memcpy node instead of move action code. +Status HcclMemcpyPass::ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { + auto op_desc = node->GetOpDesc(); + + bool is_input_continuous = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + + if (is_input_continuous && op_desc->GetInputsSize() > 1) { + GELOGI("continuous input op is:%s.", op_desc->GetName().c_str()); + // if input size bigger than one, insert memcpy between var data for support continous mem alloc for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { if (hccl_in_anchor == nullptr) { continue; } auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(src_out_anchor); - - int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); - if (src_out_anchor_size == kAnchorSize) { - // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. - NodePtr src_node = src_out_anchor->GetOwnerNode(); - std::string src_type = src_node->GetType(); - bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT); - if (check_src_type) { - Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); - return ret; - } + if (src_out_anchor == nullptr) { + GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); + return INTERNAL_ERROR; + } + + if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { + Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); + return ret; } - continue; } + } + } + return SUCCESS; +} + +// if input is var type, and node input need p2p mem, then memcpy should be insert between the two +Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { + auto op_desc = node->GetOpDesc(); + vector input_memory_types; + (void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types); + + if (input_memory_types.empty()) { + return SUCCESS; + } + + for (uint32_t index = 0; index < input_memory_types.size() && index < op_desc->GetInputsSize(); index++) { + if (input_memory_types[index] != RT_MEMORY_P2P_DDR) { + continue; + } + + GELOGD("p2p input op is:%s.", op_desc->GetName().c_str()); + auto hccl_in_anchor = node->GetInDataAnchor(index); + if (hccl_in_anchor == nullptr) { + continue; + } + auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); + if (src_out_anchor == nullptr) { + GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); + return INTERNAL_ERROR; + } + + if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); if (ret != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); @@ -82,8 +188,12 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { return SUCCESS; } +bool HcclMemcpyPass::IsDataNode(const std::string& node_type) { + return (node_type == CONSTANTOP) || (node_type == VARIABLE) || (node_type == DATA) || (node_type == CONSTANT); +} + /// -/// @brief Add MemcpyAsync Node +/// @brief Add Identity Node /// @param [in] ge::ComputeGraphPtr graph /// @param [in] ge::OutDataAnchorPtr in_node /// @return ge::NodePtr @@ -101,20 +211,20 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O node_name = CheckDuplicateName(node_name); OpDescPtr op_desc = MakeShared(node_name.c_str(), IDENTITY); if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "Create identity op: MakeShared op_desc fail."); + GELOGE(INTERNAL_ERROR, "Create Identity op: MakeShared op_desc fail."); return nullptr; } - GELOGI("Create identity op:%s.", op_desc->GetName().c_str()); + GELOGI("Create Identity op:%s.", op_desc->GetName().c_str()); graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create identity op: add input desc fail."); + GELOGE(INTERNAL_ERROR, "Create Identity op: add input desc fail."); return nullptr; } ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create identity op: add output desc fail."); + GELOGE(INTERNAL_ERROR, "Create Identity op: add output desc fail."); return nullptr; } // because history reason ,this pass can not do work after constant fold so mark it @@ -122,7 +232,7 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O NodePtr memcpy_node = graph->AddNode(op_desc); if (memcpy_node == nullptr) { - GELOGE(INTERNAL_ERROR, "Insert identity node fail."); + GELOGE(INTERNAL_ERROR, "Insert Identity node fail."); return nullptr; } @@ -155,7 +265,38 @@ std::string HcclMemcpyPass::CheckDuplicateName(const std::string &node_name) { /// Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, const InDataAnchorPtr &hccl_in_anchor) { - GELOGI("The op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str()); + GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); + GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode()); + + Status ret = InsertIdentityBeforeHccl(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "add identity failed, var_node:%s, hccl_node:%s.", + src_out_anchor->GetOwnerNode()->GetName().c_str(), + hccl_in_anchor->GetOwnerNode()->GetName().c_str()); + return ret; + } + + ret = InsertAssignAfterBroadcastIfNeed(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "add assign failed, var_node:%s, hccl_node:%s.", + src_out_anchor->GetOwnerNode()->GetName().c_str(), + hccl_in_anchor->GetOwnerNode()->GetName().c_str()); + return ret; + } + return SUCCESS; +} + +/// +/// @brief Insert Identity node Between Hccl node and variable +/// @param [in] ComputeGraphPtr graph +/// @param [in] OutDataAnchorPtr src_out_anchor +/// @param [in] InDataAnchorPtr hccl_in_anchor +/// @return status +/// +Status HcclMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, + const InDataAnchorPtr &hccl_in_anchor) { + GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), + hccl_in_anchor->GetOwnerNode()->GetName().c_str()); NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); GE_CHECK_NOTNULL(memcpy_node); @@ -182,6 +323,141 @@ Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const } return SUCCESS; } + +/// +/// @brief Insert assign node after broadcast node and variable to refresh variable data +/// @param [in] ComputeGraphPtr graph +/// @param [in] OutDataAnchorPtr var_out_anchor +/// @param [in] InDataAnchorPtr hccl_in_anchor +/// @return status +/// +Status HcclMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, + const OutDataAnchorPtr &var_out_anchor, + const InDataAnchorPtr &hccl_in_anchor) { + if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) { + GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str()); + return SUCCESS; + } + + if (var_out_anchor->GetOwnerNode()->GetType() != VARIABLE) { + GELOGD("%s not variable, no need to insert assign node", var_out_anchor->GetOwnerNode()->GetName().c_str()); + return SUCCESS; + } + + GELOGI("after op %s and op %s need insert assign op.", var_out_anchor->GetOwnerNode()->GetName().c_str(), + hccl_in_anchor->GetOwnerNode()->GetName().c_str()); + + for (auto peer_in_anchor : var_out_anchor->GetPeerInDataAnchors()) { + if (peer_in_anchor->GetOwnerNode()->GetType() == ASSIGN) { + GELOGD("variable %s out assign node is exist.", var_out_anchor->GetOwnerNode()->GetName().c_str()); + return SUCCESS; + } + } + + NodePtr assign_node = CreateAssignNode(graph, var_out_anchor); + GE_CHECK_NOTNULL(assign_node); + + OutDataAnchorPtr hccl_out_anchor = hccl_in_anchor->GetOwnerNode()->GetOutDataAnchor(hccl_in_anchor->GetIdx()); + GE_CHECK_NOTNULL(hccl_out_anchor); + + Status ret = hccl_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignValueIndex)); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", hccl_out_anchor->GetOwnerNode()->GetName().c_str(), + assign_node->GetName().c_str()); + return FAILED; + } + + ret = var_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignRefIndex)); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", var_out_anchor->GetOwnerNode()->GetName().c_str(), + assign_node->GetName().c_str()); + return FAILED; + } + + // add control edge between assign node and node after broadcast node + OutControlAnchorPtr assign_out_control_anchor = assign_node->GetOutControlAnchor(); + GE_CHECK_NOTNULL(assign_out_control_anchor); + + for (auto in_data_anchor : hccl_out_anchor->GetPeerInDataAnchors()) { + if (in_data_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) { + continue; + } + ret = assign_out_control_anchor->LinkTo(in_data_anchor->GetOwnerNode()->GetInControlAnchor()); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.", + assign_out_control_anchor->GetOwnerNode()->GetName().c_str(), + in_data_anchor->GetOwnerNode()->GetName().c_str()); + return FAILED; + } + } + + for (auto in_control_anchor : hccl_out_anchor->GetOwnerNode()->GetOutControlAnchor()->GetPeerInControlAnchors()) { + if (in_control_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) { + continue; + } + ret = assign_out_control_anchor->LinkTo(in_control_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.", + assign_out_control_anchor->GetOwnerNode()->GetName().c_str(), + in_control_anchor->GetOwnerNode()->GetName().c_str()); + return FAILED; + } + } + return SUCCESS; +} + +/// +/// @brief create assign Node, add to graph +/// @param [in] ge::ComputeGraphPtr graph +/// @param [in] ge::OutDataAnchorPtr variable node out anchor +/// @return ge::NodePtr +/// +NodePtr HcclMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { + GE_IF_BOOL_EXEC(graph == nullptr, return nullptr); + NodePtr pre_node = out_data_anchor->GetOwnerNode(); + OpDescPtr pre_op_desc = pre_node->GetOpDesc(); + if (pre_op_desc == nullptr) { + GELOGE(INTERNAL_ERROR, "OpDesc of pre node is invalid."); + return nullptr; + } + + std::string node_name = pre_node->GetName() + "_" + ASSIGN; + node_name = CheckDuplicateName(node_name); + OpDescPtr op_desc = MakeShared(node_name.c_str(), ASSIGN); + if (op_desc == nullptr) { + GELOGE(INTERNAL_ERROR, "Create Assign op: MakeShared op_desc fail."); + return nullptr; + } + GELOGI("Create Assign op:%s.", op_desc->GetName().c_str()); + + graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); + if (ret != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail."); + return nullptr; + } + + ret = op_desc->AddInputDesc("value", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); + if (ret != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Create Assign op: add value input desc fail."); + return nullptr; + } + + ret = op_desc->AddOutputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); + if (ret != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Create Assign op: add output desc fail."); + return nullptr; + } + + NodePtr assign_node = graph->AddNode(op_desc); + if (assign_node == nullptr) { + GELOGE(INTERNAL_ERROR, "Insert Identity node fail."); + return nullptr; + } + + return assign_node; +} + + /// /// @brief Clear Status, used for subgraph pass /// @return SUCCESS diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h index e73a5483..98e05964 100755 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -32,11 +32,28 @@ class HcclMemcpyPass : public GraphPass { private: NodePtr CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); + NodePtr CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); + std::string CheckDuplicateName(const std::string &node_name); Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, const InDataAnchorPtr &hccl_in_anchor); + Status InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, + const InDataAnchorPtr &hccl_in_anchor); + + Status InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, + const OutDataAnchorPtr &src_out_anchor, + const InDataAnchorPtr &hccl_in_anchor); + + Status ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node); + + Status MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node); + + Status P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node); + + bool IsDataNode(const std::string& node_type); + std::unordered_map node_num_map_; }; } // namespace ge diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc index b7efa070..17a1e3bb 100755 --- a/ge/graph/passes/multi_batch_clone_pass.cc +++ b/ge/graph/passes/multi_batch_clone_pass.cc @@ -928,7 +928,7 @@ Status MultiBatchClonePass::CreateOriGraph(const ComputeGraphPtr &graph) { auto out_data_anchor = node->GetOutDataAnchor(out_index); GE_IF_BOOL_EXEC(out_data_anchor == nullptr, continue); NodePtr data_node = CreateDataNode(graph, out_data_anchor, data_index); - GE_IF_BOOL_EXEC(data_node == nullptr, GELOGE(INTERNAL_ERROR, "Create %zu data node failed.", + GE_IF_BOOL_EXEC(data_node == nullptr, GELOGE(INTERNAL_ERROR, "Create %d data node failed.", out_data_anchor->GetIdx()); return INTERNAL_ERROR); for (auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { GE_IF_BOOL_EXEC(in_anchor == nullptr, continue); diff --git a/ge/graph/passes/remove_same_const_pass.cc b/ge/graph/passes/remove_same_const_pass.cc index e75a4553..3d18a92d 100644 --- a/ge/graph/passes/remove_same_const_pass.cc +++ b/ge/graph/passes/remove_same_const_pass.cc @@ -85,7 +85,7 @@ Status RemoveSameConstPass::Run(ComputeGraphPtr graph) { ret = GraphUtils::ReplaceNodeAnchors(iter->second, node, {}, output_map); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to replace node %s by node %s", node->GetName().c_str(), + GELOGE(INTERNAL_ERROR, "Failed to replace node %s by node %s, ret=%u", node->GetName().c_str(), iter->second->GetName().c_str(), ret); return INTERNAL_ERROR; } diff --git a/ge/graph/passes/subgraph_pass.cc b/ge/graph/passes/subgraph_pass.cc index d1111d52..3d83c301 100755 --- a/ge/graph/passes/subgraph_pass.cc +++ b/ge/graph/passes/subgraph_pass.cc @@ -142,17 +142,18 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node GE_CHECK_NOTNULL(in_node); // Need insert memcpy - // 1. Const->NetOutput in subgraph + // 1. Const->NetOutput in subgraph & parent graph is known // 2. AtomicOp->NetOutput in subgraph // 3. OutputContinuesRequiredOp->NetOutput in subgraph // 4. Data->NetOutput in subgraph but parent_node is not while // 5. While->NetOutput in known subgraph std::string op_type; - bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) || + bool insert_flag = + (NodeUtils::GetConstOpType(in_node, op_type) && !graph->GetParentGraph()->GetGraphUnknownFlag()) || IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || (!graph->GetGraphUnknownFlag() && NodeUtils::IsDynamicShape(node) && - (kWhileOpTypes.count(in_node->GetType()) != 0)); + (kWhileOpTypes.count(in_node->GetType()) != 0)); if (insert_flag) { GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; @@ -310,7 +311,7 @@ Status SubgraphPass::InsertInputMemcpy(const ComputeGraphPtr &graph, const std:: Status SubgraphPass::InsertOutputMemcpy(const ComputeGraphPtr &graph, const NodePtr &output_node, const std::set &bypass_index) { if (output_node->GetAllInDataAnchorsSize() == bypass_index.size()) { - GELOGD("No need to insert output memcpy node in while_body %s, output_size=%zu, bypass_num=%zu.", + GELOGD("No need to insert output memcpy node in while_body %s, output_size=%u, bypass_num=%zu.", graph->GetName().c_str(), output_node->GetAllInDataAnchorsSize(), bypass_index.size()); return SUCCESS; } diff --git a/ge/graph/passes/variable_op_pass_bak.cc b/ge/graph/passes/variable_op_pass_bak.cc deleted file mode 100644 index c9218296..00000000 --- a/ge/graph/passes/variable_op_pass_bak.cc +++ /dev/null @@ -1,811 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/passes/variable_op_pass.h" -#include -#include - -#include "common/formats/formats.h" -#include "common/formats/utils/formats_trans_utils.h" -#include "graph/ge_context.h" -#include "graph/graph.h" -#include "graph/manager/graph_var_manager.h" -#include "graph/utils/graph_utils.h" -#include "graph/utils/tensor_utils.h" -#include "graph/utils/type_utils.h" - -namespace ge { -namespace { -const int kTransOpOutIndex = 0; - -Status ByPassTransNode(NodePtr &front_node, NodePtr &back_node) { - GE_CHECK_NOTNULL(front_node); - GE_CHECK_NOTNULL(back_node); - GELOGD("Begin to bypass trans node %s", front_node->GetName().c_str()); - auto ret = GraphUtils::CopyInCtrlEdges(front_node, back_node); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, - "Failed to move control edges from trans " - "node %s to var-ref %s", - front_node->GetName().c_str(), back_node->GetName().c_str()); - return INTERNAL_ERROR; - } - auto back_node_in_anchor = back_node->GetInDataAnchor(0); - if (back_node_in_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, - "The back node %s does not have an " - "input anchor", - back_node->GetName().c_str()); - return INTERNAL_ERROR; - } - back_node_in_anchor->UnlinkAll(); - auto trans_in_anchor = front_node->GetInDataAnchor(0); - if (trans_in_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, - "Failed to get the in data anchor from trans" - " node %s type %s", - front_node->GetName().c_str(), front_node->GetType().c_str()); - return INTERNAL_ERROR; - } - auto prev_trans_node_out_anchor = trans_in_anchor->GetPeerOutAnchor(); - if (prev_trans_node_out_anchor == nullptr) { - GELOGW( - "The trans node %s does not have an input, so the ref node %s does" - " not have any inputs after bypass", - front_node->GetName().c_str(), front_node->GetName().c_str()); - } else { - ret = GraphUtils::AddEdge(prev_trans_node_out_anchor, back_node_in_anchor); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, - "Failed to add edge between ref node %s " - "and the prev node of trans node %s", - back_node->GetName().c_str(), front_node->GetName().c_str()); - return INTERNAL_ERROR; - } - } - return SUCCESS; -} - -bool IsTransSupport(const TransNodeInfo &trans_info) { - if (trans_info.output.GetShape().IsUnknownShape()) { - return false; - } - if (trans_info.node_type == RESHAPE || trans_info.node_type == REFORMAT) { - return true; - } else if (trans_info.node_type == TRANSDATA || trans_info.node_type == TRANSPOSED) { - formats::TransArgs args{nullptr, - trans_info.input.GetFormat(), - trans_info.output.GetFormat(), - trans_info.input.GetShape().GetDims(), - trans_info.output.GetShape().GetDims(), - trans_info.input.GetDataType()}; - return formats::IsTransFormatSupport(args); - } else if (trans_info.node_type == CAST) { - formats::CastArgs datatype_args{nullptr, static_cast(trans_info.input.GetShape().GetShapeSize()), - trans_info.input.GetDataType(), trans_info.output.GetDataType()}; - return formats::IsTransDataTypeSupport(datatype_args); - } else { - return false; - } -} - -std::string GetInAndOutDecsDiff(NodePtr &trans_node, bool reverse = false) { - int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); - auto op_desc = trans_node->GetOpDesc(); - GeTensorDesc input_desc = op_desc->GetInputDesc(tran_in_index); - GeTensorDesc output_desc = op_desc->GetOutputDesc(kTransOpOutIndex); - if (reverse) { - GeTensorDesc tmp_desc = input_desc; - input_desc = output_desc; - output_desc = tmp_desc; - } - auto input_format = input_desc.GetFormat(); - auto input_type = input_desc.GetDataType(); - auto input_shape = input_desc.GetShape(); - auto output_format = output_desc.GetFormat(); - auto output_type = output_desc.GetDataType(); - auto output_shape = output_desc.GetShape(); - std::stringstream diff_key; - diff_key.str(""); - if (input_format != output_format) { - diff_key << static_cast(input_format) << '-' << static_cast(output_format) << '-'; - } else { - diff_key << "*-"; - } - if (input_type != output_type) { - diff_key << static_cast(input_type) << '-' << static_cast(output_type) << '-'; - } else { - diff_key << "*-"; - } - if (!ge::formats::IsShapeEqual(input_shape, output_shape)) { - for (auto dim : input_shape.GetDims()) { - diff_key << dim << '-'; - } - for (auto dim : output_shape.GetDims()) { - diff_key << dim << '-'; - } - } else { - diff_key << "*"; - } - return diff_key.str(); -} -} // namespace - -Status VariableOpPass::Run(ge::ComputeGraphPtr graph) { - if (graph == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to run variable op pass, null graph"); - return INTERNAL_ERROR; - } - - GELOGD("Begin to run variable op pass on graph %s, session %lu, graph id %u", graph->GetName().c_str(), - GetContext().SessionId(), graph->GetGraphID()); - - if (var_accelerate_ctrl_ == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to run var op pass, the variable accelerate control is null"); - return INTERNAL_ERROR; - } - - GELOGD("Begin to generate ref map for variable and refs, graph name:%s.", graph->GetName().c_str()); - if (RenewVarDesc(graph) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to renew var desc on graph"); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - if (GenerateVariableVariableRefMap(graph) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to generate variable map for graph %s", graph->GetName().c_str()); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - GELOGD("Begin to fusion variables and trans nodes"); - for (auto &var_to_refs : var_and_var_ref_map_) { - auto &node = var_to_refs.first; - GE_CHECK_NOTNULL(node); - GE_CHECK_NOTNULL(var_accelerate_ctrl_); - if (!var_accelerate_ctrl_->IsVarPermitToChangeFormats(node->GetName())) { - GELOGD("The var %s does not permit to change formats, skip it", node->GetName().c_str()); - continue; - } - - VarTransRoad fusion_road; - auto ret = FusionIfNeed(node, fusion_road); - if (ret != SUCCESS) { - return ret; - } - - if (fusion_road.empty()) { - GELOGD("No need to fusion variable %s because it's fusion road is empty", node->GetName().c_str()); - continue; - } - - ret = RenewTransRoadDesc(node, fusion_road); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to renew description fusion road for var %s", node->GetName().c_str()); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - auto start_iter = fusion_road.begin(); - auto end_iter = fusion_road.rbegin(); - GELOGD( - "Trans variable data for %s from format %s to %s, shape %s to %s " - "data-type %s to %s, path len %zu success", - node->GetName().c_str(), TypeUtils::FormatToSerialString(start_iter->input.GetFormat()).c_str(), - TypeUtils::FormatToSerialString(end_iter->output.GetFormat()).c_str(), - formats::ShapeToString(start_iter->input.GetShape().GetDims()).c_str(), - formats::ShapeToString(end_iter->output.GetShape().GetDims()).c_str(), - TypeUtils::DataTypeToSerialString(start_iter->input.GetDataType()).c_str(), - TypeUtils::DataTypeToSerialString(end_iter->output.GetDataType()).c_str(), fusion_road.size()); - - ret = VarManager::Instance(graph->GetSessionID())->SetTransRoad(node->GetName(), fusion_road); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to update the format fusion road for var %s", node->GetName().c_str()); - return INTERNAL_ERROR; - } - ret = VarManager::Instance(graph->GetSessionID())->SetChangedGraphId(node->GetName(), graph->GetGraphID()); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to update the graph id for var %s", node->GetName().c_str()); - return INTERNAL_ERROR; - } - var_accelerate_ctrl_->SetVarChanged(node->GetName()); - - GELOGD("Begin to update format info for var %s.", node->GetName().c_str()); - std::set node_set({node}); - if (UpdateIOFormatInfo(end_iter->output, node_set) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - // renew var desc if the trans_road is all reshape or reformat - ret = RenewVarDesc(graph->GetSessionID(), node, fusion_road); - if (ret != SUCCESS) { - GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); - return FAILED; - } - } - - return SUCCESS; -} - -Status VariableOpPass::RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusion_road) { - auto var_desc = var->GetOpDesc(); - GE_CHECK_NOTNULL(var_desc); - TransNodeInfo prev_node_info; - prev_node_info.node_type = var->GetType(); - prev_node_info.output = var_desc->GetOutputDesc(0); - // two cases - // fisrt Var->cast->transdata which transdata in fusion road - // the input of transdata is not equal with output of var - // case 1 : suppose input dtype of transdata equal with out dtype - // but not equal with var - // so we make input dtype and output dytpe of transroad equal with var - // case 2: suppose input format of transdata not equal with out format - // and input format not equal with var - // so we make input format equal with var - for (auto &cur_trans : fusion_road) { - if (cur_trans.input.GetFormat() == cur_trans.output.GetFormat()) { - cur_trans.output.SetFormat(prev_node_info.output.GetFormat()); - } - if (cur_trans.input.GetDataType() == cur_trans.output.GetDataType()) { - cur_trans.output.SetDataType(prev_node_info.output.GetDataType()); - } - if (ge::formats::IsShapeEqual(cur_trans.input.GetShape(), cur_trans.output.GetShape())) { - cur_trans.output.SetShape(prev_node_info.output.GetShape()); - } - cur_trans.input = prev_node_info.output; - prev_node_info.output = cur_trans.output; - } - return SUCCESS; -} - -Status VariableOpPass::FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_road) { - bool can_fusion = false; - while (true) { - map> trans_type_to_trans_ops ; - map> trans_type_to_changed_desc; - // record the order of trans op in first path - vector first_path_trans_order; - auto ret = CheckIfCouldBeOptimized(var, first_path_trans_order, trans_type_to_changed_desc, - trans_type_to_trans_ops, can_fusion); - if (ret != SUCCESS) { - GELOGE(FAILED, "Check trans ops after vatiable could be optimized or not failed"); - return ret; - } - - if (!can_fusion) { - break; - } - - vector> delete_var_ref_trans_nodes; - ret = GetAndCheckTransOpOfVarRef(var, can_fusion, trans_type_to_changed_desc, delete_var_ref_trans_nodes); - if (ret != SUCCESS) { - GELOGE(FAILED, "get and check trans op of varref failed"); - return ret; - } - - if (!can_fusion) { - break; - } - - ret = UpdateTransRoad(fusion_road, first_path_trans_order, - trans_type_to_changed_desc, trans_type_to_trans_ops); - if (ret != SUCCESS) { - GELOGE(FAILED, "Update trans road failed"); - return ret; - } - - if (fusion_road.empty()) { - return SUCCESS; - } - - ret = DealFusion(var, fusion_road, trans_type_to_changed_desc, - trans_type_to_trans_ops, delete_var_ref_trans_nodes); - if (ret != SUCCESS) { - return ret; - } - } - return SUCCESS; -} - -Status VariableOpPass::UpdateTransRoad(VarTransRoad &fusion_road, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops){ - vector delete_trans_type; - for (auto &trans_type : first_path_trans_order) { - if (trans_type_to_changed_desc.find(trans_type) == trans_type_to_changed_desc.end()) { - continue; - } - bool delete_flag = false; - for (auto &trans_node : trans_type_to_trans_ops[trans_type]) { - int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); - auto out_op_desc = trans_node->GetOpDesc(); - GE_CHECK_NOTNULL(out_op_desc); - TransNodeInfo trans_node_info; - trans_node_info.node_type = trans_node->GetType(); - trans_node_info.input = out_op_desc->GetInputDesc(tran_in_index); - trans_node_info.output = out_op_desc->GetOutputDesc(kTransOpOutIndex); - if (!IsTransSupport(trans_node_info)) { - delete_flag = true; - GELOGD("The trans node %s does not support, skip the variable accelerating", trans_node_info.node_type.c_str()); - break; - } - } - if (delete_flag) { - delete_trans_type.push_back(trans_type); - } else { - auto &trans_node = *trans_type_to_trans_ops[trans_type].begin(); - auto out_op_desc = trans_node->GetOpDesc(); - int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); - TransNodeInfo trans_node_info; - trans_node_info.node_type = trans_node->GetType(); - trans_node_info.input = out_op_desc->GetInputDesc(tran_in_index); - trans_node_info.output = out_op_desc->GetOutputDesc(kTransOpOutIndex); - fusion_road.emplace_back(trans_node_info); - } - } - for (auto &trans_type : delete_trans_type) { - trans_type_to_changed_desc.erase(trans_type); - } - return SUCCESS; -} - -Status VariableOpPass::DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road, - map> trans_type_to_changed_desc, - map> trans_type_to_trans_ops, - vector> &delete_trans_nodes) { - GE_CHECK_NOTNULL(var_node); - GELOGD("Begin to fusion var %s with trans", var_node->GetName().c_str()); - auto graph = var_node->GetOwnerComputeGraph(); - for (auto &trans_type : trans_type_to_changed_desc) { - for (auto &trans_node : trans_type_to_trans_ops[trans_type.first]) { - GELOGD("Remove node %s type %s when fusion with variable %s", trans_node->GetName().c_str(), - trans_node->GetType().c_str(), var_node->GetName().c_str()); - if (RenewTransOpDesc(trans_node, true) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - if (GraphUtils::IsolateNode(trans_node, {0}) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - if (GraphUtils::RemoveNodeWithoutRelink(graph, trans_node) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - } - } - - // Iterate delete_trans_nodes backward, eg a->b->c, delete_trans_nodes:{{b,c},{a,b}} - // we should delete {a,b} first , then b->c,then we can delete {b,c} - // if we delete {b,c} first, then a->c, then we can not get b when we delete {a,b} - for (auto iter = delete_trans_nodes.rbegin(); iter != delete_trans_nodes.rend(); ++iter) { - auto front_node = iter->first; - auto back_node = iter->second; - if (RenewTransOpDesc(front_node, false) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - if (front_node->GetOutDataNodes().size() > 1) { - GELOGD("The trans node %s type %s connecting with var-ref %s has more" - " than one output data nodes, unlink the edge between them", - front_node->GetName().c_str(), front_node->GetType().c_str(), back_node->GetName().c_str()); - if (ByPassTransNode(front_node, back_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to bypass trans node %s to node %s", front_node->GetName().c_str(), - back_node->GetName().c_str()); - return INTERNAL_ERROR; - } - } else { - GELOGD("The trans node %s type %s connecting with %s has only" - " one output data nodes, isolate and remove it.", - front_node->GetName().c_str(), front_node->GetType().c_str(), back_node->GetName().c_str()); - if (GraphUtils::IsolateNode(front_node, {0}) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - if (GraphUtils::RemoveNodeWithoutRelink(graph, front_node) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - } - } - return SUCCESS; -} - -Status VariableOpPass::RenewTransOpDesc(ge::NodePtr &node, bool is_reverse) { - int tran_in_index = TransOpUtil::GetTransOpDataIndex(node->GetType()); - auto op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - GeTensorDesc input_desc = op_desc->GetInputDesc(tran_in_index); - GeTensorDesc output_desc = op_desc->GetOutputDesc(kTransOpOutIndex); - GeTensorDesc renew_desc = is_reverse ? output_desc : input_desc; - bool format_changed = false; - bool shape_changed = false; - bool dtype_changed = false; - if (input_desc.GetFormat() != output_desc.GetFormat()) { - format_changed = true; - } - if (input_desc.GetDataType() != output_desc.GetDataType()) { - dtype_changed = true; - } - if (!ge::formats::IsShapeEqual(input_desc.GetShape(), output_desc.GetShape())) { - shape_changed = true; - } - auto cur_node = node; - while (TransOpUtil::IsTransOp(cur_node)) { - tran_in_index = TransOpUtil::GetTransOpDataIndex(cur_node->GetType()); - auto next_node = is_reverse ? NodeUtils::GetInDataNodeByIndex(*cur_node, tran_in_index) : - cur_node->GetOutDataNodes().at(kTransOpOutIndex); - if (!TransOpUtil::IsTransOp(next_node)) { - break; - } - auto prev_desc = next_node->GetOpDesc(); - tran_in_index = TransOpUtil::GetTransOpDataIndex(next_node->GetType()); - auto mutable_output_desc = prev_desc->MutableOutputDesc(kTransOpOutIndex); - auto mutable_input_desc = prev_desc->MutableInputDesc(tran_in_index); - GE_CHECK_NOTNULL(prev_desc->MutableOutputDesc(kTransOpOutIndex)); - GE_CHECK_NOTNULL(prev_desc->MutableInputDesc(tran_in_index)); - if (shape_changed) { - mutable_input_desc->SetShape(renew_desc.GetShape()); - mutable_output_desc->SetShape(renew_desc.GetShape()); - } - if (dtype_changed) { - mutable_input_desc->SetDataType(renew_desc.GetDataType()); - mutable_output_desc->SetDataType(renew_desc.GetDataType()); - } - if (format_changed) { - mutable_input_desc->SetFormat(renew_desc.GetFormat()); - mutable_output_desc->SetFormat(renew_desc.GetFormat()); - } - cur_node = next_node; - } - return SUCCESS; -} - -Status VariableOpPass::CheckIfCouldBeOptimized(const NodePtr &var, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &flag) { - bool is_match = true; - auto ret = GetSameTransOP(var, first_path_trans_order, trans_type_to_changed_desc, - trans_type_to_trans_ops, is_match); - - if (ret != SUCCESS) { - GELOGE(FAILED, "Get same trans op of variable node: %s failed", var->GetName().c_str()); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - if (!is_match) { - flag = false; - GELOGI("trans nodes after variable do not meet the condition"); - return SUCCESS; - } - - flag = true; - return SUCCESS; -} - -Status VariableOpPass::GetSameTransOP(const NodePtr &var, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &is_match) { - GELOGD("Begin to get Node: %s trans op info of first path", var->GetName().c_str()); - auto ret = GetFisrtPathTransInfo(var, first_path_trans_order, - trans_type_to_changed_desc, trans_type_to_trans_ops); - if (ret != SUCCESS) { - GELOGE(FAILED, "Get var: %s first path trans info failed", var->GetName().c_str()); - return FAILED; - } - - if (first_path_trans_order.empty()) { - GELOGD("var %s first path has no trans op, not need to pass", var->GetName().c_str()); - is_match = false; - return SUCCESS; - } - - GELOGD("Begin to depth first search Node: %s ", var->GetName().c_str()); - VariableDFS(var, trans_type_to_changed_desc, trans_type_to_trans_ops, is_match); - - return SUCCESS; -} - -void VariableOpPass::VariableDFS(const NodePtr &node, map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &is_match) { - std::stack node_stack; - std::stack> path_stack; - for (auto &out_node : node->GetOutDataNodes()) { - if (!is_match) { - break; - } - if (out_node->GetOutDataNodesSize() == 0 || !ge::TransOpUtil::IsTransOp(out_node)) { - is_match = false; - break; - } - node_stack.push(out_node); - path_stack.emplace(vector{out_node}); - while (!node_stack.empty() && is_match) { - auto cur_node = node_stack.top(); - auto cur_path = path_stack.top(); - node_stack.pop(); - path_stack.pop(); - if (cur_node->GetOutDataNodesSize() == 0 || !ge::TransOpUtil::IsTransOp(cur_node)) { - UpdateTransInfo(cur_path, is_match, trans_type_to_changed_desc, trans_type_to_trans_ops); - continue; - } - for (auto &next_node : cur_node->GetOutDataNodes()) { - node_stack.push(next_node); - auto next_path = cur_path; - next_path.push_back(next_node); - path_stack.emplace(next_path); - } - } - } -} - -Status VariableOpPass::UpdateTransInfo(vector &cur_path, bool& is_match, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops) { - GELOGD("Begin to update trans info by path"); - std::set trans_op_occured; - for (auto &trans_node : cur_path) { - auto trans_node_type = trans_node->GetType(); - if (trans_op_occured.find(trans_node_type) != trans_op_occured.end() || - !ge::TransOpUtil::IsTransOp(trans_node_type)) { - continue; - } - trans_op_occured.insert(trans_node_type); - auto desc_diff = GetInAndOutDecsDiff(trans_node); - if (trans_type_to_changed_desc.find(trans_node_type) != trans_type_to_changed_desc.end() && - desc_diff == trans_type_to_changed_desc[trans_node_type].first) { - trans_type_to_changed_desc[trans_node_type].second = true; - auto iter = find(trans_type_to_trans_ops[trans_node_type].begin(), - trans_type_to_trans_ops[trans_node_type].end(), - trans_node); - if (iter == trans_type_to_trans_ops[trans_node_type].end()) { - trans_type_to_trans_ops[trans_node_type].push_back(trans_node); - } - } - } - std::set delete_trans_types; - for (auto &trans_item : trans_type_to_changed_desc) { - if (!trans_item.second.second) { - delete_trans_types.insert(trans_item.first); - } else { - trans_item.second.second = false; - } - } - for (auto& delete_item : delete_trans_types) { - trans_type_to_changed_desc.erase(delete_item); - } - if (trans_type_to_changed_desc.empty()) { - is_match = false; - } - return SUCCESS; -} - -Status VariableOpPass::GetFisrtPathTransInfo(const NodePtr &var, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops) { - auto cur_node = var; - while (cur_node->GetOutDataNodesSize() != 0) { - cur_node = cur_node->GetOutDataNodes().at(0); - GE_CHECK_NOTNULL(cur_node); - if (!ge::TransOpUtil::IsTransOp(cur_node)) { - break; - } - auto cur_node_type = cur_node->GetType(); - // only get the the first occurrence operator of same type - if (trans_type_to_changed_desc.find(cur_node_type) == trans_type_to_changed_desc.end()) { - auto desc_diff = GetInAndOutDecsDiff(cur_node); - trans_type_to_changed_desc[cur_node->GetType()] = make_pair(desc_diff, false); - trans_type_to_trans_ops[cur_node->GetType()] = vector{cur_node}; - first_path_trans_order.push_back(cur_node->GetType()); - } - } - GELOGD("get var %s first path trans info success", var->GetName().c_str()); - return SUCCESS; -} - -Status VariableOpPass::GetAndCheckTransOpOfVarRef(const ge::NodePtr &var_node, bool &pass_check, - map> &trans_type_to_changed_desc, - vector> &delete_var_ref_trans_nodes) { - auto iterator = var_and_var_ref_map_.find(var_node); - if (iterator == var_and_var_ref_map_.end()) { - GELOGD("there is no var_ref of node %s", var_node->GetName().c_str()); - return SUCCESS; - } - vector delete_trans_type; - for (auto &trans_type : trans_type_to_changed_desc) { - delete_trans_type.push_back(trans_type.first); - } - for (auto &ref_node : iterator->second) { - GE_CHECK_NOTNULL(ref_node); - auto cur_node = *ref_node->GetInDataNodes().begin(); - auto behind_node = ref_node; - GE_CHECK_NOTNULL(cur_node); - vector tmp_delete_trans_type = delete_trans_type; - while (TransOpUtil::IsTransOp(cur_node)) { - GE_CHECK_NOTNULL(cur_node); - auto iter = find(tmp_delete_trans_type.begin(), tmp_delete_trans_type.end(), cur_node->GetType()); - if (iter != tmp_delete_trans_type.end()) { - CheckTransOpOfVarAndVarRefSymmetry(cur_node, trans_type_to_changed_desc[cur_node->GetType()].first, - pass_check); - if (!pass_check) { - GELOGD("trans op : %s of var ref %s is illegal", cur_node->GetName().c_str(), ref_node->GetName().c_str()); - return SUCCESS; - } - tmp_delete_trans_type.erase(iter); - delete_var_ref_trans_nodes.emplace_back(std::make_pair(cur_node, behind_node)); - } - int tran_in_index = TransOpUtil::GetTransOpDataIndex(cur_node->GetType()); - behind_node = cur_node; - cur_node = cur_node->GetInDataNodes().at(tran_in_index); - } - if (!tmp_delete_trans_type.empty()) { - pass_check = false; - return SUCCESS; - } - } - return SUCCESS; -} - -Status VariableOpPass::CheckTransOpOfVarAndVarRefSymmetry(NodePtr &var_ref_trans_op, const string &desc_diff, - bool &is_symmetry){ - auto var_ref_trans_op_desc_diff = GetInAndOutDecsDiff(var_ref_trans_op, true); - is_symmetry = (var_ref_trans_op_desc_diff == desc_diff); - return SUCCESS; -} - -Status VariableOpPass::UpdateVarAndRefOutputFormatInfo(const GeTensorDesc &final_output, const ge::NodePtr &node) { - if (node == nullptr || node->GetOpDesc() == nullptr) { - GELOGE(FAILED, "node or opdesc is nullptr"); - return FAILED; - } - const Format &format = final_output.GetFormat(); - const DataType &data_type = final_output.GetDataType(); - const GeShape &shape = final_output.GetShape(); - GELOGD("last ref is (%s, %s, %lu), var_ref_name is %s.", TypeUtils::DataTypeToSerialString(data_type).c_str(), - TypeUtils::FormatToSerialString(format).c_str(), shape.GetDims().size(), node->GetName().c_str()); - - auto node_desc = node->GetOpDesc()->GetOutputDesc(0); - CopyVariableFormatDataTypeAndShape(final_output, node_desc); - if (node->GetOpDesc()->UpdateOutputDesc(0, node_desc) != GRAPH_SUCCESS) { - GELOGE(FAILED, "update output desc fail."); - return FAILED; - } - GELOGD("node ref is (%s, %s, %lu), var_ref_name is %s.", - TypeUtils::DataTypeToSerialString(node->GetOpDesc()->GetOutputDesc(0).GetDataType()).c_str(), - TypeUtils::FormatToSerialString(node->GetOpDesc()->GetOutputDesc(0).GetFormat()).c_str(), - node->GetOpDesc()->GetOutputDesc(0).GetShape().GetDims().size(), node->GetName().c_str()); - - auto iterator = var_and_var_ref_map_.find(node); - if (iterator == var_and_var_ref_map_.end()) { - auto graph = node->GetOwnerComputeGraph(); - if (GenerateVariableVariableRefMap(graph) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to generate variable map for graph %s", graph->GetName().c_str()); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - } - iterator = var_and_var_ref_map_.find(node); - if (iterator == var_and_var_ref_map_.end()) { - GELOGW("The var node %s which belongs to graph %s can not be found on the graph", node->GetName().c_str(), - node->GetOwnerComputeGraph()->GetName().c_str()); - return SUCCESS; - } - - for (const auto &var_ref_node : iterator->second) { - auto var_ref_node_description = var_ref_node->GetOpDesc(); - GE_CHECK_NOTNULL(var_ref_node_description); - - GELOGD("var_ref_node before is (%s, %s, %zu), var_ref_name is %s.", - TypeUtils::DataTypeToSerialString(data_type).c_str(), TypeUtils::FormatToSerialString(format).c_str(), - shape.GetDims().size(), var_ref_node->GetName().c_str()); - if (var_ref_node_description->UpdateOutputDesc(0, node_desc) != GRAPH_SUCCESS) { - GELOGW("UpdateOutputDesc fail."); - } - if (var_ref_node_description->UpdateInputDesc(0, node_desc) != GRAPH_SUCCESS) { - GELOGW("UpdateInputDesc fail."); - } - const auto &input_desc = var_ref_node_description->MutableInputDesc(0); - const auto &output_desc = var_ref_node_description->MutableOutputDesc(0); - GE_CHECK_NOTNULL(input_desc); - GE_CHECK_NOTNULL(output_desc); - GELOGD("var_ref_node ref is (%s, %s, %zu), var_ref_name is %s.", - TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str(), - TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(), output_desc->GetShape().GetDims().size(), - var_ref_node->GetName().c_str()); - } - - return SUCCESS; -} - -Status VariableOpPass::GenerateVariableVariableRefMap(const ComputeGraphPtr &compute_graph) { - std::map names_to_var; - std::map> names_to_refs; - GE_CHECK_NOTNULL(compute_graph); - for (auto &node : compute_graph->GetDirectNode()) { - if (node->GetType() != VARIABLE) { - continue; - } - std::string ref_var_name; - if (!ge::AttrUtils::GetStr(node->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_name)) { - names_to_var[node->GetName()] = node; - } else { - names_to_refs[ref_var_name].insert(node); - } - } - - for (auto &name_to_var : names_to_var) { - var_and_var_ref_map_[name_to_var.second] = names_to_refs[name_to_var.first]; - } - return SUCCESS; -} - -void VariableOpPass::CopyVariableFormatDataTypeAndShape(const GeTensorDesc &src_tensor_desc, - GeTensorDesc &dst_tensor_desc) { - dst_tensor_desc.SetShape(src_tensor_desc.GetShape()); - dst_tensor_desc.SetFormat(src_tensor_desc.GetFormat()); - dst_tensor_desc.SetDataType(src_tensor_desc.GetDataType()); -} - -Status VariableOpPass::UpdateIOFormatInfo(const GeTensorDesc &final_output, std::set &nodes) { - for (auto &need_set_node : nodes) { - auto ret = UpdateVarAndRefOutputFormatInfo(final_output, need_set_node); - if (ret != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - } - return SUCCESS; -} - -Status VariableOpPass::RenewVarDesc(ge::ComputeGraphPtr &graph) { - GE_CHECK_NOTNULL(graph); - // renew var manager desc - Status ret = SUCCESS; - for (auto &node : graph->GetDirectNode()) { - bool is_var_node = - (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) || (node->GetType() == VARHANDLEOP); - if (is_var_node) { - if (!ge::VarManager::Instance(graph->GetSessionID())->IsVarExist(node->GetName())) { - GELOGD("var manager does not exist var node[%s]", node->GetName().c_str()); - continue; - } - GELOGD("var manager exist var node[%s], graph name[%s]", node->GetName().c_str(), graph->GetName().c_str()); - GE_CHECK_NOTNULL(node->GetOpDesc()); - ret = ge::VarManager::Instance(graph->GetSessionID())->RenewCurVarDesc(node->GetName(), node->GetOpDesc()); - if (ret != SUCCESS) { - GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); - return FAILED; - } - } - } - return SUCCESS; -} - -Status VariableOpPass::RenewVarDesc(uint64_t session_id, const NodePtr &node, const VarTransRoad &fusion_road) { - // renew var desc if the trans_road is all reshape or reformat - for (auto &road : fusion_road) { - if (road.node_type != RESHAPE && road.node_type != REFORMAT) { - return SUCCESS; - } - } - - if (!ge::VarManager::Instance(session_id)->IsVarExist(node->GetName())) { - GELOGD("var manager does not exist var node[%s]", node->GetName().c_str()); - return SUCCESS; - } - GELOGD("var manager exist var node[%s]", node->GetName().c_str()); - GE_CHECK_NOTNULL(node->GetOpDesc()); - Status ret = ge::VarManager::Instance(session_id)->RenewCurVarDesc(node->GetName(), node->GetOpDesc()); - if (ret != SUCCESS) { - GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); - return FAILED; - } - - return SUCCESS; -} - -} // namespace ge diff --git a/ge/graph/passes/variable_op_pass_bak.h b/ge/graph/passes/variable_op_pass_bak.h deleted file mode 100644 index fccd063b..00000000 --- a/ge/graph/passes/variable_op_pass_bak.h +++ /dev/null @@ -1,104 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ -#define GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ -#include -#include -#include -#include "graph/common/transop_util.h" -#include "common/formats/utils/formats_trans_utils.h" -#include "graph/utils/node_utils.h" -#include "graph/graph.h" -#include "graph/manager/graph_var_manager.h" -#include "graph/manager/util/variable_accelerate_ctrl.h" -#include "inc/graph_pass.h" - -namespace ge { -namespace variable_op { -struct NodeDesc { - ge::GeTensorDesc input; - ge::GeTensorDesc output; - bool is_update = false; -}; -} // namespace variable_op -class VariableOpPass : public GraphPass { - public: - explicit VariableOpPass(VarAccelerateCtrl *ctrl) : var_accelerate_ctrl_(ctrl) {} - - ~VariableOpPass() override = default; - - Status Run(ge::ComputeGraphPtr graph) override; - - private: - Status UpdateTransRoad(VarTransRoad &fusion_road, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops); - - Status DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road, - map> trans_type_to_changed_desc, - map> trans_type_to_trans_ops, - vector> &delete_trans_nodes); - - Status RenewTransOpDesc(ge::NodePtr &node, bool is_reverse); - - Status RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusion_road); - - Status CheckIfCouldBeOptimized(const NodePtr &var, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &flag); - - Status FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_road); - - Status GetSameTransOP(const NodePtr &var, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &is_match); - - Status GetFisrtPathTransInfo(const NodePtr &var, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops); - - void VariableDFS(const NodePtr &node, map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &is_match); - - Status UpdateTransInfo(vector &cur_path, bool& is_match, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops); - - Status GetAndCheckTransOpOfVarRef(const ge::NodePtr &var_node, bool &pass_check, - map> &trans_type_to_changed_desc, - vector> &delete_var_ref_trans_nodes); - - Status CheckTransOpOfVarAndVarRefSymmetry(NodePtr &var_ref_trans_op, const string &desc_diff, bool &is_symmetry); - - Status UpdateVarAndRefOutputFormatInfo(const GeTensorDesc &final_output, const ge::NodePtr &node); - - Status GenerateVariableVariableRefMap(const ComputeGraphPtr &compute_graph); - - void CopyVariableFormatDataTypeAndShape(const GeTensorDesc &src_tensor_desc, GeTensorDesc &dst_tensor_desc); - - Status UpdateIOFormatInfo(const GeTensorDesc &final_output, std::set &nodes); - - Status RenewVarDesc(ge::ComputeGraphPtr &graph); - - Status RenewVarDesc(uint64_t session_id, const NodePtr &node, const VarTransRoad &fusion_road); - - map> var_and_var_ref_map_; - - VarAccelerateCtrl *var_accelerate_ctrl_; -}; -} // namespace ge -#endif // GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 91fab280..63f1b131 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -49,7 +49,6 @@ #include "graph/passes/for_pass.h" #include "graph/passes/guarantee_const_pass.h" #include "graph/passes/hccl_group_pass.h" -#include "graph/passes/hccl_memcpy_pass.h" #include "graph/passes/identity_pass.h" #include "graph/passes/infershape_pass.h" #include "graph/passes/merge_pass.h" @@ -935,7 +934,10 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range, return PARAM_INVALID; } for (auto &shape_range_str : shape_range_set) { - if (shape_range_str.empty()) { + if (shape_range_str.size() < 3) { + // shape_range_str should be "[2~3,1" + // or ",[2~3,1". because we should trim '[' or ',[' + // so shape_range_str.size() < 3 is invalid continue; } // trim start bytes, after that, single input should be "1~20,3,3~6,-1" @@ -956,7 +958,7 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range, // fix dim auto range_value = StringToLongNoThrow(range_pair_set.at(0).c_str()); if (range_value < 0) { - range_pair = std::make_pair(0, range_value); + range_pair = std::make_pair(1, range_value); } else { range_pair = std::make_pair(range_value, range_value); } @@ -1017,36 +1019,32 @@ Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index, return PARAM_INVALID; } for (size_t i = 0; i < origin_shape.GetDimNum(); ++i) { - if (current_shape_range_vec.at(i).first == current_shape_range_vec.at(i).second) { + auto curr_dim = origin_shape.GetDim(i); + auto left_range = current_shape_range_vec.at(i).first; + auto right_range = current_shape_range_vec.at(i).second; + if (left_range == right_range) { // given shape_range is known dim, check is same as origin or not - if (origin_shape.GetDim(i) != current_shape_range_vec.at(i).first) { + if (curr_dim != left_range) { GELOGE(PARAM_INVALID, "Given shape range is %ld, current dim shape is %ld, not match.Pleace Check.", - current_shape_range_vec.at(i).first, origin_shape.GetDim(i)); + left_range, curr_dim); return PARAM_INVALID; } - origin_shape.SetDim(i, current_shape_range_vec.at(i).first); + origin_shape.SetDim(i, left_range); } else { - origin_shape.SetDim(i, -1); + // given shape_range is fix range, check input_shape is in this range or not + if (right_range != UNKNOWN_DIM) { + if ((curr_dim < left_range) || (curr_dim > right_range)) { + GELOGE(PARAM_INVALID, "Given shape range is [%ld~%ld], current dim shape is %ld, out of range.Pleace Check.", + left_range, right_range, curr_dim); + return PARAM_INVALID; + } + } + origin_shape.SetDim(i, UNKNOWN_DIM); } } desc.SetShape(origin_shape); desc.SetShapeRange(current_shape_range_vec); - int64_t dynamic_shape_size = 1; - for (const auto range_pair : range_vec.at(index)) { - FMK_INT64_MULCHECK(dynamic_shape_size, range_pair.second); - dynamic_shape_size *= range_pair.second; - } - auto data_type_size = GetSizeByDataType(desc.GetDataType()); - if (data_type_size < 0) { - GELOGE(PARAM_INVALID, "Input data type is %s, is not supported.", - TypeUtils::DataTypeToSerialString(desc.GetDataType()).c_str()); - return PARAM_INVALID; - } - FMK_INT64_MULCHECK(dynamic_shape_size, data_type_size); - dynamic_shape_size *= data_type_size; - GELOGI("In dynamic_execute mode ,set input %s shape range size %ld", op->GetName().c_str(), dynamic_shape_size); - ge::TensorUtils::SetSize(desc, dynamic_shape_size); graphStatus graph_ret = op->UpdateInputDesc(0, desc); GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret); graph_ret = op->UpdateOutputDesc(0, desc); @@ -1666,6 +1664,9 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) { auto ge_tensor_desc = ge_tensor_ptr->GetTensorDesc(); int64_t shape_size = ge_tensor_desc.GetShape().GetShapeSize(); auto data_type = ge_tensor_desc.GetDataType(); + if (data_type == DT_STRING) { + return SUCCESS; + } uint32_t length = 1; bool type_ret = TypeUtils::GetDataTypeLength(data_type, length); if (!type_ret) { @@ -1893,8 +1894,6 @@ Status GraphPrepare::PrepareOptimize() { PassManager graph_pass; try { (void)graph_pass.AddPass("PrepareOptimize::PrunePass", new PrunePass); - // todo 临时把hccl的memcpy插入放到图准备,为了防止其多插memcpy - (void)graph_pass.AddPass("PrepareOptimize::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass); } catch (std::bad_alloc &e) { GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); return INTERNAL_ERROR; @@ -1926,7 +1925,7 @@ void GraphPrepare::TypeConversionOfConstant() { for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { // This can ensure that n is not a null pointer // No Conversion when called by aclOpCompile - (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, is_acl_compile); + (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_SINGLE_OP_SCENE, is_acl_compile); if (is_acl_compile) { return; } diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 3b37003f..b1534eb4 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -540,7 +540,7 @@ Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map aipp_params(new (std::nothrow) domi::AippOpParams()); ge::GeAttrValue::NAMED_ATTRS aipp_attr; - GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, + GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, "Data node do not contain param aipp!"); GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); @@ -644,7 +644,7 @@ Status InsertNewOpUtil::RecordAIPPInfoToData(const ComputeGraphPtr &graph) { std::vector aipps; GE_RETURN_IF_ERROR(GetAllAipps(data_node, *aipps_or_switchs_or_case.begin(), aipps)); - GELOGI("RecordAIPPInfoToData: Data: name[%s], type[%s], batch size[%u]", data_node->GetName().c_str(), + GELOGI("RecordAIPPInfoToData: Data: name[%s], type[%s], batch size[%zu]", data_node->GetName().c_str(), data_node->GetType().c_str(), aipps.size()); for (auto aipp_it : aipps) { diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index 5506435e..e43c5dd2 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -371,7 +371,7 @@ Status MultiBatchGraphCopyer::GetEnterNodesGroupByFrame(mapGetName().c_str()); + GELOGE(FAILED, "Get attr frame_name of enter[%s] failed.", node->GetName().c_str()); return FAILED; } frame_enter[frame_name].emplace_back(node); @@ -850,19 +850,19 @@ NodePtr MultiBatchGraphCopyer::FindSwitchnNodeForDataEdge(const OutDataAnchorPtr if (is_getnext_sink_data) { auto output_idx = data_out_anchor->GetIdx(); size_t referenced_index = 0; - GELOGI("The output idx %zu has %zu referenced nums.", output_idx, data_out_anchor->GetPeerInDataAnchors().size()); + GELOGI("The output idx %d has %zu referenced nums.", output_idx, data_out_anchor->GetPeerInDataAnchors().size()); for (const auto &peer_in_anchor : data_out_anchor->GetPeerInDataAnchors()) { if (peer_in_anchor->GetOwnerNode()->GetOpDesc() == nullptr) { GELOGE(INTERNAL_ERROR, "Op desc should not be nullptr."); return nullptr; } if (getnext_nodes_to_switchn_.at(output_idx).empty()) { - GELOGI("Output idx %zu of %s is static output.", output_idx, data_node->GetName().c_str()); + GELOGI("Output idx %d of %s is static output.", output_idx, data_node->GetName().c_str()); return nullptr; } if (output_idx >= static_cast(getnext_nodes_to_switchn_.size()) || referenced_index >= getnext_nodes_to_switchn_.at(output_idx).size()) { - GELOGE(INTERNAL_ERROR, "Output idx is %zu, referenced index is %zu", output_idx, referenced_index); + GELOGE(INTERNAL_ERROR, "Output idx is %d, referenced index is %zu", output_idx, referenced_index); return nullptr; } if (peer_in_anchor->GetOwnerNode()->GetOpDesc()->GetName() == origin_node->GetName()) { @@ -1203,7 +1203,7 @@ Status MultiBatchGraphCopyer::InsertSwitchNAndUpdateMaxShape(const NodePtr &node for (size_t i = 0; i < getnext_sink_dynamic_out_mapping_.size(); ++i) { if(UpdateMaxShapeToData(node, i) != SUCCESS) { - GELOGE(PARAM_INVALID, "Failed to update max shape of %zu out anchor", node->GetName().c_str(), i); + GELOGE(PARAM_INVALID, "Failed to update %s max shape of %zu out anchor", node->GetName().c_str(), i); return PARAM_INVALID; } } diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index 8aab0981..84f38fa6 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -435,7 +435,7 @@ Status CheckDynamicParams(const vector> &shapes) { "E10035", {"shapesize", "minshapesize"}, {std::to_string(shapes.size()), std::to_string(kMinShapesCount - 1)}); GELOGE(PARAM_INVALID, "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims]'s " - "value size [%zu] must be greater than [%zu].", + "value size [%zu] must be greater than [%d].", shapes.size(), kMinShapesCount - 1); return PARAM_INVALID; } @@ -444,7 +444,7 @@ Status CheckDynamicParams(const vector> &shapes) { "E10036", {"shapesize", "maxshapesize"}, {std::to_string(shapes.size()), std::to_string(kMaxShapesCount + 1)}); GELOGE(PARAM_INVALID, "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims]'s " - "value size [%zu] must be less than [%zu].", + "value size [%zu] must be less than [%d].", shapes.size(), kMaxShapesCount + 1); return PARAM_INVALID; } diff --git a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc index a6e00f4a..7f709f03 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc +++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc @@ -32,5 +32,8 @@ REGISTER_OP_CREATOR(Assign, HostOp); REGISTER_OP_CREATOR(RandomUniform, HostOp); REGISTER_OP_CREATOR(Add, HostOp); REGISTER_OP_CREATOR(Mul, HostOp); +REGISTER_OP_CREATOR(ConcatV2, HostOp); +REGISTER_OP_CREATOR(Data, HostOp); +REGISTER_OP_CREATOR(Fill, HostOp); } // namespace host_cpu } // namespace ge diff --git a/ge/host_cpu_engine/proto/task.proto b/ge/host_cpu_engine/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/host_cpu_engine/proto/task.proto +++ b/ge/host_cpu_engine/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/ge/host_kernels/dynamic_stitch_kernel.cc b/ge/host_kernels/dynamic_stitch_kernel.cc index 32611b03..3037934e 100644 --- a/ge/host_kernels/dynamic_stitch_kernel.cc +++ b/ge/host_kernels/dynamic_stitch_kernel.cc @@ -126,10 +126,10 @@ void DynamicStitchKernel::ComputeMergedShape(const vector &inp vector merged_dim_vec = {merged_first_dim + 1}; if (step > 0) { merged_dim_vec.emplace_back(step); - GELOGD("merged_shape is [ %ld, %ld].", merged_first_dim, step); + GELOGD("merged_shape is [ %d, %ld].", merged_first_dim, step); } merged_shape = GeShape(merged_dim_vec); - GELOGD("merged_shape is [ %ld ].", merged_first_dim); + GELOGD("merged_shape is [ %d ].", merged_first_dim); } Status DynamicStitchKernel::GenData(const vector &input, GeTensorPtr &output_ptr) { @@ -196,14 +196,14 @@ Status DynamicStitchKernel::StitchDataFollowIndices(int64_t data_unit, const vec // if index repeated, need new data replace old data , so give more allowance if (indices_set.find(input_indices[j]) != indices_set.end()) { if (ge::CheckInt64AddOverflow(input_indices[j], data_unit) != SUCCESS) { - GELOGW("Check int64 mul overflow failed. Indices is %ld, data_unit is %ld.", input_indices[j], data_unit); + GELOGW("Check int64 mul overflow failed. Indices is %d, data_unit is %ld.", input_indices[j], data_unit); return NOT_CHANGED; } allowance += data_unit; } indices_set.insert(input_indices[j]); if (!CheckInt64MulOverflow(input_indices[j], data_unit)) { - GELOGW("Check int64 mul overflow failed. Indices is %ld, data_unit is %ld.", input_indices[j], data_unit); + GELOGW("Check int64 mul overflow failed. Indices is %d, data_unit is %ld.", input_indices[j], data_unit); return NOT_CHANGED; } dst_offset = input_indices[j] * data_unit; diff --git a/ge/host_kernels/pack_kernel.cc b/ge/host_kernels/pack_kernel.cc index 476005ef..bf7a2a1f 100644 --- a/ge/host_kernels/pack_kernel.cc +++ b/ge/host_kernels/pack_kernel.cc @@ -124,7 +124,7 @@ Status PackKernel::ValidateInputs(const ge::OpDescPtr &op_desc_ptr, const std::v int64_t num = 1; for (auto dim : dst_shape.GetDims()) { if (dim < 0) { - GELOGW("Invalid dim ld% in the shape %s", dim, formats::ShapeToString(shape).c_str()); + GELOGW("Invalid dim %ld in the shape %s", dim, formats::ShapeToString(shape).c_str()); return NOT_CHANGED; } num *= dim; diff --git a/ge/host_kernels/rank_kernel.cc b/ge/host_kernels/rank_kernel.cc index 1de9478c..b246b976 100755 --- a/ge/host_kernels/rank_kernel.cc +++ b/ge/host_kernels/rank_kernel.cc @@ -42,7 +42,7 @@ Status RankKernel::Compute(const NodePtr &node, std::vector &v_outp GE_CHECK_NOTNULL(op_desc); size_t input_node_size = op_desc->GetInputsSize(); if (input_node_size != kRankInputSize) { - GELOGW("input node size must be %d", kRankInputSize); + GELOGW("input node size must be %zu", kRankInputSize); return NOT_CHANGED; } diff --git a/ge/host_kernels/strided_slice_kernel.cc b/ge/host_kernels/strided_slice_kernel.cc index b1bfb10a..c7e4b2c8 100644 --- a/ge/host_kernels/strided_slice_kernel.cc +++ b/ge/host_kernels/strided_slice_kernel.cc @@ -250,16 +250,16 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector &lis run_flag_ = true; listener_ = listener; future_ = std::async(std::launch::async, [&]() -> Status { + GetThreadLocalContext() = *executor_->GetContext()->ge_context; GetContext().SetSessionId(executor_->GetContext()->session_id); return RunInternal(); }); @@ -220,7 +221,7 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy auto &tensor_desc = input_tensor_desc_[input_index]; tensor_desc->SetShape(GeShape(current_data.shapes[input_index])); args.input_desc[input_index] = tensor_desc; - GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); + GELOGD("Update shape of input[%zu] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size), "Failed to calc tensor size, index = %zu, shape = [%s]", input_index, @@ -229,11 +230,15 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy } GE_CHECK_GE(tensor_size, 0); - auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size); + AllocationAttr attr; + if (GetContext().GetHostExecFlag()) { + attr.SetMemType(HOST_DDR); + } + auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size, &attr); GE_CHECK_NOTNULL(tensor_buffer); args.inputs.emplace_back(std::shared_ptr(tensor_buffer.release())); - GELOGD("To copy input data for input[%u]", input_index); + GELOGD("To copy input data for input[%zu]", input_index); const DataBuffer &data_buf = blobs[input_index]; auto mem_size = static_cast(tensor_size); GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length, @@ -242,7 +247,7 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy data_buf.length, mem_size); - GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%u] datasize[%lu]", + GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%zu] memaddr[%p] mem_size[%zu] datasize[%lu]", model_->root_runtime_param_.graph_id, input_index, args.inputs[input_index].GetData(), diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index 21d2d033..a69cc45f 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -21,7 +21,7 @@ #include #include "external/ge/ge_api_error_codes.h" #include "external/ge/ge_api_types.h" -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "hybrid/executor/hybrid_model_executor.h" #include "runtime/stream.h" diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h index 6299d4ff..6b2e52b4 100644 --- a/ge/hybrid/executor/hybrid_model_executor.h +++ b/ge/hybrid/executor/hybrid_model_executor.h @@ -17,7 +17,7 @@ #ifndef GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_ #define GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_ #include "common/thread_pool.h" -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/rt_callback_manager.h" #include "hybrid/executor/subgraph_executor.h" diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index 171ddaf3..00921705 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -188,6 +188,14 @@ Status NodeState::WaitForPrepareDone() { return SUCCESS; } +void NodeState::SetTaskContext(std::shared_ptr &task_context) { + task_context_ = task_context; +} + +std::shared_ptr NodeState::GetTaskContext() { + return task_context_; +} + Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node_), "cancelled"); diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 02a362b4..c68a19ac 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -29,6 +29,7 @@ namespace hybrid { class NodeTask; struct GraphExecutionContext; class SubgraphContext; +class TaskContext; class ShapeFuture { public: @@ -103,6 +104,9 @@ struct NodeState { Status AwaitInputTensors(GraphExecutionContext &context) const; + void SetTaskContext(std::shared_ptr &task_context); + std::shared_ptr GetTaskContext(); + private: const NodeItem *node_item_ = nullptr; std::shared_ptr kernel_task_ = nullptr; @@ -110,6 +114,7 @@ struct NodeState { OpDescPtr op_desc_; ShapeInferenceState shape_inference_state_; SubgraphContext *subgraph_context_; + std::shared_ptr task_context_ = nullptr; std::mutex mu_; }; diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index f7b063c7..f8f122b1 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -231,6 +231,15 @@ Status SubgraphExecutor::PrepareNodes() { } else { node_state->SetKernelTask(node_item.kernel_task); } + auto unique_task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get()); + GE_CHECK_NOTNULL(unique_task_context); + const auto &task = node_state->GetKernelTask(); + if (task == nullptr) { + GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state->GetName().c_str()); + return INTERNAL_ERROR; + } + auto shared_task_context = std::shared_ptr(unique_task_context.release()); + node_state->SetTaskContext(shared_task_context); } } @@ -267,6 +276,19 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta } else { node_state.SetKernelTask(node_item.kernel_task); } + auto unique_task_context = TaskContext::Create(*node_state.GetNodeItem(), context_, subgraph_context_.get()); + GE_CHECK_NOTNULL(unique_task_context); + const auto &task = node_state.GetKernelTask(); + if (task == nullptr) { + GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state.GetName().c_str()); + return INTERNAL_ERROR; + } + auto shared_task_context = std::shared_ptr(unique_task_context.release()); + node_state.SetTaskContext(shared_task_context); + GE_CHK_RT_RET(rtCtxSetCurrent(ctx->rt_context)); + RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] start"); + GE_CHK_STATUS_RET_NOLOG(task->UpdateTilingData(*shared_task_context)); // update op_desc before alloc ws + RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] end"); return SUCCESS; } @@ -295,10 +317,9 @@ Status SubgraphExecutor::LaunchTasks() { GE_CHK_STATUS_RET_NOLOG(node_state->WaitForPrepareDone()); GELOGD("[%s] Start to execute.", node_state->GetName().c_str()); - auto task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get()); - GE_CHECK_NOTNULL(task_context); - task_context->SetForceInferShape(force_infer_shape_); - auto shared_task_context = std::shared_ptr(task_context.release()); + auto shared_task_context = node_state->GetTaskContext(); + GE_CHECK_NOTNULL(shared_task_context); + shared_task_context->SetForceInferShape(force_infer_shape_); HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_), "[%s] Execute node failed.", node_state->GetName().c_str()); diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h index d1949947..4523e2c4 100644 --- a/ge/hybrid/executor/subgraph_executor.h +++ b/ge/hybrid/executor/subgraph_executor.h @@ -75,7 +75,7 @@ class SubgraphExecutor { Status GetOutputs(std::vector &outputs, std::vector &output_desc); private: - static Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); + Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); Status Init(const std::vector &inputs, const std::vector &input_desc); diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index b5de2a70..a6386b27 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -159,27 +159,9 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * } GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); - auto op_desc = node->GetOpDesc(); - std::string op_name = op_desc->GetName(); - std::string dynamic_model_name = model->GetModelName(); - uint32_t task_id = context_->GetTaskId(); - uint32_t stream_id = context_->GetStreamId(); - TaskDescInfo tmp_task_desc_info; - tmp_task_desc_info.model_name = dynamic_model_name; - tmp_task_desc_info.op_name = op_name; - tmp_task_desc_info.block_dim = 0; - auto task_defs = model->GetTaskDefs(node); - if (task_defs != nullptr && (*task_defs).size() > 0) { - const auto &task_def = (*task_defs)[0]; - tmp_task_desc_info.block_dim = task_def.kernel().block_dim(); - } - tmp_task_desc_info.task_id = task_id; - tmp_task_desc_info.stream_id = stream_id; - tmp_task_desc_info.shape_type = "dynamic"; - tmp_task_desc_info.cur_iter_num = graph_context_->iteration; - GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]", - node->GetName().c_str(), task_id, stream_id); - task_desc_info.emplace_back(tmp_task_desc_info); + task_desc_info = context_->GetProfilingTaskDescInfo(); + context_->ClearProfilingTaskDescInfo(); + return SUCCESS; } @@ -189,22 +171,18 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel GE_CHECK_NOTNULL(model); GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); + compute_graph_info = context_->GetProfilingGraphDescInfo(); + context_->ClearProfilingGraphDescInfo(); - std::string dynamic_model_name = model->GetModelName(); auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(PARAM_INVALID, "op_desc is nullptr."); - return PARAM_INVALID; - } - - auto op_mode = static_cast(domi::ImplyType::INVALID); - if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && - op_mode == static_cast(domi::ImplyType::TVM)) { - ComputeGraphDescInfo tmp_compute_graph_info; - tmp_compute_graph_info.model_name = dynamic_model_name; - tmp_compute_graph_info.op_name = op_desc->GetName(); - tmp_compute_graph_info.op_type = op_desc->GetType(); - + GE_CHECK_NOTNULL(op_desc); + for (auto &tmp_compute_graph_info : compute_graph_info) { + // default + if (op_desc->GetAllInputsSize() == 0) { + tmp_compute_graph_info.input_format = { FORMAT_NULL }; + tmp_compute_graph_info.input_shape = { {0} }; + tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; + } for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); if (input_desc == nullptr) { @@ -215,17 +193,19 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); } + if (op_desc->GetOutputsSize() == 0) { + tmp_compute_graph_info.output_format = { FORMAT_NULL }; + tmp_compute_graph_info.output_shape = { {0} }; + tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; + } for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { GeTensorDesc output_desc = op_desc->GetOutputDesc(j); tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); } - tmp_compute_graph_info.task_id = context_->GetTaskId(); - tmp_compute_graph_info.stream_id = context_->GetStreamId(); - compute_graph_info.emplace_back(tmp_compute_graph_info); - GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str()); } + return SUCCESS; } @@ -247,7 +227,6 @@ Status NodeDoneCallback::ProfilingReport() { GELOGD("ProfilingReport of node [%s] model [%s] start.", node->GetName().c_str(), model->GetModelName().c_str()); std::vector task_desc_info; - TaskDescInfo tmp_task_desc_info; auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info); if (profiling_ret != RT_ERROR_NONE) { GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str()); diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index 56ae3ea3..46ee6bd6 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -68,7 +68,6 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { // Do shape inference GELOGD("[%s] Start to invoke InferShapeAndType", node_item.NodeName().c_str()); { - std::lock_guard lk(mu_); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), "Invoke InferShapeAndType failed."); diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h index 5349390c..369c732a 100644 --- a/ge/hybrid/hybrid_davinci_model.h +++ b/ge/hybrid/hybrid_davinci_model.h @@ -19,7 +19,7 @@ #include #include "external/ge/ge_api_error_codes.h" -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "model/ge_root_model.h" namespace ge { diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index 91b6a549..7e5d8fe5 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -17,7 +17,7 @@ #include "hybrid_model.h" #include #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index e521b776..72495cad 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -21,8 +21,8 @@ #include #include #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/data_inputer.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/data_inputer.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/node.h" #include "hybrid/common/tensor_value.h" #include "hybrid/model/node_item.h" diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index d1f61985..b314c6a7 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -20,8 +20,8 @@ #include "graph/ge_context.h" #include "graph/build/memory/var_mem_assign_util.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" @@ -772,7 +772,12 @@ Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_ var_name.c_str(), hybrid_model_.GetSessionId()); - uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, RT_MEMORY_HBM); + rtMemType_t memory_type = RT_MEMORY_HBM; + uint32_t mem_type = 0; + if (AttrUtils::GetInt(var_node->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) { + memory_type = RT_MEMORY_RDMA_HBM; + } + uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, memory_type); if (dev_mem == nullptr) { GELOGE(INTERNAL_ERROR, "Failed to copy var %s from device, cant not get " @@ -934,7 +939,7 @@ Status HybridModelBuilder::InitVariableTensors() { GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed."); return MEMALLOC_FAILED; } - GELOGD("Host variable [%s] malloc success, size=%lld.", it.first.c_str(), tensor_size); + GELOGD("Host variable [%s] malloc success, size=%ld.", it.first.c_str(), tensor_size); std::unique_ptr tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(), tensor_size)); @@ -1603,16 +1608,19 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons GE_CHECK_NOTNULL(compute_graph); NodePtr node_ptr = nullptr; - vector task_def_list; + map> node_task_map; // create fp node bool is_insert_fp_profiling_task = false; (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); if (is_insert_fp_profiling_task) { + vector task_def_list; (void)GenerateFpProfilingTask(op_desc, task_def_list); auto fp_desc = MakeShared(kProfilingFpNode, PROFILINGTRAININGTRACE); GE_CHECK_NOTNULL(fp_desc); fp_desc->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(fp_desc); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create fp profiling node success before."); } // creat all reduce start node @@ -1620,6 +1628,7 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); if (is_all_reduce && is_insert_bp_profiling_task) { + vector task_def_list; int64_t log_id = 0; (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); GELOGD("All reduce node profiling task log id: %ld before", log_id); @@ -1629,18 +1638,24 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons GE_CHECK_NOTNULL(ar_desc_start); ar_desc_start->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(ar_desc_start); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create all reduce start profiling node success before."); } - if (node_ptr != nullptr) { - for (const auto &task_def : task_def_list) { - hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); + if (!node_task_map.empty()) { + for (const auto &node_task : node_task_map) { + NodePtr profiling_node = node_task.first; + vector task_def_lists = node_task.second; + for (const auto &task_def : task_def_lists) { + hybrid_model_.task_defs_[profiling_node].emplace_back(task_def); + } + NodeItem *node_item = nullptr; + GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(profiling_node, &node_item)); + node_item->input_start = 0; + node_item->output_start = 0; + graph_item.node_items_.emplace_back(node_item); } - NodeItem *node_item = nullptr; - GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); - node_item->input_start = 0; - node_item->output_start = 0; - graph_item.node_items_.emplace_back(node_item); } else { GELOGD("No need to create profiling node before."); } @@ -1656,12 +1671,13 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const GE_CHECK_NOTNULL(compute_graph); NodePtr node_ptr = nullptr; - vector task_def_list; + map> node_task_map; // Create all reduce end node bool is_insert_bp_profiling_task = false; (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); if (is_all_reduce && is_insert_bp_profiling_task) { + vector task_def_list; int64_t log_id = 0; (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); GELOGD("All reduce node profiling task log id: %ld after", log_id); @@ -1671,38 +1687,50 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const GE_CHECK_NOTNULL(ar_desc_end); ar_desc_end->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(ar_desc_end); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create all reduce end profiling node success after."); } // create bp node if (!is_all_reduce && is_insert_bp_profiling_task) { + vector task_def_list; (void) GenerateBpProfilingTask(op_desc, task_def_list); auto bp_op_desc = MakeShared(kProfilingBpNode, PROFILINGTRAININGTRACE); GE_CHECK_NOTNULL(bp_op_desc); bp_op_desc->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(bp_op_desc); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create bp profiling node success after."); } // create end node bool is_insert_end_profiling_task = false; (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task); if (is_insert_end_profiling_task) { + vector task_def_list; (void)GenerateEndProfilingTask(op_desc, task_def_list); auto end_desc = MakeShared(kProfilingEndNode, PROFILINGTRAININGTRACE); GE_CHECK_NOTNULL(end_desc); end_desc->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(end_desc); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create end profiling node success after."); } - if (node_ptr != nullptr) { - for (const auto &task_def : task_def_list) { - hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); + if (!node_task_map.empty()) { + for (const auto &node_task : node_task_map) { + NodePtr profiling_node = node_task.first; + vector task_def_lists = node_task.second; + for (const auto &task_def : task_def_lists) { + hybrid_model_.task_defs_[profiling_node].emplace_back(task_def); + } + NodeItem *node_item = nullptr; + GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(profiling_node, &node_item)); + node_item->input_start = 0; + node_item->output_start = 0; + graph_item.node_items_.emplace_back(node_item); } - NodeItem *node_item = nullptr; - GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); - node_item->input_start = 0; - node_item->output_start = 0; - graph_item.node_items_.emplace_back(node_item); } else { GELOGD("No need to create profiling node after."); } diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index 55a19b6c..045bf3ef 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -21,7 +21,7 @@ #include #include #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/node.h" #include "hybrid/model/hybrid_model.h" #include "hybrid/model/node_item.h" diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 2abc5b03..cb5a7d4c 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -182,16 +182,17 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function } RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); + // save profiling data uint32_t task_id = 0; uint32_t stream_id = 0; - rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); + rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel if (rt_ret != RT_ERROR_NONE) { GELOGE(rt_ret, "Get task_id and stream_id failed."); - return rt_ret; + return FAILED; } - context.SetTaskId(task_id); - context.SetStreamId(stream_id); - GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); + GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); + (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); + (void)context.SaveProfilingGraphDescInfo(task_id, stream_id); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 80ea579b..f1bd6466 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -19,7 +19,8 @@ #include "framework/common/debug/log.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/node_executor/aicore/aicore_task_builder.h" -#include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/load/model_manager/tbe_handle_store.h" +#include "graph/types.h" using optiling::OpRunInfo; @@ -34,6 +35,23 @@ constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); + + GE_CHECK_LE(op_desc.GetOutputsSize(), static_cast(INT_MAX)); + int outputs_size = static_cast(op_desc.GetOutputsSize()); + + for (int i = 0; i < outputs_size; ++i) { + const GeTensorDescPtr tensor_desc = op_desc.MutableOutputDesc(i); + if (tensor_desc == nullptr) { + GELOGW("Op: %s, Index: %d, Tensor Desc is null", op_desc.GetName().c_str(), i); + continue; + } + + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + output_indices_to_skip_.push_back(i); + } + } return SUCCESS; } @@ -221,7 +239,8 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) } Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { - size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces(); + size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces() + - output_indices_to_skip_.size(); if (tiling_buffer_ != nullptr) { ++expected_arg_count; } @@ -244,6 +263,11 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { for (int i = 0; i < task_context.NumOutputs(); ++i) { const auto output = task_context.GetOutput(i); GE_CHECK_NOTNULL(output); + if (find(output_indices_to_skip_.begin(), output_indices_to_skip_.end(), i) != output_indices_to_skip_.end()) { + GELOGD("Node:%s output[%d] is an optional, the address don't need to be saved.", + task_context.GetNodeName(), i); + continue; + } arg_base_[index++] = reinterpret_cast(output->GetData()); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 5818f384..3f350531 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -48,6 +48,8 @@ class AiCoreOpTask { bool GetClearAtomic() const {return clear_atomic_;} + uint32_t GetBlockDim() const {return block_dim_;} + protected: Status UpdateTilingInfo(TaskContext &context); virtual std::string GetKeyForOpParamSize() const; @@ -70,6 +72,7 @@ class AiCoreOpTask { uint32_t args_size_ = 0; uint32_t block_dim_ = 1; bool clear_atomic_ = true; + std::vector output_indices_to_skip_; }; class AtomicAddrCleanOpTask : public AiCoreOpTask { diff --git a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc index b8acbf0e..e9c7c604 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc @@ -29,8 +29,9 @@ constexpr int64_t kDimEndFlag = INT64_MIN; Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { GELOGI("Node[%s] parse ext info start.", node_name_.c_str()); if (ext_info.empty()) { - GELOGE(PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.", node_name_.c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.", + node_name_.c_str()); + return ACL_ERROR_GE_PARAM_INVALID; } ext_info_len_ = ext_info.size(); @@ -38,8 +39,8 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { GE_CHECK_NOTNULL(ext_info_); if (memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size()) != EOK) { - GELOGE(FAILED, "[%s] Failed to coy ext info", node_name_.c_str()); - return FAILED; + GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[%s] Failed to coy ext info", node_name_.c_str()); + return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; } input_shape_and_type_.clear(); @@ -72,7 +73,7 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { offset += aicpu_ext_info->infoLen; } - GE_CHK_BOOL_RET_STATUS(offset == ext_info_len_, PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(offset == ext_info_len_, ACL_ERROR_GE_PARAM_INVALID, "Node[%s] ext_info format error, parse not reach end, offset=%zu, ext_info_len=%zu.", node_name_.c_str(), offset, ext_info_len_); GELOGI("Node[%s] parse ext info end.", node_name_.c_str()); @@ -80,13 +81,13 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { } Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { - GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(int32_t), PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(int32_t), ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.", node_name_.c_str(), sizeof(int32_t), aicpu_ext_info->infoLen); auto type = reinterpret_cast(aicpu_ext_info->infoMsg); - GE_CHK_BOOL_RET_STATUS(*type == unknown_type_, PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(*type == unknown_type_, ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext shape type failed as need %d but %d.", node_name_.c_str(), unknown_type_, *type); GELOGI("Node[%s] parse ext shape type success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); @@ -95,7 +96,7 @@ Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { Status AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) { auto need_len = input_num_ * sizeof(AicpuShapeAndType); - GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext input shape failed as infoLen must be " "input_num[%u]*sizeof(ShapeAndType)[%zu] but %u.", node_name_.c_str(), input_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen); @@ -116,7 +117,7 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) { return SUCCESS; } auto need_len = output_num_ * sizeof(AicpuShapeAndType); - GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext output shape failed as infoLen must be " "output_num[%u]*sizeof(ShapeAndType)[%zu] but %u.", node_name_.c_str(), output_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen); @@ -130,7 +131,7 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) { } Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) { - GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(AicpuSessionInfo), PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(AicpuSessionInfo), ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext session info failed as infoLen must be %zu but %u.", node_name_.c_str(), sizeof(SessionInfo), aicpu_ext_info->infoLen); @@ -173,7 +174,7 @@ Status AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const } Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const GeTensorDesc &output_desc) { - GE_CHK_BOOL_RET_STATUS((unknown_type_ != DEPEND_COMPUTE), INTERNAL_ERROR, + GE_CHK_BOOL_RET_STATUS((unknown_type_ != DEPEND_COMPUTE), ACL_ERROR_GE_INTERNAL_ERROR, "Node[%s] is depend compute is no need update output shape and type by ext.", node_name_.c_str()); GE_CHECK_LE(output_index, output_num_); @@ -183,7 +184,7 @@ Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, cons if (unknown_type_ == DEPEND_SHAPE_RANGE) { std::vector> range; auto range_ret = output_desc.GetShapeRange(range); - GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, INTERNAL_ERROR, + GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, ACL_ERROR_GE_INTERNAL_ERROR, "Node[%s] is shape range type but get GetShapeRange failed, ret=%u.", node_name_.c_str(), range_ret); for (size_t k = 0; k < range.size(); ++k) { @@ -210,9 +211,9 @@ Status AicpuExtInfoHandler::UpdateShapeAndType(const GeShape &shape, DataType da AicpuShapeAndType *shape_and_type) { auto dim_num = shape.GetDimNum(); if (dim_num > aicpu::FWKAdapter::kMaxShapeDims) { - GELOGE(PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.", + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.", dim_num, aicpu::FWKAdapter::kMaxShapeDims); - return PARAM_INVALID; + return ACL_ERROR_GE_PARAM_INVALID; } size_t index = 0; for (; index < dim_num; ++index) { diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 63ce65e9..1c160eea 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -18,11 +18,10 @@ #include "framework/common/taskdown_common.h" #include "common/formats/formats.h" #include "aicpu/common/aicpu_task_struct.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/utils/node_utils.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/model/hybrid_model.h" -#include "opskernel_manager/ops_kernel_builder_manager.h" namespace ge { namespace hybrid { @@ -190,17 +189,17 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::functionnode); + if (unknown_type_ == DEPEND_COMPUTE) { + GE_CHK_STATUS_RET_NOLOG(SetMemCopyTask((*task_defs)[1])); + } GELOGI("Node[%s] init end.", node_name_.c_str()); return SUCCESS; } +Status AicpuTfNodeTask::SetMemCopyTask(const domi::TaskDef &task_def) { + if (node_item_->num_outputs == 0) { + GELOGD("Node[%s] type[%s] has no output, no need set mem_copy task.", + node_name_.c_str(), node_item_->node_type.c_str()); + return SUCCESS; + } + + GELOGD("Start to set memcpy task for node[%s].", node_name_.c_str()); + const domi::KernelExDef &kernel_def = task_def.kernel_ex(); + if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { + GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", + sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); + return PARAM_INVALID; + } + STR_FWK_OP_KERNEL aicpu_task = {0}; + auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), + kernel_def.args().data(), kernel_def.args_size()); + if (sec_ret != EOK) { + GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); + return FAILED; + } + + GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_def.task_info_size(), copy_workspace_buf_), + "Node[%s] alloc copy task workspace buf failed, size=%u.", + node_name_.c_str(), kernel_def.task_info_size()); + + GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_->GetData(), kernel_def.task_info_size(), + kernel_def.task_info().data(), kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); + + aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_->GetData()); + aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(copy_workspace_buf_->GetData()); + aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; + aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; + + GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), + &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); + GELOGD("Set memcpy task for node[%s] successfully.", node_name_.c_str()); + return SUCCESS; +} + uint64_t AicpuTfNodeTask::GetStepIdAddr(const HybridModel &model) { // get step_id_addr auto var_tensor = model.GetVariable(NODE_NAME_GLOBAL_STEP); @@ -407,32 +450,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, "Node[%s] has %d outputs but out shape is %zu.", node_name_.c_str(), node_item_->num_outputs, out_shape_hbm.size()); - uint64_t copy_num = 0; - GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm, copy_num)); - - STR_FWK_OP_KERNEL aicpu_task = {0}; - std::string task_info; - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), - "[GenMemCopyTask] Start"); - GE_CHK_STATUS_RET_NOLOG(GenMemCopyTask(copy_num, aicpu_task, task_info)); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), - "[GenMemCopyTask] End"); - - std::unique_ptr kernel_workspace_buf; - GE_CHK_STATUS_RET(AllocTensorBuffer(task_info.size(), kernel_workspace_buf), - "Node[%s] alloc copy task workspace buf failed, size=%zu.", - node_name_.c_str(), task_info.size()); - - GE_CHK_RT_RET(rtMemcpy(kernel_workspace_buf->GetData(), task_info.size(), - task_info.data(), task_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); - - aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_->GetData()); - aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(kernel_workspace_buf->GetData()); - aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; - aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; - - GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), - &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm)); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), @@ -445,8 +463,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, } Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, - const std::vector> &out_shape_hbm, - uint64_t ©_num) { + const std::vector> &out_shape_hbm) { std::vector copy_input_release_flag; std::vector copy_input_data_size; std::vector copy_input_src; @@ -458,34 +475,23 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, node_name_.c_str(), i, summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, summary.raw_data_size); - if (summary.raw_data_size > 0) { - auto output = context.GetOutput(i); - GE_CHECK_NOTNULL(output); - GE_CHECK_NOTNULL(output->GetData()); - copy_input_release_flag.emplace_back(kReleaseFlag); - copy_input_data_size.emplace_back(summary.raw_data_size); - copy_input_src.emplace_back(summary.raw_data_ptr); - copy_input_dst.emplace_back(reinterpret_cast(output->GetData())); - } - - if (summary.shape_data_size > 0) { - const auto &shape_buffer = out_shape_hbm[i]; - GE_CHECK_NOTNULL(shape_buffer); - GE_CHECK_NOTNULL(shape_buffer->GetData()); - copy_input_release_flag.emplace_back(kReleaseFlag); - copy_input_data_size.emplace_back(summary.shape_data_size); - copy_input_src.emplace_back(summary.shape_data_ptr); - copy_input_dst.emplace_back(reinterpret_cast(shape_buffer->GetData())); - } + auto output = context.GetOutput(i); + GE_CHECK_NOTNULL(output); + copy_input_release_flag.emplace_back(kReleaseFlag); + copy_input_data_size.emplace_back(summary.raw_data_size); + copy_input_src.emplace_back(summary.raw_data_ptr); + copy_input_dst.emplace_back(reinterpret_cast(output->GetData())); + + const auto &shape_buffer = out_shape_hbm[i]; + GE_CHECK_NOTNULL(shape_buffer); + copy_input_release_flag.emplace_back(kReleaseFlag); + copy_input_data_size.emplace_back(summary.shape_data_size); + copy_input_src.emplace_back(summary.shape_data_ptr); + copy_input_dst.emplace_back(reinterpret_cast(shape_buffer->GetData())); } - copy_num = copy_input_release_flag.size(); - - GE_CHK_BOOL_RET_STATUS(copy_num > 0, INTERNAL_ERROR, - "Node[%s] need copy num is 0", node_name_.c_str()); - - // copy task need copy output and output shape - const size_t copy_input_buf_len = copy_num * sizeof(uint64_t); + // copy task need copy all output_data and output_shape, len is 2 * output_num + const size_t copy_input_buf_len = node_item_->num_outputs * 2 * sizeof(uint64_t); GE_CHK_RT_RET(rtMemcpy(copy_input_release_flag_dev_->GetData(), copy_input_release_flag_dev_->GetSize(), ©_input_release_flag[0], copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); @@ -498,15 +504,6 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, return SUCCESS; } -Status AicpuTfNodeTask::GenMemCopyTask(uint64_t copy_num, STR_FWK_OP_KERNEL &task, std::string &task_info) { - static constexpr const char *const kKernelLibName = "aicpu_tf_kernel"; - auto kernel_builder = OpsKernelBuilderManager::Instance().GetOpsKernelBuilder(kKernelLibName); - GE_CHK_BOOL_RET_STATUS(kernel_builder != nullptr, FAILED, "Get op kernel info store[%s] failed", kKernelLibName); - auto ret = kernel_builder->GenMemCopyTask(copy_num, task, task_info); - GE_CHK_STATUS_RET(ret, "Call aicpu GenMemCopyTask failed, copy_num=%lu, ret=%u", copy_num, ret); - return SUCCESS; -} - Status AicpuTfNodeTask::UpdateShapeByHbmBuffer(TaskContext &context, const std::vector> &out_shape_hbm) { GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast(node_item_->num_outputs), @@ -813,9 +810,9 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1", node->GetName().c_str(), (*task_defs).size()); } else { - // The number of tasks of the fourth type operator may be 2 - GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1 || (*task_defs).size() == 2, PARAM_INVALID, - "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 1 or 2", + // The number of tasks of the fourth type operator must be 2 + GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 2, PARAM_INVALID, + "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 2", node->GetName().c_str(), (*task_defs).size()); } const auto &task_def = (*task_defs)[0]; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index 8f0b1d0a..b9cc8256 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -98,6 +98,8 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { Status UpdateIoAddr(TaskContext &context) override; private: + Status SetMemCopyTask(const domi::TaskDef &task_def); + Status InitForDependComputeTask(); Status UpdateShapeAndDataByResultSummary(TaskContext &context); @@ -117,11 +119,9 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { const std::vector> &out_shape_hbm); Status PrepareCopyInputs(const TaskContext &context, - const std::vector> &out_shape_hbm, - uint64_t ©_num); + const std::vector> &out_shape_hbm); static Status EnsureSessionCreated(uint64_t session_id); - static Status GenMemCopyTask(uint64_t count, STR_FWK_OP_KERNEL &task, std::string &task_info); static uint64_t GetStepIdAddr(const HybridModel &model); private: // kernel buf, device mem @@ -145,6 +145,8 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { std::unique_ptr copy_input_src_dev_; std::unique_ptr copy_input_dst_dev_; bool need_sync_ = false; + + std::unique_ptr copy_workspace_buf_; }; class AicpuNodeTask : public AicpuNodeTaskBase { diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index 7f2c6288..0837ffff 100755 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -21,8 +21,8 @@ #include "common/ge/ge_util.h" #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/model_manager.h" #include "hybrid/executor/hybrid_execution_context.h" namespace ge { @@ -126,6 +126,12 @@ Status KnownNodeTask::Init(TaskContext &context) { auto dump_properties = context.GetDumpProperties(); if (dump_properties.IsDumpOpen()) { davinci_model_->SetDumpProperties(dump_properties); + void *global_step = nullptr; + TensorValue *varible_global_step = context.GetVariable(NODE_NAME_GLOBAL_STEP); + if (varible_global_step != nullptr) { + global_step = varible_global_step->MutableData(); + } + davinci_model_->SetKnownShapeGlobalStep(global_step); } int32_t device_id = 0; rtError_t rt_ret = rtGetDevice(&device_id); diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h index 2dde993b..6e9740ad 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h @@ -19,7 +19,7 @@ #include "hybrid/node_executor/node_executor.h" #include "hybrid/model/hybrid_model.h" #include "graph/op_desc.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { namespace hybrid { diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index 94c734ca..5387a176 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -15,23 +15,25 @@ */ #include "hybrid/node_executor/hccl/hccl_node_executor.h" -#include "common/ge/ge_util.h" #include "common/ge/plugin_manager.h" #include "common/math/math_util.h" -#include "framework/common/debug/ge_log.h" #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" #include "graph/manager/util/hcom_util.h" #include "graph/runtime_inference_context.h" -#include "hccl/hcom.h" +#include "graph/utils/type_utils.h" +#include "hybrid/executor/hybrid_execution_context.h" +namespace ge { namespace { -const size_t kVarTableDims = 2; -const size_t kVarTableRowCnt = 3; -const size_t kVarTableIdxAddr = 1; -const size_t kVarTableIdxLen = 2; +constexpr size_t kVarTableDims = 2; +constexpr size_t kVarTableRowCnt = 3; +constexpr size_t kVarTableIdxAddr = 1; +constexpr size_t kVarTableIdxLen = 2; +const std::set kRdmaReadTypes = { HCOMREMOTEREAD, HCOMREMOTEREFREAD }; +const std::set kRdmaWriteTypes = { HCOMREMOTEWRITE, HCOMREMOTESCATTERWRITE }; +const std::set kRdmaScatterTypes = { HCOMREMOTEREFREAD, HCOMREMOTESCATTERWRITE }; } // namespace -namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::HCCL, HcclNodeExecutor); @@ -142,11 +144,22 @@ Status RdmaNodeTask::Init(TaskContext &context) { GE_CHECK_NOTNULL(peer_node->GetOpDesc()); remote_index_ = {peer_node->GetOpDesc()->GetId(), out_data_anchor->GetIdx()}; - if (node_item.node->GetType() == HCOMREMOTEREAD) { + if (kRdmaReadTypes.count(node_item.node->GetType()) > 0) { local_index_ = 0; } else { local_index_ = op_desc->GetInputIndexByName("local"); } + int32_t offset_idx = node_item.op_desc->GetInputIndexByName("local_offset"); + if ((offset_idx != -1) && (node_item.op_desc->GetInputDescPtr(offset_idx) != nullptr)) { + skip_flag_ = true; + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)); + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()); + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()); + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc()); + offset_index_ = { + node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc()->GetId(), + node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetIdx() }; + } return SUCCESS; } @@ -158,8 +171,13 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vectorGetTensor(remote_index_.first, remote_index_.second, remote_tensor)); auto data = reinterpret_cast(remote_tensor.GetData()); if (data == nullptr) { - GELOGE(FAILED, "Tensor data is nullptr."); - return FAILED; + if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { + GELOGD("data is null, no need to do rdma read/write, node=%s", context.GetNodeName()); + return SUCCESS; + } else { + GELOGE(FAILED, "Tensor data is nullptr."); + return FAILED; + } } auto dims = remote_tensor.GetTensorDesc().GetShape().GetDims(); if (dims.size() != kVarTableDims && dims.back() != kVarTableRowCnt) { @@ -183,30 +201,63 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector(tensor_buffer.release())))); } + } else if (context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD) { + AllocationAttr attr; + attr.SetMemType(RDMA_HBM); + GE_CHK_STATUS_RET(context.AllocateOutputs(&attr)) } TensorValue *tv; - if (context.GetNodeItem().NodeType() == HCOMREMOTEREAD) { - tv = context.MutableOutput(0); + if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) > 0) { + tv = context.MutableOutput(local_index_); } else { tv = context.MutableInput(local_index_); } GE_CHECK_NOTNULL(tv); - auto local_addr = reinterpret_cast(reinterpret_cast(tv->MutableData())); auto row_num = dims.front(); addr_infos.resize(row_num); - auto device_len = tv->GetSize() / row_num; - if (device_len <= 0 || device_len > data[kVarTableIdxLen]) { - GELOGE(FAILED, "Local embedding length is out of range."); - return FAILED; - } + if (skip_flag_) { + int32_t offset_idx = context.GetNodeItem().op_desc->GetInputIndexByName("local_offset"); + GE_CHECK_NOTNULL(context.GetNodeItem().op_desc->GetInputDescPtr(offset_idx)); + auto data_type = context.GetNodeItem().op_desc->GetInputDesc(offset_idx).GetDataType(); + + Tensor offset_tensor; + GE_CHK_STATUS_RET(ctx->GetTensor(offset_index_.first, offset_index_.second, offset_tensor)) + if (static_cast(offset_tensor.GetSize() / GetSizeByDataType(data_type)) != row_num) { + GELOGE(PARAM_INVALID, "num of offset and remote addr mismatch, offset size=%zu, remote_addr size=%lld, dtype=%s", + offset_tensor.GetSize(), row_num, TypeUtils::DataTypeToSerialString(data_type).c_str()); + return PARAM_INVALID; + } - for (auto idx = 0; idx < row_num; ++idx) { - FMK_INT64_MULCHECK(idx, kVarTableRowCnt); - auto line_idx = idx * kVarTableRowCnt; - addr_infos[idx] = {static_cast(data[line_idx]), data[line_idx + kVarTableIdxAddr], local_addr, - device_len}; - local_addr += device_len; + auto addr_offset = reinterpret_cast(offset_tensor.GetData()); + GE_CHECK_NOTNULL(addr_offset); + auto base_addr = reinterpret_cast(tv->MutableData()); + GE_CHECK_NOTNULL(base_addr); + + for (auto idx = 0; idx < row_num; idx++) { + FMK_INT64_MULCHECK(idx, kVarTableRowCnt) + auto line_idx = idx * kVarTableRowCnt; + addr_infos[idx] = { static_cast(data[line_idx]), + data[line_idx + kVarTableIdxAddr], + reinterpret_cast(reinterpret_cast(base_addr + addr_offset[idx])), + data[line_idx + kVarTableIdxLen] }; + } + } else { + auto local_addr = reinterpret_cast(reinterpret_cast(tv->MutableData())); + auto device_len = tv->GetSize() / row_num; + if (device_len <= 0 || device_len > data[kVarTableIdxLen]) { + GELOGE(FAILED, "Local embedding length is out of range, expect %lld, but %lld exactly.", + data[kVarTableIdxLen], device_len); + return FAILED; + } + + for (auto idx = 0; idx < row_num; ++idx) { + FMK_INT64_MULCHECK(idx, kVarTableRowCnt) + auto line_idx = idx * kVarTableRowCnt; + addr_infos[idx] = { static_cast(data[line_idx]), data[line_idx + kVarTableIdxAddr], local_addr, + device_len }; + local_addr += device_len; + } } return SUCCESS; @@ -226,6 +277,10 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function do } vector addr_infos; GE_CHK_STATUS_RET(ExtractTensor(context, addr_infos)); + if (addr_infos.empty()) { + done_callback(); + return SUCCESS; + } auto callback = [this](HcclResult status) { if (status != HCCL_SUCCESS) { @@ -235,6 +290,11 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function do this->cond_.notify_all(); GELOGI("rdma callback success."); }; + + std::string executor_type = context.GetNodeItem().NodeType(); + if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { + executor_type = context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD ? HCOMREMOTEREAD : HCOMREMOTEWRITE; + } HcclResult hccl_ret = HcomExecEnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); if (hccl_ret != HCCL_SUCCESS) { GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); @@ -262,7 +322,7 @@ Status HcclNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const GE_CHK_STATUS_RET(task.Init(context), "hccl node load hccl so failed."); // allocate output mem, output mem or remote read will be calculated when node execute. - if (context.GetNodeItem().NodeType() != HCOMREMOTEREAD) { + if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) == 0) { GE_CHK_STATUS_RET(context.AllocateOutputs(), "hccl node task allocate output failed."); } @@ -274,7 +334,7 @@ Status HcclNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const Status HcclNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { GELOGI("[%s] HcclNodeExecutor::LoadTask in.", node->GetName().c_str()); GE_CHECK_NOTNULL(node); - if (node->GetType() == HCOMREMOTEREAD || node->GetType() == HCOMREMOTEWRITE) { + if ((kRdmaReadTypes.count(node->GetType()) > 0) || (kRdmaWriteTypes.count(node->GetType()) > 0)) { task = MakeShared(); } else { task = MakeShared(); diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.h b/ge/hybrid/node_executor/hccl/hccl_node_executor.h index 07dd848b..873f259f 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.h +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.h @@ -55,9 +55,11 @@ class RdmaNodeTask : public NodeTask { private: Status ExtractTensor(TaskContext &context, vector &addr_infos); std::pair remote_index_; + std::pair offset_index_; int32_t local_index_ = 0; std::mutex hccl_mutex_; std::condition_variable cond_; + bool skip_flag_; }; class HcclNodeExecutor : public NodeExecutor { diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc index 01fd391d..d54195d6 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc @@ -29,8 +29,6 @@ namespace ge { namespace hybrid { namespace host_cpu { Status AssignKernel::Compute(TaskContext& context) { - GELOGI("[%s] compute begin.", node_->GetName().c_str()); - auto ref_tensor = context.MutableInput(kAssignRefInputIndex); GE_CHECK_NOTNULL(ref_tensor); const auto value_tensor = context.GetInput(kAssignValueInputIndex); @@ -50,7 +48,7 @@ Status AssignKernel::Compute(TaskContext& context) { GE_CHK_STATUS_RET(context.SetOutput(kAssignRefOutputIndex, *ref_tensor), "[%s] Failed to set output.", context.GetNodeName()); - GELOGI("[%s] compute success.", node_->GetName().c_str()); + GELOGD("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc new file mode 100644 index 00000000..e34f601a --- /dev/null +++ b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc @@ -0,0 +1,41 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hybrid/node_executor/host_cpu/kernel/data_kernel.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/util.h" +#include "hybrid/node_executor/host_cpu/kernel_factory.h" + +namespace { +constexpr size_t kDataInputIndex = 0; +constexpr size_t kDataOutputIndex = 0; +} + +namespace ge { +namespace hybrid { +namespace host_cpu { +Status DataKernel::Compute(TaskContext& context) { + auto input = context.MutableInput(kDataInputIndex); + GE_CHECK_NOTNULL(input); + GE_CHK_STATUS_RET(context.SetOutput(kDataOutputIndex, *input), "[%s] Failed to set output.", context.GetNodeName()) + GELOGD("[%s] compute success.", node_->GetName().c_str()); + return SUCCESS; +} + +REGISTER_KERNEL_CREATOR(Data, DataKernel); +} // namespace host_cpu +} // namespace hybrid +} // namespace ge diff --git a/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h new file mode 100644 index 00000000..ca42d647 --- /dev/null +++ b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ +#define GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ + +#include "hybrid/node_executor/host_cpu/kernel/kernel.h" + +namespace ge { +namespace hybrid { +namespace host_cpu { +class DataKernel : public Kernel { + public: + DataKernel(const NodePtr &node) : Kernel(node) {} + ~DataKernel() override = default; + DataKernel &operator=(const DataKernel &op) = delete; + DataKernel(const DataKernel &op) = delete; + + /** + * @brief compute for node_task. + * @return result + */ + Status Compute(TaskContext& context) override; +}; +} // namespace host_cpu +} // namespace hybrid +} // namespace ge + +#endif // GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ diff --git a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc index ff5a7c6d..b1b4e68c 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc @@ -23,7 +23,7 @@ namespace ge { namespace hybrid { namespace host_cpu { Status NoOpKernel::Compute(TaskContext& context) { - GELOGI("[%s] no need to compute.", node_->GetName().c_str()); + GELOGD("[%s] no need to compute.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc index 37b07e37..52d48821 100755 --- a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc @@ -30,8 +30,6 @@ namespace ge { namespace hybrid { namespace host_cpu { Status RandomUniformKernel::Compute(TaskContext& context) { - GELOGI("[%s] compute begin.", node_->GetName().c_str()); - int64_t seed = 0; int64_t seed2 = 0; (void)AttrUtils::GetInt(node_->GetOpDesc(), "seed", seed); @@ -66,7 +64,7 @@ Status RandomUniformKernel::Compute(TaskContext& context) { return UNSUPPORTED; } - GELOGI("[%s] compute success.", node_->GetName().c_str()); + GELOGD("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc index 2a836458..16738c2a 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc @@ -23,8 +23,6 @@ namespace ge { namespace hybrid { namespace host_cpu { Status VariableKernel::Compute(TaskContext& context) { - GELOGI("[%s] compute begin.", node_->GetName().c_str()); - auto tensor = context.GetVariable(node_->GetName()); if (tensor == nullptr) { GELOGE(PARAM_INVALID, "tensor is NULL."); @@ -32,7 +30,7 @@ Status VariableKernel::Compute(TaskContext& context) { } // Constant & Variable Op has and only has one output GE_CHK_STATUS_RET(context.SetOutput(0, *tensor), "[%s] Failed to set output.", context.GetNodeName()); - GELOGI("[%s] compute success.", node_->GetName().c_str()); + GELOGD("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index 02427b91..e74256f2 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -38,7 +38,6 @@ const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; } Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); - GE_CHK_STATUS_RET_NOLOG(task.UpdateTilingData(context)); // update op_desc before alloc ws GE_CHK_STATUS_RET_NOLOG(context.AllocateWorkspaces()); GE_CHK_STATUS_RET_NOLOG(task.UpdateArgs(context)); return SUCCESS; @@ -118,11 +117,11 @@ Status NodeExecutorManager::GetExecutor(Node &node, const NodeExecutor **executo auto executor_type = ResolveExecutorType(node); const auto it = executors_.find(executor_type); if (it == executors_.end()) { - GELOGE(INTERNAL_ERROR, "Failed to get executor by type: %d.", executor_type); + GELOGE(INTERNAL_ERROR, "Failed to get executor by type: %d.", static_cast(executor_type)); return INTERNAL_ERROR; } - GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), executor_type); + GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), static_cast(executor_type)); *executor = it->second.get(); return SUCCESS; } @@ -166,7 +165,7 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const { TensorUtils::SetSize(output_tensor, output_mem_size); GE_CHK_STATUS_RET(op_desc->UpdateOutputDesc(static_cast(i), output_tensor), "hccl update output size failed."); - GELOGD("%s output desc[%u], dim_size: %zu, mem_size: %ld.", node.GetName().c_str(), i, + GELOGD("%s output desc[%zu], dim_size: %zu, mem_size: %ld.", node.GetName().c_str(), i, output_tensor.GetShape().GetDimNum(), output_mem_size); } return SUCCESS; @@ -190,14 +189,14 @@ Status NodeExecutorManager::InitializeExecutors() { GE_CHECK_NOTNULL(build_fn); auto executor = std::unique_ptr(build_fn()); if (executor == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to create executor for engine type = %d", engine_type); + GELOGE(INTERNAL_ERROR, "Failed to create executor for engine type = %d", static_cast(engine_type)); return INTERNAL_ERROR; } - GELOGD("Executor of engine type = %d was created successfully", engine_type); + GELOGD("Executor of engine type = %d was created successfully", static_cast(engine_type)); auto ret = executor->Initialize(); if (ret != SUCCESS) { - GELOGE(ret, "Failed to initialize NodeExecutor of type = %d, clear executors", engine_type); + GELOGE(ret, "Failed to initialize NodeExecutor of type = %d, clear executors", static_cast(engine_type)); for (auto &executor_it : executors_) { executor_it.second->Finalize(); } diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index 6488fbbe..bc318124 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -18,9 +18,11 @@ #include "framework/common/ge_inner_error_codes.h" #include "framework/common/debug/log.h" #include "graph/utils/tensor_utils.h" +#include "graph/types.h" #include "graph/debug/ge_attr_define.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/subgraph_executor.h" +#include "common/profiling/profiling_manager.h" namespace ge { namespace hybrid { @@ -212,6 +214,13 @@ Status TaskContext::AllocateOutput(int index, return SUCCESS; } + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + outputs_start_[index] = TensorValue(); + return SUCCESS; + } + auto it = node_item_->ref_outputs.find(index); if (it != node_item_->ref_outputs.end()) { auto &ref_node = it->second; @@ -498,5 +507,60 @@ bool TaskContext::NeedCallback() { Status TaskContext::Synchronize() { return execution_context_->Synchronize(GetStream()); } + +Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, + uint32_t task_type, uint32_t block_dim) { + if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { + const NodeItem &node_item = GetNodeItem(); + auto op_desc = node_item.GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + const GraphExecutionContext * graph_context = GetExecutionContext(); + GE_CHECK_NOTNULL(graph_context); + const HybridModel *model = graph_context->model; + GE_CHECK_NOTNULL(model); + + std::string op_name = op_desc->GetName(); + std::string dynamic_model_name = model->GetModelName(); + TaskDescInfo tmp_task_desc_info; + tmp_task_desc_info.model_name = dynamic_model_name; + tmp_task_desc_info.op_name = op_name; + tmp_task_desc_info.block_dim = block_dim; + tmp_task_desc_info.task_type = task_type; + tmp_task_desc_info.task_id = task_id; + tmp_task_desc_info.stream_id = stream_id; + tmp_task_desc_info.shape_type = "dynamic"; + tmp_task_desc_info.cur_iter_num = iteration_ + 1; + task_desc_info.emplace_back(tmp_task_desc_info); + } + + return SUCCESS; +} + +Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) { + if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { + const NodeItem &node_item = GetNodeItem(); + auto op_desc = node_item.GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + const GraphExecutionContext * graph_context = GetExecutionContext(); + GE_CHECK_NOTNULL(graph_context); + const HybridModel *model = graph_context->model; + GE_CHECK_NOTNULL(model); + + std::string dynamic_model_name = model->GetModelName(); + auto op_mode = static_cast(domi::ImplyType::INVALID); + if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && + op_mode == static_cast(domi::ImplyType::TVM)) { + ComputeGraphDescInfo tmp_compute_graph_info; + tmp_compute_graph_info.model_name = dynamic_model_name; + tmp_compute_graph_info.op_name = op_desc->GetName(); + tmp_compute_graph_info.op_type = op_desc->GetType(); + tmp_compute_graph_info.task_id = task_id; + tmp_compute_graph_info.stream_id = stream_id; + compute_graph_info.emplace_back(tmp_compute_graph_info); + } + } + return SUCCESS; +} + } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 6a4bcb8c..e7ee4fc8 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -22,6 +22,7 @@ #include #include "common/properties_manager.h" #include "external/ge/ge_api_error_codes.h" +#include "framework/common/ge_types.h" #include "hybrid/common/tensor_value.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/executor/rt_callback_manager.h" @@ -108,6 +109,14 @@ class TaskContext { void SetForceInferShape(bool force_infer_shape); void *handle_ = nullptr; + const std::vector& GetProfilingTaskDescInfo() const { return task_desc_info; } + Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim); + void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } + + const std::vector& GetProfilingGraphDescInfo() const { return compute_graph_info; } + Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id); + void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); } + private: TaskContext(GraphExecutionContext *execution_context, const NodeItem *node_item, @@ -127,6 +136,8 @@ class TaskContext { uint64_t iteration_ = 0; uint32_t task_id_ = 0; uint32_t stream_id_ = 0; + std::vector task_desc_info; + std::vector compute_graph_info; }; } // namespace hybrid } // namespace ge diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index b81632bd..1a97b6f8 100755 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -37,7 +37,7 @@ #include "graph/common/ge_call_wrapper.h" #include "graph/ge_context.h" #include "graph/ge_global_options.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/graph_var_manager.h" diff --git a/ge/offline/proto/task.proto b/ge/offline/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/offline/proto/task.proto +++ b/ge/offline/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/ge/proto/task.proto b/ge/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/proto/task.proto +++ b/ge/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc index c4f8a53b..5a67f7cd 100755 --- a/ge/session/inner_session.cc +++ b/ge/session/inner_session.cc @@ -29,7 +29,7 @@ #include "graph/ge_global_options.h" #include "graph/ge_local_context.h" #include "graph/common/local_context.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/utils/tensor_adapter.h" #include "runtime/mem.h" diff --git a/ge/session/omg.cc b/ge/session/omg.cc index 37b279a2..47073fc0 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -659,7 +659,7 @@ Status ParseOutNodes(const string &out_nodes) { auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); int32_t index = stoi(StringUtils::Trim(key_value_v[1])); - GELOGD("Get output info: node[%s] and index[%ld]", key_value_v[0].c_str(), index); + GELOGD("Get output info: node[%s] and index[%d]", key_value_v[0].c_str(), index); if (iter != domi::GetContext().out_nodes_map.end()) { iter->second.emplace_back(index); } else { @@ -1007,7 +1007,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOm(const char *model_file, const char *js } else { ErrorManager::GetInstance().ATCReportErrMessage("E10003", {"parameter", "value", "reason"}, {"om", model_file, "invalid om file"}); - GELOGE(PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); } if (model.model_data != nullptr) { diff --git a/ge/session/session_manager.cc b/ge/session/session_manager.cc index 5d5a299a..3c531747 100755 --- a/ge/session/session_manager.cc +++ b/ge/session/session_manager.cc @@ -20,7 +20,7 @@ #include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" #include "graph/ge_context.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/util/rt_context_util.h" using std::map; diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 1f3fc5c5..4f32bd6b 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -22,11 +22,11 @@ #include "common/profiling/profiling_manager.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "runtime/mem.h" #include "single_op/single_op_manager.h" #include "single_op/task/build_task_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" namespace ge { namespace { @@ -57,9 +57,10 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { std::vector task_desc_info; uint32_t task_id = 0; uint32_t stream_id = 0; - if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); - return ACL_ERROR_GE_PARAM_INVALID; + auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "Get task_id and stream_id failed."); + return RT_ERROR_TO_GE_STATUS(rt_ret); } TaskDescInfo tmp_task_desc_info; @@ -70,6 +71,7 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { tmp_task_desc_info.stream_id = stream_id; tmp_task_desc_info.shape_type = shape_type; tmp_task_desc_info.cur_iter_num = 0; + tmp_task_desc_info.task_type = op_task->GetTaskType(); GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); task_desc_info.emplace_back(tmp_task_desc_info); diff --git a/ge/single_op/single_op_manager.cc b/ge/single_op/single_op_manager.cc index d523d355..ccbdbe3f 100644 --- a/ge/single_op/single_op_manager.cc +++ b/ge/single_op/single_op_manager.cc @@ -141,7 +141,7 @@ Status SingleOpManager::GetResourceId(rtStream_t stream, uintptr_t &resource_id) auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); if (rt_err != RT_ERROR_NONE) { GELOGE(rt_err, "get current context failed, runtime result is %d", static_cast(rt_err)); - return rt_err; + return RT_ERROR_TO_GE_STATUS(rt_err); } // use current context as resource key instead GELOGI("use context as resource key instead when default stream"); diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 2a1a14e6..7d092091 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -23,7 +23,7 @@ #include "framework/common/debug/ge_log.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/tensor_utils.h" @@ -438,8 +438,8 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { auto task_type = static_cast(task_def.type()); if (task_type == RT_MODEL_TASK_KERNEL) { if (single_op.op_task_ != nullptr) { - GELOGE(UNSUPPORTED, "Do not support dynamic op with multiple tasks."); - return UNSUPPORTED; + GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); + return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; } GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op)); } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 6d0109fe..6637271c 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -24,7 +24,7 @@ #include #include "common/helper/model_helper.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "single_op/single_op.h" #include "single_op/stream_resource.h" diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 2a5f968f..6580ea31 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -16,7 +16,7 @@ #include "single_op/task/aicpu_kernel_task_builder.h" #include "framework/common/taskdown_common.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "build_task_utils.h" namespace ge { diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc index 1bfbcb3c..a01ee0f0 100755 --- a/ge/single_op/task/aicpu_task_builder.cc +++ b/ge/single_op/task/aicpu_task_builder.cc @@ -19,8 +19,8 @@ #include "single_op/task/build_task_utils.h" #include "runtime/mem.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/model_manager.h" namespace ge { AiCpuTaskBuilder::AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def) @@ -30,8 +30,8 @@ namespace ge { auto sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_def_.args().data(), kernel_def_.args().size()); if (sec_ret != EOK) { - GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "memcpy failed, ret: %d", sec_ret); - return ACL_ERROR_GE_INTERNAL_ERROR; + GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memcpy failed, ret: %d", sec_ret); + return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; } auto io_addr_val = static_cast(reinterpret_cast(io_addr)); @@ -46,7 +46,7 @@ namespace ge { auto rt_ret = rtMalloc(&fwk_op_args, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(rt_ret, "malloc arg memory failed, ret = %d", rt_ret); - return rt_ret; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(fwk_op_args, sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, @@ -54,7 +54,7 @@ namespace ge { if (rt_ret != RT_ERROR_NONE) { (void)rtFree(fwk_op_args); GELOGE(rt_ret, "copy args failed, ret = %d", rt_ret); - return rt_ret; + return RT_ERROR_TO_GE_STATUS(rt_ret); } *args = fwk_op_args; return SUCCESS; @@ -96,7 +96,7 @@ namespace ge { // get kernel_ext_info auto &kernel_ext_info = kernel_def_.kernel_ext_info(); auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size(); - GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, + GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, ACL_ERROR_GE_PARAM_INVALID, "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", kernel_ext_info.size(), kernel_ext_info_size); GE_CHK_STATUS_RET(task.SetExtInfoAndType(kernel_ext_info, kernel_id), "Init ext info failed."); diff --git a/ge/single_op/task/build_task_utils.cc b/ge/single_op/task/build_task_utils.cc index 071e514b..9e4d55e1 100644 --- a/ge/single_op/task/build_task_utils.cc +++ b/ge/single_op/task/build_task_utils.cc @@ -17,7 +17,7 @@ #include "single_op/task/build_task_utils.h" #include "runtime/rt.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" #include "graph/utils/type_utils.h" #include "framework/common/debug/ge_log.h" diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index cc63e811..3d001d8b 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -45,7 +45,7 @@ void FreeHbm(void *var) { Status OpTask::OpenDump(rtStream_t stream) { if (DumpManager::GetInstance().GetDumpProperties().IsSingleOpNeedDump()) { - GELOGI("Dump is open in single op,start to set dump info"); + GELOGI("Dump is open in single op, start to set dump info"); std::vector input_addrs; std::vector output_adds; auto input_size = op_desc_->GetInputsSize(); @@ -54,10 +54,10 @@ Status OpTask::OpenDump(rtStream_t stream) { size_t arg_num = 0; GetIoAddr(arg_base, arg_num); if (arg_num < input_size + output_size) { - GELOGE(FAILED, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", arg_num, input_size + output_size); - return FAILED; + return ACL_ERROR_GE_INTERNAL_ERROR; } for (size_t i = 0; i < input_size; i++) { @@ -120,11 +120,11 @@ Status OpTask::DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_works size_t arg_num = 0; GetIoAddr(arg_base, arg_num); if (arg_num < all_addresses.size()) { - GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu", + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu", op_desc_->GetName().c_str(), all_addresses.size(), arg_num); - return INTERNAL_ERROR; + return ACL_ERROR_GE_INTERNAL_ERROR; } for (void *addr : all_addresses) { @@ -145,6 +145,8 @@ Status OpTask::LaunchKernel(const vector &input_desc, return UNSUPPORTED; } +uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } + TbeOpTask::~TbeOpTask() { if (sm_desc_ != nullptr) { (void)rtMemFreeManaged(sm_desc_); @@ -161,6 +163,8 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } const std::string &TbeOpTask::GetStubName() const { return stub_name_; } +uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } + Status TbeOpTask::LaunchKernel(rtStream_t stream) { GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); auto *sm_desc = reinterpret_cast(sm_desc_); @@ -174,8 +178,8 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { } if (ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->stub_name_.c_str()); - return RT_FAILED; + GELOGE(ret, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->stub_name_.c_str()); + return RT_ERROR_TO_GE_STATUS(ret); } GELOGI("[TASK_INFO] %s", this->stub_name_.c_str()); auto status = OpenDump(stream); @@ -195,8 +199,8 @@ Status TbeOpTask::UpdateRunInfo(const vector &input_desc, const ve run_info.block_dim = 0; auto ret = optiling::OpParaCalculate(*node_, run_info); if (ret != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to invoke OpParaCalculate. ret = %u", ret); - return FAILED; + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Failed to invoke OpParaCalculate. ret = %u", ret); + return ACL_ERROR_GE_INTERNAL_ERROR; } block_dim_ = run_info.block_dim; tiling_data_ = run_info.tiling_data.str(); @@ -219,8 +223,8 @@ Status TbeOpTask::UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc } else { std::vector storage_shape; if (!AttrUtils::GetListInt(src_tensor, ge::ATTR_NAME_STORAGE_SHAPE, storage_shape)) { - GELOGE(PARAM_INVALID, "Failed to get storage_shape while storage_format was set"); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Failed to get storage_shape while storage_format was set"); + return ACL_ERROR_GE_INTERNAL_ERROR; } GELOGD("Storage format set. update shape to [%s], and original shape to [%s]", @@ -269,7 +273,9 @@ Status TbeOpTask::AllocateWorkspaces(const vector &workspace_sizes) { std::vector ws_offsets; for (auto ws_size : workspace_sizes) { // alignment and padding should be done in OpParaCalculate - GE_CHK_STATUS_RET_NOLOG(CheckInt64AddOverflow(total_size, ws_size)); + if (CheckInt64AddOverflow(total_size, ws_size) != SUCCESS) { + return ACL_ERROR_GE_INTERNAL_ERROR; + } ws_offsets.emplace_back(total_size); total_size += ws_size; } @@ -317,8 +323,9 @@ Status TbeOpTask::LaunchKernel(const vector &input_desc, } if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { - GELOGE(INTERNAL_ERROR, "[%s] Failed to update kernel args.", node_->GetName().c_str()); - return INTERNAL_ERROR; + GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[%s] Failed to update kernel args.", + node_->GetName().c_str()); + return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; } GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); @@ -356,7 +363,7 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint num_inputs_, num_outputs_, unknown_type_)); - GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, FAILED, "Malloc aicpu_ext_handle mem failed!"); + GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, "Malloc aicpu_ext_handle mem failed!"); Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); if (ret != SUCCESS) { @@ -414,7 +421,7 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, "Input[%zu] update input shape failed.", input_index); continue; } - GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), ACL_ERROR_GE_PARAM_INVALID, "Input_desc size is %zu, but get non_const_index is %zu", input_desc.size(), non_const_index); GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(input_index, input_desc[non_const_index]), @@ -503,11 +510,11 @@ Status AiCpuBaseTask::UpdateIoAddr(const vector &inputs, const vecto if (input_index < input_is_const_.size() && input_is_const_[input_index]) { // const input no need update addr GE_CHECK_NOTNULL(arg_base); - GELOGD("AICpuTask input[%zu] addr = %u", input_index, *arg_base); + GELOGD("AICpuTask input[%zu] addr = %lu", input_index, *arg_base); arg_base++; continue; } - GE_CHK_BOOL_RET_STATUS(non_const_index < inputs.size(), PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(non_const_index < inputs.size(), ACL_ERROR_GE_PARAM_INVALID, "Input size is %zu, but get non_const_index is %zu", inputs.size(), non_const_index); auto addr = inputs[non_const_index].data; @@ -557,15 +564,15 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { RT_MEMCPY_HOST_TO_DEVICE_EX, stream); if (ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtMemcpyAsync workspace data failed. ret = %d, task = %s", ret, this->op_type_.c_str()); - return RT_FAILED; + GELOGE(ret, "rtMemcpyAsync workspace data failed. ret = %d, task = %s", ret, this->op_type_.c_str()); + return RT_ERROR_TO_GE_STATUS(ret); } GELOGI("To invoke rtKernelLaunchEx. task = %s", this->op_type_.c_str()); ret = rtKernelLaunchEx(args_, arg_size_, 0, stream); if (ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->op_type_.c_str()); - return RT_FAILED; + GELOGE(ret, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->op_type_.c_str()); + return RT_ERROR_TO_GE_STATUS(ret); } GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); @@ -706,7 +713,7 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output Status AiCpuTask::InitForSummaryAndCopy() { if (unknown_type_ != DEPEND_COMPUTE || num_outputs_ == 0) { - GELOGI("Unknown_type is %d, output num is %d.", unknown_type_, num_outputs_); + GELOGI("Unknown_type is %d, output num is %zu.", unknown_type_, num_outputs_); return SUCCESS; } @@ -743,9 +750,9 @@ Status AiCpuTask::InitForSummaryAndCopy() { Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { - GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); - return PARAM_INVALID; + return ACL_ERROR_GE_PARAM_INVALID; } GE_CHK_RT_RET(rtMalloc(©_workspace_buf_, kernel_def.task_info_size(), RT_MEMORY_HBM)); GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_, kernel_def.task_info_size(), @@ -755,8 +762,8 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), kernel_def.args().data(), kernel_def.args().size()); if (sec_ret != EOK) { - GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); - return FAILED; + GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memcpy failed, ret: %d", sec_ret); + return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; } aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_); @@ -802,6 +809,8 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { return DoUpdateArgTable(param, false); } +uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } + void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { arg_base = reinterpret_cast(io_addr_host_.data()); arg_count = io_addr_host_.size(); @@ -838,7 +847,7 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { sm_desc, stream, dump_flag_); if (ret != RT_ERROR_NONE) { GELOGE(ret, "Invoke rtCpuKernelLaunch failed. ret = %d", ret); - return ret; + return RT_ERROR_TO_GE_STATUS(ret); } GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); GELOGD("Invoke rtCpuKernelLaunch succeeded"); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 2d0740a6..78e1f6f0 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -52,6 +52,7 @@ class OpTask { std::vector &output_desc, std::vector &output_buffers, rtStream_t stream); + virtual uint32_t GetTaskType() const; protected: Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); @@ -85,6 +86,7 @@ class TbeOpTask : public OpTask { size_t GetArgSize() const; const std::string &GetStubName() const; void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); + uint32_t GetTaskType() const override; private: friend class SingleOpModel; @@ -113,6 +115,8 @@ class AiCpuBaseTask : public OpTask { ~AiCpuBaseTask() override; UnknowShapeOpType GetUnknownType() const { return unknown_type_; } Status UpdateArgTable(const SingleOpModelParam ¶m) override; + uint32_t GetTaskType() const override; + protected: Status UpdateIoAddr(const std::vector &inputs, const std::vector &outputs); Status SetInputConst(); diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc index 594352aa..6eee61d0 100644 --- a/ge/single_op/task/tbe_task_builder.cc +++ b/ge/single_op/task/tbe_task_builder.cc @@ -20,7 +20,7 @@ #include #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" #include "runtime/rt.h" #include "single_op/task/build_task_utils.h" @@ -242,7 +242,7 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); if (rtRet != RT_ERROR_NONE) { GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast(rtRet)); - return rtRet; + return RT_ERROR_TO_GE_STATUS(rtRet); } const domi::KernelContext &context = kernel_def_.context(); @@ -261,7 +261,7 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); if (rtRet != RT_ERROR_NONE) { GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast(rtRet)); - return rtRet; + return RT_ERROR_TO_GE_STATUS(rtRet); } } @@ -287,7 +287,7 @@ Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶ auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func); if (rtRet != SUCCESS) { GELOGE(rtRet, "rtGetFunctionByName failed."); - return rtRet; + return RT_ERROR_TO_GE_STATUS(rtRet); } task.SetStubFunc(stub_name_, stub_func); diff --git a/inc/external/ge/ge_api_error_codes.h b/inc/external/ge/ge_api_error_codes.h index 3d63aced..e77f817c 100644 --- a/inc/external/ge/ge_api_error_codes.h +++ b/inc/external/ge/ge_api_error_codes.h @@ -109,8 +109,13 @@ GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_NOT_EXIST, "AIPP parameter not exist."); GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_MODE_INVALID, "AIPP mode invalid."); GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Task type invalid."); GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Kernel type invalid."); +GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "Plugin path is invalid."); +GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, "Format is invalid when transferring shape."); +GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Shape is invalid when transferring shape."); +GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Datatype is invalid when transferring shape."); GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_ALLOCATION, "Memory allocation error."); +GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate memory."); GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_INTERNAL_ERROR, "Internal error."); GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_LOAD_MODEL, "Load model error."); diff --git a/inc/external/ge/ge_error_codes.h b/inc/external/ge/ge_error_codes.h index 20a7e0f9..041fc7ae 100644 --- a/inc/external/ge/ge_error_codes.h +++ b/inc/external/ge/ge_error_codes.h @@ -38,7 +38,12 @@ static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015; static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016; static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017; static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018; +static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019; +static const uint32_t ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID = 145020; +static const uint32_t ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID = 145021; +static const uint32_t ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID = 145022; static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000; +static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001; static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000; static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001; static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002; @@ -49,6 +54,7 @@ static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006; static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007; static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008; static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009; + #ifdef __cplusplus } // namespace ge #endif diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h index 4a32af36..07cd1664 100644 --- a/inc/framework/common/debug/ge_log.h +++ b/inc/framework/common/debug/ge_log.h @@ -38,75 +38,53 @@ extern "C" { enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; class GeLog { -public: + public: + static uint64_t GetTid() { #ifdef __GNUC__ -static pid_t GetTid() { - thread_local static pid_t tid = syscall(__NR_gettid); - return tid; -} + thread_local static uint64_t tid = static_cast(syscall(__NR_gettid)); #else -static int GetTid() { - thread_local static int tid = static_cast(GetCurrentThreadId()); - return tid; -} + thread_local static uint64_t tid = static_cast(GetCurrentThreadId()); #endif + return tid; + } }; inline bool IsLogEnable(int module_name, int log_level) { int32_t enable = CheckLogLevel(module_name, log_level); // 1:enable, 0:disable - if (enable == 1) { - return true; - } - return false; + return (enable == 1); } -#define GELOGE(ERROR_CODE, fmt, ...) \ +#define GELOGE(ERROR_CODE, fmt, ...) \ dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) -#define GELOGW(fmt, ...) \ - if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GELOGI(fmt, ...) \ - if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GELOGD(fmt, ...) \ - if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) +#define GELOGW(fmt, ...) \ + if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) \ + dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) +#define GELOGI(fmt, ...) \ + if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) \ + dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) +#define GELOGD(fmt, ...) \ + if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) \ + dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) + #define GEEVENT(fmt, ...) dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GELOGO(fmt, ...) \ - Dlog(GE_MODULE_NAME, DLOG_OPLOG, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GELOGT(VALUE, fmt, ...) \ - do { \ - TraceStatus stat = VALUE; \ - const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ - int idx = static_cast(stat); \ - char *k = const_cast("status"); \ - char *v = const_cast(TraceStatStr[idx]); \ - KeyValue kv = {k, v}; \ - DlogWithKV(static_cast(GE_MODULE_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__); \ + +#define GELOGT(VALUE, fmt, ...) \ + do { \ + TraceStatus stat = VALUE; \ + const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ + int idx = static_cast(stat); \ + char *k = const_cast("status"); \ + char *v = const_cast(TraceStatStr[idx]); \ + KeyValue kv = {k, v}; \ + DlogWithKV(static_cast(GE_MODULE_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, \ + ##__VA_ARGS__); \ } while (0) -#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ +#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) -#define GE_LOG_WARN(MOD_NAME, fmt, ...) \ - if (IsLogEnable(MOD_NAME, DLOG_WARN)) dlog_warn(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GE_LOG_INFO(MOD_NAME, fmt, ...) \ - if (IsLogEnable(MOD_NAME, DLOG_INFO)) dlog_info(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GE_LOG_DEBUG(MOD_NAME, fmt, ...) \ - if (IsLogEnable(MOD_NAME, DLOG_DEBUG)) dlog_debug(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GE_LOG_EVENT(MOD_NAME, fmt, ...) dlog_event(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GE_LOG_OPLOG(MOD_NAME, fmt, ...) \ - Dlog(MOD_NAME, DLOG_OPLOG, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) - -#define GE_LOG_TRACE(MOD_NAME, value, fmt, ...) \ - do { \ - TraceStatus stat = value; \ - const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ - int idx = static_cast(stat); \ - char *k = const_cast("status"); \ - char *v = const_cast(TraceStatStr[idx]); \ - KeyValue kv = {k, v}; \ - DlogWithKV(static_cast(MOD_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__); \ - } while (0) // print memory when it is greater than 1KB. #define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index 72dba126..31281cd6 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -261,6 +261,12 @@ ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \ } +#define GE_WARNINGLOG_AND_ERRORMSG(errormsg) \ + { \ + GELOGW("%s", errormsg); \ + ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \ + } + #define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \ do { \ bool b = (expr); \ diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index f7e6d679..9ca77f1c 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -41,12 +41,7 @@ enum FrameworkType { }; const std::map kFwkTypeToStr = { - {"0", "Caffe"}, - {"1", "MindSpore"}, - {"3", "TensorFlow"}, - {"4", "Android_NN"}, - {"5", "Onnx"} -}; + {"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}}; enum OpEngineType { ENGINE_SYS = 0, // default engine @@ -61,6 +56,11 @@ enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYN const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; +// profiling data +const uint32_t kTaskTypeAicore = 0; +const uint32_t kTaskTypeAicpu = 1; +const uint32_t kTaskTypeInvalid = 0xFFFF; + // Data cache, including data address and length struct DataBuffer { public: @@ -256,6 +256,7 @@ struct TaskDescInfo { uint32_t stream_id; std::string shape_type; int64_t cur_iter_num; + uint32_t task_type; }; // Profiling info of graph diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index 4d4c54d1..2dbb1753 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -437,6 +437,7 @@ REGISTER_OPTYPE_DECLARE(HCOMRECEIVE, "HcomReceive"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEREAD, "HcomRemoteRead"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEWRITE, "HcomRemoteWrite"); +REGISTER_OPTYPE_DECLARE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite"); REGISTER_OPTYPE_DECLARE(VARASSIGN, "VarAssign"); REGISTER_OPTYPE_DECLARE(VARISINITIALIZEDOP, "VarIsInitializedOp"); diff --git a/inc/framework/omg/parser/parser_types.h b/inc/framework/omg/parser/parser_types.h index 62c9c750..f3b7f00a 100644 --- a/inc/framework/omg/parser/parser_types.h +++ b/inc/framework/omg/parser/parser_types.h @@ -238,8 +238,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SOFTSIGN; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *COSH; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SINH; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQUAREDDIFFERENCE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char - *REQUIREDSPACETOBATCHPADDINGS; // for retinanet scope fusion +// for retinanet scope fusion +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REQUIREDSPACETOBATCHPADDINGS; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDPOSTPROCESSOR; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETBOXES; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINAMULTIANCHORS; @@ -370,7 +370,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREDUCESC FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMSEND; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMRECEIVE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEREAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEREFREAD; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEWRITE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTESCATTERWRITE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARASSIGN; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARISINITIALIZEDOP; diff --git a/metadef b/metadef index fcd0833c..8ab60be2 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit fcd0833cffcd201701f71d17db0c696c1bb01715 +Subproject commit 8ab60be2870b80b1ec952bb21c7f05ae2a624984 diff --git a/parser b/parser index 1601d66b..98f17f4a 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 1601d66b6187c83cbf38e762beb5538ce2c7c573 +Subproject commit 98f17f4a2a37f283797858eabefa9dba1d06a66b diff --git a/tests/depends/omg/src/omg_stub.cc b/tests/depends/omg/src/omg_stub.cc index a6221570..13ddf8bb 100644 --- a/tests/depends/omg/src/omg_stub.cc +++ b/tests/depends/omg/src/omg_stub.cc @@ -315,7 +315,7 @@ long GetFileLength(const std::string &input_file) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, return -1, "open file failed."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length <= 0), return -1, "file length <= 0, not valid."); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > MAX_FILE_SIZE_LIMIT, return -1, "file size %ld is out of limit: %d.", + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > MAX_FILE_SIZE_LIMIT, return -1, "file size %llu is out of limit: %d.", file_length, MAX_FILE_SIZE_LIMIT); return file_length; } diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 91a6620d..a1ec8248 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -132,7 +132,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc" "${GE_CODE_DIR}/ge/session/session_manager.cc" "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_builder_manager.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_manager.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" "${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc" "${GE_CODE_DIR}/ge/session/inner_session.cc" @@ -140,15 +140,15 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/execute/graph_execute.cc" "${GE_CODE_DIR}/ge/graph/preprocess/graph_preprocess.cc" "${GE_CODE_DIR}/ge/hybrid/hybrid_davinci_model_stub.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_inputer.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" "${GE_CODE_DIR}/ge/common/dump/dump_properties.cc" "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/model/ge_root_model.cc" "${GE_CODE_DIR}/ge/common/model_parser/base.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_dumper.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc" "${GE_CODE_DIR}/ge/common/dump/dump_server.cc" "${GE_CODE_DIR}/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc" @@ -254,13 +254,13 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_utils.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/zero_copy_offset.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/zero_copy_task.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/cpu_queue_schedule.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/aipp_utils.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_offset.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_task.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/aipp_utils.cc" "${GE_CODE_DIR}/ge/omm/csa_interact.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/tbe_handle_store.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" "${GE_CODE_DIR}/ge/common/kernel_store.cc" "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" "${GE_CODE_DIR}/ge/common/auth/file_saver.cc" @@ -386,32 +386,32 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES "${GE_CODE_DIR}/ge/common/model_parser/base.cc" "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" "${GE_CODE_DIR}/ge/common/util.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/cpu_queue_schedule.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_dumper.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_inputer.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model_parser.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_manager.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_utils.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/tbe_handle_store.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model_parser.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/event_record_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/event_wait_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/hccl_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/label_set_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/stream_active_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/end_graph_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/model_exit_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/common/debug/memory_dumper.cc" @@ -573,7 +573,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES "graph/load/data_dumper_unittest.cc" #"graph/load/new_model_manager_data_inputer_unittest.cc" #"graph/load/new_model_manager_davinci_model_unittest.cc" - #"graph/load/new_model_manager_model_manager_unittest.cc" + "graph/load/new_model_manager_model_manager_unittest.cc" #"graph/load/new_model_manager_task_build_unittest.cc" "graph/load/new_model_manager_model_manager_aicpu_unittest.cc" "graph/load/end_graph_task_unittest.cc" @@ -589,6 +589,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES #"graph/graph_load_unittest.cc" "graph/ge_executor_unittest.cc" "graph/load/model_helper_unittest.cc" + "graph/load/model_utils_unittest.cc" ) set(PASS_TEST_FILES @@ -682,7 +683,7 @@ set(MULTI_PARTS_TEST_FILES "common/format_transfer_nchw_fractalz_unittest.cc" "common/format_transfer_hwcn_fractalz_unittest.cc" "common/format_transfer_nhwc_fractalz_unittest.cc" - #"common/format_transfer_fractal_nz_unittest.cc" + "common/format_transfer_fractal_nz_unittest.cc" "common/format_transfer_fractal_zz_unittest.cc" "common/format_transfer_nhwc_5d_unittest.cc" "common/format_transfer_5d_nchw_unittest.cc" @@ -696,6 +697,7 @@ set(MULTI_PARTS_TEST_FILES "graph/variable_accelerate_ctrl_unittest.cc" "graph/build/logical_stream_allocator_unittest.cc" "graph/build/mem_assigner_unittest.cc" + "graph/preprocess/graph_preprocess_unittest.cc" "session/omg_omg_unittest.cc" ) diff --git a/tests/ut/ge/common/format_transfer_5d_nhwc_unittest.cc b/tests/ut/ge/common/format_transfer_5d_nhwc_unittest.cc index 6e5158df..b0a39396 100644 --- a/tests/ut/ge/common/format_transfer_5d_nhwc_unittest.cc +++ b/tests/ut/ge/common/format_transfer_5d_nhwc_unittest.cc @@ -679,7 +679,7 @@ TEST_F(UtestFormatTransfer5dNhwc, nc1hwc0_to_nhwc_float2) { } Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, UNSUPPORTED); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransfer5dNhwc, invalid_src_format) { diff --git a/tests/ut/ge/common/format_transfer_c1hwncoc0_hwcn_unittest.cc b/tests/ut/ge/common/format_transfer_c1hwncoc0_hwcn_unittest.cc index e809cf1b..3f195ef2 100644 --- a/tests/ut/ge/common/format_transfer_c1hwncoc0_hwcn_unittest.cc +++ b/tests/ut/ge/common/format_transfer_c1hwncoc0_hwcn_unittest.cc @@ -158,7 +158,7 @@ TEST_F(UtestFormatTransferC1hwncoc0Hwcn, sixd_to_hwcn_fp16_success_lt_cube) { } Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, UNSUPPORTED); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferC1hwncoc0Hwcn, sixd_to_hwcn_gp16_success_eq_cube) { diff --git a/tests/ut/ge/common/format_transfer_fractal_nz_unittest.cc b/tests/ut/ge/common/format_transfer_fractal_nz_unittest.cc index fe3dd452..70c07d45 100644 --- a/tests/ut/ge/common/format_transfer_fractal_nz_unittest.cc +++ b/tests/ut/ge/common/format_transfer_fractal_nz_unittest.cc @@ -249,8 +249,7 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape1_uint8_3) { } */ - -TEST_F(UtestFormatTransferNdFractNz, nd_shape2_uint8_1) { +/*TEST_F(UtestFormatTransferNdFractNz, nd_shape2_uint8_1) { uint8_t data[32 * 32] = { 47, 78, 47, 180, 246, 76, 157, 127, 63, 0, 168, 23, 148, 198, 180, 190, 43, 187, 76, 67, 77, 246, 11, 149, 240, 236, 136, 123, 51, 95, 7, 163, 163, 64, 157, 230, 247, 122, 67, 106, 150, 20, 231, 118, 43, 208, @@ -2157,7 +2156,7 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape3_fp16) { for (int i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { EXPECT_EQ((reinterpret_cast(result2.data.get()))[i], data[i]); } -} +}*/ TEST_F(UtestFormatTransferNdFractNz, nd_shape4_fp16) { uint16_t data[2 * 2 * 17 * 4] = { @@ -2333,7 +2332,7 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape4_fp16) { } EXPECT_EQ( transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), - UNSUPPORTED); + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferNdFractNz, nd_shape5_fp16) { @@ -4785,6 +4784,8 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape4_fp32) { for (int i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { EXPECT_EQ((reinterpret_cast(result2.data.get()))[i], data[i]); } + EXPECT_EQ(transfer2.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferNdFractNz, nchw_shape4_fp32) { @@ -9059,7 +9060,7 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_shape) { FormatTransferFractalNz transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), - PARAM_INVALID); + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); } TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type) { @@ -9079,7 +9080,7 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type) { FormatTransferFractalNz transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), - PARAM_INVALID); + ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID); } TEST_F(UtestFormatTransferNdFractNz, invalid_src_format) { @@ -9094,8 +9095,7 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_format) { FormatTransferFractalNz transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), - PARAM_INVALID); - EXPECT_EQ(TransFormat(args, result), UNSUPPORTED); + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); } TEST_F(UtestFormatTransferNdFractNz, invalid_dst_shape) { @@ -9136,6 +9136,24 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type2) { EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); } +TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type3) { + uint16_t data[1 * 1 * 1 * 16 * 16] = {0}; + TransArgs args{reinterpret_cast(data), + FORMAT_FRACTAL_NZ, + FORMAT_NHWC, + {1, 1, 1, 16, 16}, + { + 1, + 1, + 4, + 4, + }, + DT_VARIANT}; + TransResult result; + FormatTransferFractalNzND transfer; + EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); +} + TEST_F(UtestFormatTransferNdFractNz, invalid_dst_format2) { uint16_t data[1 * 1 * 1 * 1 * 16 * 16] = {0}; TransArgs args{reinterpret_cast(data), diff --git a/tests/ut/ge/common/format_transfer_fractal_zz_unittest.cc b/tests/ut/ge/common/format_transfer_fractal_zz_unittest.cc index 6278b958..8b1afa24 100644 --- a/tests/ut/ge/common/format_transfer_fractal_zz_unittest.cc +++ b/tests/ut/ge/common/format_transfer_fractal_zz_unittest.cc @@ -1894,7 +1894,7 @@ TEST_F(UtestFormatTransferNdFractZz, nd_shape4_fp16_1) { } EXPECT_EQ( transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), - UNSUPPORTED); + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferNdFractZz, nd_shape4_fp16) { @@ -2071,7 +2071,7 @@ TEST_F(UtestFormatTransferNdFractZz, nd_shape4_fp16) { } EXPECT_EQ( transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), - UNSUPPORTED); + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferNdFractZz, nd_shape5_fp16) { @@ -7879,7 +7879,7 @@ TEST_F(UtestFormatTransferNdFractZz, invalid_src_shape) { FormatTransferFractalZz transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), - PARAM_INVALID); + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); } TEST_F(UtestFormatTransferNdFractZz, invalid_src_data_type) { @@ -7899,7 +7899,7 @@ TEST_F(UtestFormatTransferNdFractZz, invalid_src_data_type) { FormatTransferFractalZz transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), - PARAM_INVALID); + ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID); } TEST_F(UtestFormatTransferNdFractZz, invalid_src_format) { @@ -7914,7 +7914,7 @@ TEST_F(UtestFormatTransferNdFractZz, invalid_src_format) { FormatTransferFractalZz transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), - PARAM_INVALID); + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); EXPECT_EQ(TransFormat(args, result), UNSUPPORTED); } diff --git a/tests/ut/ge/common/format_transfer_fracz_hwcn_unittest.cc b/tests/ut/ge/common/format_transfer_fracz_hwcn_unittest.cc index 6c18aa34..25caa741 100644 --- a/tests/ut/ge/common/format_transfer_fracz_hwcn_unittest.cc +++ b/tests/ut/ge/common/format_transfer_fracz_hwcn_unittest.cc @@ -302,7 +302,7 @@ TEST_F(UtestFormatTransferFracZHwcn, fracz_to_hwcn_fp16_success_eq_cube) { } Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, UNSUPPORTED); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferFracZHwcn, fracz_to_hwcn_fp16_success_gt_cube) { diff --git a/tests/ut/ge/common/format_transfer_fracz_nchw_unittest.cc b/tests/ut/ge/common/format_transfer_fracz_nchw_unittest.cc index 46d3ae86..93160070 100644 --- a/tests/ut/ge/common/format_transfer_fracz_nchw_unittest.cc +++ b/tests/ut/ge/common/format_transfer_fracz_nchw_unittest.cc @@ -302,7 +302,7 @@ TEST_F(UtestFormatTransferFraczNchw, fracz_to_nchw_fp16_success_eq_cube) { } Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, UNSUPPORTED); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferFraczNchw, fracz_to_nchw_fp16_success_gt_cube) { diff --git a/tests/ut/ge/common/format_transfer_hwcn_c1hwncoc0_unittest.cc b/tests/ut/ge/common/format_transfer_hwcn_c1hwncoc0_unittest.cc index e468f5ac..1e6b90dd 100644 --- a/tests/ut/ge/common/format_transfer_hwcn_c1hwncoc0_unittest.cc +++ b/tests/ut/ge/common/format_transfer_hwcn_c1hwncoc0_unittest.cc @@ -75,7 +75,7 @@ TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_src_format_nchw) { EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, UNSUPPORTED); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_dst_format_nc1khkwhwc0) { @@ -142,7 +142,7 @@ TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_src_shape3) { EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, PARAM_INVALID); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); } TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_dst_format) { diff --git a/tests/ut/ge/common/format_transfer_nchw_5d_unittest.cc b/tests/ut/ge/common/format_transfer_nchw_5d_unittest.cc index 67104bf8..610bd7d3 100644 --- a/tests/ut/ge/common/format_transfer_nchw_5d_unittest.cc +++ b/tests/ut/ge/common/format_transfer_nchw_5d_unittest.cc @@ -633,5 +633,14 @@ TEST_F(UtestFormatTransferNchw5d, unsupport_dst_format) { TransResult result; EXPECT_NE(transfer.TransFormat(args, result), SUCCESS); } + +TEST_F(UtestFormatTransferNchw5d, invalid_data_format) { + uint16_t data[1 * 4 * 4 * 1] = {0}; + TransArgs args{ + reinterpret_cast(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; + FormatTransferNchwNc1hwc0 transfer; + EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); +} } // namespace formats } // namespace ge diff --git a/tests/ut/ge/common/format_transfer_nhwc_5d_unittest.cc b/tests/ut/ge/common/format_transfer_nhwc_5d_unittest.cc index 0944afd7..bc5a8754 100644 --- a/tests/ut/ge/common/format_transfer_nhwc_5d_unittest.cc +++ b/tests/ut/ge/common/format_transfer_nhwc_5d_unittest.cc @@ -719,7 +719,7 @@ TEST_F(UtestFormatTransferNhwc5d, invalid_src_format) { EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, UNSUPPORTED); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferNhwc5d, invalid_dst_shape2) { @@ -751,5 +751,20 @@ TEST_F(UtestFormatTransferNhwc5d, unsupport_dst_format) { FormatTransferNhwcNc1hwc0 transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); } + +TEST_F(UtestFormatTransferNhwc5d, invalid_data_shape) { + uint16_t data[1 * 4 * 4 * 1] = {0}; + TransArgs args{ + reinterpret_cast(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; + FormatTransferNhwcNc1hwc0 transfer; + EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); + + TransArgs args2{ + reinterpret_cast(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_STRING}; + FormatTransferNhwcNc1hwc0 transfer2; + EXPECT_EQ(transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID); +} } // namespace formats } // namespace ge diff --git a/tests/ut/ge/common/format_transfer_nhwc_fractalz_unittest.cc b/tests/ut/ge/common/format_transfer_nhwc_fractalz_unittest.cc index f6017fb7..a6dfffb0 100644 --- a/tests/ut/ge/common/format_transfer_nhwc_fractalz_unittest.cc +++ b/tests/ut/ge/common/format_transfer_nhwc_fractalz_unittest.cc @@ -5353,5 +5353,44 @@ TEST_F(UtestFormatTransferNhwcFz, build_transfer_uint8) { auto transfer = BuildFormatTransfer(args); EXPECT_NE(transfer, nullptr); } + +TEST_F(UtestFormatTransferNhwcFz, invalid_data_type) { + uint16_t data[1 * 4 * 4 * 1] = {0}; + TransArgs args{ + reinterpret_cast(data), FORMAT_NHWC, FORMAT_FRACTAL_NZ, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_VARIANT}; + FormatTransferFractalZ transfer; + EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID); +} + +TEST_F(UtestFormatTransferNhwcFz, invalid_data_format) { + uint16_t data[1 * 4 * 4 * 1] = {0}; + TransArgs args{ + reinterpret_cast(data), FORMAT_CHWN, FORMAT_FRACTAL_NZ, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; + FormatTransferFractalZ transfer; + EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); +} + +TEST_F(UtestFormatTransferNhwcFz, invalid_data_shape) { + uint16_t data[1 * 4 * 4 * 1] = {0}; + TransArgs args{ + reinterpret_cast(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; + FormatTransferFractalZ transfer; + EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); + + TransArgs args2{ + reinterpret_cast(data), FORMAT_HWCN, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; + FormatTransferFractalZ transfer2; + EXPECT_EQ(transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); + + TransArgs args3{ + reinterpret_cast(data), FORMAT_NCHW, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; + FormatTransferFractalZ transfer3; + EXPECT_EQ(transfer3.TransShape(args3.src_format, args3.src_shape, args3.src_data_type, args3.dst_format, args3.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); +} } // namespace formats } // namespace ge diff --git a/tests/ut/ge/common/format_transfer_transpose_unittest.cc b/tests/ut/ge/common/format_transfer_transpose_unittest.cc index 258b77fc..d56e06c0 100644 --- a/tests/ut/ge/common/format_transfer_transpose_unittest.cc +++ b/tests/ut/ge/common/format_transfer_transpose_unittest.cc @@ -4654,5 +4654,27 @@ TEST_F(UtestFormatTranspose, chwn_to_hwcn2) { EXPECT_EQ((reinterpret_cast(result.data.get()))[i], ret[i]); } } + +TEST_F(UtestFormatTranspose, invalid_data_shape) { + FormatTransferTranspose transfer; + std::vector dst_shape; + EXPECT_EQ(transfer.TransShape(FORMAT_NCHW, std::vector({}), DT_FLOAT16, FORMAT_HWCN, dst_shape), + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); +} + +TEST_F(UtestFormatTranspose, invalid_src_format) { + FormatTransferTranspose transfer; + std::vector dst_shape; + EXPECT_EQ(transfer.TransShape(FORMAT_NC1HWC0, std::vector({1, 3, 8, 8}), DT_FLOAT16, FORMAT_HWCN, dst_shape), + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); +} + +TEST_F(UtestFormatTranspose, invalid_dst_format) { + FormatTransferTranspose transfer; + std::vector dst_shape; + std::vector src_shape; + EXPECT_EQ(transfer.TransShape(FORMAT_NCHW, src_shape, DT_FLOAT16, FORMAT_C1HWNC0, dst_shape), + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); +} } // namespace formats } // namespace ge diff --git a/tests/ut/ge/graph/ge_executor_unittest.cc b/tests/ut/ge/graph/ge_executor_unittest.cc index 3d04fd0c..3ef8a750 100644 --- a/tests/ut/ge/graph/ge_executor_unittest.cc +++ b/tests/ut/ge/graph/ge_executor_unittest.cc @@ -33,11 +33,11 @@ #include "common/properties_manager.h" #include "common/types.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" #include "ge/common/dump/dump_properties.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/utils/graph_utils.h" diff --git a/tests/ut/ge/graph/graph_load_unittest.cc b/tests/ut/ge/graph/graph_load_unittest.cc index af9d5a37..54972af7 100644 --- a/tests/ut/ge/graph/graph_load_unittest.cc +++ b/tests/ut/ge/graph/graph_load_unittest.cc @@ -24,7 +24,7 @@ #include "common/helper/model_helper.h" #include "common/op/ge_op_utils.h" #include "common/types.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/op_desc.h" #include "graph/types.h" #include "graph/utils/attr_utils.h" @@ -35,7 +35,7 @@ #include "graph/load/graph_loader.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_manager_utils.h" #include "model/ge_model.h" #undef private diff --git a/tests/ut/ge/graph/load/data_dumper_unittest.cc b/tests/ut/ge/graph/load/data_dumper_unittest.cc index e53b76f4..1866f4eb 100644 --- a/tests/ut/ge/graph/load/data_dumper_unittest.cc +++ b/tests/ut/ge/graph/load/data_dumper_unittest.cc @@ -18,8 +18,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/data_dumper.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/data_dumper.h" +#include "graph/load/model_manager/davinci_model.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 0c03c934..47968345 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -20,7 +20,7 @@ #define protected public #include "graph/utils/graph_utils.h" #include "common/profiling/profiling_manager.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" using namespace std; @@ -46,7 +46,7 @@ class UtestDavinciModel : public testing::Test { } }; -TEST_F(UtestDavinciModel, init_success) { +/*TEST_F(UtestDavinciModel, init_success) { DavinciModel model(0, nullptr); ComputeGraphPtr graph = make_shared("default"); ProfilingManager::Instance().is_load_profiling_ = true; @@ -130,7 +130,7 @@ TEST_F(UtestDavinciModel, init_success) { EXPECT_EQ(outputs.size(), 1); ProfilingManager::Instance().is_load_profiling_ = false; -} +}*/ TEST_F(UtestDavinciModel, init_data_op) { DavinciModel model(0, nullptr); @@ -334,7 +334,7 @@ TEST_F(UtestDavinciModel, Init_variable_op) { EXPECT_EQ(model.InitNodes(graph), SUCCESS); EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID); - EXPECT_NE(model.SyncVarData(), SUCCESS); + EXPECT_EQ(model.SyncVarData(), SUCCESS); } TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ1) { diff --git a/tests/ut/ge/graph/load/end_graph_task_unittest.cc b/tests/ut/ge/graph/load/end_graph_task_unittest.cc index 29e7a53a..a66aaaff 100644 --- a/tests/ut/ge/graph/load/end_graph_task_unittest.cc +++ b/tests/ut/ge/graph/load/end_graph_task_unittest.cc @@ -18,8 +18,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/task_info/end_graph_task_info.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/end_graph_task_info.h" +#include "graph/load/model_manager/davinci_model.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/hccl_task_info_unittest.cc b/tests/ut/ge/graph/load/hccl_task_info_unittest.cc index 5c056007..6a2468ee 100644 --- a/tests/ut/ge/graph/load/hccl_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/hccl_task_info_unittest.cc @@ -19,8 +19,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" namespace ge { class UtestHcclTaskInfo : public testing::Test { diff --git a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc index 443d2975..53436820 100644 --- a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc @@ -19,9 +19,9 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" #include "cce/aicpu_engine_struct.h" namespace ge { diff --git a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc index fe886b49..a3a27a7b 100644 --- a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc @@ -19,9 +19,9 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" namespace ge { extern OpDescPtr CreateOpDesc(string name, string type); diff --git a/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc b/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc index 9348d49e..1652841d 100644 --- a/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc @@ -19,8 +19,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h" namespace ge { class UtestMemcpyAddrAsyncTaskInfo : public testing::Test { diff --git a/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc b/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc index 8769ec39..afc04130 100644 --- a/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc @@ -19,8 +19,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/memcpy_async_task_info.h" namespace ge { diff --git a/tests/ut/ge/graph/load/model_utils_unittest.cc b/tests/ut/ge/graph/load/model_utils_unittest.cc new file mode 100644 index 00000000..ac886cea --- /dev/null +++ b/tests/ut/ge/graph/load/model_utils_unittest.cc @@ -0,0 +1,70 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#define protected public +#define private public +#include "graph/load/model_manager/model_utils.h" +#include "graph/manager/graph_var_manager.h" + +using namespace std; + +namespace ge { +class UtestModelUtils : public testing::Test { + protected: + void TearDown() {} +}; + +// test ModelUtils::GetVarAddr +TEST_F(UtestModelUtils, get_var_addr_hbm) { + uint8_t test = 2; + uint8_t *pf = &test; + RuntimeParam runtime_param; + runtime_param.session_id = 0; + runtime_param.logic_var_base = 0; + runtime_param.var_base = pf; + runtime_param.var_size = 16; + + int64_t offset = 8; + EXPECT_EQ(VarManager::Instance(runtime_param.session_id)->Init(0, 0, 0, 0), SUCCESS); + EXPECT_NE(VarManager::Instance(runtime_param.session_id)->var_resource_, nullptr); + VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_HBM; + std::shared_ptr op_desc = std::make_shared("test", "test"); + uint8_t *var_addr = nullptr; + EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS); + EXPECT_EQ(runtime_param.var_base + offset - runtime_param.logic_var_base, var_addr); + VarManager::Instance(runtime_param.session_id)->Destory(); +} + +TEST_F(UtestModelUtils, get_var_addr_rdma_hbm) { + uint8_t test = 2; + uint8_t *pf = &test; + RuntimeParam runtime_param; + runtime_param.session_id = 0; + runtime_param.logic_var_base = 0; + runtime_param.var_base = pf; + + int64_t offset = 8; + EXPECT_EQ(VarManager::Instance(runtime_param.session_id)->Init(0, 0, 0, 0), SUCCESS); + EXPECT_NE(VarManager::Instance(runtime_param.session_id)->var_resource_, nullptr); + VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_RDMA_HBM; + std::shared_ptr op_desc = std::make_shared("test", "test"); + uint8_t *var_addr = nullptr; + EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS); + EXPECT_EQ(reinterpret_cast(offset), var_addr); + VarManager::Instance(runtime_param.session_id)->Destory(); +} +} // namespace ge diff --git a/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc index 56e673f7..43c2ad15 100644 --- a/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc @@ -17,7 +17,7 @@ #include -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "common/debug/log.h" #include "common/debug/memory_dumper.h" diff --git a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc index 00069930..38a250ad 100644 --- a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc @@ -24,29 +24,29 @@ #include "graph/compute_graph.h" #include "graph/utils/graph_utils.h" #include "graph/model_serialize.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "common/properties_manager.h" #include "common/op/ge_op_utils.h" #include #include "runtime/dev.h" #include "runtime/kernel.h" #include "cce/fwk_adpt_struct.h" -#include "graph/load/new_model_manager/task_info/task_info_factory.h" -#include "graph/load/new_model_manager/task_info/task_info.h" -#include "graph/load/new_model_manager/task_info/stream_active_task_info.h" -#include "graph/load/new_model_manager/task_info/stream_switch_task_info.h" -#include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h" -#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" -#include "graph/load/new_model_manager/task_info/label_set_task_info.h" -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" -#include "graph/load/new_model_manager/task_info/fusion_start_task_info.h" -#include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h" -#include "graph/load/new_model_manager/task_info/event_record_task_info.h" -#include "graph/load/new_model_manager/task_info/event_wait_task_info.h" +#include "graph/load/model_manager/task_info/task_info_factory.h" +#include "graph/load/model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/stream_active_task_info.h" +#include "graph/load/model_manager/task_info/stream_switch_task_info.h" +#include "graph/load/model_manager/task_info/profiler_trace_task_info.h" +#include "graph/load/model_manager/task_info/memcpy_async_task_info.h" +#include "graph/load/model_manager/task_info/label_set_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/task_info/fusion_start_task_info.h" +#include "graph/load/model_manager/task_info/fusion_stop_task_info.h" +#include "graph/load/model_manager/task_info/event_record_task_info.h" +#include "graph/load/model_manager/task_info/event_wait_task_info.h" #include "graph/manager/graph_var_manager.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc index 43e094b5..a68fb307 100644 --- a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc @@ -30,9 +30,9 @@ #include "common/helper/om_file_helper.h" #include "common/op/ge_op_utils.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" //#include "new_op_test_utils.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc index 1c6e5a10..688e73d4 100644 --- a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc @@ -15,24 +15,18 @@ */ #include - -#include +#include #include "common/debug/log.h" -#include "common/model_parser/base.h" -#include "common/properties_manager.h" #include "common/types.h" -#include "common/l2_cache_optimize.h" - +#include "graph/utils/graph_utils.h" #define private public #define protected public -#include "graph/load/new_model_manager/model_manager.h" - +#include "graph/load/model_manager/model_manager.h" #include "common/helper/om_file_helper.h" #include "common/op/ge_op_utils.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "new_op_test_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" #undef private #undef protected @@ -87,7 +81,6 @@ class UtestModelManagerModelManager : public testing::Test { data.model_data = new uint8_t[data.model_len]; uint8_t data_ori[model_len]; memset(data_ori, 10, model_len); - uint32_t out_len; ModelFileHeader *header = (ModelFileHeader *)data.model_data; header->magic = MODEL_FILE_MAGIC_NUM; header->version = MODEL_VERSION; @@ -97,7 +90,7 @@ class UtestModelManagerModelManager : public testing::Test { void LoadStandardModelData(ge::ModelData &data) { static const std::string STANDARD_MODEL_DATA_PATH = - "llt/framework/domi/ut/ome/test/data/standard_partition_model.txt"; + "llt/framework/domi/ut/ome/test/data/standard_partition_model.txt"; ge::proto::ModelDef model_def; ReadProtoFromText(STANDARD_MODEL_DATA_PATH.c_str(), &model_def); @@ -113,9 +106,8 @@ class DModelListener : public ge::ModelListener { uint32_t OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t resultCode) { return 0; } }; -shared_ptr UTEST_CALL_BACK_FUN(new DModelListener()); -TEST_F(UtestModelManagerModelManager, case_load_incorrect_param) { +/*TEST_F(UtestModelManagerModelManager, case_load_incorrect_param) { ModelManager mm; uint32_t model_id = 0; ge::ModelData model; @@ -307,7 +299,7 @@ TEST_F(UtestModelManagerModelManager, get_input_output_desc_info_fail) { } -/* +*//* // test GetInputOutputDescInfo fail TEST_F(UtestModelManagerModelManager, get_input_output_desc_info_zero_copy_fail) { ModelManager manager; @@ -316,7 +308,7 @@ TEST_F(UtestModelManagerModelManager, get_input_output_desc_info_zero_copy_fail) vector output_shape; EXPECT_EQ(ge::PARAM_INVALID, manager.GetInputOutputDescInfoForZeroCopy(2, input_shape, output_shape)); } -*/ +*//* // test Stop TEST_F(UtestModelManagerModelManager, stop_fail) { @@ -347,6 +339,20 @@ TEST_F(UtestModelManagerModelManager, destroy_aicpu_session) { manager.sess_ids_.insert(0); manager.DestroyAicpuSession(0); +}*/ +// test DataInputTensor +TEST_F(UtestModelManagerModelManager, test_data_input_tensor) { + shared_ptr g_label_call_back(nullptr); + auto model = std::make_shared(0, g_label_call_back); + ModelManager mm; + uint32_t model_id = 1; + mm.model_map_[1] = model; + mm.hybrid_model_map_[1] = std::make_shared(); + + auto input_tensor = InputTensorInfo(); + vector inputs; + inputs.emplace_back(input_tensor); + auto ret = mm.DataInputTensor(model_id,inputs); + EXPECT_EQ(ge::UNSUPPORTED, ret); } - } // namespace ge diff --git a/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc index 620fac09..f10ccd7f 100644 --- a/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc @@ -30,7 +30,7 @@ #include "graph/compute_graph.h" #include "graph/utils/graph_utils.h" #include "graph/model_serialize.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "common/properties_manager.h" #include "common/op/ge_op_utils.h" #include diff --git a/tests/ut/ge/graph/load/new_op_test_utils.h b/tests/ut/ge/graph/load/new_op_test_utils.h index 4cbc78ac..984cbfb4 100644 --- a/tests/ut/ge/graph/load/new_op_test_utils.h +++ b/tests/ut/ge/graph/load/new_op_test_utils.h @@ -40,7 +40,7 @@ #define private public #include "graph/compute_graph.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/node.h" #include "graph/op_desc.h" #include "graph/utils/attr_utils.h" diff --git a/tests/ut/ge/graph/load/output_net_output_unittest.cc b/tests/ut/ge/graph/load/output_net_output_unittest.cc index ecd28fe3..97246dad 100644 --- a/tests/ut/ge/graph/load/output_net_output_unittest.cc +++ b/tests/ut/ge/graph/load/output_net_output_unittest.cc @@ -23,8 +23,8 @@ #define private public #include "common/debug/memory_dumper.h" #include "common/op/ge_op_utils.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" #include "new_op_test_utils.h" #include "proto/om.pb.h" diff --git a/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc b/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc index a98e14c6..82ffb388 100644 --- a/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc +++ b/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc @@ -18,7 +18,7 @@ #define protected public #define private public -#include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/load/model_manager/tbe_handle_store.h" #include "runtime/kernel.h" #undef protected #undef private diff --git a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc index b51908e2..d6af6de9 100644 --- a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc @@ -849,7 +849,7 @@ class VariableOpPassSimulator { if (variable_ref_node_format != FORMAT_NC1HWC0 || variable_ref_node_data_type != DT_FLOAT || variable_ref_node_shape.size() != 5) { GELOGI("wanted data format is (%d,%d,%u)", FORMAT_NC1HWC0, DT_FLOAT, 5); - GELOGI("variable_ref_node_format is (%d,%d,%u)", variable_ref_node_format, variable_ref_node_data_type, + GELOGI("variable_ref_node_format is (%d,%d,%zu)", variable_ref_node_format, variable_ref_node_data_type, variable_ref_node_shape.size()); std::cout << "var ref format not changed !" << std::endl; @@ -918,7 +918,7 @@ class VariableOpPassSimulator { if (variable_ref_node_format != FORMAT_NCHW || variable_ref_node_data_type != DT_INT32 || variable_ref_node_shape.size() != 4) { GELOGI("wanted data format is (%d,%d,%u)", FORMAT_NCHW, DT_INT32, 4); - GELOGI("variable_ref_node_format is (%d,%d,%u)", variable_ref_node_format, variable_ref_node_data_type, + GELOGI("variable_ref_node_format is (%d,%d,%zu)", variable_ref_node_format, variable_ref_node_data_type, variable_ref_node_shape.size()); std::cout << "var ref format not changed !" << std::endl; diff --git a/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc b/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc new file mode 100644 index 00000000..2f149761 --- /dev/null +++ b/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc @@ -0,0 +1,77 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "common/ge_inner_error_codes.h" +#include "common/types.h" +#include "common/util.h" +#include "graph/passes/graph_builder_utils.h" +#include "graph/utils/attr_utils.h" +#include "graph/debug/ge_attr_define.h" + +#define private public +#define protected public +#include "graph/preprocess/graph_preprocess.h" +#include "ge/ge_api.h" +#undef private +#undef protected + +using namespace std; +namespace ge { +class UtestGraphPreproces : public testing::Test { + protected: + void SetUp() { + } + void TearDown() { + } +}; + +ComputeGraphPtr BuildGraph1(){ + auto builder = ut::GraphBuilder("g1"); + auto data1 = builder.AddNode("data1",DATA,1,1); + auto data_opdesc = data1->GetOpDesc(); + AttrUtils::SetInt(data_opdesc, ATTR_NAME_INDEX, 0); + data1->UpdateOpDesc(data_opdesc); + return builder.GetGraph(); +} + +TEST_F(UtestGraphPreproces, test_dynamic_input_shape_parse) { + ge::GraphPrepare graph_prepare; + graph_prepare.compute_graph_ = BuildGraph1(); + // prepare user_input & graph option + ge::GeTensorDesc tensor1; + tensor1.SetFormat(ge::FORMAT_NCHW); + tensor1.SetShape(ge::GeShape({3, 12, 5, 5})); + tensor1.SetDataType(ge::DT_FLOAT); + GeTensor input1(tensor1); + std::vector user_input = {input1}; + std::map graph_option = {{"ge.exec.dynamicGraphExecuteMode","dynamic_execute"}, + {"ge.exec.dataInputsShapeRange","[3,1~20,2~10,5]"}}; + auto ret = graph_prepare.UpdateInput(user_input, graph_option); + EXPECT_EQ(ret, ge::SUCCESS); + // check data node output shape_range and shape + auto data_node = graph_prepare.compute_graph_->FindNode("data1"); + auto data_output_desc = data_node->GetOpDesc()->GetOutputDescPtr(0); + vector expect_shape = {3,-1,-1,5}; + auto result_shape = data_output_desc->GetShape(); + EXPECT_EQ(result_shape.GetDimNum(), expect_shape.size()); + for(size_t i =0; i< expect_shape.size(); ++i){ + EXPECT_EQ(result_shape.GetDim(i), expect_shape.at(i)); + } +} +} \ No newline at end of file diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index b6b97d89..ab909e11 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -18,7 +18,7 @@ #include //#include "cce/taskdown_common.hpp" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/graph_utils.h" #include "runtime/rt.h" diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 7fbe9eb4..5b246eed 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -112,6 +112,12 @@ typedef void *rtEvent_t; */ typedef void *rtLabel_t; +/** + * @ingroup dvrt_base + * @brief model handle. + */ +typedef void *rtModel_t; + /** * @ingroup profiling_base * @brief runtime handle. @@ -217,6 +223,16 @@ typedef void *rtNotify_t; */ RTS_API rtError_t rtLabelCreate(rtLabel_t *label); +/** + * @ingroup dvrt_base + * @brief create label instance + * @param [out] label created label + * @param [in] model label set model + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtLabelCreateV2(rtLabel_t *label, rtModel_t model); + /** * @ingroup dvrt_base * @brief set label and stream instance @@ -314,6 +330,17 @@ RTS_API rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *d */ RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream); +/** + * @ingroup dvrt_base + * @brief labels to dev info + * @param [out] label created label handle + * @param [in] model label bind model + * @param [in] stream label bind stream + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream); + /** * @ingroup dvrt_base * @brief get current thread last stream id and task id diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index d3eadd59..dc16ca58 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -376,7 +376,6 @@ RTS_API rtError_t rtCpuKernelLaunchWithFlag(const void *soName, const void *kern const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags); -typedef void *rtModel_t; /** * @ingroup rt_kernel * @brief L1 fusion dump addr transfered to device diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index b72b142d..482486a8 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -278,7 +278,6 @@ typedef struct tagLabelDevInfo_t { uint16_t labelId; }rtLabelDevInfo; -typedef void *rtModel_t; typedef rtError_t (*rtTaskGenCallback)(rtModel_t model, rtTaskInfo_t *taskInfo); /** diff --git a/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h index b642cbc8..bef5c05d 100644 --- a/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h +++ b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h @@ -15,23 +15,23 @@ extern "C" { struct SoftDpProcsessInfo { - uint8_t* inputBuffer; - uint32_t inputBufferSize; + uint8_t* inputBuffer; + uint32_t inputBufferSize; - uint8_t* outputBuffer; - uint32_t outputBufferSize; + uint8_t* outputBuffer; + uint32_t outputBufferSize; - uint32_t outputWidth; - uint32_t outputHeight; + uint32_t outputWidth; + uint32_t outputHeight; - uint32_t reserved; + uint32_t reserved; }; struct DpCropInfo { - uint32_t left; - uint32_t right; - uint32_t up; - uint32_t down; + uint32_t left; + uint32_t right; + uint32_t up; + uint32_t down; }; /* @@ -49,4 +49,4 @@ uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo); */ uint32_t DecodeAndCropAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo, const DpCropInfo& cropInfo); } -#endif // EXTERNALSOFTDP_H +#endif // EXTERNALSOFTDP_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index 7c4f7be2..683dabf1 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -381,13 +381,13 @@ DLL_EXPORT void DlogFlush(void); * @ingroup slog * @brief Internal log interface, other modules are not allowed to call this interface */ -void DlogErrorInner(int moduleId, const char *fmt, ...); -void DlogWarnInner(int moduleId, const char *fmt, ...); -void DlogInfoInner(int moduleId, const char *fmt, ...); -void DlogDebugInner(int moduleId, const char *fmt, ...); -void DlogEventInner(int moduleId, const char *fmt, ...); -void DlogInner(int moduleId, int level, const char *fmt, ...); -void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); +void DlogErrorInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3))); +void DlogWarnInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3))); +void DlogInfoInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3))); +void DlogDebugInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3))); +void DlogEventInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3))); +void DlogInner(int moduleId, int level, const char *fmt, ...) __attribute__((format(printf, 3, 4))); +void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...) __attribute__((format(printf, 5, 6))); #ifdef __cplusplus #ifndef LOG_CPP @@ -500,8 +500,8 @@ DLL_EXPORT void DlogFlushForC(void); * @ingroup slog * @brief Internal log interface, other modules are not allowed to call this interface */ -void DlogInnerForC(int moduleId, int level, const char *fmt, ...); -void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); +void DlogInnerForC(int moduleId, int level, const char *fmt, ...) __attribute__((format(printf, 3, 4))); +void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...) __attribute__((format(printf, 5, 6))); #ifdef __cplusplus }