diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index b451b897..d639433e 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -803,26 +803,29 @@ Status ModelBuilder::CompileSingleOp() { return ge::SUCCESS; } -Status ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &cpue_check_set, std::set &tf_engine_set) { +Status ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &cpue_check_set, + std::set &tf_engine_set) { GE_CHECK_NOTNULL(op_desc); std::string aicpu_optype; bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype); std::vector tf_optypes; - bool has_attr_check_tf = ge::AttrUtils::GetListStr(op_desc, "needCheckTf", tf_optypes); + bool has_attr_check_tf = ge::AttrUtils::GetListStr(op_desc, "needCheckTf", tf_optypes); if (has_attr_check_cpu && !aicpu_optype.empty()) { GELOGI("Check Aicpu op type %s, op name: %s.", op_desc->GetType().c_str(), op_desc->GetName().c_str()); cpue_check_set.insert(aicpu_optype); } if (has_attr_check_tf && !tf_optypes.empty()) { - GELOGI("Check Tf op type %s, op name: %s, tf_optypes size: %zu.", op_desc->GetType().c_str(), op_desc->GetName().c_str(), tf_optypes.size()); + GELOGI("Check Tf op type %s, op name: %s, tf_optypes size: %zu.", op_desc->GetType().c_str(), + op_desc->GetName().c_str(), tf_optypes.size()); tf_engine_set.insert(tf_optypes.begin(), tf_optypes.end()); } - + return SUCCESS; } -Status ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set &aicpu_optype_set, std::set &aicpu_tf_optype_set) { +Status ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set &aicpu_optype_set, + std::set &aicpu_tf_optype_set) { std::vector aicpu_optype_list; std::vector aicpu_tf_optype_list; if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) { @@ -838,11 +841,16 @@ Status ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::setGetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(), aicpu_tf_optype_list.size()); - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return FAILED, "Set attr needCheckCpu fail."); - - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return FAILED, "Set attr needCheckTf fail."); + GELOGI( + "Check Aicpu op types ComputeGraph: %s aicpu_optype_set: %zu, aicpu_optype_list: %zu, aicpu_tf_optype_set: %zu, " + "aicpu_tf_optype_list:%zu.", + compute_graph_->GetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(), + aicpu_tf_optype_list.size()); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return FAILED, + "Set attr needCheckCpu fail."); + + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return FAILED, + "Set attr needCheckTf fail."); return SUCCESS; } } // namespace ge diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index a6c70a78..99b47878 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1532,7 +1532,8 @@ Status ModelManager::EnableExceptionDump(const std::map &options return SUCCESS; } -Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_optype_list, std::vector &aicpu_tf_optype_list) { +Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_optype_list, + std::vector &aicpu_tf_optype_list) { std::string kernel_name = "checkOpType"; GELOGI("LaunchKernelCheckAicpuOpType in, kernel name %s", kernel_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); @@ -1556,7 +1557,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op size_t aicpu_op_nums = aicpu_optype_list.size(); size_t tf_op_nums = aicpu_tf_optype_list.size(); - size_t op_nums = aicpu_op_nums + tf_op_nums; + size_t op_nums = aicpu_op_nums + tf_op_nums; // malloc sysOpInfoList in SysOpCheckInfo status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { @@ -1591,12 +1592,11 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_op_type_name); - GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.length(), op_type.c_str(), op_type.length(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.length(), op_type.c_str(), op_type.length(), RT_MEMCPY_HOST_TO_DEVICE)); op_info.opType = reinterpret_cast(reinterpret_cast(d_op_type_name)); op_info.opLen = op_type.length(); op_info.kernelsType = CPU_KERNEL; req_aicpu_op_info_list.emplace_back(op_info); - //GE_CHK_RT(rtMemcpy((char *)d_req_op_list + i * sizeof(SysOpInfo), sizeof(SysOpInfo), &op_info, sizeof(SysOpInfo), RT_MEMCPY_HOST_TO_DEVICE)); } for (const auto &op_type : aicpu_tf_optype_list) { @@ -1609,15 +1609,15 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_op_type_name); - GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.size(), op_type.c_str(), op_type.size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.size(), op_type.c_str(), op_type.size(), RT_MEMCPY_HOST_TO_DEVICE)); op_info.opType = reinterpret_cast(reinterpret_cast(d_op_type_name)); op_info.opLen = op_type.size(); op_info.kernelsType = TF_KERNEL; req_aicpu_op_info_list.emplace_back(op_info); - //GE_CHK_RT(rtMemcpy((char *)d_req_op_list + i * sizeof(SysOpInfo), sizeof(SysOpInfo), &op_info, sizeof(SysOpInfo), RT_MEMCPY_HOST_TO_DEVICE)); } GELOGI("Check aicpu op all attr size: %zu, real attr size: %zu.", op_nums, req_aicpu_op_info_list.size()); - GE_CHK_RT(rtMemcpy(d_req_op_list, sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(), sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(d_req_op_list, sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(), + sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE)); SysOpCheckInfo op_check_info_req; SysOpCheckResp op_check_info_res; @@ -1636,8 +1636,10 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(args); - GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), &op_check_info_req, sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHK_RT(rtMemcpy(reinterpret_cast(reinterpret_cast(args) + op_check_info_req.offSetLen), sizeof(SysOpCheckResp), &op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT( + rtMemcpy(args, sizeof(SysOpCheckInfo), &op_check_info_req, sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(reinterpret_cast(reinterpret_cast(args) + op_check_info_req.offSetLen), + sizeof(SysOpCheckResp), &op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); GE_CHK_RT(rtStreamCreate(&stream, 0)); GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream)); @@ -1650,7 +1652,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op // Check the response void *d_op_check_info_res = reinterpret_cast(reinterpret_cast(args) + op_check_info_req.offSetLen); - GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), + RT_MEMCPY_DEVICE_TO_HOST)); std::function callback = [&]() { for (auto mem : allocated_mem) { GE_CHK_RT(rtFree(mem)); @@ -1670,8 +1673,12 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op res_ret_code_list.resize(res_op_nums); res_aicpu_op_info_list.clear(); res_aicpu_op_info_list.resize(res_op_nums); - GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums, reinterpret_cast(reinterpret_cast(op_check_info_res.returnCodeList)), sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); - GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums, reinterpret_cast(reinterpret_cast(op_check_info_res.sysOpInfoList)), sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums, + reinterpret_cast(reinterpret_cast(op_check_info_res.returnCodeList)), + sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums, + reinterpret_cast(reinterpret_cast(op_check_info_res.sysOpInfoList)), + sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) { GELOGE(FAILED, "Number of retcode is not equal to number of op type."); GE_MAKE_GUARD(release, callback); @@ -1681,14 +1688,19 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op for (uint32_t i = 0; i < res_op_nums; i++) { ReturnCode ret_code = res_ret_code_list.at(i); SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); - GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, aicpu_info.kernelsType, aicpu_info.opLen, ret_code); + GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, + aicpu_info.kernelsType, aicpu_info.opLen, ret_code); std::vector op_name; op_name.clear(); op_name.resize(kOpNameMaxSize); - GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast(aicpu_info.opType), aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST)); - std::string kernel_type = (static_cast(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL"; + GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast(aicpu_info.opType), + aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST)); + std::string kernel_type = + (static_cast(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL"; string op_name_str(op_name.data()); - fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + " ret code:" + std::to_string(static_cast(ret_code)) + "<0: op_type, 1: format, 2: datatype> \n"; + fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + + " ret code:" + std::to_string(static_cast(ret_code)) + + "<0: op_type, 1: format, 2: datatype> \n"; } fail_reason += "not support."; GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str()); @@ -1706,11 +1718,12 @@ Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) { std::vector aicpu_tf_optype_list; bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list); bool tf_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list); - if (!aicpu_need_check && !tf_need_check) { + if (!aicpu_need_check && !tf_need_check) { GELOGI("Graph:%s No need to check aicpu optype.", ge_model->GetGraph().GetName().c_str()); return SUCCESS; } - GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed."); + GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), + "Launch check aicpu op type failed."); return SUCCESS; } diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index e0d0913e..b9b8e6d0 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -1579,7 +1579,8 @@ Status HybridModelBuilder::CheckAicpuOpList() { // reset list with set aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end()); aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end()); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed."); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), + "Launch check aicpu op type failed."); return SUCCESS; } } // namespace hybrid