| @@ -53,6 +53,7 @@ constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | |||
| const int64_t kDynamicDimValue = -2; | |||
| const int kDefaultDeviceId = 0; | |||
| const int kDefaultJobId = 0; | |||
| const int32_t kFuzzBuildPattern = 1; | |||
| std::map<ge::OpEngineType, std::string> engine_type_map{ | |||
| {ge::ENGINE_SYS, kEngineNameDefault}, | |||
| @@ -296,13 +297,60 @@ static Status ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTenso | |||
| return SUCCESS; | |||
| } | |||
| static Status GetFuzzBuildAttrs(OpDescPtr &op_desc, const GeRootModelPtr &ge_root_model, | |||
| GeAttrValue::LIST_NAMED_ATTRS &fuzz_build_attrs) { | |||
| GELOGD("Start get fuzz build attrs of %s.", op_desc->GetName().c_str()); | |||
| GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | |||
| for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) { | |||
| GE_CHECK_NOTNULL(node); | |||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
| GELOGD("Delete fuzz build attr of %s after build.", node->GetName().c_str()); | |||
| node->GetOpDesc()->DelAttr(ATTR_NAME_FUZZ_BUILD); | |||
| } | |||
| (void)AttrUtils::GetListNamedAttrs(op_desc, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs); | |||
| if (!fuzz_build_attrs.empty()) { | |||
| GELOGD("%s has split, get ATTR_NAME_FUZZ_BUILD_RES_ATTRS directly.", op_desc->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| bool all_aicore_support_dyn = false; | |||
| for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) { | |||
| if (node->GetOpDesc()->GetOpKernelLibName() != kAIcoreEngine) { | |||
| continue; | |||
| } | |||
| if (AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS)) { | |||
| all_aicore_support_dyn = true; | |||
| } else { | |||
| all_aicore_support_dyn = false; | |||
| GELOGD("%s kernel type is %s, but not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str(), | |||
| node->GetOpDesc()->GetOpKernelLibName().c_str()); | |||
| break; | |||
| } | |||
| } | |||
| if (all_aicore_support_dyn) { | |||
| GELOGD("All aicore nodes from %s is support dynamic.", ge_root_model->GetRootGraph()->GetName().c_str()); | |||
| for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) { | |||
| if (node->GetName() == op_desc->GetName()) { | |||
| (void)AttrUtils::GetListNamedAttrs(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs); | |||
| if (fuzz_build_attrs.empty()) { | |||
| GELOGE(FAILED, "[Get][ATTR_NAME_FUZZ_BUILD_RES_ATTRS] %s should set fuzz ret.", op_desc->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| if (fuzz_build_attrs.empty()) { | |||
| GELOGW("%s build with fuzz build pattern, but not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", op_desc->GetName().c_str()); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| class GeGenerator::Impl { | |||
| public: | |||
| Impl(OmgContext &omg_context) : omg_context_(omg_context) {} | |||
| ~Impl() = default; | |||
| Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GeRootModelPtr &ge_models); | |||
| bool HasSetShapeRange(const vector<GeTensor> &inputs); | |||
| Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model); | |||
| Status SaveRootModel(const string &file_name_prefix, GeRootModelPtr &model, ModelBufferData &model_buff); | |||
| @@ -742,7 +790,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> | |||
| Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | |||
| const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | |||
| bool is_offline) { | |||
| bool is_offline, int32_t compile_flag) { | |||
| GELOGD("Inputs size is %zu, outputs size is %zu.", inputs.size(), outputs.size()); | |||
| GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | |||
| impl_->is_offline_ = is_offline; | |||
| if (!is_offline) { | |||
| @@ -764,6 +813,16 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc); | |||
| GE_CHECK_NOTNULL(op_desc_tmp); | |||
| bool fuzz_compile_flag = false; | |||
| if (!(impl_->HasSetShapeRange(inputs)) && (compile_flag == kFuzzBuildPattern)) { | |||
| fuzz_compile_flag = true; | |||
| } | |||
| if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, fuzz_compile_flag)) { | |||
| GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD] Failed to set attr for %s.", op_desc->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| impl_->omg_context_.fuzz_compile_flag = fuzz_compile_flag; | |||
| // 1. Create ComputeGraph. | |||
| string name = ge::CurrentTimeInStr() + "_" + model_file_name; | |||
| Graph graph; | |||
| @@ -810,6 +869,19 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic)); | |||
| GE_CHK_STATUS_RET_NOLOG( | |||
| impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic)); | |||
| } else if (fuzz_compile_flag) { | |||
| GELOGD("Get fuzz build result of %s.", op_desc->GetName().c_str()); | |||
| (void)AttrUtils::SetInt(ge_model, ATTR_NAME_BUILD_MODE, fuzz_compile_flag); | |||
| GeAttrValue::LIST_NAMED_ATTRS fuzz_build_attrs; | |||
| if (GetFuzzBuildAttrs(op_desc, ge_root_model, fuzz_build_attrs) != SUCCESS) { | |||
| GELOGE(FAILED, "[Get][FuzzRet]Failed to get fuzz build result of %s.", op_desc->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| if (!fuzz_build_attrs.empty()) { | |||
| GE_CHK_BOOL_EXEC(AttrUtils::SetListNamedAttrs(ge_model, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs), | |||
| return FAILED, "Set ATTR_NAME_FUZZ_BUILD_RES_ATTRS failed."); | |||
| } | |||
| GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | |||
| } else { | |||
| GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | |||
| } | |||
| @@ -825,15 +897,17 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| * @param [in] vector<GeTensor> &inputs: Operator input data description information. | |||
| * @param [in] vector<GeTensor> &outputs: Operator output data description information. | |||
| * @param [in] const string &model_file_name: Offline model filename. | |||
| * @param [in] compile_flag: op build flag from atc | |||
| * @return SUCCESS handle successfully / others handle failed | |||
| */ | |||
| Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||
| const vector<GeTensor> &outputs, const string &model_file_name) { | |||
| const vector<GeTensor> &outputs, const string &model_file_name, | |||
| int32_t compile_flag) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||
| GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | |||
| ModelBufferData model_buff; | |||
| OpEngineType engine_type = ENGINE_SYS; | |||
| Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); | |||
| Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true, compile_flag); | |||
| GELOGI("Finish build single offline model, status: %u", status); | |||
| return status; | |||
| } | |||
| @@ -850,23 +924,17 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor | |||
| * @return SUCCESS handle successfully / others handle failed | |||
| */ | |||
| // old process will be deleted | |||
| Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||
| const vector<GeTensor> &outputs, OpEngineType engine_type, | |||
| const vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag, | |||
| ModelBufferData &model_buff) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||
| GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | |||
| Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false); | |||
| Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false, | |||
| compile_flag); | |||
| GELOGI("Finish build single online model, status: %u", status); | |||
| return status; | |||
| } | |||
| Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||
| const vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag, | |||
| ModelBufferData &model_buff) { | |||
| return SUCCESS; | |||
| } | |||
| Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||
| const vector<GeTensor> &outputs, std::string graph_name, Graph &graph) { | |||
| ge::ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(graph_name); | |||
| @@ -976,6 +1044,18 @@ Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootMo | |||
| return SUCCESS; | |||
| } | |||
| bool GeGenerator::Impl::HasSetShapeRange(const vector<GeTensor> &inputs) { | |||
| for (const auto &input : inputs) { | |||
| vector<pair<int64_t, int64_t>> shape_range; | |||
| (void)input.GetTensorDesc().GetShapeRange(shape_range); | |||
| if (!shape_range.empty()) { | |||
| GELOGD("Has set shape range."); | |||
| return true; | |||
| } | |||
| } | |||
| return false; | |||
| } | |||
| Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> &inputs, | |||
| GeRootModelPtr &ge_root_model) { | |||
| static std::atomic<GraphId> atomic_graph_id(0); | |||
| @@ -863,6 +863,8 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||
| } | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kPrepareOptimize); | |||
| // set fuzz compile flag after origin graph optimize | |||
| GE_CHK_STATUS_RET(SetFuzzCompileFlag(compute_graph), "Set fuzz compile flag failed."); | |||
| ret = PreRunOptimizeSubGraph(graph_node, compute_graph, session_id); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Run PreRunOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); | |||
| @@ -877,7 +879,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||
| options_.build_step == BUILD_STEP_AFTER_BUILDER || | |||
| options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB)); | |||
| if (run_after_optimize_subgraph) { | |||
| Status ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id); | |||
| ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Run PreRunAfterOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); | |||
| return ret; | |||
| @@ -895,6 +897,19 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||
| return SUCCESS; | |||
| } | |||
| Status GraphManager::SetFuzzCompileFlag(ComputeGraphPtr &compute_graph) { | |||
| for (const auto &node : compute_graph->GetAllNodes()) { | |||
| OpDescPtr op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| GELOGD("Fuzz compile flag is %d.", GetLocalOmgContext().fuzz_compile_flag); | |||
| if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, GetLocalOmgContext().fuzz_compile_flag)) { | |||
| GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD]Failed to set fuzz build attr to %s.", op_desc->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) { | |||
| PassManager pass_manager; | |||
| GE_CHK_STATUS_RET(pass_manager.AddPass("SubexpressionMigrationPass", new (std::nothrow) SubexpressionMigrationPass)); | |||
| @@ -358,6 +358,7 @@ class GraphManager { | |||
| ComputeGraphPtr &compute_graph, | |||
| GeRootModelPtr &ge_root_model, | |||
| uint64_t session_id); | |||
| Status SetFuzzCompileFlag(ComputeGraphPtr &compute_graph); | |||
| Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph, | |||
| Graph2SubGraphInfoList &sub_graph_map, | |||
| @@ -0,0 +1,88 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/passes/mark_node_unknown_shape_pass.h" | |||
| #include "graph/utils/node_utils.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| namespace ge { | |||
| namespace { | |||
| const char *const kEngineNameAiCore = "AIcoreEngine"; | |||
| const int32_t kDynamicState = -2; | |||
| } | |||
| Status MarkNodeUnknownShapePass::Run(ComputeGraphPtr graph) { | |||
| GE_CHECK_NOTNULL(graph); | |||
| if (IsAllAicoreSupportDyn(graph)) { | |||
| if (UpdateNodeShapeToUnknown(graph) != SUCCESS) { | |||
| GELOGE(FAILED, "[Update][Node_Shape]Failed to update node shape to unknown."); | |||
| return FAILED; | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| bool MarkNodeUnknownShapePass::IsAllAicoreSupportDyn(ComputeGraphPtr &graph) { | |||
| bool is_all_aicore_support_dyn = false; | |||
| for (const auto &node : graph->GetAllNodes()) { | |||
| if (node->GetOpDesc() == nullptr) { | |||
| continue; | |||
| } | |||
| if (node->GetOpDesc()->GetOpKernelLibName() != kEngineNameAiCore) { | |||
| GELOGD("Kernel of %s is %s.", node->GetName().c_str(), node->GetOpDesc()->GetOpKernelLibName().c_str()); | |||
| continue; | |||
| } | |||
| if (AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS)) { | |||
| GELOGD("%s has set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str()); | |||
| is_all_aicore_support_dyn = true; | |||
| } else { | |||
| GELOGD("%s has not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str()); | |||
| is_all_aicore_support_dyn = false; | |||
| break; | |||
| } | |||
| } | |||
| return is_all_aicore_support_dyn; | |||
| } | |||
| Status MarkNodeUnknownShapePass::UpdateNodeShapeToUnknown(ComputeGraphPtr &graph) { | |||
| GELOGD("Need to update node shape to dynamic when get fuzz build result."); | |||
| for (const auto &node : graph->GetAllNodes()) { | |||
| if (NodeUtils::IsConst(*node)) { | |||
| continue; | |||
| } | |||
| auto op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
| auto src_node = NodeUtils::GetInDataNodeByIndex(*node, static_cast<int>(i)); | |||
| if (src_node != nullptr && (NodeUtils::IsConst(*src_node) || src_node->GetType() == VARIABLE)) { | |||
| continue; | |||
| } | |||
| GELOGD("Update shape for %s.", node->GetName().c_str()); | |||
| auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | |||
| if (input_desc != nullptr) { | |||
| input_desc->SetShape(GeShape({kDynamicState})); | |||
| } | |||
| } | |||
| for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) { | |||
| if (output_desc != nullptr) { | |||
| output_desc->SetShape(GeShape({kDynamicState})); | |||
| } | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -0,0 +1,32 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ | |||
| #define GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ | |||
| #include "graph/graph.h" | |||
| #include "inc/graph_pass.h" | |||
| namespace ge { | |||
| class MarkNodeUnknownShapePass : public GraphPass { | |||
| public: | |||
| Status Run(ComputeGraphPtr graph); | |||
| private: | |||
| bool IsAllAicoreSupportDyn(ComputeGraphPtr &graph); | |||
| Status UpdateNodeShapeToUnknown(ComputeGraphPtr &graph); | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ | |||
| @@ -55,9 +55,17 @@ Status InsertReshapeIfNeed(const NodePtr &node) { | |||
| GE_CHECK_NOTNULL(dst_node->GetOpDesc()); | |||
| auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx()); | |||
| GE_CHECK_NOTNULL(dst_tensor); | |||
| bool is_need_insert_reshape = src_tensor->GetShape().GetDims() != UNKNOWN_RANK && | |||
| dst_tensor->GetShape().GetDims() != UNKNOWN_RANK && | |||
| src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims(); | |||
| bool is_dynamic = false; | |||
| auto src_tensor_dims = src_tensor->GetShape().GetDims(); | |||
| auto dst_tensor_dims = dst_tensor->GetShape().GetDims(); | |||
| if ((!(std::all_of(src_tensor_dims.begin(), src_tensor_dims.end(), [](int64_t val) { return val >= 0 ; }))) | |||
| || (!(std::all_of(dst_tensor_dims.begin(), dst_tensor_dims.end(), [](int64_t val) { return val >= 0; })))) { | |||
| GELOGD("No need to insert reshape node between %s nad %s.", node->GetName().c_str(), | |||
| dst_node->GetName().c_str()); | |||
| is_dynamic = true; | |||
| } | |||
| bool is_need_insert_reshape = (src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims()) && | |||
| (!is_dynamic); | |||
| if (is_need_insert_reshape) { | |||
| auto reshape = CreateReshape(src_tensor, dst_tensor, node->GetOwnerComputeGraph()); | |||
| GE_CHECK_NOTNULL(reshape); | |||
| @@ -225,6 +225,7 @@ Status SubgraphExecutor::PrepareNodes(int group) { | |||
| if (node_item.node_type != NETOUTPUT) { | |||
| // only do shape inference and compilation for nodes with dynamic shapes. | |||
| if (node_item.is_dynamic) { | |||
| GELOGD("Need to reinfershape when %s is dynamic.", node_item.NodeName().c_str()); | |||
| auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status { | |||
| GetContext().SetSessionId(context_->session_id); | |||
| GetContext().SetContextId(context_->context_id); | |||
| @@ -168,12 +168,12 @@ Status NodeItem::InitInputsAndOutputs() { | |||
| Status NodeItem::ResolveDynamicState() { | |||
| (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); | |||
| GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); | |||
| if (!is_dynamic) { | |||
| GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic), | |||
| "[%s] Failed to get shape status.", | |||
| node->GetName().c_str()); | |||
| } | |||
| GELOGD("Resolve dynamic state of %s, dynamic state is %d.", this->node_name.c_str(), is_dynamic); | |||
| return SUCCESS; | |||
| } | |||
| @@ -216,6 +216,10 @@ DEFINE_string(op_bank_path, "", "Optional; op bank path"); | |||
| DEFINE_string(display_model_info, "0", "Optional; display model info"); | |||
| DEFINE_string(perforemance_mode, "", "Optional; express high compile performance or high execute performance." | |||
| "normal: no need to compile, used saved .o files directly;" | |||
| "high: need to recompile, high execute performance mode."); | |||
| class GFlagUtils { | |||
| public: | |||
| /** | |||
| @@ -330,7 +334,8 @@ class GFlagUtils { | |||
| "Default value: $HOME/atc_data\n" | |||
| " --op_compiler_cache_mode Set the operator compilation cache mode." | |||
| "Options are disable(default), enable and force(force to refresh the cache)\n" | |||
| " --display_model_info enable for display model info; 0(default): close display, 1: open display"); | |||
| " --display_model_info enable for display model info; 0(default): close display, 1: open display" | |||
| "--performance_mode Set high performance mode of compile or execute when op compile"); | |||
| gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); | |||
| // Using gflags to analyze input parameters | |||
| @@ -1078,6 +1083,7 @@ static void SetEnvForSingleOp(std::map<string, string> &options) { | |||
| options.emplace(ge::OP_COMPILER_CACHE_MODE, FLAGS_op_compiler_cache_mode); | |||
| options.emplace(ge::MDL_BANK_PATH_FLAG, FLAGS_mdl_bank_path); | |||
| options.emplace(ge::OP_BANK_PATH_FLAG, FLAGS_op_bank_path); | |||
| options.emplace(ge::PERFORMANCE_MODE, FLAGS_performance_mode); | |||
| } | |||
| domi::Status GenerateSingleOp(const std::string& json_file_path) { | |||
| @@ -1124,7 +1130,7 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) { | |||
| output_path = FLAGS_output + "/"; | |||
| } | |||
| output_path += param.file_name; | |||
| ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path); | |||
| ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path, param.compile_flag); | |||
| if (ret != SUCCESS) { | |||
| DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index); | |||
| ret = domi::FAILED; | |||
| @@ -1229,6 +1235,8 @@ domi::Status GenerateOmModel() { | |||
| options.insert(std::pair<string, string>(string(ge::OP_BANK_PATH_FLAG), FLAGS_op_bank_path)); | |||
| options.insert(std::pair<string, string>(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info)); | |||
| options.insert(std::pair<string, string>(string(ge::PERFORMANCE_MODE), FLAGS_performance_mode)); | |||
| // set enable scope fusion passes | |||
| SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes); | |||
| // print atc option map | |||
| @@ -53,6 +53,7 @@ constexpr char const *kKeyOriginFormat = "origin_format"; | |||
| constexpr char const *kFileSuffix = ".om"; | |||
| constexpr char const *kKeyDynamicInput = "dynamic_input"; | |||
| constexpr char const *kKeyDynamicOutput = "dynamic_output"; | |||
| constexpr char const *kKeyCompileFlag = "compile_flag"; | |||
| constexpr int kDumpJsonIndent = 2; | |||
| constexpr int kShapeRangePairSize = 2; | |||
| constexpr int kShapeRangeLow = 0; | |||
| @@ -265,7 +266,10 @@ void from_json(const Json &j, SingleOpAttr &attr) { | |||
| } | |||
| void from_json(const Json &j, SingleOpDesc &desc) { | |||
| desc.op = j.at(kKeyOp).get<string>(); | |||
| auto op = j.find(kKeyOp); | |||
| if (op != j.end()) { | |||
| desc.op = j.at(kKeyOp).get<string>(); | |||
| } | |||
| auto input_desc = j.find(kKeyInputDesc); | |||
| if (input_desc != j.end()) { | |||
| @@ -281,6 +285,11 @@ void from_json(const Json &j, SingleOpDesc &desc) { | |||
| if (attr_field != j.end()) { | |||
| desc.attrs = attr_field->get<vector<SingleOpAttr>>(); | |||
| } | |||
| auto compile_flag = j.find(kKeyCompileFlag); | |||
| if (compile_flag != j.end()) { | |||
| desc.compile_flag = compile_flag->get<int32_t>(); | |||
| } | |||
| } | |||
| Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) { | |||
| @@ -583,10 +592,16 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si | |||
| return ret; | |||
| } | |||
| int32_t compile_flag = 0; | |||
| for (const Json &single_op_json : single_op_list_json) { | |||
| SingleOpDesc single_op_desc; | |||
| GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str()); | |||
| single_op_desc = single_op_json; | |||
| GELOGD("Compile flag is %d.", single_op_desc.compile_flag); | |||
| if (single_op_desc.compile_flag == 1) { | |||
| compile_flag = single_op_desc.compile_flag; | |||
| continue; | |||
| } | |||
| if (UpdateDynamicTensorName(single_op_desc.input_desc) != SUCCESS) { | |||
| GELOGE(FAILED, "[Update][DynamicTensorName] failed for invalid input param!"); | |||
| REPORT_CALL_ERROR("E19999", "UpdateDynamicTensorName failed for invalid input param."); | |||
| @@ -604,6 +619,7 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| param.compile_flag = compile_flag; | |||
| op_list.emplace_back(param); | |||
| GELOGI("Parse the index[%d] of op success", index); | |||
| @@ -55,6 +55,7 @@ struct SingleOpDesc { | |||
| std::vector<SingleOpTensorDesc> input_desc; | |||
| std::vector<SingleOpTensorDesc> output_desc; | |||
| std::vector<SingleOpAttr> attrs; | |||
| int32_t compile_flag = 0; | |||
| }; | |||
| struct SingleOpBuildParam { | |||
| @@ -62,6 +63,7 @@ struct SingleOpBuildParam { | |||
| std::vector<ge::GeTensor> inputs; | |||
| std::vector<ge::GeTensor> outputs; | |||
| std::string file_name; | |||
| int32_t compile_flag = 0; | |||
| }; | |||
| void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc); | |||
| @@ -34,6 +34,9 @@ const size_t kDataMemAlignSize = 32; | |||
| const size_t kDataMemAlignUnit = 2; | |||
| const string kShapeTypeDynamic = "dynamic"; | |||
| const string kShapeTypeStatic = "static"; | |||
| const int64_t kHostMemType = 1; | |||
| const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024; | |||
| const uint32_t kAlignBytes = 512; | |||
| size_t GetAlignedSize(size_t size) { | |||
| size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; | |||
| @@ -166,15 +169,67 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve | |||
| return SUCCESS; | |||
| } | |||
| Status SingleOp::CalInputsHostMemSize(const std::vector<DataBuffer> &inputs, | |||
| std::vector<std::pair<size_t, uint64_t>> &inputs_size) { | |||
| int64_t total_size = 0; | |||
| size_t index = 0; | |||
| for (auto &input_buffer : inputs) { | |||
| int64_t input_size = 0; | |||
| if (input_buffer.placement == kHostMemType) { | |||
| input_size = input_buffer.length; | |||
| // input_size pad to 512 | |||
| input_size = (input_size / kAlignBytes + 1) * kAlignBytes; | |||
| inputs_size.emplace_back(index, input_size); | |||
| total_size += input_size; | |||
| GELOGD("The %zu input mem type is host, tensor size is %ld.", index, input_size); | |||
| } | |||
| index++; | |||
| } | |||
| if (total_size > kFuzzDeviceBufferSize) { | |||
| GELOGE(FAILED, "[Check][Size]Total size is %ld, larger than 1M.", total_size); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status SingleOp::UpdateInputsBufferAddr(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||
| std::vector<DataBuffer> &update_buffers) { | |||
| if (stream_resource_->Init() != SUCCESS) { | |||
| GELOGE(FAILED, "[Malloc][Memory]Failed to malloc device buffer."); | |||
| return FAILED; | |||
| } | |||
| void *dst_addr = stream_resource_->GetDeviceBufferAddr(); | |||
| // copy host mem from input_buffer to device mem of dst_addr | |||
| for (const auto &input_size : inputs_size) { | |||
| size_t index = input_size.first; | |||
| auto size = input_size.second; | |||
| GELOGD("SingleOp: do H2D for %zu input, dst addr is %p, size is %zu, src addr is %p, length is %lu.", | |||
| index, dst_addr, size, update_buffers[index].data, update_buffers[index].length); | |||
| GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, size, update_buffers[index].data, update_buffers[index].length, | |||
| RT_MEMCPY_HOST_TO_DEVICE_EX, stream_)); | |||
| update_buffers[index].data = dst_addr; | |||
| dst_addr = reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(dst_addr) + size); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(const std::vector<DataBuffer> &inputs, | |||
| const std::vector<DataBuffer> &outputs) { | |||
| GELOGD("Start SingleOp::ExecuteAsync."); | |||
| Status ret = ValidateArgs(inputs, outputs); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| GE_CHECK_NOTNULL(stream_resource_); | |||
| vector<pair<size_t, uint64_t>> inputs_size; | |||
| GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(inputs, inputs_size)); | |||
| std::lock_guard<std::mutex> lk(*stream_mutex_); | |||
| vector<DataBuffer> update_buffers = inputs; | |||
| if (!inputs_size.empty()) { | |||
| GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(inputs_size, update_buffers)); | |||
| } | |||
| auto current_mem_base = stream_resource_->GetMemoryBase(); | |||
| if (running_param_->mem_base != current_mem_base) { | |||
| running_param_->mem_base = const_cast<uint8_t *>(current_mem_base); | |||
| @@ -185,7 +240,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c | |||
| task->GetOpdesc()->GetName().c_str()); | |||
| } | |||
| } | |||
| ret = UpdateArgs(inputs, outputs); | |||
| if (!inputs_size.empty()) { | |||
| ret = UpdateArgs(update_buffers, outputs); | |||
| } else { | |||
| ret = UpdateArgs(inputs, outputs); | |||
| } | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| @@ -252,11 +311,100 @@ Status DynamicSingleOp::ValidateParams(const vector<GeTensorDesc> &input_desc, | |||
| return SUCCESS; | |||
| } | |||
| Status DynamicSingleOp::UpdateInputsTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||
| const vector<GeTensorDesc> &input_desc, | |||
| const std::vector<DataBuffer> &input_buffers) { | |||
| auto op_desc = op_task_->GetOpdesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| GELOGD("Start update inputs tensor value of %s.", op_desc->GetName().c_str()); | |||
| for (const auto &input_size : inputs_size) { | |||
| size_t index = input_size.first; | |||
| auto tensor_desc = input_desc.at(index); | |||
| // reconstruct GeTensor by DataBuffer | |||
| GeTensorPtr ge_tensor = MakeShared<GeTensor>(tensor_desc); | |||
| GELOGD("The %zu tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.", | |||
| index, tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length); | |||
| if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(input_buffers[index].data), | |||
| static_cast<size_t>(input_buffers[index].length)) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor."); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| auto tensor = op_desc->MutableInputDesc(index); | |||
| GE_CHECK_NOTNULL(tensor); | |||
| if (!AttrUtils::SetTensor(tensor, ATTR_NAME_VALUE, ge_tensor)) { | |||
| GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE to %s.", op_desc->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status DynamicSingleOp::CalInputsHostMemSize(const vector<GeTensorDesc> &input_desc, | |||
| std::vector<std::pair<size_t, uint64_t>> &inputs_size) { | |||
| int64_t total_size = 0; | |||
| size_t index = 0; | |||
| for (const auto &tensor_desc : input_desc) { | |||
| int64_t input_size = 0; | |||
| int64_t mem_type = 0; | |||
| (void)AttrUtils::GetInt(tensor_desc, ATTR_NAME_PLACEMENT, mem_type); | |||
| bool is_const = false; | |||
| (void)AttrUtils::GetBool(tensor_desc, CONST_ATTR_NAME_INPUT, is_const); | |||
| if (mem_type == kHostMemType && !is_const) { | |||
| graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(tensor_desc, input_size); | |||
| if (graph_status != GRAPH_SUCCESS) { | |||
| REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed when CalcOutputSize."); | |||
| GELOGE(graph_status, "GetTensorMemorySizeInBytes failed!"); | |||
| return FAILED; | |||
| } | |||
| // input_size pad to 512 | |||
| input_size = (input_size / kAlignBytes + 1) * kAlignBytes; | |||
| inputs_size.emplace_back(index, input_size); | |||
| total_size += input_size; | |||
| GELOGD("The %zu input mem type is host, tensor size is %ld.", index, input_size); | |||
| } | |||
| index++; | |||
| } | |||
| if (total_size > kFuzzDeviceBufferSize) { | |||
| GELOGE(FAILED, "[Check][Size]Total size is %ld, larger than 1M.", total_size); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status DynamicSingleOp::UpdateInputsBufferAddr(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||
| std::vector<DataBuffer> &update_buffers) { | |||
| StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); | |||
| GE_CHECK_NOTNULL(stream_resource); | |||
| void *dst_addr = stream_resource->GetDeviceBufferAddr(); | |||
| // copy host mem from input_buffer to device mem of dst_addr | |||
| for (const auto &input_size : inputs_size) { | |||
| size_t index = input_size.first; | |||
| auto size = input_size.second; | |||
| GELOGD("DynamicSingleOp: do H2D for %zu input, dst addr is %p, size is %zu, src addr is %p, length is %lu.", | |||
| index, dst_addr, size, update_buffers[index].data, update_buffers[index].length); | |||
| GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, size, update_buffers[index].data, update_buffers[index].length, | |||
| RT_MEMCPY_HOST_TO_DEVICE_EX, stream_)); | |||
| update_buffers[index].data = dst_addr; | |||
| dst_addr = reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(dst_addr) + size); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||
| const vector<DataBuffer> &input_buffers, | |||
| vector<GeTensorDesc> &output_desc, | |||
| vector<DataBuffer> &output_buffers) { | |||
| GELOGD("Start DynamicSingleOp::ExecuteAsync."); | |||
| GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); | |||
| vector<pair<size_t, uint64_t>> inputs_size; | |||
| GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(input_desc, inputs_size)); | |||
| std::lock_guard<std::mutex> lk(*stream_mutex_); | |||
| vector<DataBuffer> update_buffers = input_buffers; | |||
| if (!inputs_size.empty()) { | |||
| GE_CHK_STATUS_RET_NOLOG(UpdateInputsTensorValue(inputs_size, input_desc, input_buffers)); | |||
| GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(inputs_size, update_buffers)); | |||
| } | |||
| if (hybrid_model_executor_ != nullptr) { | |||
| GELOGD("Execute multi-task dynamic single op by hybrid model executor"); | |||
| hybrid::HybridModelExecutor::ExecuteArgs args; | |||
| @@ -274,11 +422,12 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||
| return hybrid_model_executor_->Execute(args); | |||
| } | |||
| std::lock_guard<std::mutex> lk(*stream_mutex_); | |||
| GE_CHECK_NOTNULL(op_task_); | |||
| GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | |||
| if (!inputs_size.empty()) { | |||
| GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, update_buffers, output_desc, output_buffers, stream_)); | |||
| } else { | |||
| GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | |||
| } | |||
| GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); | |||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | |||
| return SUCCESS; | |||
| @@ -45,6 +45,10 @@ class SingleOp { | |||
| Status ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | |||
| Status UpdateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | |||
| Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | |||
| Status CalInputsHostMemSize(const std::vector<DataBuffer> &inputs, | |||
| std::vector<std::pair<size_t, uint64_t>> &inputs_size); | |||
| Status UpdateInputsBufferAddr(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||
| std::vector<DataBuffer> &update_buffers); | |||
| friend class SingleOpModel; | |||
| StreamResource *stream_resource_ = nullptr; | |||
| @@ -76,7 +80,12 @@ class DynamicSingleOp { | |||
| const std::vector<DataBuffer> &inputs, | |||
| std::vector<GeTensorDesc> &output_desc, | |||
| std::vector<DataBuffer> &outputs) const; | |||
| Status CalInputsHostMemSize(const vector<GeTensorDesc> &input_desc, | |||
| std::vector<std::pair<size_t, uint64_t>> &inputs_size); | |||
| Status UpdateInputsBufferAddr(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||
| std::vector<DataBuffer> &update_buffers); | |||
| Status UpdateInputsTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||
| const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers); | |||
| std::unique_ptr<OpTask> op_task_; | |||
| std::unique_ptr<hybrid::HybridModel> hybrid_model_; | |||
| std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | |||
| @@ -85,6 +94,7 @@ class DynamicSingleOp { | |||
| rtStream_t stream_ = nullptr; | |||
| size_t num_inputs_ = 0; | |||
| size_t num_outputs_ = 0; | |||
| ComputeGraphPtr compute_graph_; | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_SINGLE_OP_SINGLE_OP_H_ | |||
| @@ -83,6 +83,9 @@ StreamResource *SingleOpManager::GetResource(uintptr_t resource_id, rtStream_t s | |||
| if (it == stream_resources_.end()) { | |||
| res = new (std::nothrow) StreamResource(resource_id); | |||
| if (res != nullptr) { | |||
| if (res->Init() != SUCCESS) { | |||
| return nullptr; | |||
| } | |||
| res->SetStream(stream); | |||
| stream_resources_.emplace(resource_id, res); | |||
| } | |||
| @@ -71,10 +71,10 @@ Status NeedHybridModel(GeModelPtr &ge_model, bool &flag) { | |||
| tasks[i].kernel_with_handle().context(); | |||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | |||
| if (kernel_type == ccKernelType::TE) { | |||
| if (infer_depend_flag) { | |||
| flag = true; | |||
| return SUCCESS; | |||
| } | |||
| // if (infer_depend_flag) { | |||
| // flag = true; | |||
| // return SUCCESS; | |||
| // } | |||
| kernel_task_num++; | |||
| if (kernel_task_num > 1) { | |||
| flag = true; | |||
| @@ -304,6 +304,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s | |||
| ParseArgTable(tbe_task, single_op); | |||
| tbe_task->SetModelArgs(model_name_, model_id_); | |||
| if (tbe_task->tiling_buffer_ != nullptr) { | |||
| GELOGD("tiling buffer is not nullptr."); | |||
| tbe_task->stream_resource_ = stream_resource; | |||
| } | |||
| single_op.tasks_.emplace_back(tbe_task); | |||
| @@ -472,7 +473,8 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { | |||
| return BuildTaskList(&resource, single_op); | |||
| } | |||
| Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { | |||
| Status SingleOpModel::BuildModelTaskKernel(StreamResource *stream_resource, const TaskDef &task_def, | |||
| DynamicSingleOp &single_op) { | |||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||
| const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | |||
| task_def.kernel_with_handle().context(); | |||
| @@ -483,6 +485,10 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||
| TbeOpTask *tbe_task = nullptr; | |||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); | |||
| tbe_task->SetModelArgs(model_name_, model_id_); | |||
| if (tbe_task->tiling_buffer_ != nullptr) { | |||
| GELOGD("tiling buffer is not nullptr."); | |||
| tbe_task->stream_resource_ = stream_resource; | |||
| } | |||
| single_op.op_task_.reset(tbe_task); | |||
| } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | |||
| GELOGD("Building AICPU_CC task"); | |||
| @@ -504,10 +510,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||
| return SUCCESS; | |||
| } | |||
| Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||
| Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &single_op) { | |||
| auto ge_model = model_helper_.GetGeModel(); | |||
| GE_CHECK_NOTNULL(ge_model); | |||
| auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | |||
| GE_CHECK_NOTNULL(compute_graph); | |||
| single_op.compute_graph_ = compute_graph; | |||
| auto tasks = ge_model->GetModelTaskDefPtr()->task(); | |||
| for (int i = 0; i < tasks.size(); ++i) { | |||
| const TaskDef &task_def = tasks[i]; | |||
| @@ -521,7 +530,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||
| "BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks."); | |||
| return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | |||
| } | |||
| GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op)); | |||
| GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(stream_resource, task_def, single_op)); | |||
| } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | |||
| if (single_op.op_task_ != nullptr) { | |||
| GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks."); | |||
| @@ -585,6 +594,6 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||
| GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed."); | |||
| return SUCCESS; | |||
| } | |||
| return BuildTaskListForDynamicOp(single_op); | |||
| return BuildTaskListForDynamicOp(&resource, single_op); | |||
| } | |||
| } // namespace ge | |||
| @@ -65,12 +65,13 @@ class SingleOpModel { | |||
| void ParseOutputNode(const OpDescPtr &op_desc); | |||
| Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); | |||
| Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); | |||
| Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op); | |||
| Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); | |||
| Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | |||
| bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); | |||
| Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | |||
| Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op); | |||
| Status BuildModelTaskKernel(StreamResource *stream_resource, const domi::TaskDef &task_def, | |||
| DynamicSingleOp &single_op); | |||
| static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); | |||
| void ParseArgTable(OpTask *task, SingleOp &op); | |||
| @@ -22,6 +22,11 @@ | |||
| #include "single_op/single_op_model.h" | |||
| namespace ge { | |||
| namespace { | |||
| // limit available device mem size 1M | |||
| const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024; | |||
| } | |||
| StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) { | |||
| } | |||
| @@ -39,6 +44,17 @@ StreamResource::~StreamResource() { | |||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed.")); | |||
| } | |||
| } | |||
| if (device_buffer_ != nullptr) { | |||
| auto rt_ret = rtFree(device_buffer_); | |||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed.")); | |||
| } | |||
| } | |||
| Status StreamResource::Init() { | |||
| auto rt_ret = rtMalloc(&device_buffer_, kFuzzDeviceBufferSize, RT_MEMORY_HBM); | |||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Malloc][Rt] failed.")); | |||
| return SUCCESS; | |||
| } | |||
| SingleOp *StreamResource::GetOperator(const uint64_t key) { | |||
| @@ -40,6 +40,7 @@ class StreamResource { | |||
| rtStream_t GetStream() const; | |||
| void SetStream(rtStream_t stream); | |||
| Status Init(); | |||
| SingleOp *GetOperator(const uint64_t key); | |||
| DynamicSingleOp *GetDynamicOperator(const uint64_t key); | |||
| @@ -49,6 +50,7 @@ class StreamResource { | |||
| uint8_t *MallocMemory(const std::string &purpose, size_t size, bool holding_lock = true); | |||
| uint8_t *MallocWeight(const std::string &purpose, size_t size); | |||
| const uint8_t *GetMemoryBase() const; | |||
| void *GetDeviceBufferAddr() const {return device_buffer_; } | |||
| private: | |||
| uint8_t *DoMallocMemory(const std::string &purpose, | |||
| @@ -65,6 +67,7 @@ class StreamResource { | |||
| rtStream_t stream_ = nullptr; | |||
| std::mutex mu_; | |||
| std::mutex stream_mu_; | |||
| void *device_buffer_ = nullptr; | |||
| }; | |||
| } // namespace ge | |||
| @@ -333,8 +333,8 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||
| vector<GeTensorDesc> &output_desc, | |||
| vector<DataBuffer> &output_buffers, | |||
| rtStream_t stream) { | |||
| GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc)); | |||
| GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); | |||
| GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc)); | |||
| std::vector<void *> args; | |||
| for (auto &buffer : input_buffers) { | |||
| args.emplace_back(buffer.data); | |||
| @@ -354,6 +354,15 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||
| args.emplace_back(tiling_buffer_); | |||
| } | |||
| GELOGD("Dst addr is %p, dst size is %zu, src addr is %p, src size is %zu.", | |||
| args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)); | |||
| // node with workspace: build can not get size of workspace, need to update arg_size_ when execute | |||
| if (arg_size_ < (args.size() * sizeof(void *))) { | |||
| size_t temp_size = args.size() * sizeof(void *); | |||
| GELOGD("Need to reset size of args_ from %zu to %zu.", arg_size_, temp_size); | |||
| args_.reset(new(std::nothrow) uint8_t[temp_size]()); | |||
| arg_size_ = temp_size; | |||
| } | |||
| if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str()); | |||
| @@ -362,13 +371,14 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||
| GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | |||
| if (handle_ == nullptr) { | |||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); | |||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), | |||
| nullptr, stream)); | |||
| GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); | |||
| } else { | |||
| std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_); | |||
| std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_); | |||
| GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr, | |||
| stream, kernel_info.c_str())); | |||
| GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), | |||
| static_cast<uint32_t>(arg_size_), nullptr, stream, kernel_info.c_str())); | |||
| GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str()); | |||
| } | |||
| @@ -65,10 +65,12 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||
| /// @param [in] inputs: input tensors. | |||
| /// @param [in] outputs: output tensors. | |||
| /// @param [in] model_file_name: name of model file. | |||
| /// @param [in] compile_flag: op build flag, accurate build is 0, fuzz build is 1 | |||
| /// @return SUCCESS or FAILED | |||
| /// | |||
| Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, | |||
| const std::vector<GeTensor> &outputs, const std::string &model_file_name); | |||
| const std::vector<GeTensor> &outputs, const std::string &model_file_name, | |||
| int32_t compile_flag = 0); | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief: Build single Op into model buff. | |||
| @@ -79,8 +81,6 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||
| /// @param [in] compile_flag: op build flag, accurate build is 0, fuzz build is 1 | |||
| /// @param [out] model_buff: model buff of op. | |||
| /// @return SUCCESS or FAILED | |||
| Status BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | |||
| OpEngineType engine_type, ModelBufferData &model_buff); | |||
| Status BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | |||
| OpEngineType engine_type, int32_t compile_flag, ModelBufferData &model_buff); | |||
| /// | |||
| @@ -100,7 +100,7 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||
| ge::ModelBufferData &model, bool is_offline = true); | |||
| Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | |||
| const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | |||
| bool is_offline = true); | |||
| bool is_offline = true, int32_t compile_flag = 0); | |||
| bool CheckNoAicore(const ComputeGraphPtr &graph); | |||
| void RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs); | |||
| Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs); | |||
| @@ -123,6 +123,7 @@ struct OmgContext { | |||
| bool need_multi_batch = false; | |||
| std::vector<NodePtr> data_nodes; | |||
| std::vector<NodePtr> getnext_nosink_nodes; | |||
| bool fuzz_compile_flag = false; | |||
| }; | |||
| } // namespace ge | |||
| @@ -276,6 +276,7 @@ set(COMMON_SRC_FILES | |||
| "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/parallel_group_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/mark_node_unknown_shape_pass.cc" | |||
| "${GE_CODE_DIR}/ge/model/ge_model.cc" | |||
| "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" | |||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" | |||
| @@ -703,6 +704,7 @@ set(PASS_TEST_FILES | |||
| "graph/passes/link_gen_mask_nodes_pass_unittest.cc" | |||
| "graph/passes/transpose_transdata_pass_unittest.cc" | |||
| "graph/passes/parallel_group_pass_unittest.cc" | |||
| "graph/passes/mark_node_unknown_shape_pass_unittest.cc" | |||
| ) | |||
| set(KERNEL_TEST_FILES | |||
| @@ -791,6 +793,7 @@ set(SINGLE_OP_TEST_FILES | |||
| "single_op/single_op_manager_unittest.cc" | |||
| "single_op/stream_resource_unittest.cc" | |||
| "single_op/single_op_task_unittest.cc" | |||
| "single_op/single_op_unittest.cc" | |||
| ) | |||
| set(PROFILING_MNG_TEST_FILES | |||
| @@ -85,7 +85,7 @@ TEST_F(UtestGeGenerator, test_build_single_op_online) { | |||
| GeGenerator generator; | |||
| generator.Initialize({}); | |||
| ModelBufferData model_buffer; | |||
| EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, model_buffer), FAILED); | |||
| EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, false, model_buffer), FAILED); | |||
| } | |||
| TEST_F(UtestGeGenerator, test_singleop_fuzz_build) { | |||
| @@ -103,7 +103,7 @@ TEST_F(UtestGeGenerator, test_singleop_fuzz_build) { | |||
| generator.Initialize({}); | |||
| ModelBufferData model_buffer; | |||
| bool compile_flag = true; | |||
| EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, compile_flag, model_buffer), SUCCESS); | |||
| EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, compile_flag, model_buffer), FAILED); | |||
| } | |||
| TEST_F(UtestGeGenerator, test_check_aicore) { | |||
| @@ -0,0 +1,107 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <gtest/gtest.h> | |||
| #include <cstdint> | |||
| #include <memory> | |||
| #include <string> | |||
| #define private public | |||
| #include "graph/passes/mark_node_unknown_shape_pass.h" | |||
| #include "common/ge_inner_error_codes.h" | |||
| #include "inc/pass_manager.h" | |||
| #undef private | |||
| namespace ge { | |||
| class UtestMarkNodeUnknownShapePass : public testing::Test { | |||
| protected: | |||
| void SetUp() {} | |||
| void TearDown() {} | |||
| public: | |||
| NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { | |||
| GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||
| auto op_desc = std::make_shared<OpDesc>(name, type); | |||
| for (auto i = 0; i < in_num; ++i) { | |||
| op_desc->AddInputDesc(test_desc); | |||
| } | |||
| for (auto i = 0; i < out_num; ++i) { | |||
| op_desc->AddOutputDesc(test_desc); | |||
| } | |||
| return graph->AddNode(op_desc); | |||
| } | |||
| void make_graph(const ComputeGraphPtr &graph) { | |||
| auto conv2d_node = MakeNode(graph, 2, 1, "conv1", "Conv2D"); | |||
| { | |||
| auto data1 = MakeNode(graph, 1, 1, "data", "Data"); | |||
| GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT); | |||
| data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); | |||
| data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); | |||
| GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); | |||
| GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); | |||
| } | |||
| conv2d_node->GetOpDesc()->SetOpKernelLibName("AIcoreEngine"); | |||
| AttrUtils::SetBool(conv2d_node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS, true); | |||
| auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); | |||
| GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | |||
| } | |||
| }; | |||
| TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_GE_kernel) { | |||
| OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||
| ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||
| op_desc->SetOpKernelLibName("GE"); | |||
| graph->AddNode(op_desc); | |||
| PassManager pass; | |||
| pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); | |||
| EXPECT_EQ(pass.Run(graph), SUCCESS); | |||
| } | |||
| TEST_F(UtestMarkNodeUnknownShapePass, test_run_without_fuzz_attrs) { | |||
| OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||
| ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||
| op_desc->SetOpKernelLibName("AIcoreEngine"); | |||
| graph->AddNode(op_desc); | |||
| PassManager pass; | |||
| pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); | |||
| EXPECT_EQ(pass.Run(graph), SUCCESS); | |||
| } | |||
| TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_fuzz_attrs) { | |||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph"); | |||
| make_graph(graph); | |||
| PassManager pass; | |||
| pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); | |||
| EXPECT_EQ(pass.Run(graph), SUCCESS); | |||
| EXPECT_EQ(graph->GetAllNodes().size(), 3); | |||
| for (const auto &node : graph->GetAllNodes()) { | |||
| if (node->GetName() == "conv1") { | |||
| auto op_desc = node->GetOpDesc(); | |||
| EXPECT_NE(op_desc, nullptr); | |||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
| auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | |||
| EXPECT_TRUE(input_desc->GetShape().GetDim(0) == -2); | |||
| } | |||
| for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) { | |||
| EXPECT_NE(output_desc, nullptr); | |||
| EXPECT_TRUE(output_desc->GetShape().GetDim(0) == -2); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } // namespace ge | |||
| @@ -0,0 +1,108 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <gtest/gtest.h> | |||
| #include <vector> | |||
| #include "runtime/rt.h" | |||
| #define protected public | |||
| #define private public | |||
| #include "single_op/single_op.h" | |||
| #include "single_op/single_op_manager.h" | |||
| #undef private | |||
| #undef protected | |||
| using namespace std; | |||
| using namespace ge; | |||
| class UtestSingleOp : public testing::Test { | |||
| protected: | |||
| void SetUp() {} | |||
| void TearDown() {} | |||
| }; | |||
| TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async) { | |||
| uintptr_t resource_id = 0; | |||
| std::mutex stream_mu; | |||
| rtStream_t stream = nullptr; | |||
| rtStreamCreate(&stream, 0); | |||
| DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream); | |||
| vector<int64_t> dims_vec_0 = {2}; | |||
| vector<GeTensorDesc> input_desc; | |||
| GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32); | |||
| // input data from device | |||
| AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 0); | |||
| input_desc.emplace_back(tensor_desc_0); | |||
| vector<DataBuffer> input_buffers; | |||
| ge::DataBuffer data_buffer; | |||
| data_buffer.data = new char[4]; | |||
| data_buffer.length = 4; | |||
| input_buffers.emplace_back(data_buffer); | |||
| vector<GeTensorDesc> output_desc; | |||
| vector<DataBuffer> output_buffers; | |||
| // UpdateRunInfo failed | |||
| EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), ACL_ERROR_GE_PARAM_INVALID); | |||
| } | |||
| TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async1) { | |||
| uintptr_t resource_id = 0; | |||
| std::mutex stream_mu; | |||
| rtStream_t stream = nullptr; | |||
| rtStreamCreate(&stream, 0); | |||
| DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream); | |||
| dynamic_single_op.num_inputs_ = 1; | |||
| vector<int64_t> dims_vec_0 = {2}; | |||
| vector<GeTensorDesc> input_desc; | |||
| GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32); | |||
| // input data from host | |||
| AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 1); | |||
| input_desc.emplace_back(tensor_desc_0); | |||
| int64_t input_size = 0; | |||
| EXPECT_EQ(TensorUtils::GetTensorMemorySizeInBytes(tensor_desc_0, input_size), SUCCESS); | |||
| EXPECT_EQ(input_size, 64); | |||
| EXPECT_NE(SingleOpManager::GetInstance().GetResource(resource_id, stream), nullptr); | |||
| vector<DataBuffer> input_buffers; | |||
| ge::DataBuffer data_buffer; | |||
| data_buffer.data = new char[4]; | |||
| data_buffer.length = 4; | |||
| input_buffers.emplace_back(data_buffer); | |||
| vector<GeTensorDesc> output_desc; | |||
| vector<DataBuffer> output_buffers; | |||
| auto *tbe_task = new (std::nothrow) TbeOpTask(); | |||
| ge::OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||
| ge::ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||
| ge::NodePtr node = graph->AddNode(op_desc); | |||
| tbe_task->node_ = node; | |||
| dynamic_single_op.op_task_.reset((OpTask *)(tbe_task)); | |||
| OpDescPtr desc_ptr = MakeShared<OpDesc>("name1", "type1"); | |||
| EXPECT_EQ(desc_ptr->AddInputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS); | |||
| dynamic_single_op.op_task_->op_desc_ = desc_ptr; | |||
| // UpdateRunInfo failed | |||
| EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), PARAM_INVALID); | |||
| } | |||