Pre Merge pull request !1418 from 周莉莉/master

5 years ago · e4f9335560
--- a/ge/generator/ge_generator.cc
+++ b/ge/generator/ge_generator.cc
@@ -53,6 +53,7 @@ constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape";
 const int64_t kDynamicDimValue = -2;
 const int kDefaultDeviceId = 0;
 const int kDefaultJobId = 0;
 const int32_t kFuzzBuildPattern = 1;

 std::map<ge::OpEngineType, std::string> engine_type_map{
    {ge::ENGINE_SYS, kEngineNameDefault},
@@ -296,13 +297,60 @@ static Status ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTenso
  return SUCCESS;
 }

 static Status GetFuzzBuildAttrs(OpDescPtr &op_desc, const GeRootModelPtr &ge_root_model,
                                GeAttrValue::LIST_NAMED_ATTRS &fuzz_build_attrs) {
  GELOGD("Start get fuzz build attrs of %s.", op_desc->GetName().c_str());
  GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
  for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) {
    GE_CHECK_NOTNULL(node);
    GE_CHECK_NOTNULL(node->GetOpDesc());
    GELOGD("Delete fuzz build attr of %s after build.", node->GetName().c_str());
    node->GetOpDesc()->DelAttr(ATTR_NAME_FUZZ_BUILD);
  }
  (void)AttrUtils::GetListNamedAttrs(op_desc, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs);
  if (!fuzz_build_attrs.empty()) {
    GELOGD("%s has split, get ATTR_NAME_FUZZ_BUILD_RES_ATTRS directly.", op_desc->GetName().c_str());
    return SUCCESS;
  }
  bool all_aicore_support_dyn = false;
  for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) {
    if (node->GetOpDesc()->GetOpKernelLibName() != kAIcoreEngine) {
      continue;
    }
    if (AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS)) {
      all_aicore_support_dyn = true;
    } else {
      all_aicore_support_dyn = false;
      GELOGD("%s kernel type is %s, but not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str(),
             node->GetOpDesc()->GetOpKernelLibName().c_str());
      break;
    }
  }
  if (all_aicore_support_dyn) {
    GELOGD("All aicore nodes from %s is support dynamic.", ge_root_model->GetRootGraph()->GetName().c_str());
    for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) {
      if (node->GetName() == op_desc->GetName()) {
        (void)AttrUtils::GetListNamedAttrs(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs);
        if (fuzz_build_attrs.empty()) {
          GELOGE(FAILED, "[Get][ATTR_NAME_FUZZ_BUILD_RES_ATTRS] %s should set fuzz ret.", op_desc->GetName().c_str());
          return FAILED;
        }
      }
    }
  }
  if (fuzz_build_attrs.empty()) {
    GELOGW("%s build with fuzz build pattern, but not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", op_desc->GetName().c_str());
  }
  return SUCCESS;
 }

 class GeGenerator::Impl {
 public:
  Impl(OmgContext &omg_context) : omg_context_(omg_context) {}
  ~Impl() = default;

  Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GeRootModelPtr &ge_models);

  bool HasSetShapeRange(const vector<GeTensor> &inputs);
  Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model);

  Status SaveRootModel(const string &file_name_prefix, GeRootModelPtr &model, ModelBufferData &model_buff);
@@ -742,7 +790,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor>

 Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
                                  const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
                                  bool is_offline) {
                                  bool is_offline, int32_t compile_flag) {
  GELOGD("Inputs size is %zu, outputs size is %zu.", inputs.size(), outputs.size());
  GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
  impl_->is_offline_ = is_offline;
  if (!is_offline) {
@@ -764,6 +813,16 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
  OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc);
  GE_CHECK_NOTNULL(op_desc_tmp);

  bool fuzz_compile_flag = false;
  if (!(impl_->HasSetShapeRange(inputs)) && (compile_flag == kFuzzBuildPattern)) {
    fuzz_compile_flag = true;
  }
  if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, fuzz_compile_flag)) {
    GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD] Failed to set attr for %s.", op_desc->GetName().c_str());
    return FAILED;
  }
  impl_->omg_context_.fuzz_compile_flag = fuzz_compile_flag;

  // 1. Create ComputeGraph.
  string name = ge::CurrentTimeInStr() + "_" + model_file_name;
  Graph graph;
@@ -810,6 +869,19 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
    GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic));
    GE_CHK_STATUS_RET_NOLOG(
      impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic));
  } else if (fuzz_compile_flag) {
    GELOGD("Get fuzz build result of %s.", op_desc->GetName().c_str());
    (void)AttrUtils::SetInt(ge_model, ATTR_NAME_BUILD_MODE, fuzz_compile_flag);
    GeAttrValue::LIST_NAMED_ATTRS fuzz_build_attrs;
    if (GetFuzzBuildAttrs(op_desc, ge_root_model, fuzz_build_attrs) != SUCCESS) {
      GELOGE(FAILED, "[Get][FuzzRet]Failed to get fuzz build result of %s.", op_desc->GetName().c_str());
      return FAILED;
    }
    if (!fuzz_build_attrs.empty()) {
      GE_CHK_BOOL_EXEC(AttrUtils::SetListNamedAttrs(ge_model, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs),
                       return FAILED, "Set ATTR_NAME_FUZZ_BUILD_RES_ATTRS failed.");
    }
    GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
  } else {
    GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
  }
@@ -825,15 +897,17 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
 * @param [in] vector<GeTensor> &inputs: Operator input data description information.
 * @param [in] vector<GeTensor> &outputs: Operator output data description information.
 * @param [in] const string &model_file_name: Offline model filename.
 * @param [in] compile_flag: op build flag from atc
 * @return SUCCESS handle successfully / others handle failed
 */
 Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
                                       const vector<GeTensor> &outputs, const string &model_file_name) {
                                       const vector<GeTensor> &outputs, const string &model_file_name,
                                       int32_t compile_flag) {
  ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther);
  GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size());
  ModelBufferData model_buff;
  OpEngineType engine_type = ENGINE_SYS;
  Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true);
  Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true, compile_flag);
  GELOGI("Finish build single offline model, status: %u", status);
  return status;
 }
@@ -850,23 +924,17 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor
 * @return SUCCESS handle successfully / others handle failed
 */

 // old process will be deleted
 Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
                                       const vector<GeTensor> &outputs, OpEngineType engine_type,
                                       const vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag,
                                       ModelBufferData &model_buff) {
  ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther);
  GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size());
  Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false);
  Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false,
                                compile_flag);
  GELOGI("Finish build single online model, status: %u", status);
  return status;
 }

 Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
                                       const vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag,
                                       ModelBufferData &model_buff) {
  return SUCCESS;
 }

 Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
                                       const vector<GeTensor> &outputs, std::string graph_name, Graph &graph) {
  ge::ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(graph_name);
@@ -976,6 +1044,18 @@ Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootMo
  return SUCCESS;
 }

 bool GeGenerator::Impl::HasSetShapeRange(const vector<GeTensor> &inputs) {
  for (const auto &input : inputs) {
    vector<pair<int64_t, int64_t>> shape_range;
    (void)input.GetTensorDesc().GetShapeRange(shape_range);
    if (!shape_range.empty()) {
      GELOGD("Has set shape range.");
      return true;
    }
  }
  return false;
 }

 Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> &inputs,
                                     GeRootModelPtr &ge_root_model) {
  static std::atomic<GraphId> atomic_graph_id(0);
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -863,6 +863,8 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
  }

  ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kPrepareOptimize);
  // set fuzz compile flag after origin graph optimize
  GE_CHK_STATUS_RET(SetFuzzCompileFlag(compute_graph), "Set fuzz compile flag failed.");
  ret = PreRunOptimizeSubGraph(graph_node, compute_graph, session_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "Run PreRunOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str());
@@ -877,7 +879,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
                                         options_.build_step == BUILD_STEP_AFTER_BUILDER ||
                                         options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB));
  if (run_after_optimize_subgraph) {
    Status ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id);
    ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id);
    if (ret != SUCCESS) {
      GELOGE(ret, "Run PreRunAfterOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str());
      return ret;
@@ -895,6 +897,19 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
  return SUCCESS;
 }

 Status GraphManager::SetFuzzCompileFlag(ComputeGraphPtr &compute_graph) {
  for (const auto &node : compute_graph->GetAllNodes()) {
    OpDescPtr op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    GELOGD("Fuzz compile flag is %d.", GetLocalOmgContext().fuzz_compile_flag);
    if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, GetLocalOmgContext().fuzz_compile_flag)) {
      GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD]Failed to set fuzz build attr to %s.", op_desc->GetName().c_str());
      return FAILED;
    }
  }
  return SUCCESS;
 }

 Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) {
  PassManager pass_manager;
  GE_CHK_STATUS_RET(pass_manager.AddPass("SubexpressionMigrationPass", new (std::nothrow) SubexpressionMigrationPass));
--- a/ge/graph/manager/graph_manager.h
+++ b/ge/graph/manager/graph_manager.h
@@ -358,6 +358,7 @@ class GraphManager {
                                     ComputeGraphPtr &compute_graph,
                                     GeRootModelPtr &ge_root_model,
                                     uint64_t session_id);
  Status SetFuzzCompileFlag(ComputeGraphPtr &compute_graph);

  Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph,
                                   Graph2SubGraphInfoList &sub_graph_map,
--- a/ge/graph/passes/mark_node_unknown_shape_pass.cc
+++ b/ge/graph/passes/mark_node_unknown_shape_pass.cc
@@ -0,0 +1,88 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "graph/passes/mark_node_unknown_shape_pass.h"
 #include "graph/utils/node_utils.h"
 #include "graph/debug/ge_attr_define.h"

 namespace ge {
 namespace {
 const char *const kEngineNameAiCore = "AIcoreEngine";
 const int32_t kDynamicState = -2;
 }

 Status MarkNodeUnknownShapePass::Run(ComputeGraphPtr graph) {
  GE_CHECK_NOTNULL(graph);
  if (IsAllAicoreSupportDyn(graph)) {
    if (UpdateNodeShapeToUnknown(graph) != SUCCESS) {
      GELOGE(FAILED, "[Update][Node_Shape]Failed to update node shape to unknown.");
      return FAILED;
    }
  }
  return SUCCESS;
 }

 bool MarkNodeUnknownShapePass::IsAllAicoreSupportDyn(ComputeGraphPtr &graph) {
  bool is_all_aicore_support_dyn = false;
  for (const auto &node : graph->GetAllNodes()) {
    if (node->GetOpDesc() == nullptr) {
      continue;
    }
    if (node->GetOpDesc()->GetOpKernelLibName() != kEngineNameAiCore) {
      GELOGD("Kernel of %s is %s.", node->GetName().c_str(), node->GetOpDesc()->GetOpKernelLibName().c_str());
      continue;
    }
    if (AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS)) {
      GELOGD("%s has set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str());
      is_all_aicore_support_dyn = true;
    } else {
      GELOGD("%s has not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str());
      is_all_aicore_support_dyn = false;
      break;
    }
  }
  return is_all_aicore_support_dyn;
 }

 Status MarkNodeUnknownShapePass::UpdateNodeShapeToUnknown(ComputeGraphPtr &graph) {
  GELOGD("Need to update node shape to dynamic when get fuzz build result.");
  for (const auto &node : graph->GetAllNodes()) {
    if (NodeUtils::IsConst(*node)) {
      continue;
    }
    auto op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
      auto src_node = NodeUtils::GetInDataNodeByIndex(*node, static_cast<int>(i));
      if (src_node != nullptr && (NodeUtils::IsConst(*src_node) || src_node->GetType() == VARIABLE)) {
        continue;
      }
      GELOGD("Update shape for %s.", node->GetName().c_str());
      auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
      if (input_desc != nullptr) {
        input_desc->SetShape(GeShape({kDynamicState}));
      }
    }

    for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) {
      if (output_desc != nullptr) {
        output_desc->SetShape(GeShape({kDynamicState}));
      }
    }
  }
  return SUCCESS;
 }
 }  // namespace ge
--- a/ge/graph/passes/mark_node_unknown_shape_pass.h
+++ b/ge/graph/passes/mark_node_unknown_shape_pass.h
@@ -0,0 +1,32 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_
 #define GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_
 #include "graph/graph.h"
 #include "inc/graph_pass.h"

 namespace ge {
 class MarkNodeUnknownShapePass : public GraphPass {
 public:
  Status Run(ComputeGraphPtr graph);

 private:
  bool IsAllAicoreSupportDyn(ComputeGraphPtr &graph);
  Status UpdateNodeShapeToUnknown(ComputeGraphPtr &graph);
 };
 }  // namespace ge
 #endif  // GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_
--- a/ge/graph/passes/reshape_recovery_pass.cc
+++ b/ge/graph/passes/reshape_recovery_pass.cc
@@ -55,9 +55,17 @@ Status InsertReshapeIfNeed(const NodePtr &node) {
      GE_CHECK_NOTNULL(dst_node->GetOpDesc());
      auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx());
      GE_CHECK_NOTNULL(dst_tensor);
      bool is_need_insert_reshape = src_tensor->GetShape().GetDims() != UNKNOWN_RANK &&
                                    dst_tensor->GetShape().GetDims() != UNKNOWN_RANK &&
                                    src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims();
      bool is_dynamic = false;
      auto src_tensor_dims = src_tensor->GetShape().GetDims();
      auto dst_tensor_dims = dst_tensor->GetShape().GetDims();
      if ((!(std::all_of(src_tensor_dims.begin(), src_tensor_dims.end(), [](int64_t val) { return val >= 0 ; })))
          || (!(std::all_of(dst_tensor_dims.begin(), dst_tensor_dims.end(), [](int64_t val) { return val >= 0; })))) {
        GELOGD("No need to insert reshape node between %s nad %s.", node->GetName().c_str(),
               dst_node->GetName().c_str());
        is_dynamic = true;
      }
      bool is_need_insert_reshape = (src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims()) &&
                                    (!is_dynamic);
      if (is_need_insert_reshape) {
        auto reshape = CreateReshape(src_tensor, dst_tensor, node->GetOwnerComputeGraph());
        GE_CHECK_NOTNULL(reshape);
--- a/ge/hybrid/executor/subgraph_executor.cc
+++ b/ge/hybrid/executor/subgraph_executor.cc
@@ -225,6 +225,7 @@ Status SubgraphExecutor::PrepareNodes(int group) {
    if (node_item.node_type != NETOUTPUT) {
      // only do shape inference and compilation for nodes with dynamic shapes.
      if (node_item.is_dynamic) {
        GELOGD("Need to reinfershape when %s is dynamic.", node_item.NodeName().c_str());
        auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status {
          GetContext().SetSessionId(context_->session_id);
          GetContext().SetContextId(context_->context_id);
--- a/ge/hybrid/model/node_item.cc
+++ b/ge/hybrid/model/node_item.cc
@@ -168,12 +168,12 @@ Status NodeItem::InitInputsAndOutputs() {

 Status NodeItem::ResolveDynamicState() {
  (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic);
  GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic);
  if (!is_dynamic) {
    GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic),
                      "[%s] Failed to get shape status.",
                      node->GetName().c_str());
  }
  GELOGD("Resolve dynamic state of %s, dynamic state is %d.", this->node_name.c_str(), is_dynamic);
  return SUCCESS;
 }

--- a/ge/offline/main.cc
+++ b/ge/offline/main.cc
@@ -216,6 +216,10 @@ DEFINE_string(op_bank_path, "", "Optional; op bank path");

 DEFINE_string(display_model_info, "0", "Optional; display model info");

 DEFINE_string(perforemance_mode, "", "Optional; express high compile performance or high execute performance."
                                     "normal: no need to compile, used saved .o files directly;"
                                     "high: need to recompile, high execute performance mode.");

 class GFlagUtils {
 public:
  /**
@@ -330,7 +334,8 @@ class GFlagUtils {
        "Default value: $HOME/atc_data\n"
        "  --op_compiler_cache_mode   Set the operator compilation cache mode."
        "Options are disable(default), enable and force(force to refresh the cache)\n"
        "  --display_model_info     enable for display model info; 0(default): close display, 1: open display");
        "  --display_model_info     enable for display model info; 0(default): close display, 1: open display"
        "--performance_mode        Set high performance mode of compile or execute when op compile");

    gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
    // Using gflags to analyze input parameters
@@ -1078,6 +1083,7 @@ static void SetEnvForSingleOp(std::map<string, string> &options) {
  options.emplace(ge::OP_COMPILER_CACHE_MODE, FLAGS_op_compiler_cache_mode);
  options.emplace(ge::MDL_BANK_PATH_FLAG, FLAGS_mdl_bank_path);
  options.emplace(ge::OP_BANK_PATH_FLAG, FLAGS_op_bank_path);
  options.emplace(ge::PERFORMANCE_MODE, FLAGS_performance_mode);
 }

 domi::Status GenerateSingleOp(const std::string& json_file_path) {
@@ -1124,7 +1130,7 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) {
      output_path = FLAGS_output + "/";
    }
    output_path += param.file_name;
    ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path);
    ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path, param.compile_flag);
    if (ret != SUCCESS) {
      DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index);
      ret = domi::FAILED;
@@ -1229,6 +1235,8 @@ domi::Status GenerateOmModel() {
  options.insert(std::pair<string, string>(string(ge::OP_BANK_PATH_FLAG), FLAGS_op_bank_path));

  options.insert(std::pair<string, string>(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info));

  options.insert(std::pair<string, string>(string(ge::PERFORMANCE_MODE), FLAGS_performance_mode));
  // set enable scope fusion passes
  SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes);
  // print atc option map
--- a/ge/offline/single_op_parser.cc
+++ b/ge/offline/single_op_parser.cc
@@ -53,6 +53,7 @@ constexpr char const *kKeyOriginFormat = "origin_format";
 constexpr char const *kFileSuffix = ".om";
 constexpr char const *kKeyDynamicInput = "dynamic_input";
 constexpr char const *kKeyDynamicOutput = "dynamic_output";
 constexpr char const *kKeyCompileFlag = "compile_flag";
 constexpr int kDumpJsonIndent = 2;
 constexpr int kShapeRangePairSize = 2;
 constexpr int kShapeRangeLow = 0;
@@ -265,7 +266,10 @@ void from_json(const Json &j, SingleOpAttr &attr) {
 }

 void from_json(const Json &j, SingleOpDesc &desc) {
  desc.op = j.at(kKeyOp).get<string>();
  auto op = j.find(kKeyOp);
  if (op != j.end()) {
    desc.op = j.at(kKeyOp).get<string>();
  }

  auto input_desc = j.find(kKeyInputDesc);
  if (input_desc != j.end()) {
@@ -281,6 +285,11 @@ void from_json(const Json &j, SingleOpDesc &desc) {
  if (attr_field != j.end()) {
    desc.attrs = attr_field->get<vector<SingleOpAttr>>();
  }

  auto compile_flag = j.find(kKeyCompileFlag);
  if (compile_flag != j.end()) {
    desc.compile_flag = compile_flag->get<int32_t>();
  }
 }

 Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) {
@@ -583,10 +592,16 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si
      return ret;
    }

    int32_t compile_flag = 0;
    for (const Json &single_op_json : single_op_list_json) {
      SingleOpDesc single_op_desc;
      GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str());
      single_op_desc = single_op_json;
      GELOGD("Compile flag is %d.", single_op_desc.compile_flag);
      if (single_op_desc.compile_flag == 1) {
        compile_flag = single_op_desc.compile_flag;
        continue;
      }
      if (UpdateDynamicTensorName(single_op_desc.input_desc) != SUCCESS) {
        GELOGE(FAILED, "[Update][DynamicTensorName] failed for invalid input param!");
        REPORT_CALL_ERROR("E19999", "UpdateDynamicTensorName failed for invalid input param.");
@@ -604,6 +619,7 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si
      if (ret != SUCCESS) {
        return ret;
      }
      param.compile_flag = compile_flag;

      op_list.emplace_back(param);
      GELOGI("Parse the index[%d] of op success", index);
--- a/ge/offline/single_op_parser.h
+++ b/ge/offline/single_op_parser.h
@@ -55,6 +55,7 @@ struct SingleOpDesc {
  std::vector<SingleOpTensorDesc> input_desc;
  std::vector<SingleOpTensorDesc> output_desc;
  std::vector<SingleOpAttr> attrs;
  int32_t compile_flag = 0;
 };

 struct SingleOpBuildParam {
@@ -62,6 +63,7 @@ struct SingleOpBuildParam {
  std::vector<ge::GeTensor> inputs;
  std::vector<ge::GeTensor> outputs;
  std::string file_name;
  int32_t compile_flag = 0;
 };

 void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc);
--- a/ge/single_op/single_op.cc
+++ b/ge/single_op/single_op.cc
@@ -34,6 +34,9 @@ const size_t kDataMemAlignSize = 32;
 const size_t kDataMemAlignUnit = 2;
 const string kShapeTypeDynamic = "dynamic";
 const string kShapeTypeStatic = "static";
 const int64_t kHostMemType = 1;
 const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024;
 const uint32_t kAlignBytes = 512;

 size_t GetAlignedSize(size_t size) {
  size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize;
@@ -166,15 +169,67 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve
  return SUCCESS;
 }

 Status SingleOp::CalInputsHostMemSize(const std::vector<DataBuffer> &inputs,
                                      std::vector<std::pair<size_t, uint64_t>> &inputs_size) {
  int64_t total_size = 0;
  size_t index = 0;
  for (auto &input_buffer : inputs) {
    int64_t input_size = 0;
    if (input_buffer.placement == kHostMemType) {
      input_size = input_buffer.length;
      // input_size pad to 512
      input_size = (input_size / kAlignBytes + 1) * kAlignBytes;
      inputs_size.emplace_back(index, input_size);
      total_size += input_size;
      GELOGD("The %zu input mem type is host, tensor size is %ld.", index, input_size);
    }
    index++;
  }
  if (total_size > kFuzzDeviceBufferSize) {
    GELOGE(FAILED, "[Check][Size]Total size is %ld, larger than 1M.", total_size);
    return FAILED;
  }
  return SUCCESS;
 }

 Status SingleOp::UpdateInputsBufferAddr(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
                                        std::vector<DataBuffer> &update_buffers) {
  if (stream_resource_->Init() != SUCCESS) {
    GELOGE(FAILED, "[Malloc][Memory]Failed to malloc device buffer.");
    return FAILED;
  }
  void *dst_addr = stream_resource_->GetDeviceBufferAddr();
  // copy host mem from input_buffer to device mem of dst_addr
  for (const auto &input_size : inputs_size) {
    size_t index = input_size.first;
    auto size = input_size.second;
    GELOGD("SingleOp: do H2D for %zu input, dst addr is %p, size is %zu, src addr is %p, length is %lu.",
           index, dst_addr, size, update_buffers[index].data, update_buffers[index].length);
    GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, size, update_buffers[index].data, update_buffers[index].length,
                                RT_MEMCPY_HOST_TO_DEVICE_EX, stream_));
    update_buffers[index].data = dst_addr;
    dst_addr = reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(dst_addr) + size);
  }
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(const std::vector<DataBuffer> &inputs,
                                                                               const std::vector<DataBuffer> &outputs) {
  GELOGD("Start SingleOp::ExecuteAsync.");
  Status ret = ValidateArgs(inputs, outputs);
  if (ret != SUCCESS) {
    return ret;
  }

  GE_CHECK_NOTNULL(stream_resource_);
  vector<pair<size_t, uint64_t>> inputs_size;
  GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(inputs, inputs_size));
  std::lock_guard<std::mutex> lk(*stream_mutex_);
  vector<DataBuffer> update_buffers = inputs;
  if (!inputs_size.empty()) {
    GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(inputs_size, update_buffers));
  }

  auto current_mem_base = stream_resource_->GetMemoryBase();
  if (running_param_->mem_base != current_mem_base) {
    running_param_->mem_base = const_cast<uint8_t *>(current_mem_base);
@@ -185,7 +240,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c
          task->GetOpdesc()->GetName().c_str());
    }
  }
  ret = UpdateArgs(inputs, outputs);
  if (!inputs_size.empty()) {
    ret = UpdateArgs(update_buffers, outputs);
  } else {
    ret = UpdateArgs(inputs, outputs);
  }
  if (ret != SUCCESS) {
    return ret;
  }
@@ -252,11 +311,100 @@ Status DynamicSingleOp::ValidateParams(const vector<GeTensorDesc> &input_desc,
  return SUCCESS;
 }

 Status DynamicSingleOp::UpdateInputsTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
                                                const vector<GeTensorDesc> &input_desc,
                                                const std::vector<DataBuffer> &input_buffers) {
  auto op_desc = op_task_->GetOpdesc();
  GE_CHECK_NOTNULL(op_desc);
  GELOGD("Start update inputs tensor value of %s.", op_desc->GetName().c_str());
  for (const auto &input_size : inputs_size) {
    size_t index = input_size.first;
    auto tensor_desc = input_desc.at(index);
    // reconstruct GeTensor by DataBuffer
    GeTensorPtr ge_tensor = MakeShared<GeTensor>(tensor_desc);
    GELOGD("The %zu tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.",
           index, tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length);
    if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(input_buffers[index].data),
                           static_cast<size_t>(input_buffers[index].length)) != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor.");
      return INTERNAL_ERROR;
    }
    auto tensor = op_desc->MutableInputDesc(index);
    GE_CHECK_NOTNULL(tensor);
    if (!AttrUtils::SetTensor(tensor, ATTR_NAME_VALUE, ge_tensor)) {
      GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE to %s.", op_desc->GetName().c_str());
      return FAILED;
    }
  }
  return SUCCESS;
 }

 Status DynamicSingleOp::CalInputsHostMemSize(const vector<GeTensorDesc> &input_desc,
                                             std::vector<std::pair<size_t, uint64_t>> &inputs_size) {
  int64_t total_size = 0;
  size_t index = 0;
  for (const auto &tensor_desc : input_desc) {
    int64_t input_size = 0;
    int64_t mem_type = 0;
    (void)AttrUtils::GetInt(tensor_desc, ATTR_NAME_PLACEMENT, mem_type);
    bool is_const = false;
    (void)AttrUtils::GetBool(tensor_desc, CONST_ATTR_NAME_INPUT, is_const);
    if (mem_type == kHostMemType && !is_const) {
      graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(tensor_desc, input_size);
      if (graph_status != GRAPH_SUCCESS) {
        REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed when CalcOutputSize.");
        GELOGE(graph_status, "GetTensorMemorySizeInBytes failed!");
        return FAILED;
      }
      // input_size pad to 512
      input_size = (input_size / kAlignBytes + 1) * kAlignBytes;
      inputs_size.emplace_back(index, input_size);
      total_size += input_size;
      GELOGD("The %zu input mem type is host, tensor size is %ld.", index, input_size);
    }
    index++;
  }
  if (total_size > kFuzzDeviceBufferSize) {
    GELOGE(FAILED, "[Check][Size]Total size is %ld, larger than 1M.", total_size);
    return FAILED;
  }
  return SUCCESS;
 }

 Status DynamicSingleOp::UpdateInputsBufferAddr(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
                                               std::vector<DataBuffer> &update_buffers) {
  StreamResource *stream_resource  = SingleOpManager::GetInstance().GetResource(resource_id_, stream_);
  GE_CHECK_NOTNULL(stream_resource);
  void *dst_addr = stream_resource->GetDeviceBufferAddr();
  // copy host mem from input_buffer to device mem of dst_addr
  for (const auto &input_size : inputs_size) {
    size_t index = input_size.first;
    auto size = input_size.second;
    GELOGD("DynamicSingleOp: do H2D for %zu input, dst addr is %p, size is %zu, src addr is %p, length is %lu.",
           index, dst_addr, size, update_buffers[index].data, update_buffers[index].length);
    GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, size, update_buffers[index].data, update_buffers[index].length,
                                RT_MEMCPY_HOST_TO_DEVICE_EX, stream_));
    update_buffers[index].data = dst_addr;
    dst_addr = reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(dst_addr) + size);
  }
  return SUCCESS;
 }

 Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
                                     const vector<DataBuffer> &input_buffers,
                                     vector<GeTensorDesc> &output_desc,
                                     vector<DataBuffer> &output_buffers) {
  GELOGD("Start DynamicSingleOp::ExecuteAsync.");
  GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers));
  vector<pair<size_t, uint64_t>> inputs_size;
  GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(input_desc, inputs_size));
  std::lock_guard<std::mutex> lk(*stream_mutex_);
  vector<DataBuffer> update_buffers = input_buffers;
  if (!inputs_size.empty()) {
    GE_CHK_STATUS_RET_NOLOG(UpdateInputsTensorValue(inputs_size, input_desc, input_buffers));
    GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(inputs_size, update_buffers));
  }

  if (hybrid_model_executor_ != nullptr) {
    GELOGD("Execute multi-task dynamic single op by hybrid model executor");
    hybrid::HybridModelExecutor::ExecuteArgs args;
@@ -274,11 +422,12 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,

    return hybrid_model_executor_->Execute(args);
  }

  std::lock_guard<std::mutex> lk(*stream_mutex_);
  GE_CHECK_NOTNULL(op_task_);

  GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_));
  if (!inputs_size.empty()) {
    GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, update_buffers, output_desc, output_buffers, stream_));
  } else {
    GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_));
  }
  GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_));
  GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic));
  return SUCCESS;
--- a/ge/single_op/single_op.h
+++ b/ge/single_op/single_op.h
@@ -45,6 +45,10 @@ class SingleOp {
  Status ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
  Status UpdateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
  Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
  Status CalInputsHostMemSize(const std::vector<DataBuffer> &inputs,
                             std::vector<std::pair<size_t, uint64_t>> &inputs_size);
  Status UpdateInputsBufferAddr(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
                                std::vector<DataBuffer> &update_buffers);

  friend class SingleOpModel;
  StreamResource *stream_resource_ = nullptr;
@@ -76,7 +80,12 @@ class DynamicSingleOp {
                        const std::vector<DataBuffer> &inputs,
                        std::vector<GeTensorDesc> &output_desc,
                        std::vector<DataBuffer> &outputs) const;

  Status CalInputsHostMemSize(const vector<GeTensorDesc> &input_desc,
                              std::vector<std::pair<size_t, uint64_t>> &inputs_size);
  Status UpdateInputsBufferAddr(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
                                std::vector<DataBuffer> &update_buffers);
  Status UpdateInputsTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
                                 const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers);
  std::unique_ptr<OpTask> op_task_;
  std::unique_ptr<hybrid::HybridModel> hybrid_model_;
  std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_;
@@ -85,6 +94,7 @@ class DynamicSingleOp {
  rtStream_t stream_ = nullptr;
  size_t num_inputs_ = 0;
  size_t num_outputs_ = 0;
  ComputeGraphPtr compute_graph_;
 };
 }  // namespace ge
 #endif  // GE_SINGLE_OP_SINGLE_OP_H_
--- a/ge/single_op/single_op_manager.cc
+++ b/ge/single_op/single_op_manager.cc
@@ -83,6 +83,9 @@ StreamResource *SingleOpManager::GetResource(uintptr_t resource_id, rtStream_t s
  if (it == stream_resources_.end()) {
    res = new (std::nothrow) StreamResource(resource_id);
    if (res != nullptr) {
      if (res->Init() != SUCCESS) {
        return nullptr;
      }
      res->SetStream(stream);
      stream_resources_.emplace(resource_id, res);
    }
--- a/ge/single_op/single_op_model.cc
+++ b/ge/single_op/single_op_model.cc
@@ -71,10 +71,10 @@ Status NeedHybridModel(GeModelPtr &ge_model, bool &flag) {
                                                                tasks[i].kernel_with_handle().context();
      auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
      if (kernel_type == ccKernelType::TE) {
        if (infer_depend_flag) {
          flag = true;
          return SUCCESS;
        }
 //        if (infer_depend_flag) {
 //          flag = true;
 //          return SUCCESS;
 //        }
        kernel_task_num++;
        if (kernel_task_num > 1) {
          flag = true;
@@ -304,6 +304,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s
        ParseArgTable(tbe_task, single_op);
        tbe_task->SetModelArgs(model_name_, model_id_);
        if (tbe_task->tiling_buffer_ != nullptr) {
          GELOGD("tiling buffer is not nullptr.");
          tbe_task->stream_resource_ = stream_resource;
        }
        single_op.tasks_.emplace_back(tbe_task);
@@ -472,7 +473,8 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
  return BuildTaskList(&resource, single_op);
 }

 Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) {
 Status SingleOpModel::BuildModelTaskKernel(StreamResource *stream_resource, const TaskDef &task_def,
                                           DynamicSingleOp &single_op) {
  auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
  const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
                                                            task_def.kernel_with_handle().context();
@@ -483,6 +485,10 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
    TbeOpTask *tbe_task = nullptr;
    GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task));
    tbe_task->SetModelArgs(model_name_, model_id_);
    if (tbe_task->tiling_buffer_ != nullptr) {
      GELOGD("tiling buffer is not nullptr.");
      tbe_task->stream_resource_ = stream_resource;
    }
    single_op.op_task_.reset(tbe_task);
  } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
    GELOGD("Building AICPU_CC task");
@@ -504,10 +510,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
  return SUCCESS;
 }

 Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
 Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &single_op) {
  auto ge_model = model_helper_.GetGeModel();
  GE_CHECK_NOTNULL(ge_model);

  auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
  GE_CHECK_NOTNULL(compute_graph);
  single_op.compute_graph_ = compute_graph;
  auto tasks = ge_model->GetModelTaskDefPtr()->task();
  for (int i = 0; i < tasks.size(); ++i) {
    const TaskDef &task_def = tasks[i];
@@ -521,7 +530,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
            "BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks.");
        return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;
      }
      GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op));
      GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(stream_resource, task_def, single_op));
    } else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
      if (single_op.op_task_ != nullptr) {
        GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks.");
@@ -585,6 +594,6 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
    GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed.");
    return SUCCESS;
  }
  return BuildTaskListForDynamicOp(single_op);
  return BuildTaskListForDynamicOp(&resource, single_op);
 }
 }  // namespace ge
--- a/ge/single_op/single_op_model.h
+++ b/ge/single_op/single_op_model.h
@@ -65,12 +65,13 @@ class SingleOpModel {
  void ParseOutputNode(const OpDescPtr &op_desc);

  Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op);
  Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op);
  Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op);
  Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task);
  Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
                           bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id);
  Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id);
  Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op);
  Status BuildModelTaskKernel(StreamResource *stream_resource, const domi::TaskDef &task_def,
                              DynamicSingleOp &single_op);

  static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam &param);
  void ParseArgTable(OpTask *task, SingleOp &op);
--- a/ge/single_op/stream_resource.cc
+++ b/ge/single_op/stream_resource.cc
@@ -22,6 +22,11 @@
 #include "single_op/single_op_model.h"

 namespace ge {
 namespace {
 // limit available device mem size  1M
 const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024;
 }

 StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) {
 }

@@ -39,6 +44,17 @@ StreamResource::~StreamResource() {
      GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed."));
    }
  }

  if (device_buffer_ != nullptr) {
    auto rt_ret = rtFree(device_buffer_);
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed."));
  }
 }

 Status StreamResource::Init() {
  auto rt_ret = rtMalloc(&device_buffer_, kFuzzDeviceBufferSize, RT_MEMORY_HBM);
  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Malloc][Rt] failed."));
  return SUCCESS;
 }

 SingleOp *StreamResource::GetOperator(const uint64_t key) {
--- a/ge/single_op/stream_resource.h
+++ b/ge/single_op/stream_resource.h
@@ -40,6 +40,7 @@ class StreamResource {
  rtStream_t GetStream() const;
  void SetStream(rtStream_t stream);

  Status Init();
  SingleOp *GetOperator(const uint64_t key);
  DynamicSingleOp *GetDynamicOperator(const uint64_t key);

@@ -49,6 +50,7 @@ class StreamResource {
  uint8_t *MallocMemory(const std::string &purpose, size_t size, bool holding_lock = true);
  uint8_t *MallocWeight(const std::string &purpose, size_t size);
  const uint8_t *GetMemoryBase() const;
  void *GetDeviceBufferAddr() const {return device_buffer_; }

 private:
  uint8_t *DoMallocMemory(const std::string &purpose,
@@ -65,6 +67,7 @@ class StreamResource {
  rtStream_t stream_ = nullptr;
  std::mutex mu_;
  std::mutex stream_mu_;
  void *device_buffer_ = nullptr;
 };
 }  // namespace ge

--- a/ge/single_op/task/op_task.cc
+++ b/ge/single_op/task/op_task.cc
@@ -333,8 +333,8 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
                               vector<GeTensorDesc> &output_desc,
                               vector<DataBuffer> &output_buffers,
                               rtStream_t stream) {
  GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc));
  GELOGD("[%s] Start to launch kernel", node_->GetName().c_str());
  GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc));
  std::vector<void *> args;
  for (auto &buffer : input_buffers) {
    args.emplace_back(buffer.data);
@@ -354,6 +354,15 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
    args.emplace_back(tiling_buffer_);
  }

  GELOGD("Dst addr is %p, dst size is %zu, src addr is %p, src size is %zu.",
         args_.get(), arg_size_, args.data(), args.size() * sizeof(void *));
  // node with workspace: build can not get size of workspace, need to update arg_size_ when execute
  if (arg_size_ < (args.size() * sizeof(void *))) {
    size_t temp_size = args.size() * sizeof(void *);
    GELOGD("Need to reset size of args_ from %zu to %zu.", arg_size_, temp_size);
    args_.reset(new(std::nothrow) uint8_t[temp_size]());
    arg_size_ = temp_size;
  }
  if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) {
    GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str());
    REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str());
@@ -362,13 +371,14 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,

  GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str());
  if (handle_ == nullptr) {
    GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream));
    GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast<uint32_t>(arg_size_),
                                 nullptr, stream));
    GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str());
  } else {
    std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_);
    std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_);
    GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr,
                                           stream, kernel_info.c_str()));
    GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(),
                                           static_cast<uint32_t>(arg_size_), nullptr, stream, kernel_info.c_str()));
    GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str());
  }

--- a/inc/framework/generator/ge_generator.h
+++ b/inc/framework/generator/ge_generator.h
@@ -65,10 +65,12 @@ class GE_FUNC_VISIBILITY GeGenerator {
  /// @param [in] inputs: input tensors.
  /// @param [in] outputs: output tensors.
  /// @param [in] model_file_name: name of model file.
  /// @param [in] compile_flag: op build flag, accurate build is 0, fuzz build is 1
  /// @return SUCCESS or FAILED
  ///
  Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
                            const std::vector<GeTensor> &outputs, const std::string &model_file_name);
                            const std::vector<GeTensor> &outputs, const std::string &model_file_name,
                            int32_t compile_flag = 0);
  ///
  /// @ingroup ge
  /// @brief: Build single Op into model buff.
@@ -79,8 +81,6 @@ class GE_FUNC_VISIBILITY GeGenerator {
  /// @param [in] compile_flag: op build flag, accurate build is 0, fuzz build is 1
  /// @param [out] model_buff: model buff of op.
  /// @return SUCCESS or FAILED
  Status BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
                            OpEngineType engine_type, ModelBufferData &model_buff);
  Status BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
                            OpEngineType engine_type, int32_t compile_flag, ModelBufferData &model_buff);
  ///
@@ -100,7 +100,7 @@ class GE_FUNC_VISIBILITY GeGenerator {
                       ge::ModelBufferData &model, bool is_offline = true);
  Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
                       const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
                       bool is_offline = true);
                       bool is_offline = true, int32_t compile_flag = 0);
  bool CheckNoAicore(const ComputeGraphPtr &graph);
  void RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs);
  Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs);
--- a/inc/framework/omg/omg_inner_types.h
+++ b/inc/framework/omg/omg_inner_types.h
@@ -123,6 +123,7 @@ struct OmgContext {
  bool need_multi_batch = false;
  std::vector<NodePtr> data_nodes;
  std::vector<NodePtr> getnext_nosink_nodes;
  bool fuzz_compile_flag = false;
 };
 }  // namespace ge

--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -276,6 +276,7 @@ set(COMMON_SRC_FILES
    "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc"
    "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc"
    "${GE_CODE_DIR}/ge/graph/passes/parallel_group_pass.cc"
 	"${GE_CODE_DIR}/ge/graph/passes/mark_node_unknown_shape_pass.cc"
    "${GE_CODE_DIR}/ge/model/ge_model.cc"
    "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc"
    "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc"
@@ -703,6 +704,7 @@ set(PASS_TEST_FILES
    "graph/passes/link_gen_mask_nodes_pass_unittest.cc"
    "graph/passes/transpose_transdata_pass_unittest.cc"
    "graph/passes/parallel_group_pass_unittest.cc"
 	"graph/passes/mark_node_unknown_shape_pass_unittest.cc"
 )

 set(KERNEL_TEST_FILES
@@ -791,6 +793,7 @@ set(SINGLE_OP_TEST_FILES
    "single_op/single_op_manager_unittest.cc"
    "single_op/stream_resource_unittest.cc"
    "single_op/single_op_task_unittest.cc"
 	"single_op/single_op_unittest.cc"
 )

 set(PROFILING_MNG_TEST_FILES
--- a/tests/ut/ge/generator/ge_generator_unittest.cc
+++ b/tests/ut/ge/generator/ge_generator_unittest.cc
@@ -85,7 +85,7 @@ TEST_F(UtestGeGenerator, test_build_single_op_online) {
  GeGenerator generator;
  generator.Initialize({});
  ModelBufferData model_buffer;
  EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, model_buffer), FAILED);
  EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, false, model_buffer), FAILED);
 }

 TEST_F(UtestGeGenerator, test_singleop_fuzz_build) {
@@ -103,7 +103,7 @@ TEST_F(UtestGeGenerator, test_singleop_fuzz_build) {
  generator.Initialize({});
  ModelBufferData model_buffer;
  bool compile_flag = true;
  EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, compile_flag, model_buffer), SUCCESS);
  EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, compile_flag, model_buffer), FAILED);
 }

 TEST_F(UtestGeGenerator, test_check_aicore) {
--- a/tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc
+++ b/tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc
@@ -0,0 +1,107 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <gtest/gtest.h>
 #include <cstdint>
 #include <memory>
 #include <string>

 #define private public
 #include "graph/passes/mark_node_unknown_shape_pass.h"

 #include "common/ge_inner_error_codes.h"
 #include "inc/pass_manager.h"
 #undef private

 namespace ge {
 class UtestMarkNodeUnknownShapePass : public testing::Test {
 protected:
  void SetUp() {}
  void TearDown() {}
 public:
  NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) {
    GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
    auto op_desc = std::make_shared<OpDesc>(name, type);
    for (auto i = 0; i < in_num; ++i) {
      op_desc->AddInputDesc(test_desc);
    }
    for (auto i = 0; i < out_num; ++i) {
      op_desc->AddOutputDesc(test_desc);
    }
    return graph->AddNode(op_desc);
  }
  void make_graph(const ComputeGraphPtr &graph) {
    auto conv2d_node = MakeNode(graph, 2, 1, "conv1", "Conv2D");
    {
      auto data1 = MakeNode(graph, 1, 1, "data", "Data");
      GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT);
      data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc);
      data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc);
      GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0));
      GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1));
    }

    conv2d_node->GetOpDesc()->SetOpKernelLibName("AIcoreEngine");
    AttrUtils::SetBool(conv2d_node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS, true);
    auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput");
    GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
  }
 };

 TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_GE_kernel) {
  OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL);
  ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default");
  op_desc->SetOpKernelLibName("GE");
  graph->AddNode(op_desc);
  PassManager pass;
  pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass);
  EXPECT_EQ(pass.Run(graph), SUCCESS);
 }

 TEST_F(UtestMarkNodeUnknownShapePass, test_run_without_fuzz_attrs) {
  OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL);
  ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default");
  op_desc->SetOpKernelLibName("AIcoreEngine");
  graph->AddNode(op_desc);
  PassManager pass;
  pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass);
  EXPECT_EQ(pass.Run(graph), SUCCESS);
 }

 TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_fuzz_attrs) {
  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
  make_graph(graph);
  PassManager pass;
  pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass);
  EXPECT_EQ(pass.Run(graph), SUCCESS);
  EXPECT_EQ(graph->GetAllNodes().size(), 3);
  for (const auto &node : graph->GetAllNodes()) {
    if (node->GetName() == "conv1") {
      auto op_desc = node->GetOpDesc();
      EXPECT_NE(op_desc, nullptr);
      for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
        auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
        EXPECT_TRUE(input_desc->GetShape().GetDim(0) == -2);
      }
      for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) {
        EXPECT_NE(output_desc, nullptr);
        EXPECT_TRUE(output_desc->GetShape().GetDim(0) == -2);
      }
    }
  }
 }

 }  // namespace ge
--- a/tests/ut/ge/single_op/single_op_unittest.cc
+++ b/tests/ut/ge/single_op/single_op_unittest.cc
@@ -0,0 +1,108 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <gtest/gtest.h>
 #include <vector>

 #include "runtime/rt.h"

 #define protected public
 #define private public
 #include "single_op/single_op.h"
 #include "single_op/single_op_manager.h"
 #undef private
 #undef protected

 using namespace std;
 using namespace ge;

 class UtestSingleOp : public testing::Test {
 protected:
  void SetUp() {}
  void TearDown() {}
 };

 TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async) {
  uintptr_t resource_id = 0;
  std::mutex stream_mu;
  rtStream_t stream = nullptr;
  rtStreamCreate(&stream, 0);
  DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream);

  vector<int64_t> dims_vec_0 = {2};
  vector<GeTensorDesc> input_desc;
  GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32);
  // input data from device
  AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 0);
  input_desc.emplace_back(tensor_desc_0);

  vector<DataBuffer> input_buffers;
  ge::DataBuffer data_buffer;
  data_buffer.data = new char[4];
  data_buffer.length = 4;
  input_buffers.emplace_back(data_buffer);

  vector<GeTensorDesc> output_desc;
  vector<DataBuffer> output_buffers;

  // UpdateRunInfo failed
  EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), ACL_ERROR_GE_PARAM_INVALID);
 }

 TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async1) {
  uintptr_t resource_id = 0;
  std::mutex stream_mu;
  rtStream_t stream = nullptr;
  rtStreamCreate(&stream, 0);
  DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream);
  dynamic_single_op.num_inputs_ = 1;

  vector<int64_t> dims_vec_0 = {2};
  vector<GeTensorDesc> input_desc;
  GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32);
  // input data from host
  AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 1);
  input_desc.emplace_back(tensor_desc_0);

  int64_t input_size = 0;
  EXPECT_EQ(TensorUtils::GetTensorMemorySizeInBytes(tensor_desc_0, input_size), SUCCESS);
  EXPECT_EQ(input_size, 64);
  EXPECT_NE(SingleOpManager::GetInstance().GetResource(resource_id, stream), nullptr);

  vector<DataBuffer> input_buffers;
  ge::DataBuffer data_buffer;
  data_buffer.data = new char[4];
  data_buffer.length = 4;
  input_buffers.emplace_back(data_buffer);

  vector<GeTensorDesc> output_desc;
  vector<DataBuffer> output_buffers;

  auto *tbe_task = new (std::nothrow) TbeOpTask();
  ge::OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL);
  ge::ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default");
  ge::NodePtr node = graph->AddNode(op_desc);
  tbe_task->node_ = node;

  dynamic_single_op.op_task_.reset((OpTask *)(tbe_task));

  OpDescPtr desc_ptr = MakeShared<OpDesc>("name1", "type1");
  EXPECT_EQ(desc_ptr->AddInputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS);
  dynamic_single_op.op_task_->op_desc_ = desc_ptr;
  // UpdateRunInfo failed
  EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), PARAM_INVALID);
 }