From bed5633f8b08c9d5f11ef7520b302908f3e1583d Mon Sep 17 00:00:00 2001
From: chuxing <chuxing@huawei.com>
Date: Sat, 12 Dec 2020 16:28:13 +0800
Subject: [PATCH 01/23] Calc op running param by GE

---
 ge/hybrid/executor/node_state.cc              |  54 ++++++----
 ge/hybrid/executor/node_state.h               |   3 +-
 ge/hybrid/executor/subgraph_executor.cc       |   9 +-
 ge/hybrid/executor/worker/execution_engine.cc |   9 +-
 .../executor/worker/shape_inference_engine.cc | 100 ++++++++++++++----
 .../executor/worker/shape_inference_engine.h  |   2 +
 ge/hybrid/model/node_item.cc                  |   9 +-
 ge/hybrid/node_executor/task_context.cc       |  22 ++++
 ge/hybrid/node_executor/task_context.h        |   2 +
 9 files changed, 156 insertions(+), 54 deletions(-)
diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc
index 033c5304..66eeeba8 100644
--- a/ge/hybrid/executor/node_state.cc
+++ b/ge/hybrid/executor/node_state.cc
@@ -18,6 +18,7 @@
 #include <chrono>
 #include "framework/common/debug/log.h"
 #include "graph/compute_graph.h"
+#include "graph/utils/tensor_utils.h"
 #include "hybrid_execution_context.h"
 #include "subgraph_context.h"
 
@@ -35,29 +36,31 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item(
          this->num_pending_shapes_);
 }
 
-Status ShapeInferenceState::UpdateInputShape(int idx,
-                                             const GeShape &ori_shape,
-                                             const GeShape &shape) {
+Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) {
   if (node_item.IsInputShapeStatic(idx)) {
     GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]",
            node_item.NodeName().c_str(),
            idx,
            node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(),
-           shape.ToString().c_str());
+           target.GetShape().ToString().c_str());
     return SUCCESS;
   }
 
-  GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]",
+  int64_t tensor_size = -1;
+  (void) TensorUtils::GetSize(target, tensor_size);
+  GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld",
          node_item.NodeName().c_str(),
          idx,
-         shape.ToString().c_str(),
-         ori_shape.ToString().c_str());
+         target.GetShape().ToString().c_str(),
+         target.GetOriginShape().ToString().c_str(),
+         tensor_size);
 
   std::lock_guard<std::mutex> lk(mu_);
   auto tensor_desc = node_item.MutableInputDesc(idx);
   GE_CHECK_NOTNULL(tensor_desc);
-  tensor_desc->SetShape(shape);
-  tensor_desc->SetOriginShape(ori_shape);
+  tensor_desc->SetShape(target.GetShape());
+  tensor_desc->SetOriginShape(target.GetOriginShape());
+  (void) TensorUtils::SetSize(*tensor_desc, tensor_size);
   if (--num_pending_shapes_ == 0) {
     ready_cv_.notify_all();
   }
@@ -110,24 +113,24 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
   for (auto &p : shape_futures) {
     auto idx = p.first;
     auto &future = p.second;
-    GeShape shape;
-    GeShape ori_shape;
     RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx);
-    GE_CHK_STATUS_RET(future.Get(ori_shape, shape),
-                      "[%s] Get shape failed. index = %u",
-                      node_item.NodeName().c_str(),
-                      idx);
+    auto src_tensor_desc = future.GetTensorDesc();
+    GE_CHECK_NOTNULL(src_tensor_desc);
     RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx);
 
+    auto input_desc = node_item.MutableInputDesc(idx);
+    GE_CHECK_NOTNULL(input_desc);
+    int64_t tensor_size = -1;
+    (void) TensorUtils::GetSize(*src_tensor_desc, tensor_size);
     GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]",
            node_item.NodeName().c_str(),
            idx,
-           shape.ToString().c_str(),
-           ori_shape.ToString().c_str());
-    auto input_desc = node_item.MutableInputDesc(idx);
-    GE_CHECK_NOTNULL(input_desc);
-    input_desc->SetShape(std::move(shape));
-    input_desc->SetOriginShape(ori_shape);
+           src_tensor_desc->GetShape().ToString().c_str(),
+           src_tensor_desc->GetOriginShape().ToString().c_str(),
+           tensor_size);
+    input_desc->SetShape(src_tensor_desc->GetShape());
+    input_desc->SetOriginShape(src_tensor_desc->GetOriginShape());
+    (void) TensorUtils::SetSize(*input_desc, tensor_size);
   }
 
   return SUCCESS;
@@ -190,5 +193,14 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) {
   GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str());
   return SUCCESS;
 }
+
+GeTensorDescPtr ShapeFuture::GetTensorDesc() {
+  GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str());
+  if (!subgraph_context_->Await(src_node_)) {
+    GELOGE(INTERNAL_ERROR, "cancelled");
+    return nullptr;
+  }
+  return src_node_->GetOpDesc()->MutableOutputDesc(src_index_);
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h
index 04f1ee4b..312e177f 100644
--- a/ge/hybrid/executor/node_state.h
+++ b/ge/hybrid/executor/node_state.h
@@ -35,6 +35,7 @@ class ShapeFuture {
   ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context);
   ~ShapeFuture() = default;
   Status Get(GeShape &ori_shape, GeShape &shape);
+  GeTensorDescPtr GetTensorDesc();
 
  private:
   NodePtr src_node_;
@@ -45,7 +46,7 @@ class ShapeFuture {
 struct ShapeInferenceState {
   explicit ShapeInferenceState(const NodeItem &node_item);
 
-  Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape);
+  Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc);
 
   void UpdateInputShapeFuture(int idx, ShapeFuture &&future);
 
diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc
index 5a464f8e..4b6dddab 100644
--- a/ge/hybrid/executor/subgraph_executor.cc
+++ b/ge/hybrid/executor/subgraph_executor.cc
@@ -96,7 +96,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
       GE_CHECK_NOTNULL(tensor_desc);
       auto node_state = subgraph_context_->GetOrCreateNodeState(input_node);
       GE_CHECK_NOTNULL(node_state);
-      node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape());
+      node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc);
     }
   }
 
@@ -268,13 +268,6 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
   } else {
     node_state.SetKernelTask(node_item.kernel_task);
   }
-
-  GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
-  RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
-  GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node),
-                    "[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
-  RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");
-  GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str());
   return SUCCESS;
 }
 
diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc
index e6729352..0d9c7a69 100755
--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -20,12 +20,9 @@
 #include "graph/utils/tensor_adapter.h"
 #include "graph/debug/ge_attr_define.h"
 #include "hybrid/node_executor/node_executor.h"
-#include "common/dump/dump_manager.h"
+#include "hybrid/executor//worker//shape_inference_engine.h"
 #include "common/dump/dump_op.h"
-#include "common/types.h"
-#include "common/ge_types.h"
 #include "common/profiling/profiling_manager.h"
-#include "runtime/base.h"
 
 namespace ge {
 namespace hybrid {
@@ -349,6 +346,10 @@ Status NodeDoneCallback::OnNodeDone() {
   }
 
   GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item));
+  if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) {
+    // update output tensor sizes
+    GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item));
+  }
   // PropagateOutputs for type == DEPEND_COMPUTE
   if (node_item.shape_inference_type == DEPEND_COMPUTE) {
     if (graph_context_->trace_enabled) {
diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc
index 1d813526..02b3a50b 100755
--- a/ge/hybrid/executor/worker/shape_inference_engine.cc
+++ b/ge/hybrid/executor/worker/shape_inference_engine.cc
@@ -17,9 +17,15 @@
 #include "hybrid/executor/worker/shape_inference_engine.h"
 #include "graph/shape_refiner.h"
 #include "graph/utils/node_utils.h"
+#include "graph/utils/tensor_utils.h"
+#include "graph/utils/type_utils.h"
+#include "common/math/math_util.h"
 #include "hybrid/node_executor/node_executor.h"
 
 namespace ge {
+namespace {
+const int kAlignment = 32;
+}
 namespace hybrid {
 ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context)
     : execution_context_(execution_context),
@@ -40,7 +46,9 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
   }
 
   if (node_item.fused_subgraph != nullptr) {
-    return InferShapeForSubgraph(node_item, *node_item.fused_subgraph);
+    GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph));
+    GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item));
+    return SUCCESS;
   }
 
   // Skip shape inference for node of type DEPEND_COMPUTE
@@ -66,18 +74,12 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
         "Invoke InferShapeAndType failed.");
     RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End");
   }
-  // Check again to make sure shape is valid after shape inference
-  if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) {
-    bool is_unknown_shape = false;
-    GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape),
-                      "Failed to get shape status. node = %s",
-                      node_item.NodeName().c_str());
-
-    GE_CHK_BOOL_RET_STATUS(!is_unknown_shape,
-                           INTERNAL_ERROR,
-                           "[%s] Shape is still unknown after shape inference.",
-                           node_item.NodeName().c_str());
-  }
+
+  // update output tensor sizes after shape inference
+  // error if shape is still unknown and not of type DEPEND_SHAPE_RANGE
+  RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
+  GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item, node_item.shape_inference_type == DEPEND_SHAPE_RANGE));
+  RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");
 
   GELOGD("[%s] [HybridTrace] After shape inference. Node = %s",
          node_item.NodeName().c_str(),
@@ -127,8 +129,6 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
   // propagate each output
   for (int i = 0; i < node_item.num_outputs; ++i) {
     auto output_desc = node_item.op_desc->MutableOutputDesc(i);
-    const auto &shape = output_desc->MutableShape();
-    const auto &ori_shape = output_desc->GetOriginShape();
     auto &output_nodes = node_item.outputs[i];
 
     // propagate output to all sub-inputs
@@ -149,9 +149,7 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
         infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first,
                                            std::move(future));
       } else {
-        GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first,
-                                                             ori_shape,
-                                                             shape));
+        GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, *output_desc));
       }
     }
   }
@@ -230,5 +228,71 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) {
   }
   return SUCCESS;
 }
+
+Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) {
+  auto op_desc = node_item.GetOpDesc();
+  for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) {
+    auto tensor_desc = op_desc->MutableOutputDesc(output_index);
+    GE_CHECK_NOTNULL(tensor_desc);
+    const auto &shape = tensor_desc->MutableShape();
+    auto dims = shape.GetDims();
+    auto dim_num = dims.size();
+    if (shape.IsUnknownShape()) {
+      if (!fallback_with_range) {
+        GELOGE(INTERNAL_ERROR, "[%s] Shape of output[%zu] is still unknown after shape inference. shape = [%s]",
+               node_item.NodeName().c_str(),
+               output_index,
+               shape.ToString().c_str());
+        return INTERNAL_ERROR;
+      }
+
+      GELOGD("[%s] Calc output[%zu] size by range", node_item.NodeName().c_str(), output_index);
+      std::vector<std::pair<int64_t, int64_t>> shape_range;
+      GE_CHK_GRAPH_STATUS_RET(tensor_desc->GetShapeRange(shape_range),
+                              "[$s] Failed to get shape range for output: %zu",
+                              node_item.NodeName().c_str(),
+                              output_index);
+      if (shape_range.size() != dim_num) {
+        GELOGE(INTERNAL_ERROR, "[%s] Number of shape ranges (%zu) mismatches that of dims (%zu), index = %zu",
+               node_item.NodeName().c_str(),
+               shape_range.size(),
+               dim_num,
+               output_index);
+        return INTERNAL_ERROR;
+      }
+
+      for (size_t dim_index = 0; dim_index < dim_num; ++dim_index) {
+        if (dims[dim_index] == ge::UNKNOWN_DIM) {
+          dims[dim_index] = shape_range[dim_index].second;
+        }
+      }
+    }
+
+    uint32_t type_size = 0;
+    if (!TypeUtils::GetDataTypeLength(tensor_desc->GetDataType(), type_size)) {
+      GELOGE(INTERNAL_ERROR, "Failed to get data type size");
+      return INTERNAL_ERROR;
+    }
+    int64_t tensor_size = type_size;
+    for (const auto &dim : dims) {
+      GE_CHECK_GE(dim, 0);
+      GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim),
+                        "[%s] Shape size overflow, shape = [%s]",
+                        node_item.NodeName().c_str(),
+                        shape.ToString().c_str());
+      tensor_size *= dim;
+    }
+
+    GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1),
+                      "[%s] Output[%zu] Tensor size too large, shape = [%s]",
+                      node_item.NodeName().c_str(),
+                      output_index,
+                      shape.ToString().c_str());
+    tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment;
+    (void) TensorUtils::SetSize(*tensor_desc, tensor_size);
+  }
+
+  return SUCCESS;
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/executor/worker/shape_inference_engine.h b/ge/hybrid/executor/worker/shape_inference_engine.h
index 7bb9269c..9401ead2 100644
--- a/ge/hybrid/executor/worker/shape_inference_engine.h
+++ b/ge/hybrid/executor/worker/shape_inference_engine.h
@@ -34,6 +34,8 @@ class ShapeInferenceEngine {
 
   Status PropagateOutputShapes(const NodeItem &node_item);
 
+  static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false);
+
  private:
   static Status UpdatePeerNodeShape(const Node &node);
   Status AwaitDependentNodes(NodeState &node_state);
diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc
index 69cf334d..1fd8fe31 100644
--- a/ge/hybrid/model/node_item.cc
+++ b/ge/hybrid/model/node_item.cc
@@ -22,6 +22,7 @@
 #include "graph/debug/ge_attr_define.h"
 #include "graph/utils/node_utils.h"
 #include "hybrid/node_executor/node_executor.h"
+#include "hybrid/executor/worker/shape_inference_engine.h"
 
 namespace ge {
 namespace hybrid {
@@ -47,7 +48,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr
     GE_CHECK_NOTNULL(dst_op_desc);
     auto in_idx = node_and_anchor.second->GetIdx();
     auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx);
-    fused_subgraph.input_mapping[parent_index].emplace_back(tensor_desc);
+    fused_subgraph.input_mapping[static_cast<int>(parent_index)].emplace_back(tensor_desc);
     GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx);
   }
 
@@ -64,7 +65,7 @@ Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgrap
     return FAILED;
   }
 
-  fused_subgraph.output_mapping.emplace(parent_index, op_desc);
+  fused_subgraph.output_mapping.emplace(static_cast<int>(parent_index), op_desc);
   return SUCCESS;
 }
 
@@ -175,6 +176,10 @@ Status NodeItem::Init() {
       }
     }
 
+    if (is_output_shape_static) {
+      GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this));
+    }
+
     if (IsControlOp() || node_type == PARTITIONEDCALL) {
       shape_inference_type = DEPEND_COMPUTE;
     } else {
diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc
index 77004f99..d6291c60 100644
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -148,6 +148,10 @@ Status TaskContext::AllocateWorkspaces() {
 }
 
 Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const {
+  if (callback_fun == nullptr) {
+    GELOGW("[%s] Callback is NULL", GetNodeName());
+    return SUCCESS;
+  }
   auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun);
   if (ret != SUCCESS) {
     GELOGE(ret, "[%s] Failed to register callback", GetNodeName());
@@ -384,6 +388,20 @@ const char *TaskContext::GetNodeName() const {
   return node_item_->NodeName().c_str();
 }
 
+void TaskContext::ReleaseInputsAndOutputs() {
+  for (int i = 0; i < node_item_->num_inputs; ++i) {
+    auto tensor = inputs_start_ + i;
+    tensor->Destroy();
+    GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), index);
+  }
+
+  for (int i = 0; i < node_item_->num_outputs; ++i) {
+    auto tensor = outputs_start_ + i;
+    tensor->Destroy();
+    GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), index);
+  }
+}
+
 void TaskContext::ReleaseInput(int index) {
   auto input_tensor = MutableInput(index);
   if (input_tensor != nullptr) {
@@ -456,5 +474,9 @@ Status TaskContext::TryExecuteCallback(const function<void()> &callback_fun) con
 const DumpProperties &TaskContext::GetDumpProperties() const {
   return execution_context_->dump_properties;
 }
+
+bool TaskContext::NeedCallback() {
+  return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0;
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h
index 0549a1dc..34754a14 100644
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -50,6 +50,8 @@ class TaskContext {
   ConstGeTensorDescPtr GetOutputDesc(int index) const;
   GeTensorDescPtr MutableInputDesc(int index) const;
   GeTensorDescPtr MutableOutputDesc(int index) const;
+  void ReleaseInputsAndOutputs();
+  bool NeedCallback();
   void ReleaseInput(int index);
   const TensorValue *GetInput(int index) const;
   const TensorValue *GetOutput(int index) const;

From f28de88aaf367068ca2998466bec5d9999f67cfe Mon Sep 17 00:00:00 2001
From: chuxing <chuxing@huawei.com>
Date: Sat, 12 Dec 2020 16:51:15 +0800
Subject: [PATCH 02/23] fix compile error

---
 ge/hybrid/node_executor/task_context.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc
index d6291c60..f16bfb2f 100644
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -392,13 +392,13 @@ void TaskContext::ReleaseInputsAndOutputs() {
   for (int i = 0; i < node_item_->num_inputs; ++i) {
     auto tensor = inputs_start_ + i;
     tensor->Destroy();
-    GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), index);
+    GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i);
   }
 
   for (int i = 0; i < node_item_->num_outputs; ++i) {
     auto tensor = outputs_start_ + i;
     tensor->Destroy();
-    GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), index);
+    GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i);
   }
 }
 

From d942b4a57860e9fc8e4d3358b0c0a8f0e6b626b1 Mon Sep 17 00:00:00 2001
From: chuxing <chuxing@huawei.com>
Date: Sat, 12 Dec 2020 18:00:28 +0800
Subject: [PATCH 03/23] fix static checks

---
 ge/hybrid/executor/node_state.cc              |   2 +-
 .../executor/worker/shape_inference_engine.cc | 125 ++++++++++--------
 .../executor/worker/shape_inference_engine.h  |   2 +
 ge/hybrid/model/node_item.cc                  |  88 +++++++-----
 ge/hybrid/model/node_item.h                   |   5 +
 5 files changed, 134 insertions(+), 88 deletions(-)

diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc
index 66eeeba8..ceed40b0 100644
--- a/ge/hybrid/executor/node_state.cc
+++ b/ge/hybrid/executor/node_state.cc
@@ -122,7 +122,7 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
     GE_CHECK_NOTNULL(input_desc);
     int64_t tensor_size = -1;
     (void) TensorUtils::GetSize(*src_tensor_desc, tensor_size);
-    GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]",
+    GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu",
            node_item.NodeName().c_str(),
            idx,
            src_tensor_desc->GetShape().ToString().c_str(),
diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc
index 02b3a50b..66d0ede2 100755
--- a/ge/hybrid/executor/worker/shape_inference_engine.cc
+++ b/ge/hybrid/executor/worker/shape_inference_engine.cc
@@ -71,7 +71,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
     std::lock_guard<std::mutex> lk(mu_);
     RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start");
     GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true),
-        "Invoke InferShapeAndType failed.");
+                      "Invoke InferShapeAndType failed.");
     RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End");
   }
 
@@ -229,66 +229,87 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) {
   return SUCCESS;
 }
 
+Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc,
+                                               std::vector<int64_t> &shape,
+                                               bool fallback_with_range) {
+  const auto &tensor_shape = tensor_desc.MutableShape();
+  if (tensor_shape.IsUnknownShape()) {
+    if (!fallback_with_range) {
+      GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]",
+             tensor_shape.ToString().c_str());
+      return INTERNAL_ERROR;
+    }
+
+    GELOGD("Calc output size by range");
+    std::vector<std::pair<int64_t, int64_t>> shape_range;
+    GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range");
+    if (shape_range.size() != shape.size()) {
+      GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)",
+             shape_range.size(),
+             shape.size());
+      return INTERNAL_ERROR;
+    }
+
+    for (size_t dim_index = 0; dim_index < shape.size(); ++dim_index) {
+      if (shape[dim_index] == ge::UNKNOWN_DIM) {
+        shape[dim_index] = shape_range[dim_index].second;
+      }
+    }
+
+    GELOGD("After canonicalization, shape = [%s], before = [%s]",
+           GeShape(shape).ToString().c_str(),
+           tensor_shape.ToString().c_str());
+  }
+
+  return SUCCESS;
+}
+
+Status ShapeInferenceEngine::CalcTensorSize(DataType data_type,
+                                            const std::vector<int64_t> &shape,
+                                            int64_t &tensor_size) {
+  GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str());
+  uint32_t type_size;
+  if (!TypeUtils::GetDataTypeLength(data_type, type_size)) {
+    GELOGE(INTERNAL_ERROR, "Failed to get data type size");
+    return INTERNAL_ERROR;
+  }
+
+  tensor_size = type_size;
+  for (const auto &dim : shape) {
+    GE_CHECK_GE(dim, 0);
+    GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim),
+                      "Shape size overflow, shape = [%s]",
+                      GeShape(shape).ToString().c_str());
+    tensor_size *= dim;
+  }
+
+  GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1),
+                    "Tensor size is too large: %ld, shape = [%s]",
+                    tensor_size,
+                    GeShape(shape).ToString().c_str());
+  tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment;
+  return SUCCESS;
+}
+
 Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) {
   auto op_desc = node_item.GetOpDesc();
   for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) {
     auto tensor_desc = op_desc->MutableOutputDesc(output_index);
     GE_CHECK_NOTNULL(tensor_desc);
     const auto &shape = tensor_desc->MutableShape();
+    // modify on copy
     auto dims = shape.GetDims();
-    auto dim_num = dims.size();
-    if (shape.IsUnknownShape()) {
-      if (!fallback_with_range) {
-        GELOGE(INTERNAL_ERROR, "[%s] Shape of output[%zu] is still unknown after shape inference. shape = [%s]",
-               node_item.NodeName().c_str(),
-               output_index,
-               shape.ToString().c_str());
-        return INTERNAL_ERROR;
-      }
-
-      GELOGD("[%s] Calc output[%zu] size by range", node_item.NodeName().c_str(), output_index);
-      std::vector<std::pair<int64_t, int64_t>> shape_range;
-      GE_CHK_GRAPH_STATUS_RET(tensor_desc->GetShapeRange(shape_range),
-                              "[$s] Failed to get shape range for output: %zu",
-                              node_item.NodeName().c_str(),
-                              output_index);
-      if (shape_range.size() != dim_num) {
-        GELOGE(INTERNAL_ERROR, "[%s] Number of shape ranges (%zu) mismatches that of dims (%zu), index = %zu",
-               node_item.NodeName().c_str(),
-               shape_range.size(),
-               dim_num,
-               output_index);
-        return INTERNAL_ERROR;
-      }
-
-      for (size_t dim_index = 0; dim_index < dim_num; ++dim_index) {
-        if (dims[dim_index] == ge::UNKNOWN_DIM) {
-          dims[dim_index] = shape_range[dim_index].second;
-        }
-      }
-    }
-
-    uint32_t type_size = 0;
-    if (!TypeUtils::GetDataTypeLength(tensor_desc->GetDataType(), type_size)) {
-      GELOGE(INTERNAL_ERROR, "Failed to get data type size");
-      return INTERNAL_ERROR;
-    }
-    int64_t tensor_size = type_size;
-    for (const auto &dim : dims) {
-      GE_CHECK_GE(dim, 0);
-      GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim),
-                        "[%s] Shape size overflow, shape = [%s]",
-                        node_item.NodeName().c_str(),
-                        shape.ToString().c_str());
-      tensor_size *= dim;
-    }
+    GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range),
+                      "[%s] Failed to canonicalize shape for output %zu",
+                      node_item.NodeName().c_str(),
+                      output_index);
 
-    GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1),
-                      "[%s] Output[%zu] Tensor size too large, shape = [%s]",
+    int64_t tensor_size;
+    GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size),
+                      "[%s] Failed to calc tensor size for output %zu",
                       node_item.NodeName().c_str(),
-                      output_index,
-                      shape.ToString().c_str());
-    tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment;
+                      output_index);
+    GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size);
     (void) TensorUtils::SetSize(*tensor_desc, tensor_size);
   }
 
diff --git a/ge/hybrid/executor/worker/shape_inference_engine.h b/ge/hybrid/executor/worker/shape_inference_engine.h
index 9401ead2..b946577f 100644
--- a/ge/hybrid/executor/worker/shape_inference_engine.h
+++ b/ge/hybrid/executor/worker/shape_inference_engine.h
@@ -37,6 +37,8 @@ class ShapeInferenceEngine {
   static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false);
 
  private:
+  static Status CanonicalizeShape(GeTensorDesc &tensor_desc, std::vector<int64_t> &shape, bool fallback_with_range);
+  static Status CalcTensorSize(DataType data_type, const std::vector<int64_t> &shape, int64_t &tensor_size);
   static Status UpdatePeerNodeShape(const Node &node);
   Status AwaitDependentNodes(NodeState &node_state);
 
diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc
index 1fd8fe31..eb00f509 100644
--- a/ge/hybrid/model/node_item.cc
+++ b/ge/hybrid/model/node_item.cc
@@ -127,12 +127,7 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr<NodeItem> &node_ite
   return SUCCESS;
 }
 
-Status NodeItem::Init() {
-  GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
-  GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
-  num_inputs = static_cast<int>(op_desc->GetInputsSize());
-  num_outputs = static_cast<int>(op_desc->GetOutputsSize());
-
+void NodeItem::ResolveOptionalInputs() {
   if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) {
     has_optional_inputs = true;
     for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
@@ -144,7 +139,18 @@ Status NodeItem::Init() {
       }
     }
   }
+}
 
+Status NodeItem::InitInputsAndOutputs() {
+  GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
+  GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
+  num_inputs = static_cast<int>(op_desc->GetInputsSize());
+  num_outputs = static_cast<int>(op_desc->GetOutputsSize());
+  ResolveOptionalInputs();
+  return SUCCESS;
+}
+
+Status NodeItem::ResolveDynamicState() {
   (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic);
   GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic);
   if (!is_dynamic) {
@@ -152,42 +158,54 @@ Status NodeItem::Init() {
                       "[%s] Failed to get shape status.",
                       node->GetName().c_str());
   }
+  return SUCCESS;
+}
 
-  if (is_dynamic) {
-    for (int i = 0; i < num_inputs; ++i) {
-      const auto &input_desc = MutableInputDesc(i);
-      GE_CHECK_NOTNULL(input_desc);
-      if (input_desc->MutableShape().IsUnknownShape()) {
-        is_input_shape_static_.push_back(false);
-      } else {
-        num_static_input_shapes++;
-        is_input_shape_static_.push_back(true);
-        GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
-               NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
-      }
+Status NodeItem::ResolveStaticInputsAndOutputs() {
+  for (int i = 0; i < num_inputs; ++i) {
+    const auto &input_desc = MutableInputDesc(i);
+    GE_CHECK_NOTNULL(input_desc);
+    if (input_desc->MutableShape().IsUnknownShape()) {
+      is_input_shape_static_.push_back(false);
+    } else {
+      num_static_input_shapes++;
+      is_input_shape_static_.push_back(true);
+      GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
+             NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
     }
+  }
 
-    for (int i = 0; i < num_outputs; ++i) {
-      const auto &output_desc = op_desc->MutableOutputDesc(i);
-      GE_CHECK_NOTNULL(output_desc);
-      if (output_desc->MutableShape().IsUnknownShape()) {
-        is_output_shape_static = false;
-        break;
-      }
+  for (int i = 0; i < num_outputs; ++i) {
+    const auto &output_desc = op_desc->MutableOutputDesc(i);
+    GE_CHECK_NOTNULL(output_desc);
+    if (output_desc->MutableShape().IsUnknownShape()) {
+      is_output_shape_static = false;
+      break;
     }
+  }
 
-    if (is_output_shape_static) {
-      GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this));
-    }
+  if (is_output_shape_static) {
+    GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this));
+  }
+  return SUCCESS;
+}
 
-    if (IsControlOp() || node_type == PARTITIONEDCALL) {
-      shape_inference_type = DEPEND_COMPUTE;
-    } else {
-      int32_t unknown_shape_type_val = 0;
-      (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
-      shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
-    }
+void NodeItem::ResolveUnknownShapeType() {
+  if (IsControlOp() || node_type == PARTITIONEDCALL) {
+    shape_inference_type = DEPEND_COMPUTE;
+  } else {
+    int32_t unknown_shape_type_val = 0;
+    (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
+    shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
+  }
+}
 
+Status NodeItem::Init() {
+  GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs());
+  GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState());
+  if (is_dynamic) {
+    ResolveUnknownShapeType();
+    GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs());
     GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str());
   }
 
diff --git a/ge/hybrid/model/node_item.h b/ge/hybrid/model/node_item.h
index 8fbdc648..99f0d83c 100644
--- a/ge/hybrid/model/node_item.h
+++ b/ge/hybrid/model/node_item.h
@@ -103,6 +103,11 @@ struct NodeItem {
  private:
   explicit NodeItem(NodePtr node);
   Status Init();
+  Status InitInputsAndOutputs();
+  void ResolveOptionalInputs();
+  Status ResolveDynamicState();
+  Status ResolveStaticInputsAndOutputs();
+  void ResolveUnknownShapeType();
 
   std::vector<bool> is_input_shape_static_;
   std::vector<uint32_t> input_desc_indices_;

From 54b6ce9eea0f78ea3bad270fa3711e7da2155381 Mon Sep 17 00:00:00 2001
From: l00444296 <lixiwen1@huawei.com>
Date: Mon, 14 Dec 2020 21:30:42 +0800
Subject: [PATCH 04/23] Feature: Get default op format from ge graph

---
 ge/ir_build/ge_ir_build.cc | 110 +++++++++++++++++++++++++++++++------
 1 file changed, 93 insertions(+), 17 deletions(-)

diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc
index f181170c..34e612a2 100644
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -226,9 +226,11 @@ class Impl {
   };
   ~Impl() { (void)generator_.Finalize(); };
   graphStatus CheckOptions(const std::map<std::string, std::string> &options);
+  graphStatus CheckInputFormat(const string &input_format);
   graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs);
-  graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape);
-  graphStatus UpdateDataOpAttr(const Graph &graph);
+  graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape, bool &dynamic_shape_flag);
+  graphStatus GetDefaultInputFormat(const Graph &graph, string &default_format);
+  const Graph &graph, string &default_shape, string &input_fo graphStatus UpdateDataOpAttr(const Graph &graph);
   graphStatus Init(const Graph &graph, const std::map<std::string, std::string> &options);
   graphStatus BuildModel(const Graph &graph, const std::map<std::string, std::string> &options,
                          ModelBufferData &ge_models);
@@ -321,7 +323,62 @@ graphStatus Impl::CheckOptions(const std::map<std::string, std::string> &options
   return GRAPH_SUCCESS;
 }
 
-graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape) {
+graphStatus Impl::CheckInputFormat(const string &input_format) {
+  if (!input_format.empty()) {
+    auto iter = ge::input_format_str_to_geformat.find(input_format);
+    if (iter == ge::input_format_str_to_geformat.end()) {
+      GELOGE(GRAPH_PARAM_INVALID, "Input format %s not support , expect ND/NCHW/NHWC/CHWN/NC1HWC0/NHWC1C0.",
+             input_format.c_str());
+      return GRAPH_PARAM_INVALID;
+    }
+  }
+  return GRAPH_SUCCESS;
+}
+
+graphStatus Impl::GetDefaultInputFormat(const Graph &graph, string &default_format) {
+  auto compute_graph = ge::GraphUtils::GetComputeGraph(graph);
+  GE_CHECK_NOTNULL(compute_graph);
+  for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) {
+    GE_CHECK_NOTNULL(input_node);
+    ge::OpDescPtr op = input_node->GetOpDesc();
+    GE_CHECK_NOTNULL(op);
+    if (op->GetType() == DATA) {
+      string data_op_name = op->GetName();
+      GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size());
+      ge::GeTensorDesc tensor = op->GetInputDesc(0);
+      ge::GeShape data_shape = tensor.GetShape();
+      GELOGD("Data op get shape from InputDesc in ge ir graph.");
+
+      const std::vector<int64_t> &tmp_shape = data_shape.GetDims();
+      if (tmp_shape.empty()) {
+        GELOGD("Data op: %s has zero shape dims!", data_op_name.c_str());
+        continue;
+      }
+
+      bool is_dynamic_input = false;
+      for (auto tmp_dim : tmp_shape) {
+        if (tmp_dim < 0) {
+          is_dynamic_input = true;
+        }
+      }
+
+      if (is_dynamic_input) {
+        string tmp_data_format = ge::TypeUtils::FormatToSerialString(tensor.GetFormat());
+        if (!default_format.empty() && tmp_data_format!=default_format) {
+          GELOGE(GRAPH_PARAM_INVALID, "All data op with dynamic shape has no default format!");
+          return GRAPH_PARAM_INVALID;
+        } else if (default_format.empty()) {
+          default_format.assign(tmp_data_format);
+        }
+        GELOGD("Data op name: %s,  data format: %s.", data_op_name.c_str(), default_format.c_str());
+      }
+    }
+  }
+  GELOGI("Get default data op format: %s from ge ir graph.", default_format.c_str());
+  return GRAPH_SUCCESS;
+}
+
+graphStatus Impl::(const Graph &graph, string &default_shape, bool &dynamic_shape_flag) {
   auto compute_graph = ge::GraphUtils::GetComputeGraph(graph);
   GE_CHECK_NOTNULL(compute_graph);
   for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) {
@@ -335,21 +392,30 @@ graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape
       ge::GeShape data_shape = tensor.GetShape();
       GELOGD("Data op get shape from InputDesc in ge ir graph.");
 
-      string tmp_shape_str;
       const std::vector<int64_t> &tmp_shape = data_shape.GetDims();
       if (tmp_shape.empty()) {
         GELOGW("Data op: %s has zero shape dims!", data_op_name.c_str());
-      } else {
-        tmp_shape_str += data_op_name + ":";
-        for (auto tmp_dim : tmp_shape) {
-          tmp_shape_str += to_string((long)tmp_dim) + ",";
+        continue;
+      }
+
+      string tmp_shape_str;
+      bool is_dynamic_input = false;
+
+      tmp_shape_str += data_op_name + ":";
+      for (auto tmp_dim : tmp_shape) {
+        if (tmp_dim < 0) {
+          is_dynamic_input = true;
         }
-        tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1);
-        tmp_shape_str += ";";
-        default_shape += tmp_shape_str;
+        tmp_shape_str += to_string((long)tmp_dim) + ",";
       }
+      tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1);
+      tmp_shape_str += ";";
 
-      GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str());
+      if (is_dynamic_input) {
+        dynamic_shape_flag = true;
+        default_shape += tmp_shape_str;
+        GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str(),);
+      }
     }
   }
   default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1));
@@ -378,14 +444,24 @@ graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::stri
   GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID);
   options_[ge::ir_option::LOG_LEVEL] = log;
 
-  string input_shape;
-  if (options_.find("input_shape") == options_.end()) {
-    GE_CHK_BOOL_EXEC(GetDefaultInputShape(graph, input_shape) == ge::SUCCESS,
+  string default_input_shape;
+  bool dynamic_shape_flag = false;
+  string input_shape = options_.find("input_shape") == options_.end() ? "" : options_["input_shape"];
+  if (input_shape.empty()) {
+    GE_CHK_BOOL_EXEC(GetDefaultInputShape(graph, default_input_shape, dynamic_shape_flag) == ge::SUCCESS,
                      return ge::GRAPH_PARAM_INVALID, "Get default data op shape from graph failed!");
-  } else {
-    input_shape = options_["input_shape"];
+    input_shape.assign(default_input_shape);
   }
+
+  string default_input_format;
   string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"];
+  if (!input_format.empty()) {
+    GE_CHK_BOOL_RET_STATUS_NOLOG(CheckInputFormat(input_format) == GRAPH_SUCCESS, GRAPH_PARAM_INVALID);
+  } else if (dynamic_shape_flag) {
+    GE_CHK_BOOL_EXEC(GetDefaultInputFormat(graph, default_input_format) == ge::SUCCESS, return ge::GRAPH_PARAM_INVALID,
+                     "Get default data op format from graph failed!");
+    input_format.assign(default_input_format);
+  }
   string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"];
   string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end()
                                   ? ""

From 7370fb9c6319ad8e5c95293f70bf42db9cb6afba Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Mon, 14 Dec 2020 22:43:44 +0800
Subject: [PATCH 05/23] update thirdparty includes

---
 CMakeLists.txt                                |   8 +-
 ge/CMakeLists.txt                             |  15 +
 .../aicpu/aicpu_schedule/aicpu_op_type_list.h |  60 ++++
 .../inc/aicpu/common/aicpu_task_struct.h      |   4 +-
 third_party/fwkacllib/inc/cce/aicpu_engine.h  |  16 +-
 .../fwkacllib/inc/cce/aicpu_engine_struct.h   |   8 +-
 .../fwkacllib/inc/cce/fwk_adpt_struct.h       |  17 +-
 third_party/fwkacllib/inc/hccl/base.h         |  30 +-
 third_party/fwkacllib/inc/hccl/hcom.h         | 214 +++++------
 .../fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h   |  58 +--
 .../fwkacllib/inc/mmpa/sub_inc/mmpa_win.h     |   6 +
 third_party/fwkacllib/inc/runtime/base.h      | 332 ++----------------
 third_party/fwkacllib/inc/runtime/config.h    |  23 +-
 third_party/fwkacllib/inc/runtime/dev.h       |   4 +-
 third_party/fwkacllib/inc/runtime/rt.h        |   2 +-
 third_party/fwkacllib/inc/tdt/status.h        |   7 +
 third_party/fwkacllib/inc/tdt/tsd_client.h    |  28 +-
 .../fwkacllib/inc/toolchain/prof_callback.h   | 135 +++++++
 .../fwkacllib/inc/toolchain/prof_reporter.h   |  26 +-
 third_party/fwkacllib/inc/toolchain/slog.h    |  25 ++
 20 files changed, 522 insertions(+), 496 deletions(-)
 create mode 100644 third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
 create mode 100644 third_party/fwkacllib/inc/toolchain/prof_callback.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 86d0184b..bea12fcc 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -80,7 +80,7 @@ if (ENABLE_OPEN_SRC)
         find_module(error_manager liberror_manager.so ${GE_LIB_PATH})
         find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH})
         find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH})
-        find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH})
+        #find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH})
         #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH})
     else()
         find_module(slog libslog.so ${ASCEND_ATC_DIR})
@@ -92,7 +92,7 @@ if (ENABLE_OPEN_SRC)
             find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
             find_module(resource libresource.so ${ASCEND_RUNTIME_DIR})
             find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
-            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
+            #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
             find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
             if(PRODUCT STREQUAL "flr3")
                 message(FATAL_ERROR "This platform is not supported in train mode, build terminated")
@@ -115,7 +115,7 @@ if (ENABLE_OPEN_SRC)
                 find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
             endif()
         elseif(PLATFORM STREQUAL "all")
-            find_module(msprofiler libmsprofiler.a ${ASCEND_DRIVER_COMMON_DIR})
+            find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
             find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
             find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
             find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
@@ -123,7 +123,7 @@ if (ENABLE_OPEN_SRC)
             find_module(resource libresource.so ${ASCEND_ATC_DIR})
             find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
             find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
-            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_ACL_DIR})
+            #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
             find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
             #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
         else()
diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 59b804d8..90c341d5 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -603,6 +603,21 @@ set(INFER_SRC_LIST
     "analyzer/analyzer.cc"
 )
 
+if (ENABLE_OPEN_SRC)
+    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object)
+    if(EXISTS ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a)
+        execute_process(
+            COMMAND ar x  ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object    
+        )
+        file(GLOB msprof_file ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o)
+    else()
+        file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc CONTENT "")
+        set(msprof_file ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc)
+    endif()
+    add_library(msprofiler_fwk OBJECT ${msprof_file})
+endif()
+
 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
 ############ libge_runner.so ############
 add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $<TARGET_OBJECTS:msprofiler_fwk>)
diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
new file mode 100644
index 00000000..7e0f94a8
--- /dev/null
+++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
@@ -0,0 +1,60 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef AICPU_OP_TYPE_LIST_H_
+#define AICPU_OP_TYPE_LIST_H_
+
+enum OpKernelType {
+    TF_KERNEL,
+    CPU_KERNEL
+};
+
+enum ReturnCode {
+    OP_TYPE_NOT_SUPPORT,
+    FORMAT_NOT_SUPPORT,
+    DTYPE_NOT_SUPPORT
+};
+
+#pragma pack(push, 1)
+//One byte alignment
+struct SysOpInfo {
+    uint64_t opLen;
+    uint64_t opType;
+    OpKernelType kernelsType;
+};
+
+struct OpParamInfo {
+    uint64_t num;
+    uint64_t dtypeList;
+    uint64_t formatList;
+};
+
+struct SysOpCheckInfo {
+    uint64_t opListNum;
+    uint64_t offSetLen;
+    uint64_t sysOpInfoList;
+    uint64_t opParamInfoList;
+};
+
+struct SysOpCheckResp {
+    uint64_t opListNum;
+    bool isWithoutJson;
+    uint64_t returnCodeList;
+    uint64_t sysOpInfoList;
+    uint64_t opParamInfoList;
+};
+#pragma pack(pop)
+#endif  // AICPU_OP_TYPE_LIST_H_
diff --git a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
index c3672663..72e21f6f 100644
--- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
+++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
@@ -21,13 +21,15 @@
 
 namespace aicpu {
 
+#pragma pack(push, 1)
 struct AicpuParamHead
 {
     uint32_t        length;                    // Total length: include cunstom message
     uint32_t        ioAddrNum;                 // Input and output address number
     uint32_t        extInfoLength;             // extInfo struct Length
     uint64_t        extInfoAddr;               // extInfo address
-} __attribute__ ((packed));
+};
+#pragma pack(pop)
 
 }  // namespace aicpu
 
diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine.h b/third_party/fwkacllib/inc/cce/aicpu_engine.h
index 740f1200..b83731a8 100644
--- a/third_party/fwkacllib/inc/cce/aicpu_engine.h
+++ b/third_party/fwkacllib/inc/cce/aicpu_engine.h
@@ -13,10 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #ifndef AICPU_ENGINE_H__
 #define AICPU_ENGINE_H__
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -36,12 +37,23 @@ typedef enum {
 /**
  * @ingroup aicpu engine
  * @brief aeCallInterface:
- *          a interface to call  a function in a op kernfel lib
+ *          a interface to call a function in a op kernfel lib
  * @param [in] addr     void *,  should be STR_KERNEL * format
  * @return aeStatus_t
  */
 aeStatus_t aeCallInterface(void *addr);
 
+/**
+ * @ingroup aicpu engine
+ * @brief aeBatchLoadKernelSo:
+ *          a interface to load kernel so
+ * @param [in] loadSoNum  load so number
+ * @param [in] soPaths    load so paths
+ * @param [in] soNames    load so names
+ * @return aeStatus_t
+ */
+aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h
index a5f43be9..8c0c1847 100644
--- a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h
+++ b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h
@@ -33,18 +33,22 @@ typedef enum {
   FMK_KERNEL_TYPE_RESERVED
 } FwkkernelType_t;
 
+#pragma pack(push, 1)
 typedef struct {
   uint32_t fwkKernelType;  // FwkkernelType_t
   union {
     ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel;
   } fwkKernelBase;
-} __attribute__((packed)) STR_FWK_OP_KERNEL;
+} STR_FWK_OP_KERNEL;
+#pragma pack(pop)
 
+#pragma pack(push, 1)
 struct SessionInfo {
   uint64_t sessionId;
   uint64_t kernelId;
   bool sessFlag;
-} __attribute__((packed));
+};
+#pragma pack(pop)
 
 #ifdef __cplusplus
 }
diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
index 79d94023..50b39d91 100644
--- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
+++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
@@ -70,6 +70,7 @@ enum FWKExtUpdateAddrType {
   FWK_ADPT_UPDATE_INPUT_OUTPUT
 };
 
+#pragma pack(push, 1)
 // API Parameter Structure
 struct StrFWKKernel {
   FWKOperateType opType;
@@ -89,31 +90,39 @@ struct StrFWKKernel {
 
   uint64_t extInfoLen;         // extend info total length
   uint64_t extInfoAddr;        // extend info addr, ExtInfo structure
-} __attribute__((packed));
+};
+#pragma pack(pop)
 
 typedef StrFWKKernel FWKOperateParam;
 
 // Extent info ShapeAndType
 const uint32_t kMaxShapeDims = 8;
+#pragma pack(push, 1)
 struct ShapeAndType {
   int32_t type;
   int64_t dims[kMaxShapeDims];
-} __attribute__((packed));
+};
+#pragma pack(pop)
 
 // Extend info structure for extInfoAddr
 const uint32_t kExtInfoHeadSize = 8;
+
+#pragma pack(push, 1)
 struct ExtInfo {
   int32_t  infoType;    // extend type
   uint32_t infoLen;     // length for infoMsg
   char     infoMsg[0];  // extend value
-} __attribute__((packed));
+};
+#pragma pack(pop)
 
+#pragma pack(push, 1)
 struct ResultSummary {
   uint64_t shape_data_ptr;   // shape data addr, need convert to void*
   uint64_t shape_data_size;  // num of dims
   uint64_t raw_data_ptr;     // raw data addr,  need convert to void*
   uint64_t raw_data_size;    // size of raw data
-} __attribute__((packed));
+};
+#pragma pack(pop)
 }  // end  namespace FWKAdapter
 }  // namespace aicpu
 
diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h
index 8194097e..9facd20c 100644
--- a/third_party/fwkacllib/inc/hccl/base.h
+++ b/third_party/fwkacllib/inc/hccl/base.h
@@ -22,7 +22,8 @@
 
 #ifndef HCCL_BASE_H_
 #define HCCL_BASE_H_
-
+#include <hccl/hccl_types.h>
+#include <string>
 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus
@@ -95,6 +96,33 @@ typedef void *rtStream_t;
 */
 typedef void *rtModel_t;
 
+struct HcomOperation {
+    std::string hcclType;
+    void *inputPtr;
+    void *outputPtr;
+    u64 count;
+    HcclDataType dataType;
+    HcclReduceOp opType;
+    u32 root;
+
+    HcomOperation()
+    {
+        inputPtr = nullptr;
+        outputPtr = nullptr;
+        count = 0;
+        dataType = HCCL_DATA_TYPE_RESERVED;
+        opType = HCCL_REDUCE_RESERVED;
+        root = 0;
+    }
+};
+
+struct HcomRemoteAccessAddrInfo {
+    u32 remotetRankID;
+    u64 remoteAddr;  // host embedding table address
+    u64 localAddr;  // device HBM address
+    u64 length;   // Memory Length in Bytes 
+};
+
 #ifdef __cplusplus
 }
 #endif // __cplusplus
diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h
index 90b96ac7..e491d43f 100644
--- a/third_party/fwkacllib/inc/hccl/hcom.h
+++ b/third_party/fwkacllib/inc/hccl/hcom.h
@@ -24,145 +24,96 @@
 
 #include <hccl/base.h>
 #include <hccl/hccl_types.h>
+#include <functional>
+#include <vector>
 
 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus
 
-/**
- * @brief Initialize HCOM.
- *
- * @param rank_table A string identifying the rank table file path, include file name.
- * @param identify A string identifying the identify for the rank.
- * @return HcclResult
- * @see hcom_destroy()
- */
-extern HcclResult hcom_init(const char *rank_table, const char *identify);
 
-/**
- * @brief Destroy HCOM
- *
- * @return HcclResult
- * @see hcom_init()
- */
-extern HcclResult hcom_destroy(void);
-
-/**
- * @brief Bind the model.
- *
- * @param model A pointer identifying the model information.
- * @param stream A pointer identifying the stream information.
- * @return HcclResult
- * @see hcom_unbind_model()
- */
-extern HcclResult hcom_bind_model(rtModel_t model, rtStream_t stream);
 
 /**
- * @brief Unbind the model.
+ * @brief Get the rank number in the group.
  *
- * @param model An pointer identifying the model information.
- * @return HcclResult
- * @see hcom_unbind_model()
+ * @param group A string identifying the group name.
+ * @param rankSize A pointer identifying the rank number.
+ * @return HcclResult 
  */
-extern HcclResult hcom_unbind_model(rtModel_t model);
+HcclResult hcom_get_rank_size(const char *group, u32 *rankSize);
 
 /**
- * @brief All-gather operator.
+ * @brief Get the rank number in the group.
  *
- * @param tag A string identifying the tag of the operator.
- * @param inputPtr A pointer identifying the input data address of the operator.
- * @param outputPtr A pointer identifying the output data address of the operator.
- * @param inputCount An integer(u64) identifying the number of the input data.
- * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
- * @param group A string identifying the group name of ranks participating in the operator.
- * @param stream A pointer identifying the stream information.
+ * @param group A string identifying the group name.
+ * @param rankSize A pointer identifying the rank number.
  * @return HcclResult 
  */
-extern HcclResult hcom_all_gather(const char *tag, void *inputPtr, void *outputPtr, u64 inputCount,
-                                  HcclDataType dataType, const char *group, rtStream_t stream);
+HcclResult HcomGetRankSize(const char *group, u32 *rankSize);
 
 /**
- * @brief All-reduce operator.
+ * @brief Get the rank number of this rank's server within the group.
  *
- * @param tag A string identifying the tag of the operator.
- * @param inputPtr A pointer identifying the input data address of the operator.
- * @param outputPtr A pointer identifying the output data address of the operator.
- * @param count An integer(u64) identifying the number of the output data.
- * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
- * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
- * @param group A string identifying the group name of ranks participating in the operator.
- * @param stream A pointer identifying the stream information.
+ * @param group A string identifying the group name.
+ * @param localRankSize A pointer identifying the rank number.
  * @return HcclResult 
  */
-extern HcclResult hcom_all_reduce(const char *tag, void *inputPtr, void *outputPtr, u64 count,
-                                  HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream);
+HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize);
 
 /**
- * @brief Broadcast operator.
+ * @brief Get the rank number of this rank's server within the group.
  *
- * @param tag A string identifying the tag of the operator.
- * @param ptr A pointer identifying the data address of the operator.
- * @param count An integer(u64) identifying the number of the data.
- * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
- * @param root An integer(u32) identifying the the root rank in the operator.
- * @param group A string identifying the group name of ranks participating in the operator.
- * @param stream A pointer identifying the stream information.
+ * @param group A string identifying the group name.
+ * @param localRankSize A pointer identifying the rank number.
  * @return HcclResult 
  */
-extern HcclResult hcom_broadcast(const char *tag, void *ptr, u64 count, HcclDataType dataType, u32 root,
-                                   const char *group, rtStream_t stream);
+HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize);
 
 /**
- * @brief Reduce-scatter operator.
+ * @brief Get the rank id of this rank.
  *
- * @param tag A string identifying the tag of the operator.
- * @param inputPtr A pointer identifying the input data address of the operator.
- * @param outputPtr A pointer identifying the output data address of the operator.
- * @param count An integer(u64) identifying the number of the data.
- * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
- * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
- * @param group A string identifying the group name of ranks participating in the operator.
- * @param stream A pointer identifying the stream information.
+ * @param group A string identifying the group name.
+ * @param rankId A pointer identifying the rank id.
  * @return HcclResult 
  */
-extern HcclResult hcom_reduce_scatter(const char *tag, void *inputPtr, void *outputPtr, u64 count,
-                                      HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream);
+HcclResult hcom_get_rank_id(const char *group, u32 *rankId);
 
 /**
- * @brief Get the rank number in the group.
+ * @brief Get the rank id of this rank.
  *
  * @param group A string identifying the group name.
- * @param rankSize A pointer identifying the rank number.
+ * @param rankId A pointer identifying the rank id.
  * @return HcclResult 
  */
-HcclResult hcom_get_rank_size(const char *group, u32 *rankSize);
+HcclResult HcomGetRankId(const char *group, u32 *rankId);
 
 /**
- * @brief Get the rank number of this rank's server within the group.
+ * @brief Get the local rank id of this rank's server within the group.
  *
  * @param group A string identifying the group name.
- * @param localRankSize A pointer identifying the rank number.
+ * @param localRankId A pointer identifying the local rank id.
  * @return HcclResult 
  */
-HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize);
+HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
 
 /**
- * @brief Get the rank id of this rank.
+ * @brief Get the local rank id of this rank's server within the group.
  *
  * @param group A string identifying the group name.
- * @param rankId A pointer identifying the rank id.
+ * @param localRankId A pointer identifying the local rank id.
  * @return HcclResult 
  */
-HcclResult hcom_get_rank_id(const char *group, u32 *rankId);
+HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId);
 
 /**
- * @brief Get the local rank id of this rank's server within the group.
+ * @brief Get the world rank id according to the group rank id.
  *
  * @param group A string identifying the group name.
- * @param localRankId A pointer identifying the local rank id.
+ * @param groupRank An integer(u32) identifying the group rank id.
+ * @param worldRank A pointer identifying the world rank id.
  * @return HcclResult 
  */
-HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
+HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank);
 
 /**
  * @brief Get the world rank id according to the group rank id.
@@ -172,7 +123,7 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
  * @param worldRank A pointer identifying the world rank id.
  * @return HcclResult 
  */
-HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank);
+HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank);
 
 /**
  * @brief Get the group rank id according to the world rank id.
@@ -184,6 +135,16 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank,
  */
 HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank);
 
+/**
+ * @brief Get the group rank id according to the world rank id.
+ *
+ * @param worldRank An integer(u32) identifying the world rank id.
+ * @param group A string identifying the group name.
+ * @param groupRank A pointer identifying the group rank id.
+ * @return HcclResult 
+ */
+HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank);
+
 /**
  * @brief Create group.
  *
@@ -195,60 +156,40 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group,
 HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds);
 
 /**
- * @brief Destroy group
+ * @brief Create group.
  *
  * @param group A string identifying the group name.
+ * @param rankNum An integer(u32) identifying the number of ranks in the group.
+ * @param rankIds A list identifying the ranks in the group.
  * @return HcclResult 
  */
-HcclResult hcom_destroy_group(const char *group);
+HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds);
 
 /**
- * @brief Send operator.
+ * @brief Destroy group
  *
- * @param tag A string identifying the tag of the operator.
- * @param inputPtr A pointer identifying the input data address of the operator.
- * @param count An integer(u64) identifying the number of the data.
- * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
- * @param destRank An integer identifying the destination rank.
- * @param srTag An integer identifying the send/recv message tag.
- * The message will be send by the receive operator with the same "sr_tag".
- * @param group A string identifying the group name of ranks participating in the operator.
- * @param stream A pointer identifying the stream information.
+ * @param group A string identifying the group name.
  * @return HcclResult 
  */
-HcclResult hcom_send(const char *tag, void *inputPtr, u64 count, HcclDataType dataType,
-    u32 destRank, u32 srTag, const char *group, rtStream_t stream);
+HcclResult hcom_destroy_group(const char *group);
 
 /**
- * @brief Receive operator.
+ * @brief Destroy group
  *
- * @param tag A string identifying the tag of the operator.
- * @param outputPtr A pointer identifying the output data address of the operator.
- * @param count An integer(u64) identifying the number of the data.
- * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
- * @param srcRank An integer identifying the source rank.
- * @param srTag An integer identifying the send/recv message tag. 
- * The message will be send by the send operator with the same "sr_tag".
- * @param group A string identifying the group name of ranks participating in the operator.
- * @param stream A pointer identifying the stream information.
+ * @param group A string identifying the group name.
  * @return HcclResult 
  */
-HcclResult hcom_receive(const char *tag, void *outputPtr, u64 count, HcclDataType dataType,
-    u32 srcRank, u32 srTag, const char *group, rtStream_t stream);
+HcclResult HcomDestroyGroup(const char *group);
 
 /**
- * @brief Get the gradient split strategy with in the group.
+ * @brief Set the gradient split strategy with in the group, according to gradient index.
  *
  * @param group A string identifying the group name.
- * @param feature A pointer identifying the feature of the model.
- * @param maxSegmentNum An integer(u32) identifying the max segments of gradients.
- * @param segmentNum A pointer identifying the segments number of gradients.
- * @param segmentIdx A list identifying the index of end gradient in each segment.
- * @return HcclResult 
+ * @param segmentNum An integer(u32) identifying the segments number of gradients.
+ * @param IdxList A list identifying the index of end gradient in each segment.
+ * @return HcclResult
  */
-HcclResult hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 maxSegmentNum,
-    u32 *segmentNum, u32 *segmentIdx, GradSplitForceMode force = FORCE_NONE,
-    OriginalGraphShapeType shapeType = KNOWN_SHAPE);
+extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList);
 
 /**
  * @brief Set the gradient split strategy with in the group, according to gradient index.
@@ -258,7 +199,7 @@ HcclResult hcom_get_split_strategy(const char *group, const struct model_feature
  * @param IdxList A list identifying the index of end gradient in each segment.
  * @return HcclResult
  */
-extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList);
+extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList);
 
 /**
  * @brief Set the gradient split strategy with in the group, according to gradient data size.
@@ -270,6 +211,16 @@ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmen
  */
 extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList);
 
+/**
+ * @brief Set the gradient split strategy with in the group, according to gradient data size.
+ *
+ * @param group A string identifying the group name.
+ * @param segmentNum An integer(u32) identifying the segments number of gradients.
+ * @param sizeList A list identifying the percent of each segment.
+ * @return HcclResult
+ */
+extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);
+
 /**
  * @brief Register memories and init resources for remote access.
  *
@@ -279,6 +230,25 @@ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segment
  */
 extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count);
 
+/**
+ * @brief Register memories and init resources for remote access.
+ *
+ * @param addrList memory addresses for remote access.
+ * @param count number of remote memory addresses.
+ * @return HcclResult
+ */
+extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count);
+
+HcclResult HcomExecInitialize();
+
+HcclResult HcomExecFinalize();
+
+HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback);
+
+HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
+                               const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
+                               std::function<void(HcclResult status)> callback);
+
 #ifdef __cplusplus
 }
 #endif // __cplusplus
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
index ea51f497..ad48f70b 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
@@ -215,6 +215,10 @@ typedef struct {
 #define S_IWRITE S_IWUSR
 #endif
 
+#define mm_no_argument        no_argument
+#define mm_required_argument  required_argument
+#define mm_optional_argument  optional_argument
+
 #define M_FILE_RDONLY O_RDONLY
 #define M_FILE_WRONLY O_WRONLY
 #define M_FILE_RDWR O_RDWR
@@ -227,6 +231,7 @@ typedef struct {
 #define M_BINARY O_RDONLY
 #define M_TRUNC O_TRUNC
 #define M_IRWXU S_IRWXU
+#define M_APPEND O_APPEND
 
 #define M_IN_CREATE IN_CREATE
 #define M_IN_CLOSE_WRITE IN_CLOSE_WRITE
@@ -342,17 +347,17 @@ MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd);
 MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag);
 MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag);
 MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd,
-                            VOID *sendMsg,
-                            INT32 sendLen,
-                            UINT32 sendFlag,
-                            const mmSockAddr* addr,
-                            INT32 tolen);
+                                          VOID *sendMsg,
+                                          INT32 sendLen,
+                                          UINT32 sendFlag,
+                                          const mmSockAddr* addr,
+                                          INT32 tolen);
 MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd,
-                                  VOID *recvBuf,
-                                  mmSize recvLen,
-                                  UINT32 recvFlag,
-                                  mmSockAddr* addr,
-                                  mmSocklen_t *FromLen);
+                                                VOID *recvBuf,
+                                                mmSize recvLen,
+                                                UINT32 recvFlag,
+                                                mmSockAddr* addr,
+                                                mmSocklen_t *FromLen);
 MMPA_FUNC_VISIBILITY INT32 mmSAStartup();
 MMPA_FUNC_VISIBILITY INT32 mmSACleanup();
 MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT32 mode);
@@ -360,7 +365,10 @@ MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info);
 MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *funcName);
 MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle);
 MMPA_FUNC_VISIBILITY CHAR *mmDlerror();
-MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period);
+MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle,
+                                               mmUserBlock_t *timerBlock,
+                                               UINT milliSecond,
+                                               UINT period);
 MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle);
 MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer);
 MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer);
@@ -408,8 +416,12 @@ MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount);
 // Poll related interface
 MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort();
 MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle);
-MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP,
-                                    pmmPollData polledData, mmPollBack pollBack);
+MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds,
+                                  INT32 fdCount,
+                                  INT32 timeout,
+                                  mmCompletionHandle handleIOCP,
+                                  pmmPollData polledData,
+                                  mmPollBack pollBack);
 MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode();
 MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size);
 MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone);
@@ -454,8 +466,11 @@ MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt);
 MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg();
 MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg);
 MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts);
-MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, char *const *argv, const char *opts, const mmStructOption *longOpts,
-                                          INT32 *longIndex);
+MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc,
+                                        char *const *argv,
+                                        const char *opts,
+                                        const mmStructOption *longOpts,
+                                        INT32 *longIndex);
 
 MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag);
 MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length);
@@ -521,11 +536,14 @@ MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count);
 MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count);
 MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count);
 MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count);
-MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile,
-                                            mmProcess *id);
-
-MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock,
-                                                        const mmThreadAttr *threadAttr);
+MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName,
+                                           const mmArgvEnv *env,
+                                           const char *stdoutRedirectFile,
+                                           mmProcess *id);
+
+MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle,
+                                                      const mmUserBlock_t *funcBlock,
+                                                      const mmThreadAttr *threadAttr);
 MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode);
 MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
 MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
index 5db6bbf8..cecdd4a7 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
@@ -237,6 +237,11 @@ typedef struct {
 } mmThreadAttr;
 
 typedef VOID (*mmPf)(VOID);
+
+#define mm_no_argument        0
+#define mm_required_argument  1
+#define mm_optional_argument  2
+
 #define M_FILE_RDONLY GENERIC_READ
 #define M_FILE_WRONLY GENERIC_WRITE
 #define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE)
@@ -249,6 +254,7 @@ typedef VOID (*mmPf)(VOID);
 #define M_CREAT _O_CREAT
 #define M_BINARY _O_BINARY
 #define M_TRUNC _O_TRUNC
+#define M_APPEND _O_APPEND
 
 #define M_IREAD _S_IREAD
 #define M_IRUSR _S_IREAD
diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h
index 4e735438..b9b2cbe5 100644
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -18,6 +18,7 @@
 #define __CCE_RUNTIME_BASE_H__
 
 #include <stdint.h>
+#include "toolchain/prof_callback.h"
 
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
@@ -32,309 +33,8 @@ extern "C" {
 #endif
 #endif
 
-/**
- * @ingroup dvrt_base
- * @brief runtime error numbers.
- */
-typedef enum tagRtError {
-    RT_ERROR_NONE = 0x0,                    // success
-    
-    RT_ERROR_DEVICE_BASE                    = 0x07010000,
-    RT_ERROR_DEVICE_NULL,
-    RT_ERROR_DEVICE_NEW,
-    RT_ERROR_DEVICE_ID,
-    RT_ERROR_DEVICE_CHIPTYPE,
-    RT_ERROR_DEVICE_DEPLOY,
-    RT_ERROR_DEVICE_RETAIN,
-    RT_ERROR_DEVICE_PLATFORM,
-    RT_ERROR_DEVICE_LOADER,
-    RT_ERROR_DEVICE_LIMIT,
-    RT_ERROR_DEVICE_PROC_HANG_OUT,
-    RT_ERROR_DEVICE_POWER_UP_FAIL,
-    RT_ERROR_DEVICE_POWER_DOWN_FAIL,
-    RT_ERROR_DEVICE_INVALID,
-
-    RT_ERROR_DRV_BASE                       = 0x07020000,
-    RT_ERROR_DRV_NULL,
-    RT_ERROR_DRV_NEW,
-    RT_ERROR_DRV_MEMORY,
-    RT_ERROR_DRV_INPUT,
-    RT_ERROR_DRV_PTRNULL,
-    RT_ERROR_DRV_OPEN_AICPU,
-    RT_ERROR_DRV_CLOSE_AICPU,
-    RT_ERROR_DRV_SYM_AICPU,
-    RT_ERROR_DRV_OPEN_TSD,
-    RT_ERROR_DRV_CLOSE_TSD,
-    RT_ERROR_DRV_SYM_TSD,
-    RT_ERROR_DRV_SOURCE,
-    RT_ERROR_DRV_REPORT,
-    RT_ERROR_DRV_COMMAND,
-    RT_ERROR_DRV_OCCUPY,
-    RT_ERROR_DRV_ERR,
-
-    RT_ERROR_STREAM_BASE                    = 0x07030000,
-    RT_ERROR_STREAM_NULL,
-    RT_ERROR_STREAM_NEW,
-    RT_ERROR_STREAM_CONTEXT,
-    RT_ERROR_STREAM_INVALID,
-    RT_ERROR_STREAM_MODEL,
-    RT_ERROR_STREAM_FUSION,
-    RT_ERROR_STREAM_FULL,
-    RT_ERROR_STREAM_EMPTY,
-    RT_ERROR_STREAM_NOT_COMPLETE,
-    RT_ERROR_STREAM_SYNC,
-    RT_ERROR_STREAM_NO_CB_REG,
-    RT_ERROR_STREAM_DUPLICATE,
-    RT_ERROR_STREAM_NOT_EXIST,
-    RT_ERROR_SQ_NO_EXIST_SQ_TO_REUSE,
-    RT_ERROR_SQID_FULL,
-
-    RT_ERROR_MODEL_BASE                     = 0x07040000,
-    RT_ERROR_MODEL_NULL,
-    RT_ERROR_MODEL_NEW,
-    RT_ERROR_MODEL_CONTEXT,
-    RT_ERROR_MODEL_ENDGRAPH,
-    RT_ERROR_MODEL_STREAM,
-    RT_ERROR_MODEL_EXCUTOR,
-    RT_ERROR_MODEL_SETUP,
-    RT_ERROR_MODEL_ID,
-    RT_ERROR_MODEL_EXE_FAILED,
-    RT_ERROR_END_OF_SEQUENCE,               // end of sequence
-    RT_ERROR_MODEL_EXIT,
-    RT_ERROR_MODEL_EXIT_STREAM_UNBIND,
-    RT_ERROR_MODEL_EXIT_ID,
-    RT_ERROR_MODEL_ABORT_NORMAL,
-
-    RT_ERROR_EVENT_BASE                     = 0x07050000,
-    RT_ERROR_EVENT_NULL,
-    RT_ERROR_EVENT_NEW,
-    RT_ERROR_EVENT_RECORDER_NULL,
-    RT_ERROR_EVENT_TIMESTAMP_INVALID,
-    RT_ERROR_EVENT_TIMESTAMP_REVERSAL,
-    RT_ERROR_EVENT_NOT_COMPLETE,
-
-    RT_ERROR_NOTIFY_BASE                    = 0x07060000,
-    RT_ERROR_NOTIFY_NULL,
-    RT_ERROR_NOTIFY_NEW,
-    RT_ERROR_NOTIFY_TYPE,
-    RT_ERROR_NOTIFY_NOT_COMPLETE,
-
-    RT_ERROR_CONTEXT_BASE                   = 0x07070000,
-    RT_ERROR_CONTEXT_NULL,
-    RT_ERROR_CONTEXT_NEW,
-    RT_ERROR_CONTEXT_DEL,
-    RT_ERROR_CONTEXT_DEFAULT_STREAM_NULL,
-    RT_ERROR_CONTEXT_ONLINE_STREAM_NULL,
-
-    RT_ERROR_KERNEL_BASE                    = 0x07080000,
-    RT_ERROR_KERNEL_NULL,
-    RT_ERROR_KERNEL_NEW,
-    RT_ERROR_KERNEL_LOOKUP,
-    RT_ERROR_KERNEL_NAME,
-    RT_ERROR_KERNEL_TYPE,
-    RT_ERROR_KERNEL_OFFSET,
-    RT_ERROR_KERNEL_DUPLICATE,
-    RT_ERROR_KERNEL_UNREGISTERING,
-
-    RT_ERROR_PROGRAM_BASE                   = 0x07090000,
-    RT_ERROR_PROGRAM_NULL,
-    RT_ERROR_PROGRAM_NEW,
-    RT_ERROR_PROGRAM_DATA,
-    RT_ERROR_PROGRAM_SIZE,
-    RT_ERROR_PROGRAM_MEM_TYPE,
-    RT_ERROR_PROGRAM_MACHINE_TYPE,
-    RT_ERROR_PROGRAM_USEOUT,
-
-    RT_ERROR_MODULE_BASE                    = 0x070a0000,
-    RT_ERROR_MODULE_NULL,
-    RT_ERROR_MODULE_NEW,
-
-    RT_ERROR_INSTANCE_BASE                  = 0x070b0000,
-    RT_ERROR_INSTANCE_NULL,
-    RT_ERROR_INSTANCE_NEW,
-    RT_ERROR_INSTANCE_VERSION,
-
-    RT_ERROR_API_BASE                       = 0x070c0000,
-    RT_ERROR_API_NULL,
-    RT_ERROR_API_NEW,
-
-    RT_ERROR_DATADUMP_BASE                  = 0x070d0000,
-    RT_ERROR_DATADUMP_NULL,
-    RT_ERROR_DATADUMP_NEW,
-    RT_ERROR_DATADUMP_TIME,
-    RT_ERROR_DATADUMP_FILE,
-    RT_ERROR_DATADUMP_ADDRESS,
-    RT_ERROR_DATADUMP_LOAD_FAILED,
-    RT_ERROR_DUMP_ADDR_SET_FAILED,
-
-    RT_ERROR_PROF_BASE                      = 0x070e0000,
-    RT_ERROR_PROF_NULL,
-    RT_ERROR_PROF_NEW,
-    RT_ERROR_PROF_START,
-    RT_ERROR_PROF_DEVICE_MEM,
-    RT_ERROR_PROF_HOST_MEM,
-    RT_ERROR_PROF_SET_DIR,
-    RT_ERROR_PROF_OPER,
-    RT_ERROR_PROF_FULL,
-    RT_ERROR_PROF_NAME,
-
-    RT_ERROR_PCTRACE_BASE                   = 0x070f0000,
-    RT_ERROR_PCTRACE_NULL,
-    RT_ERROR_PCTRACE_NEW,
-    RT_ERROR_PCTRACE_TIME,
-    RT_ERROR_PCTRACE_FILE,
-
-    RT_ERROR_TASK_BASE                      = 0x07100000,
-    RT_ERROR_TASK_NULL,
-    RT_ERROR_TASK_NEW,
-    RT_ERROR_TASK_TYPE,
-    RT_ERROR_TASK_ALLOCATOR,
-
-    RT_ERROR_COMMON_BASE                    = 0x07110000,
-    RT_ERROR_INVALID_VALUE,             // RT_ERROR_INPUT_INVALID
-    RT_ERROR_MEMORY_ADDRESS_UNALIGNED,
-    RT_ERROR_SEC_HANDLE,
-    RT_ERROR_OS_HANDLE,
-    RT_ERROR_MUTEX_LOCK,
-    RT_ERROR_MUTEX_UNLOCK,
-    RT_ERROR_CALLOC,
-    RT_ERROR_POOL_RESOURCE,
-    RT_ERROR_TRANS_ARGS,
-    RT_ERROR_METADATA,
-    RT_ERROR_LOST_HEARTBEAT,
-    RT_ERROR_REPORT_TIMEOUT,
-    RT_ERROR_FEATURE_NOT_SUPPROT,
-    RT_ERROR_MEMORY_ALLOCATION,
-    RT_ERROR_MEMORY_FREE,
-    RT_ERROR_INVALID_MEMORY_TYPE,
-
-    RT_ERROR_DEBUG_BASE                     = 0x07120000,
-    RT_ERROR_DEBUG_NULL,
-    RT_ERROR_DEBUG_NEW,
-    RT_ERROR_DEBUG_SIGNAL,
-    RT_ERROR_DEBUG_OPEN,
-    RT_ERROR_DEBUG_WRITE,
-    RT_ERROR_DEBUG_REGISTER_FAILED,
-    RT_ERROR_DEBUG_UNREGISTER_FAILED,
-
-    RT_ERROR_ENGINE_BASE                    = 0x07130000,
-    RT_ERROR_ENGINE_NULL,
-    RT_ERROR_ENGINE_NEW,
-    RT_ERROR_ENGINE_THREAD,
-
-    RT_ERROR_LABEL_BASE                     = 0x07140000,
-    RT_ERROR_LABEL_NULL,
-    RT_ERROR_LABEL_NEW,
-    RT_ERROR_LABEL_CONTEXT,
-    RT_ERROR_LABEL_STREAM,
-    RT_ERROR_LABEL_MODEL,
-    RT_ERROR_LABEL_ALLOCATOR,
-    RT_ERROR_LABEL_FREE,
-    RT_ERROR_LABEL_SET,
-    RT_ERROR_LABEL_ID,
-
-    RT_ERROR_TSFW_BASE                      = 0x07150000,
-    RT_ERROR_TSFW_UNKNOWN,
-    RT_ERROR_TSFW_NULL_PTR,
-    RT_ERROR_TSFW_ILLEGAL_AI_CORE_ID,
-    RT_ERROR_TSFW_ILLEGAL_PARAM,
-    RT_ERROR_TSFW_TASK_CMD_QUEUE_FULL,
-    RT_ERROR_TSFW_TASK_CMD_QUEUE_EMPTY,
-    RT_ERROR_TSFW_TASK_REPORT_QUEUE_FULL,
-    RT_ERROR_TSFW_TASK_REPORT_QUEUE_EMPTY,
-    RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_OCCUPYED,
-    RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_FREED,
-    RT_ERROR_TSFW_L2_MEM_INSUFFICIENT_SPACE,
-    RT_ERROR_TSFW_L2_MALLOC_FAILED,
-    RT_ERROR_TSFW_DMA_CHANNEL_ALL_OCCUPYED,
-    RT_ERROR_TSFW_MEMCPY_OP_FAILED,
-    RT_ERROR_TSFW_BS_SLOT_ALL_OCCUPYED,
-    RT_ERROR_TSFW_TBS_SLOT_REPEAT_FREE,
-    RT_ERROR_TSFW_PRIORITY_TASK_LIST_FULL,
-    RT_ERROR_TSFW_PRIORITY_TASK_LIST_EMPTY,
-    RT_ERROR_TSFW_NO_STREAM_LIST_NEED_TO_BE_PROCESSED,
-    RT_ERROR_TSFW_REPEAT_MARK_STREAM_NEED_SERVICE,
-    RT_ERROR_TSFW_SYS_DMA_CHANNEL_ALL_OCCUPAPYED,
-    RT_ERROR_TSFW_NO_HBML2TASKNODE_FOUND,
-    RT_ERROR_TSFW_SQNODE_NODE_SLOT_ALL_OCCUPAPYED,
-    RT_ERROR_TSFW_CQNODE_NODE_SLOT_ALL_OCCUPAPYED,
-    RT_ERROR_TSFW_SQNODE_NOT_ENOUGH,
-    RT_ERROR_TSFW_SQNODE_SLOT_REPEAT_FREE,
-    RT_ERROR_TSFW_CQNODE_SLOT_REPEAT_FREE,
-    RT_ERROR_TSFW_CQ_REPORT_FAILED,
-    RT_ERROR_TSFW_SYS_DMA_RESET_SUCCESS,
-    RT_ERROR_TSFW_SYS_DMA_RESET_FAILED,
-    RT_ERROR_TSFW_SYS_DMA_TRNSFER_FAILED,
-    RT_ERROR_TSFW_SYS_DMA_MEMADDRALIGN_FAILED,
-    RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_FULL,
-    RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_EMPTY,
-    RT_ERROR_TSFW_TIMER_EVENT_FULL,
-    RT_ERROR_TSFW_TASK_L2_DESC_ENTRY_NOT_ENOUGH,
-    RT_ERROR_TSFW_AICORE_TIMEOUT,
-    RT_ERROR_TSFW_AICORE_EXCEPTION,
-    RT_ERROR_TSFW_AICORE_TRAP_EXCEPTION,
-    RT_ERROR_TSFW_AICPU_TIMEOUT,
-    RT_ERROR_TSFW_SDMA_L2_TO_DDR_MALLOC_FAIL,
-    RT_ERROR_TSFW_AICPU_EXCEPTION,
-    RT_ERROR_TSFW_AICPU_DATADUMP_RSP_ERR,
-    RT_ERROR_TSFW_AICPU_MODEL_RSP_ERR,
-    RT_ERROR_TSFW_REPEAT_ACTIVE_MODEL_STREAM,
-    RT_ERROR_TSFW_REPEAT_NOTIFY_WAIT,
-    RT_ERROR_TSFW_DEBUG_INVALID_SQCQ,
-    RT_ERROR_TSFW_DEBUG_WRONG_COMMAND_TYPE,
-    RT_ERROR_TSFW_DEBUG_CMD_PROCESS,
-    RT_ERROR_TSFW_DEBUG_INVALID_DEVICE_STATUS,
-    RT_ERROR_TSFW_DEBUG_NOT_IN_DEBUG_STATUS,
-    RT_ERROR_TSFW_DEBUG_INVALID_TASK_STATUS,
-    RT_ERROR_TSFW_DEBUG_TASK_EMPTY,
-    RT_ERROR_TSFW_DEBUG_TASK_FULL,
-    RT_ERROR_TSFW_DEBUG_TASK_NOT_EXIST,
-    RT_ERROR_TSFW_DEBUG_AI_CORE_FULL,
-    RT_ERROR_TSFW_DEBUG_AI_CORE_NOT_EXIST,
-    RT_ERROR_TSFW_DEBUG_AI_CORE_EXCEPTION,
-    RT_ERROR_TSFW_DEBUG_AI_CORE_TIMEOUT,
-    RT_ERROR_TSFW_DEBUG_BREAKPOINT_FULL,
-    RT_ERROR_TSFW_DEBUG_READ_ERROR,
-    RT_ERROR_TSFW_DEBUG_WRITE_FAIL,
-    RT_ERROR_TSFW_QUEUE_FULL,
-    RT_ERROR_TSFW_QUEUE_EMPTY,
-    RT_ERROR_TSFW_QUEUE_ALLOC_MEM_FAIL,
-    RT_ERROR_TSFW_QUEUE_DATA_SIZE_UNMATCH,
-    RT_ERROR_TSFW_PCIE_DMA_INVLD_CPY_TYPE,
-    RT_ERROR_TSFW_INVLD_CPY_DIR,
-    RT_ERROR_TSFW_PCIE_DMA_INVLD_CQ_DES,
-    RT_ERROR_TSFW_PCIE_DMA_CPY_ERR,
-    RT_ERROR_TSFW_PCIE_DMA_LNK_CHN_BUSY,
-    RT_ERROR_TSFW_PROFILE_BUFF_FULL,
-    RT_ERROR_TSFW_PROFILE_MODE_CONFLICT,
-    RT_ERROR_TSFW_PROFILE_OTHER_PID_ON,
-    RT_ERROR_TSFW_SCHD_AIC_TASK_PRELOAD_FAILED,
-    RT_ERROR_TSFW_TSCPU_CLOSE_FAILED,
-    RT_ERROR_TSFW_EXPECT_FAIL,
-    RT_ERROR_TSFW_REPEAT_MODEL_STREAM,
-    RT_ERROR_TSFW_STREAM_MODEL_UNBIND,
-    RT_ERROR_TSFW_MODEL_EXE_FAILED,
-    RT_ERROR_TSFW_IPC_SEND_FAILED,
-    RT_ERROR_TSFW_IPC_PROC_REG_FAILED,
-    RT_ERROR_TSFW_STREAM_FULL,
-    RT_ERROR_TSFW_END_OF_SEQUENCE,
-    RT_ERROR_TSFW_SWITCH_STREAM_LABEL,
-    RT_ERROR_TSFW_TRANS_SQE_FAIL,
-    RT_ERROR_TSFW_RESERVED,
-
-    RT_ERROR_SUBSCRIBE_BASE                = 0x07160000,
-    RT_ERROR_SUBSCRIBE_NULL,
-    RT_ERROR_SUBSCRIBE_NEW,
-    RT_ERROR_SUBSCRIBE_STREAM,
-    RT_ERROR_SUBSCRIBE_THREAD,
-    RT_ERROR_SUBSCRIBE_GROUP,
-
-    RT_ERROR_GROUP_BASE                    = 0x07170000,
-    RT_ERROR_GROUP_NOT_SET,
-    RT_ERROR_GROUP_NOT_CREATE,
-
-    RT_ERROR_RESERVED                      = 0x07ff0000,
-  }rtError_t;
+typedef int32_t rtError_t;
+static const int32_t RT_ERROR_NONE = 0; // success
 
 /**
  * @ingroup dvrt_base
@@ -387,10 +87,20 @@ typedef struct rtExceptionInfo {
     uint32_t deviceid;
 } rtExceptionInfo;
 
+typedef struct rtTaskFailInfo {
+    uint32_t taskid;
+    uint32_t streamid;
+    uint32_t tid;
+    uint32_t deviceid;
+    uint32_t retcode;
+} rtTaskFailInfo;
+
 typedef void (*rtErrorCallback)(rtExceptionType);
 
 typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo);
 
+typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo);
+
 typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen);
 
 /**
@@ -447,6 +157,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t*
  */
 RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream);
 
+/**
+ * @ingroup profiling_base
+ * @brief ts set profiling reporter callback.
+ */
+RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback);
+
 /**
  * @ingroup dvrt_base
  * @brief Returns the last error from a runtime call.
@@ -485,6 +201,16 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback);
  */
 RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback);
 
+/**
+ * @ingroup dvrt_base
+ * @brief register callback for fail task 
+ * @param [in] uniName unique register name, can't be null
+ * @param [in] callback fail task callback function
+ * @param [out] NA
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback);
+
 /**
  * @ingroup dvrt_base
  * @brief notify handle.
diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h
index f1a70eaa..12a407d7 100644
--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -121,14 +121,6 @@ typedef struct tagRtMemoryConfig {
 
 typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t;
 
-/**
- * @ingroup
- * @brief get platform
- * @param [in] platForm
- * @return platForm
- */
-RTS_API rtError_t rtGetPlatformConfig(rtPlatformConfig_t *platForm);
-
 /**
  * @ingroup
  * @brief get AI core count
@@ -169,13 +161,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate
  */
 RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig);
 
-/**
- * @ingroup
- * @brief set platform in gen ctx
- * @param [in] platForm
- * @return RT_ERROR_NONE for ok, errno for failed
- */
-RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType);
 
 /**
  * @ingroup
@@ -185,6 +170,14 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType);
  */
 RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);
 
+/**
+ * @ingroup
+ * @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be represented by 9020.
+ * @param [out] runtimeVersion
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h
index b378e3b0..d1a91a9b 100644
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -330,12 +330,12 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3
                     FEATURE_TYPE_MEMCPY = 0,
                     FEATURE_TYPE_RSV,
                } rtFeatureType_t;
- * @param [in] infoType   info type
+ * @param [in] featureInfo  info type
                typedef enum tagMemcpyInfo {
                     MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
                     MEMCPY_INFO _RSV,
                } rtMemcpyInfo_t;
- * @param [out] value   the capability info
+ * @param [out] value  the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT
  * @return RT_ERROR_NONE for ok
  */
 RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value);
diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h
index d3d5956f..83cafa3c 100644
--- a/third_party/fwkacllib/inc/runtime/rt.h
+++ b/third_party/fwkacllib/inc/runtime/rt.h
@@ -28,4 +28,4 @@
 #include "rt_model.h"
 #include "stream.h"
 
-#endif  // __CCE_RUNTIME_RT_H__
\ No newline at end of file
+#endif  // __CCE_RUNTIME_RT_H__
diff --git a/third_party/fwkacllib/inc/tdt/status.h b/third_party/fwkacllib/inc/tdt/status.h
index d30564b8..d5050f35 100644
--- a/third_party/fwkacllib/inc/tdt/status.h
+++ b/third_party/fwkacllib/inc/tdt/status.h
@@ -34,9 +34,16 @@ using TDT_StatusT = uint32_t;
 typedef uint32_t TDT_StatusT;
 #endif
 
+#define LINUX 0
+#define WINDOWS 1
+
 #ifndef TDT_LIB_EXPORT
+#if(TARGET_SYSTEM_NAME == WINDOWS)
+#define TDT_LIB_EXPORT __declspec(dllexport)
+#else
 #define TDT_LIB_EXPORT __attribute__((visibility("default")))
 #endif
+#endif
 /**
  * @ingroup  tdt status.
  *
diff --git a/third_party/fwkacllib/inc/tdt/tsd_client.h b/third_party/fwkacllib/inc/tdt/tsd_client.h
index 6066a12e..665c8b82 100644
--- a/third_party/fwkacllib/inc/tdt/tsd_client.h
+++ b/third_party/fwkacllib/inc/tdt/tsd_client.h
@@ -23,6 +23,7 @@
 #include <mutex>
 #include "tdt/status.h"
 #include "tdt/data_common.h"
+#include "toolchain/prof_callback.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -37,7 +38,7 @@ extern "C" {
 * Used for the Framework process to communicate with the TSDDaemon process,
 * and notify TSD to complete the initialization of other processes
 *
-* @param phyDeviceId [IN] type #unsigned int. Physical device ID
+* @param logicDeviceId [IN] type #unsigned int. Logic device ID
 * @param rankSize [IN] type #unsigned int. The rankSize of the training.
 * The default value is 1. When rankSize is greater than 1,
 * HCCP will be pulled to perform set communication related operations.
@@ -49,7 +50,7 @@ extern "C" {
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
-TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t rankSize);
+TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize);
 
 /**
 * @ingroup Close
@@ -67,7 +68,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t ra
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
-TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId);
+TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId);
 
 /**
 * @ingroup UpdateProfilingMode
@@ -85,7 +86,26 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId);
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
-TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t phyDeviceId, const uint32_t flag);
+TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag);
+
+/**
+* @ingroup TsdSetMsprofReporterCallback
+* @brief 用于推理场景下设置aicpu的profilng的callback函数
+*
+* @par Function
+* 设置offline模式下aicpu_sd进程的profiling的callback函数
+*
+* @param callback [IN] type #MsprofReporterCallback. 回调函数
+* @retval TDT_OK Success
+* @retval OtherValues Failure
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tsd_client.h: Header file where the interface declaration is located.
+* @li data_common.h: Header file where 'TDT_StatusT' defined
+* @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined
+*/
+TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback);
 
 /**
 * @ingroup CreateCmdParameterObj
diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h
new file mode 100644
index 00000000..3fad74bc
--- /dev/null
+++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h
@@ -0,0 +1,135 @@
+/**
+ * Copyright 2020-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @file prof_callback.h
+ * @brief declaraion of profiling callbacks
+ */
+
+#ifndef MSPROFILER_PROF_CALLBACK_H_
+#define MSPROFILER_PROF_CALLBACK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+
+#include "stddef.h"
+#include "stdint.h"
+
+/**
+ * @name  MsprofErrorCode
+ * @brief error code
+ */
+enum MsprofErrorCode {
+    MSPROF_ERROR_NONE = 0,
+    MSPROF_ERROR_MEM_NOT_ENOUGH,
+    MSPROF_ERROR_GET_ENV,
+    MSPROF_ERROR_CONFIG_INVALID,
+    MSPROF_ERROR_ACL_JSON_OFF,
+    MSPROF_ERROR,
+};
+
+#define MSPROF_ENGINE_MAX_TAG_LEN (31)
+
+/**
+ * @name  ReporterData
+ * @brief struct of data to report
+ */
+struct ReporterData {
+    char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1];  // the sub-type of the module, data with different tag will be writen
+    int deviceId;                             // the index of device
+    size_t dataLen;                           // the length of send data
+    unsigned char *data;                      // the data content
+};
+
+/**
+ * @name  MsprofReporterModuleId
+ * @brief module id of data to report
+ */
+enum MsprofReporterModuleId {
+    MSPROF_MODULE_DATA_PREPROCESS = 0,    // DATA_PREPROCESS
+    MSPROF_MODULE_HCCL,                   // HCCL
+    MSPROF_MODULE_ACL,                    // AclModule
+    MSPROF_MODULE_FRAMEWORK,              // Framework
+    MSPROF_MODULE_RUNTIME                 // runtime
+};
+
+/**
+ * @name  MsprofReporterCallbackType
+ * @brief reporter callback request type
+ */
+enum MsprofReporterCallbackType {
+    MSPROF_REPORTER_REPORT = 0,           // report data
+    MSPROF_REPORTER_INIT,                 // init reporter
+    MSPROF_REPORTER_UNINIT,               // uninit reporter
+};
+
+/**
+ * @name  MsprofReporterCallback
+ * @brief callback to start reporter/stop reporter/report date
+ * @param moduleId  [IN] enum MsprofReporterModuleId
+ * @param type      [IN] enum MsprofReporterCallbackType
+ * @param data      [IN] callback data (nullptr on INTI/UNINIT)
+ * @param len       [IN] callback data size (0 on INIT/UNINIT)
+ * @return enum MsprofErrorCode
+ */
+typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len);
+
+
+#define MSPROF_OPTIONS_DEF_LEN_MAX (2048)
+
+/**
+ * @name  MsprofGeOptions
+ * @brief struct of MSPROF_CTRL_INIT_GE_OPTIONS
+ */
+struct MsprofGeOptions {
+    char jobId[MSPROF_OPTIONS_DEF_LEN_MAX];
+    char options[MSPROF_OPTIONS_DEF_LEN_MAX];
+};
+
+/**
+ * @name  MsprofCtrlCallbackType
+ * @brief ctrl callback request type
+ */
+enum MsprofCtrlCallbackType {
+    MSPROF_CTRL_INIT_ACL_ENV = 0,           // start profiling with acl env
+    MSPROF_CTRL_INIT_ACL_JSON,              // start profiling with acl.json
+    MSPROF_CTRL_INIT_GE_OPTIONS,            // start profiling with ge env and options
+    MSPROF_CTRL_FINALIZE                    // stop profiling
+};
+
+/**
+ * @name  MsprofCtrlCallback
+ * @brief callback to start/stop profiling
+ * @param type      [IN] enum MsprofCtrlCallbackType
+ * @param data      [IN] callback data
+ * @param len       [IN] callback data size
+ * @return enum MsprofErrorCode
+ */
+typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len);
+
+/**
+ * @name  MsprofSetDeviceCallback
+ * @brief callback to notify set/reset device
+ * @param devId     [IN] device id
+ * @param isOpenDevice  [IN] true: set device, false: reset device
+ */
+typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MSPROFILER_PROF_CALLBACK_H_
diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h
index c734380c..ff91351b 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h
@@ -16,7 +16,17 @@
 
 #ifndef MSPROF_ENGINE_PROF_REPORTER_H_
 #define MSPROF_ENGINE_PROF_REPORTER_H_
+#ifndef OS_TYPE
+#define OS_TYPE 0
+#endif // OS_TYPE
+
+#if (OS_TYPE != LINUX)
+#define MSVP_PROF_API __declspec(dllexport)
+#else
 #define MSVP_PROF_API __attribute__((visibility("default")))
+#endif
+
+#include "prof_callback.h"
 
 /**
  * @file prof_reporter.h
@@ -25,20 +35,6 @@
  */
 namespace Msprof {
 namespace Engine {
-/// the max tag length
-#define MSPROF_ENGINE_MAX_TAG_LEN (31)
-/**
- * @ingroup reporter
- * @brief struct ReporterData
- * the sturct of the data send to libmsprof
- */
-struct ReporterData {
-  char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1];  ///< the sub-type of the module, data with different tag will be writen
-  int deviceId;                             ///< the physical id of device
-  size_t dataLen;                           ///< the length of send data
-  unsigned char *data;                      ///< the data content
-};
-
 /**
  * @ingroup reporter
  * @brief class Reporter
@@ -86,4 +82,4 @@ class MSVP_PROF_API Reporter {
 }  // namespace Engine
 }  // namespace Msprof
 
-#endif  // MSPROF_ENGINE_PROF_REPORTER_H_
\ No newline at end of file
+#endif  // MSPROF_ENGINE_PROF_REPORTER_H_
diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h
index bce58f32..5faca0ae 100644
--- a/third_party/fwkacllib/inc/toolchain/slog.h
+++ b/third_party/fwkacllib/inc/toolchain/slog.h
@@ -18,7 +18,9 @@
 #define D_SYSLOG_H_
 
 #ifdef __cplusplus
+#ifndef LOG_CPP
 extern "C" {
+#endif
 #endif // __cplusplus
 
 #ifndef LINUX
@@ -105,6 +107,7 @@ extern "C" {
 #define SECURITY_LOG_MASK   (0x00100000)
 #define RUN_LOG_MASK        (0x01000000)
 #define OPERATION_LOG_MASK  (0x10000000)
+#define RESERVERD_LENGTH 52
 
 typedef struct tagDCODE {
   const char *cName;
@@ -116,6 +119,18 @@ typedef struct tagKV {
   char *value;
 } KeyValue;
 
+typedef enum {
+    APPLICATION = 0,
+    SYSTEM
+} ProcessType;
+
+typedef struct {
+    ProcessType type;
+    unsigned int pid;
+    unsigned int deviceId;
+    char reserved[RESERVERD_LENGTH];
+} LogAttr;
+
 /**
  * @ingroup slog
  *
@@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent);
  */
 DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel);
 
+/**
+ * @ingroup slog
+ * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION
+ * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID)
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
+
 /**
  * @ingroup slog
  * @brief dlog_error: print error log
@@ -367,6 +390,8 @@ void DlogInner(int moduleId, int level, const char *fmt, ...);
 void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);
 
 #ifdef __cplusplus
+#ifndef LOG_CPP
 }
+#endif // LOG_CPP
 #endif // __cplusplus
 #endif // D_SYSLOG_H_

From c85ad855e08b95e30866011e0a0228cd495e768e Mon Sep 17 00:00:00 2001
From: l00444296 <lixiwen1@huawei.com>
Date: Tue, 15 Dec 2020 10:19:42 +0800
Subject: [PATCH 06/23] Feature: Get default op format from ge graph

---
 ge/ir_build/ge_ir_build.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc
index 34e612a2..1b00b334 100644
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -230,7 +230,7 @@ class Impl {
   graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs);
   graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape, bool &dynamic_shape_flag);
   graphStatus GetDefaultInputFormat(const Graph &graph, string &default_format);
-  const Graph &graph, string &default_shape, string &input_fo graphStatus UpdateDataOpAttr(const Graph &graph);
+  graphStatus UpdateDataOpAttr(const Graph &graph);
   graphStatus Init(const Graph &graph, const std::map<std::string, std::string> &options);
   graphStatus BuildModel(const Graph &graph, const std::map<std::string, std::string> &options,
                          ModelBufferData &ge_models);
@@ -378,7 +378,7 @@ graphStatus Impl::GetDefaultInputFormat(const Graph &graph, string &default_form
   return GRAPH_SUCCESS;
 }
 
-graphStatus Impl::(const Graph &graph, string &default_shape, bool &dynamic_shape_flag) {
+graphStatus Impl::Init(const Graph &graph, string &default_shape, bool &dynamic_shape_flag) {
   auto compute_graph = ge::GraphUtils::GetComputeGraph(graph);
   GE_CHECK_NOTNULL(compute_graph);
   for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) {

From 842d3f271689834fbe4a70ff144a8cb1ae794df8 Mon Sep 17 00:00:00 2001
From: l00444296 <lixiwen1@huawei.com>
Date: Tue, 15 Dec 2020 10:27:10 +0800
Subject: [PATCH 07/23] Feature: Get default op format from ge graph

---
 ge/ir_build/ge_ir_build.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc
index 1b00b334..7ae6cd49 100644
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -378,7 +378,7 @@ graphStatus Impl::GetDefaultInputFormat(const Graph &graph, string &default_form
   return GRAPH_SUCCESS;
 }
 
-graphStatus Impl::Init(const Graph &graph, string &default_shape, bool &dynamic_shape_flag) {
+graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape, bool &dynamic_shape_flag) {
   auto compute_graph = ge::GraphUtils::GetComputeGraph(graph);
   GE_CHECK_NOTNULL(compute_graph);
   for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) {

From 9a43c9afb2a47d5aee4830f867b54f758182f175 Mon Sep 17 00:00:00 2001
From: l00444296 <lixiwen1@huawei.com>
Date: Tue, 15 Dec 2020 10:45:45 +0800
Subject: [PATCH 08/23] Feature: Get default op format from ge graph

---
 ge/ir_build/ge_ir_build.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc
index 7ae6cd49..3a1a9fb9 100644
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -414,7 +414,7 @@ graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape
       if (is_dynamic_input) {
         dynamic_shape_flag = true;
         default_shape += tmp_shape_str;
-        GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str(),);
+        GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str());
       }
     }
   }

From d23b490946ef326da4748b6188602d9a598189dc Mon Sep 17 00:00:00 2001
From: weiyang <yangwei79@huawei.com>
Date: Mon, 30 Nov 2020 19:44:33 +0800
Subject: [PATCH 09/23] support known aicpu

---
 .../load/new_model_manager/davinci_model.cc   |  31 +++--
 .../load/new_model_manager/davinci_model.h    |  12 +-
 .../load/new_model_manager/model_manager.cc   |   2 +-
 .../task_info/kernel_task_info.cc             | 118 ++++++++----------
 .../task_info/kernel_task_info.h              |   2 +
 5 files changed, 85 insertions(+), 80 deletions(-)

diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index bc755e07..720c3c28 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -2991,19 +2991,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const
   return SUCCESS;
 }
 
-Status DavinciModel::UpdateKnownZeroCopyAddr() {
-  for (size_t i = 0; i < total_io_addrs_.size(); ++i) {
-    auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]);
+Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) {
+  for (size_t i = 0; i < total_io_addrs.size(); ++i) {
+    auto it_in = knonw_input_data_info_.find(total_io_addrs[i]);
     if (it_in != knonw_input_data_info_.end()) {
-      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
-             knonw_input_data_info_.at(total_io_addrs_[i]));
-      total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]);
+      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
+             knonw_input_data_info_.at(total_io_addrs[i]));
+      total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]);
     }
-    auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]);
+    auto it_out = knonw_output_data_info_.find(total_io_addrs[i]);
     if (it_out != knonw_output_data_info_.end()) {
-      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
-             knonw_output_data_info_.at(total_io_addrs_[i]));
-      total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]);
+      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
+             knonw_output_data_info_.at(total_io_addrs[i]));
+      total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]);
     }
   }
   GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success.");
@@ -3032,7 +3032,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
   } else {
     total_io_addrs_ = orig_total_io_addrs_;
   }
-  GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed.");
+  GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed.");
 
   if (total_args_size_ == 0) {
     GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_);
@@ -3099,7 +3099,14 @@ Status DavinciModel::MallocKnownArgs() {
     GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
-
+  // malloc dynamic and static hybrid memory
+  if (total_hybrid_args_size_ != 0) {
+    rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
+      return RT_ERROR_TO_GE_STATUS(rt_ret);
+    }
+  }
   // malloc fixed addr memory, eg: rts op
   if (total_fixed_addr_size_ != 0) {
     GELOGI("Begin to allocate fixed addr.");
diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h
index 19888e1f..27bd4de5 100755
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -476,6 +476,14 @@ class DavinciModel {
   void SetTotalIOAddrs(vector<void *> &io_addrs) {
     total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end());
   }
+  void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; }
+  uint32_t GetHybridArgsSize() {
+    return total_hybrid_args_size_;
+  }
+  void *GetCurrentHybridArgsAddr(uint32_t offset) {
+    void *cur_args = static_cast<char *>(hybrid_addrs_) + offset;
+    return cur_args;
+  }
   void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
   int64_t GetFixedAddrsSize(string tensor_name);
   void *GetCurrentFixedAddr(int64_t offset) const {
@@ -494,7 +502,7 @@ class DavinciModel {
   Status MallocKnownArgs();
   Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
   Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
-  Status UpdateKnownZeroCopyAddr();
+  Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs);
   void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }
 
   Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
@@ -977,6 +985,8 @@ class DavinciModel {
   void *args_ = nullptr;
   void *args_host_ = nullptr;
   void *fixed_addrs_ = nullptr;
+  void *hybrid_addrs_ = nullptr;
+  uint32_t total_hybrid_args_size_ = 0;
   int64_t total_fixed_addr_size_ = 0;
   std::map<const void *, void *> knonw_input_data_info_;
   std::map<const void *, void *> knonw_output_data_info_;
diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index b595ac39..da4856d3 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -1214,7 +1214,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
 
   std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
   GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
-                         "Invalid model id %u, check weather model has been loaded or not.", model_id);
+                         "Invalid model id %u, check whether model has been loaded or not.", model_id);
 
   if (davinci_model->NeedDestroyAicpuKernel()) {
     GELOGI("Start to destroy specified aicpu kernel.");
diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
index 74faeb24..364c7ac2 100755
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
@@ -372,7 +372,11 @@ Status KernelTaskInfo::SuperKernelDistribute() {
 Status KernelTaskInfo::Distribute() {
   GELOGD("KernelTaskInfo Distribute Start.");
   if (davinci_model_->IsKnownNode()) {
-    args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
+    if (kernel_type_ == ccKernelType::TE) {
+      args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
+    } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
+      args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_);
+    }
     GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_);
   }
   rtError_t rt_ret = RT_ERROR_NONE;
@@ -428,36 +432,31 @@ Status KernelTaskInfo::UpdateArgs() {
   const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
   vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_);
   vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_);
-  vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);
 
   vector<void *> io_addrs;
-  if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) {
-    io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
-    io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
+  io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
+  io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
+  if (kernel_type_ == ccKernelType::TE) {
+    vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);
     io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
-  } else {
-    string peer_input_name;
-    if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) {
-      uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name);
-      if (output_index > output_data_addrs.size()) {
-        GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.",
-               output_data_addrs.size(), output_index);
-        return FAILED;
-      }
-      io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
-      for (size_t i = 0; i < output_data_addrs.size(); ++i) {
-        if (i == output_index) {
-          void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_);
-          io_addrs.emplace_back(fixed_addr);
-          continue;
-        }
-        io_addrs.emplace_back(output_data_addrs[i]);
-      }
-      io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
+    davinci_model_->SetTotalIOAddrs(io_addrs);
+  } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
+    davinci_model_->UpdateKnownZeroCopyAddr(io_addrs);
+    uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead);
+    auto addrs_size = sizeof(uint64_t) * io_addrs.size();
+    errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size);
+    if (sec_ret != EOK) {
+      GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
+      return FAILED;
+    }
+    // copy args to device
+    rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
+      return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
   }
 
-  davinci_model_->SetTotalIOAddrs(io_addrs);
   GELOGI("KernelTaskInfo::UpdateArgs success.");
   return SUCCESS;
 }
@@ -533,33 +532,18 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
 }
 
 Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
-  domi::KernelDef kernel_def = task_def.kernel();
-  uint32_t args_size = kernel_def.args_size();
-  args_offset_ = davinci_model->GetTotalArgsSize();
-  davinci_model->SetTotalArgsSize(args_size);
-  GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);
-
-  // get opcontext stored in model
+  const domi::KernelDef &kernel_def = task_def.kernel();
   const domi::KernelContext &context = kernel_def.context();
-  // get opdesc
-  op_desc_ = davinci_model->GetOpByIndex(context.op_index());
-  GE_CHECK_NOTNULL(op_desc_);
-  // alloc fixed addr
-  string peer_input_name;
-  if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) {
-    uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name);
-    if (output_index > op_desc_->GetOutputsSize()) {
-      GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(),
-             output_index);
-      return FAILED;
-    }
-    fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name);
-    auto tensor_desc = op_desc_->GetOutputDesc(output_index);
-    int64_t tensor_size = 0;
-    GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
-    davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size);
-    GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size,
-           fixed_addr_offset_);
+  kernel_type_ = static_cast<ccKernelType>(context.kernel_type());
+  if (kernel_type_ == ccKernelType::TE) {
+    uint32_t args_size = kernel_def.args_size();
+    args_offset_ = davinci_model->GetTotalArgsSize();
+    davinci_model->SetTotalArgsSize(args_size);
+    GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);
+  } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
+    hybrid_args_offset_ = davinci_model->GetHybridArgsSize();
+    davinci_model->SetHybridArgsSize(kernel_def.args_size());
+    GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_);
   }
   return SUCCESS;
 }
@@ -888,7 +872,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
   }
 
   // copy args to new host memory
-  std::unique_ptr<uint8_t[]> args_addr(new (std::nothrow) uint8_t[args_size_]);
+  args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]);
   GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_)
   errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_);
   if (sec_ret != EOK) {
@@ -896,8 +880,23 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
     return FAILED;
   }
 
-  const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
+  auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
+  const auto &ext_info = kernel_def.kernel_ext_info();
+  auto init_ret = InitAicpuTaskExtInfo(ext_info);
+  if (init_ret != SUCCESS) {
+    GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
+    return init_ret;
+  }
+  GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
+         op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);
 
+  aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
+  aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());
+
+  if (davinci_model_->IsKnownNode()) {
+    return SUCCESS;
+  }
+  const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
   vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc);
   vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc);
   vector<void *> io_addrs;
@@ -914,19 +913,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
     }
   }
 
-  auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
-  const auto &ext_info = kernel_def.kernel_ext_info();
-  auto init_ret = InitAicpuTaskExtInfo(ext_info);
-  if (init_ret != SUCCESS) {
-    GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
-    return init_ret;
-  }
-  GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
-         op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);
-
-  aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
-  aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());
-
   // malloc device memory for args
   rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM);
   if (rt_ret != RT_ERROR_NONE) {
diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
index 1f90ede1..7717edd3 100644
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
@@ -159,7 +159,9 @@ class KernelTaskInfo : public TaskInfo {
   OpDescPtr op_desc_;
   DavinciModel *davinci_model_;
   uint32_t args_offset_ = 0;
+  uint32_t hybrid_args_offset_ = 0;
   int64_t fixed_addr_offset_ = 0;
+  std::unique_ptr<uint8_t[]> args_addr = nullptr;
   bool call_save_dump_ = false;
 
   // aicpu ext_info device mem

From f25a8fa11e0d19eaf650e2426915cda7264f8544 Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Wed, 16 Dec 2020 00:35:36 +0800
Subject: [PATCH 10/23] update submodule metadef parser

---
 metadef | 2 +-
 parser  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/metadef b/metadef
index dba83744..97f45957 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit dba83744a3ffe3d5f89496e69bb65c50f800c299
+Subproject commit 97f4595760f034bd06fca6c8e9459039413fbe2f
diff --git a/parser b/parser
index ce574894..6420c719 160000
--- a/parser
+++ b/parser
@@ -1 +1 @@
-Subproject commit ce574894f13cd94749d1a3964a13e8c97c20434a
+Subproject commit 6420c71989f6f7b36154b226bd8aea7790266ad1

From ccb4443e9d5f703d85982674f06173ab08f4d1eb Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Wed, 16 Dec 2020 10:51:10 +0800
Subject: [PATCH 11/23] update cmakelist

---
 ge/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 90c341d5..073ca05c 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -606,12 +606,14 @@ set(INFER_SRC_LIST
 if (ENABLE_OPEN_SRC)
     file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object)
     if(EXISTS ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a)
+        message(status "libmsprofiler_fwk.a has been found!")
         execute_process(
             COMMAND ar x  ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a
             WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object    
         )
         file(GLOB msprof_file ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o)
     else()
+        message(status "libmsprofiler_fwk.a can not be found!")
         file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc CONTENT "")
         set(msprof_file ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc)
     endif()

From 6e5a4cc4d0b88c1a4f23a5924a66efe13c738c7b Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Wed, 16 Dec 2020 11:22:15 +0800
Subject: [PATCH 12/23] update cmkaelist

---
 CMakeLists.txt    |  2 +-
 ge/CMakeLists.txt | 32 ++++++++++++++++----------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bea12fcc..0bd4b913 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -92,7 +92,7 @@ if (ENABLE_OPEN_SRC)
             find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
             find_module(resource libresource.so ${ASCEND_RUNTIME_DIR})
             find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
-            #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
+            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
             find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
             if(PRODUCT STREQUAL "flr3")
                 message(FATAL_ERROR "This platform is not supported in train mode, build terminated")
diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 073ca05c..015e7a1d 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -603,22 +603,22 @@ set(INFER_SRC_LIST
     "analyzer/analyzer.cc"
 )
 
-if (ENABLE_OPEN_SRC)
-    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object)
-    if(EXISTS ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a)
-        message(status "libmsprofiler_fwk.a has been found!")
-        execute_process(
-            COMMAND ar x  ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a
-            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object    
-        )
-        file(GLOB msprof_file ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o)
-    else()
-        message(status "libmsprofiler_fwk.a can not be found!")
-        file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc CONTENT "")
-        set(msprof_file ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc)
-    endif()
-    add_library(msprofiler_fwk OBJECT ${msprof_file})
-endif()
+#if (ENABLE_OPEN_SRC)
+#    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object)
+#    if(EXISTS ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a)
+#        message(status "libmsprofiler_fwk.a has been found!")
+#        execute_process(
+#            COMMAND ar x  ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a
+#            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object    
+#        )
+#        file(GLOB msprof_file ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o)
+#    else()
+#        message(status "libmsprofiler_fwk.a can not be found!")
+#        file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc CONTENT "")
+#        set(msprof_file ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc)
+#    endif()
+#    add_library(msprofiler_fwk OBJECT ${msprof_file})
+#endif()
 
 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
 ############ libge_runner.so ############

From b1b30afd4e47113d02cc2552582d501ad547ba95 Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Wed, 16 Dec 2020 14:48:51 +0800
Subject: [PATCH 13/23] update cmakelist

---
 CMakeLists.txt    | 2 +-
 ge/CMakeLists.txt | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0bd4b913..bea12fcc 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -92,7 +92,7 @@ if (ENABLE_OPEN_SRC)
             find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
             find_module(resource libresource.so ${ASCEND_RUNTIME_DIR})
             find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
-            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
+            #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
             find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
             if(PRODUCT STREQUAL "flr3")
                 message(FATAL_ERROR "This platform is not supported in train mode, build terminated")
diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 015e7a1d..6fe43a1c 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -620,6 +620,12 @@ set(INFER_SRC_LIST
 #    add_library(msprofiler_fwk OBJECT ${msprof_file})
 #endif()
 
+if (ENABLE_OPEN_SRC)
+    add_library(msprofiler_fwk STATIC IMPORTED)
+    set_target_properties(msprofiler_fwk PROPERTIES
+           IMPORTED_LOCATION ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a)
+endif()
+
 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
 ############ libge_runner.so ############
 add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $<TARGET_OBJECTS:msprofiler_fwk>)

From af5190136b84f59ad947a3b44b74f084a55c63d8 Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Wed, 16 Dec 2020 15:41:04 +0800
Subject: [PATCH 14/23] update cmakelist

---
 CMakeLists.txt    |  6 +++---
 ge/CMakeLists.txt | 28 ++++------------------------
 2 files changed, 7 insertions(+), 27 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bea12fcc..7416a130 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -80,7 +80,7 @@ if (ENABLE_OPEN_SRC)
         find_module(error_manager liberror_manager.so ${GE_LIB_PATH})
         find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH})
         find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH})
-        #find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH})
+        find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH})
         #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH})
     else()
         find_module(slog libslog.so ${ASCEND_ATC_DIR})
@@ -92,7 +92,7 @@ if (ENABLE_OPEN_SRC)
             find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
             find_module(resource libresource.so ${ASCEND_RUNTIME_DIR})
             find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
-            #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
+            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
             find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
             if(PRODUCT STREQUAL "flr3")
                 message(FATAL_ERROR "This platform is not supported in train mode, build terminated")
@@ -123,7 +123,7 @@ if (ENABLE_OPEN_SRC)
             find_module(resource libresource.so ${ASCEND_ATC_DIR})
             find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
             find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
-            #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
+            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
             find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
             #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
         else()
diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 6fe43a1c..e18e3a1d 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -603,32 +603,9 @@ set(INFER_SRC_LIST
     "analyzer/analyzer.cc"
 )
 
-#if (ENABLE_OPEN_SRC)
-#    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object)
-#    if(EXISTS ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a)
-#        message(status "libmsprofiler_fwk.a has been found!")
-#        execute_process(
-#            COMMAND ar x  ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a
-#            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object    
-#        )
-#        file(GLOB msprof_file ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o)
-#    else()
-#        message(status "libmsprofiler_fwk.a can not be found!")
-#        file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc CONTENT "")
-#        set(msprof_file ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc)
-#    endif()
-#    add_library(msprofiler_fwk OBJECT ${msprof_file})
-#endif()
-
-if (ENABLE_OPEN_SRC)
-    add_library(msprofiler_fwk STATIC IMPORTED)
-    set_target_properties(msprofiler_fwk PROPERTIES
-           IMPORTED_LOCATION ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a)
-endif()
-
 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
 ############ libge_runner.so ############
-add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $<TARGET_OBJECTS:msprofiler_fwk>)
+add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS})
 
 target_compile_definitions(ge_runner PRIVATE
     PROTOBUF_INLINE_NOT_IN_HEADERS=0
@@ -674,6 +651,9 @@ target_link_libraries(ge_runner
     ge_memory
     adump_server
     static_mmpa
+    -Wl,--whole-archive
+    msprofiler_fwk
+    -Wl,--no-whole-archive
     -Wl,--no-as-needed
     graph
     ge_common

From f2821fbf25dc70c3cf4ed2d368a22fbba1c3bfc3 Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Wed, 16 Dec 2020 17:49:02 +0800
Subject: [PATCH 15/23] target ge_runner link

---
 ge/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index e18e3a1d..b2186530 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -646,7 +646,7 @@ target_include_directories(ge_runner PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )
 
-target_link_libraries(ge_runner
+target_link_libraries(ge_runner PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     ge_memory
     adump_server

From 60c780ba524920b1ad8892b1888916f7feb3790a Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Wed, 16 Dec 2020 17:57:54 +0800
Subject: [PATCH 16/23] update ascendcl cmaklist

---
 ge/CMakeLists.txt | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index b2186530..753ae871 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -767,7 +767,14 @@ target_link_options(opensrc_ascendcl PRIVATE
     -Wl,--allow-multiple-definition
     -Wl,-z,muldefs
     -Wl,-Bsymbolic
-    -Wl,--exclude-libs,ALL
+    -Wl,--exclude-libs,libascend_protobuf.a
+    -Wl,--exclude-libs,libge_executor.a
+    -Wl,--exclude-libs,libge_common.a
+    -Wl,--exclude-libs,libgraph.a
+    -Wl,--exclude-libs,libmmpa.a
+    -Wl,--exclude-libs,libregister.a
+    -Wl,--exclude-libs,liberror_manager.a
+    -Wl,--exclude-libs,libadump_server.a
 )
 target_link_libraries(opensrc_ascendcl PRIVATE
                      -Wl,--whole-archive

From d2ef3006a2cdc8b9788187a09a363563a22e771c Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Wed, 16 Dec 2020 19:33:49 +0800
Subject: [PATCH 17/23] update ge_compiler cmake

---
 ge/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 753ae871..26a7ee99 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -713,7 +713,7 @@ target_include_directories(ge_compiler PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )
 
-target_link_libraries(ge_compiler
+target_link_libraries(ge_compiler PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     ge_memory
     static_mmpa

From 7bdcefaf5e7014ddac2e51153ed7492e87ae7e09 Mon Sep 17 00:00:00 2001
From: lichun <lichun30@hisilicon.com>
Date: Wed, 16 Dec 2020 20:29:01 +0800
Subject: [PATCH 18/23] support load om through LoadRootModel

---
 ge/graph/load/new_model_manager/model_manager.cc | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index b595ac39..d19ca643 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -1055,7 +1055,16 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
   mmTimespec timespec = mmGetTickCount();
 
   ModelHelper model_helper;
-  Status ret = model_helper.LoadModel(model);
+  Status ret = model_helper.LoadRootModel(model);
+  if (model_helper.GetModelType()) {
+    bool is_shape_unknown = false;
+    GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown),
+                      "CheckIsUnknownShape failed, model id:%u",
+                      model_id);
+    if (is_shape_unknown || GetContext().GetHostExecFlag()) {
+      return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener);
+    }
+  }
   if (ret != SUCCESS) {
     GELOGE(ret, "load model failed.");
     return ret;

From d4bf8cc6acf983a9393951d4a9b8c128b01eeeab Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Sat, 12 Dec 2020 15:50:47 +0800
Subject: [PATCH 19/23] provide option to download third party software from
 cache server

---
 CMakeLists.txt                            |  13 +-
 cmake/external_libs/gflags.cmake          |   1 +
 cmake/external_libs/gtest.cmake           |   8 +-
 cmake/external_libs/json.cmake            |  19 +-
 cmake/external_libs/onnx.cmake            |   6 +-
 cmake/external_libs/protobuf_shared.cmake |   1 +
 cmake/external_libs/protobuf_static.cmake |   1 +
 cmake/external_libs/protoc.cmake          | 231 +++++++++++-----------
 cmake/external_libs/securec.cmake         |  13 +-
 9 files changed, 161 insertions(+), 132 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 86d0184b..49724b41 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,8 +16,11 @@ endif()
 
 if(DEFINED ENV{D_PKG_SERVER})
     set(GE_PB_PKG $ENV{D_PKG_SERVER})
-    message("Download packages from PKG server")
-endif()
+    message("Download packages from DPKG server")
+elseif(DEFINED ENV{MSLIBS_SERVER})
+    set(GE_PB_PKG "http://$ENV{MSLIBS_SERVER}:8081")
+    message("Download packages from MSPKG server")
+endif ()
 
 set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64)
 set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common)
@@ -105,7 +108,7 @@ if (ENABLE_OPEN_SRC)
             find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
             find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
             find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
-	        #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
+            #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
             if(PRODUCT STREQUAL "flr3")
             elseif(PRODUCT STREQUAL "flr1")
                 find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
@@ -127,10 +130,10 @@ if (ENABLE_OPEN_SRC)
             find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
             #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
         else()
-	    message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
+            message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
         endif()
 
-	if (ENABLE_GE_COV OR ENABLE_GE_UT)
+        if (ENABLE_GE_COV OR ENABLE_GE_UT)
             add_subdirectory(tests)
         endif()
 
diff --git a/cmake/external_libs/gflags.cmake b/cmake/external_libs/gflags.cmake
index f3f0f0ef..50cfb2bc 100755
--- a/cmake/external_libs/gflags.cmake
+++ b/cmake/external_libs/gflags.cmake
@@ -23,6 +23,7 @@ ExternalProject_Add(gflags_build
                     URL ${REQ_URL}
                     #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                     #SOURCE_DIR ${GE_CODE_DIR}/../../third_party/gflags/src/gflags-2.2.2 
+                    TLS_VERIFY OFF
                     CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gflags_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags <SOURCE_DIR>
                     BUILD_COMMAND $(MAKE)
                     INSTALL_COMMAND $(MAKE) install
diff --git a/cmake/external_libs/gtest.cmake b/cmake/external_libs/gtest.cmake
index 96ea84b4..c5edcd72 100755
--- a/cmake/external_libs/gtest.cmake
+++ b/cmake/external_libs/gtest.cmake
@@ -10,7 +10,10 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
     message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
 endif()
 
-if (ENABLE_GITEE)
+if (GE_PB_PKG)
+    set(REQ_URL "${GE_PB_PKG}/libs/gtest/release-1.8.0.tar.gz")
+    set(MD5 "")
+elseif (ENABLE_GITEE)
     set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz")
     set(MD5 "")
 else()
@@ -22,8 +25,9 @@ set (gtest_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-
 set (gtest_CFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")
 ExternalProject_Add(gtest_build
                     URL ${REQ_URL}
+                    TLS_VERIFY OFF
                     CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest <SOURCE_DIR>
-		    -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON
+                -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON
                     BUILD_COMMAND $(MAKE)
                     INSTALL_COMMAND $(MAKE) install
                     EXCLUDE_FROM_ALL TRUE 
diff --git a/cmake/external_libs/json.cmake b/cmake/external_libs/json.cmake
index c4a52843..3c1cd012 100755
--- a/cmake/external_libs/json.cmake
+++ b/cmake/external_libs/json.cmake
@@ -5,19 +5,24 @@ endif()
 include(ExternalProject)
 
 set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include)
-#if (ENABLE_GITEE)
+if (GE_PB_PKG)
+    set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip")
+    set(MD5 "0dc903888211db3a0f170304cd9f3a89")
+    set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
+#elseif (ENABLE_GITEE)
 #    set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip")
 #    set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7")
-#    set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
-#else()
-set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
-set(MD5 "0dc903888211db3a0f170304cd9f3a89")
-set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
-#endif ()
+#set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
+else()
+    set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
+    set(MD5 "0dc903888211db3a0f170304cd9f3a89")
+    set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
+endif ()
 ExternalProject_Add(json_build
                     URL ${REQ_URL}
                     #URL /home/txd/workspace/cloud_code/pkg/include.zip
                     SOURCE_DIR  ${JSON_SRC_DIR}
+                    TLS_VERIFY OFF
                     CONFIGURE_COMMAND ""
                     BUILD_COMMAND ""
                     INSTALL_COMMAND ""
diff --git a/cmake/external_libs/onnx.cmake b/cmake/external_libs/onnx.cmake
index 9dadb544..1ee80d2d 100755
--- a/cmake/external_libs/onnx.cmake
+++ b/cmake/external_libs/onnx.cmake
@@ -6,7 +6,10 @@ set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx)
 set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto)
 file(MAKE_DIRECTORY ${ONNX_PROTO_DIR})
 
-if (ENABLE_GITEE)
+if (GE_PB_PKG)
+    set(REQ_URL "${GE_PB_PKG}/libs/onnx/onnx-1.6.0.tar.gz")
+    set(MD5 "512f2779d6215d4a36f366b6b9acdf1e")
+elseif (ENABLE_GITEE)
     set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz")
     set(MD5 "1bdbcecdd68ea8392630467646776e02")
 else()
@@ -19,6 +22,7 @@ ExternalProject_Add(onnx
                     #URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz
                     #URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345
                     #SOURCE_DIR ${ONNX_SRC_DIR}
+                    TLS_VERIFY OFF
                     CONFIGURE_COMMAND ""
                     BUILD_COMMAND ""
                     #INSTALL_COMMAND "" 
diff --git a/cmake/external_libs/protobuf_shared.cmake b/cmake/external_libs/protobuf_shared.cmake
index c9c6b7d9..6334c8a3 100755
--- a/cmake/external_libs/protobuf_shared.cmake
+++ b/cmake/external_libs/protobuf_shared.cmake
@@ -26,6 +26,7 @@ set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fst
 set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
 ExternalProject_Add(protobuf_build
                     URL ${REQ_URL}
+                    TLS_VERIFY OFF
                     CONFIGURE_COMMAND ${CMAKE_COMMAND}
                     -Dprotobuf_WITH_ZLIB=OFF
                     -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR}
diff --git a/cmake/external_libs/protobuf_static.cmake b/cmake/external_libs/protobuf_static.cmake
index 6f3e1f53..e4bbb9a0 100755
--- a/cmake/external_libs/protobuf_static.cmake
+++ b/cmake/external_libs/protobuf_static.cmake
@@ -27,6 +27,7 @@ ExternalProject_Add(protobuf_static_build
                     URL ${REQ_URL}
                     #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                     #SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0
+                    TLS_VERIFY OFF
                     CONFIGURE_COMMAND ${CMAKE_COMMAND}
                     -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                     -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
diff --git a/cmake/external_libs/protoc.cmake b/cmake/external_libs/protoc.cmake
index 0d162c0d..58321f04 100755
--- a/cmake/external_libs/protoc.cmake
+++ b/cmake/external_libs/protoc.cmake
@@ -1,115 +1,116 @@
-if (HAVE_PROTOC)
-    return()
-endif()
-
-include(ExternalProject)
-include(GNUInstallDirs)
-#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output)
-
-if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
-    (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend"))
-    set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE)
-    message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
-endif()
-
-if(GE_PB_PKG)
-    set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz")
-else()
-    if (ENABLE_GITEE)
-        set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz")
-        set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236")
-    else()
-        set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz")
-        set(MD5 "3d9e32700639618a4d2d342c99d4507a")
-    endif ()
-endif()
-
-set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
-set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
-ExternalProject_Add(protoc_build
-                    URL ${REQ_URL}
-                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
-                    #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0
-                    CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake
-                    BUILD_COMMAND $(MAKE)
-                    INSTALL_COMMAND $(MAKE) install
-                    EXCLUDE_FROM_ALL TRUE
-)
-
-set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc)
-
-set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc)
-
-function(protobuf_generate comp c_var h_var)
-    if(NOT ARGN)
-        message(SEND_ERROR "Error: protobuf_generate() called without any proto files")
-        return()
-    endif()
-    set(${c_var})
-    set(${h_var})
-
-    foreach(file ${ARGN})
-        get_filename_component(abs_file ${file} ABSOLUTE)
-        get_filename_component(file_name ${file} NAME_WE)
-        get_filename_component(file_dir ${abs_file} PATH)
-        get_filename_component(parent_subdir ${file_dir} NAME)
-
-        if("${parent_subdir}" STREQUAL "proto")
-            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
-        else()
-            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
-        endif()
-        list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc")
-        list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h")
-
-        add_custom_command(
-                OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h"
-                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-                COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
-                COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file}
-                DEPENDS protoc_build ${abs_file}
-                COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
-    endforeach()
-
-    set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
-    set(${c_var} ${${c_var}} PARENT_SCOPE)
-    set(${h_var} ${${h_var}} PARENT_SCOPE)
-
-endfunction()
-
-function(protobuf_generate_py comp py_var)
-    if(NOT ARGN)
-        message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files")
-        return()
-    endif()
-    set(${py_var})
-
-    foreach(file ${ARGN})
-        get_filename_component(abs_file ${file} ABSOLUTE)
-        get_filename_component(file_name ${file} NAME_WE)
-        get_filename_component(file_dir ${abs_file} PATH)
-        get_filename_component(parent_subdir ${file_dir} NAME)
-
-        if("${parent_subdir}" STREQUAL "proto")
-            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
-        else()
-            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
-        endif()
-        list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py")
-
-        add_custom_command(
-                OUTPUT "${proto_output_path}/${file_name}_pb2.py"
-                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-                COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
-                COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file}
-                DEPENDS protoc_build ${abs_file}
-                COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM )
-    endforeach()
-
-    set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE)
-    set(${py_var} ${${py_var}} PARENT_SCOPE)
-
-endfunction()
-
-#set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add")
-set(HAVE_PROTOC TRUE)
+if (HAVE_PROTOC)
+    return()
+endif()
+
+include(ExternalProject)
+include(GNUInstallDirs)
+#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output)
+
+if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
+    (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend"))
+    set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE)
+    message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
+endif()
+
+if(GE_PB_PKG)
+    set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz")
+else()
+    if (ENABLE_GITEE)
+        set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz")
+        set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236")
+    else()
+        set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz")
+        set(MD5 "3d9e32700639618a4d2d342c99d4507a")
+    endif ()
+endif()
+
+set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
+set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
+ExternalProject_Add(protoc_build
+                    URL ${REQ_URL}
+                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
+                    #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0
+                    TLS_VERIFY OFF
+                    CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake
+                    BUILD_COMMAND $(MAKE)
+                    INSTALL_COMMAND $(MAKE) install
+                    EXCLUDE_FROM_ALL TRUE
+)
+
+set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc)
+
+set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc)
+
+function(protobuf_generate comp c_var h_var)
+    if(NOT ARGN)
+        message(SEND_ERROR "Error: protobuf_generate() called without any proto files")
+        return()
+    endif()
+    set(${c_var})
+    set(${h_var})
+
+    foreach(file ${ARGN})
+        get_filename_component(abs_file ${file} ABSOLUTE)
+        get_filename_component(file_name ${file} NAME_WE)
+        get_filename_component(file_dir ${abs_file} PATH)
+        get_filename_component(parent_subdir ${file_dir} NAME)
+
+        if("${parent_subdir}" STREQUAL "proto")
+            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
+        else()
+            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
+        endif()
+        list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc")
+        list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h")
+
+        add_custom_command(
+                OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h"
+                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+                COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
+                COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file}
+                DEPENDS protoc_build ${abs_file}
+                COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
+    endforeach()
+
+    set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
+    set(${c_var} ${${c_var}} PARENT_SCOPE)
+    set(${h_var} ${${h_var}} PARENT_SCOPE)
+
+endfunction()
+
+function(protobuf_generate_py comp py_var)
+    if(NOT ARGN)
+        message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files")
+        return()
+    endif()
+    set(${py_var})
+
+    foreach(file ${ARGN})
+        get_filename_component(abs_file ${file} ABSOLUTE)
+        get_filename_component(file_name ${file} NAME_WE)
+        get_filename_component(file_dir ${abs_file} PATH)
+        get_filename_component(parent_subdir ${file_dir} NAME)
+
+        if("${parent_subdir}" STREQUAL "proto")
+            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
+        else()
+            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
+        endif()
+        list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py")
+
+        add_custom_command(
+                OUTPUT "${proto_output_path}/${file_name}_pb2.py"
+                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+                COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
+                COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file}
+                DEPENDS protoc_build ${abs_file}
+                COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM )
+    endforeach()
+
+    set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE)
+    set(${py_var} ${${py_var}} PARENT_SCOPE)
+
+endfunction()
+
+#set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add")
+set(HAVE_PROTOC TRUE)
diff --git a/cmake/external_libs/securec.cmake b/cmake/external_libs/securec.cmake
index 0bd62ab2..0f8b6d3a 100755
--- a/cmake/external_libs/securec.cmake
+++ b/cmake/external_libs/securec.cmake
@@ -10,11 +10,20 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
     message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
 endif()
 
+if (GE_PB_PKG)
+    set(REQ_URL "${GE_PB_PKG}/libs/securec/v1.1.10.tar.gz")
+    set(MD5 "")
+else()
+    set(REQ_URL "https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz")
+    set(MD5 "")
+endif ()
+
 ExternalProject_Add(c_sec_build
-                    URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
-                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
+                    URL ${REQ_URL}
+                    #URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
                     #SOURCE_DIR ${GE_CODE_DIR}/../libc_sec
                     PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch
+                    TLS_VERIFY OFF
                     CONFIGURE_COMMAND ${CMAKE_COMMAND}
                     -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                     -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}

From 71f0dd4cab47930304c87a7965a4e47efc70d76a Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Wed, 16 Dec 2020 21:14:29 +0800
Subject: [PATCH 20/23] update profiling training_Trace parser

---
 ge/common/profiling/profiling_manager.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc
index 456cb0a4..214f58f4 100644
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -143,6 +143,9 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
   }
   try {
     Json prof_options = Json::parse(options);
+    if (options.find(kTrainingTrace) == std::string::npos) {
+      return ge::SUCCESS;
+    }
     const std::string training_trace = prof_options[kTrainingTrace];
     if (training_trace.empty()) {
       GELOGI("Training trace will not take effect.");

From 48ab1dbf12a267c57f1806a21b88b077ce40f0bb Mon Sep 17 00:00:00 2001
From: wxl <wanxuelei@huawei.com>
Date: Thu, 17 Dec 2020 11:08:31 +0800
Subject: [PATCH 21/23] update submodule

---
 metadef | 2 +-
 parser  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/metadef b/metadef
index 97f45957..129b50b4 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit 97f4595760f034bd06fca6c8e9459039413fbe2f
+Subproject commit 129b50b41f79d0dfeb9fe8987b1c19c9ac51eb8b
diff --git a/parser b/parser
index 6420c719..e9f7d019 160000
--- a/parser
+++ b/parser
@@ -1 +1 @@
-Subproject commit 6420c71989f6f7b36154b226bd8aea7790266ad1
+Subproject commit e9f7d0197aba57eb5247cb1e029c10e393631c89

From 984fd1bae6c2685e11f757a45665f205279e0fd8 Mon Sep 17 00:00:00 2001
From: baker <chenhua26@hisilicon.com>
Date: Thu, 17 Dec 2020 11:40:17 +0800
Subject: [PATCH 22/23] add onnx model parse api

---
 inc/framework/omg/parser/model_parser.h | 78 +++++++++++++++----------
 1 file changed, 46 insertions(+), 32 deletions(-)

diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h
index 20bfcef4..57cff9a7 100644
--- a/inc/framework/omg/parser/model_parser.h
+++ b/inc/framework/omg/parser/model_parser.h
@@ -36,7 +36,7 @@ using Status = domi::Status;
 
 namespace domi {
 using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>(
-    const google::protobuf::Message *root_proto, const std::string &graph)>;
+  const google::protobuf::Message *root_proto, const std::string &graph)>;
 class ModelParser {
  public:
   ModelParser() {}
@@ -44,19 +44,20 @@ class ModelParser {
   virtual ~ModelParser() {}
 
   /**
-  * @ingroup domi_omg
-  * @brief Analyze network model data
-  * @param [in] file  Network model file path
-  * @param [in|out]  graph Save the network information after analysis
-  * @return SUCCESS
-  * @return Others failed
-  */
+   * @ingroup domi_omg
+   * @brief Analyze network model data
+   * @param [in] file  Network model file path
+   * @param [in|out]  graph Save the network information after analysis
+   * @return SUCCESS
+   * @return Others failed
+   */
   virtual Status Parse(const char *file, ge::Graph &graph) = 0;
 
   /**
    * @ingroup domi_omg
    * @brief Parse relevant data from memory and save it to graph
    * @param [in] input Model file memory data
+   * @param [in] input Model file memory size
    * @param [in|out] graph A graph for saving the model information after analysis
    * @return SUCCESS
    * @return FAILED
@@ -64,36 +65,49 @@ class ModelParser {
    */
   virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0;
 
+#ifndef ONLY_COMPILE_OPEN_SRC
+  /**
+   * @ingroup domi_omg
+   * @brief Parse relevant data from memory and save it to graph
+   * @param [in] input Model file memory data
+   * @param [in] input Model file memory size
+   * @param [in|out] graph A graph for saving the model information after analysis
+   * @return SUCCESS
+   * @return FAILED
+   * @author
+   */
+  virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0;
+#endif
+
   /**
-  * @ingroup domi_omg
-  * @brief Analyze network model data
-  * @param [in] proto  network model
-  * @param [in|out]  graph Save the network information after analysis
-  * @return SUCCESS
-  * @return Others failed
-  */
+   * @ingroup domi_omg
+   * @brief Analyze network model data
+   * @param [in] proto  network model
+   * @param [in|out]  graph Save the network information after analysis
+   * @return SUCCESS
+   * @return Others failed
+   */
   virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0;
 
   /**
-  * @ingroup domi_omg
-  * @brief Analyze callback model data in subgraph
-  * @param [in] proto network model
-  * @param [in] callback callback of subgraph
-  * @param [in|out] graph Save the network information after analysis
-  * @return SUCCESS
-  * @return Others failed
-  */
-  virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto,
-                                        GetGraphCallback callback,
+   * @ingroup domi_omg
+   * @brief Analyze callback model data in subgraph
+   * @param [in] proto network model
+   * @param [in] callback callback of subgraph
+   * @param [in|out] graph Save the network information after analysis
+   * @return SUCCESS
+   * @return Others failed
+   */
+  virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback,
                                         ge::ComputeGraphPtr &graph) = 0;
   /**
-  * @ingroup domi_omg
-  * @brief Convert model files to JSON format
-  * @param [in] model_file  Model file path to be converted
-  * @param [out] json_file Converted JSON file path
-  * @return SUCCESS
-  * @return Others failed
-  */
+   * @ingroup domi_omg
+   * @brief Convert model files to JSON format
+   * @param [in] model_file  Model file path to be converted
+   * @param [out] json_file Converted JSON file path
+   * @return SUCCESS
+   * @return Others failed
+   */
   virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; }
 
   /*

From 70651ccf70234d12dae1c25482f7be55e14c367d Mon Sep 17 00:00:00 2001
From: l00444296 <lixiwen1@huawei.com>
Date: Thu, 17 Dec 2020 16:17:57 +0800
Subject: [PATCH 23/23] Feature: delete several para of aclgrphParse interface

---
 ge/ir_build/ge_ir_build.cc | 119 -------------------------------------
 1 file changed, 119 deletions(-)

diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc
index 3a1a9fb9..0e1d9452 100644
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -226,10 +226,7 @@ class Impl {
   };
   ~Impl() { (void)generator_.Finalize(); };
   graphStatus CheckOptions(const std::map<std::string, std::string> &options);
-  graphStatus CheckInputFormat(const string &input_format);
   graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs);
-  graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape, bool &dynamic_shape_flag);
-  graphStatus GetDefaultInputFormat(const Graph &graph, string &default_format);
   graphStatus UpdateDataOpAttr(const Graph &graph);
   graphStatus Init(const Graph &graph, const std::map<std::string, std::string> &options);
   graphStatus BuildModel(const Graph &graph, const std::map<std::string, std::string> &options,
@@ -323,106 +320,6 @@ graphStatus Impl::CheckOptions(const std::map<std::string, std::string> &options
   return GRAPH_SUCCESS;
 }
 
-graphStatus Impl::CheckInputFormat(const string &input_format) {
-  if (!input_format.empty()) {
-    auto iter = ge::input_format_str_to_geformat.find(input_format);
-    if (iter == ge::input_format_str_to_geformat.end()) {
-      GELOGE(GRAPH_PARAM_INVALID, "Input format %s not support , expect ND/NCHW/NHWC/CHWN/NC1HWC0/NHWC1C0.",
-             input_format.c_str());
-      return GRAPH_PARAM_INVALID;
-    }
-  }
-  return GRAPH_SUCCESS;
-}
-
-graphStatus Impl::GetDefaultInputFormat(const Graph &graph, string &default_format) {
-  auto compute_graph = ge::GraphUtils::GetComputeGraph(graph);
-  GE_CHECK_NOTNULL(compute_graph);
-  for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) {
-    GE_CHECK_NOTNULL(input_node);
-    ge::OpDescPtr op = input_node->GetOpDesc();
-    GE_CHECK_NOTNULL(op);
-    if (op->GetType() == DATA) {
-      string data_op_name = op->GetName();
-      GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size());
-      ge::GeTensorDesc tensor = op->GetInputDesc(0);
-      ge::GeShape data_shape = tensor.GetShape();
-      GELOGD("Data op get shape from InputDesc in ge ir graph.");
-
-      const std::vector<int64_t> &tmp_shape = data_shape.GetDims();
-      if (tmp_shape.empty()) {
-        GELOGD("Data op: %s has zero shape dims!", data_op_name.c_str());
-        continue;
-      }
-
-      bool is_dynamic_input = false;
-      for (auto tmp_dim : tmp_shape) {
-        if (tmp_dim < 0) {
-          is_dynamic_input = true;
-        }
-      }
-
-      if (is_dynamic_input) {
-        string tmp_data_format = ge::TypeUtils::FormatToSerialString(tensor.GetFormat());
-        if (!default_format.empty() && tmp_data_format!=default_format) {
-          GELOGE(GRAPH_PARAM_INVALID, "All data op with dynamic shape has no default format!");
-          return GRAPH_PARAM_INVALID;
-        } else if (default_format.empty()) {
-          default_format.assign(tmp_data_format);
-        }
-        GELOGD("Data op name: %s,  data format: %s.", data_op_name.c_str(), default_format.c_str());
-      }
-    }
-  }
-  GELOGI("Get default data op format: %s from ge ir graph.", default_format.c_str());
-  return GRAPH_SUCCESS;
-}
-
-graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape, bool &dynamic_shape_flag) {
-  auto compute_graph = ge::GraphUtils::GetComputeGraph(graph);
-  GE_CHECK_NOTNULL(compute_graph);
-  for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) {
-    GE_CHECK_NOTNULL(input_node);
-    ge::OpDescPtr op = input_node->GetOpDesc();
-    GE_CHECK_NOTNULL(op);
-    if (op->GetType() == DATA) {
-      string data_op_name = op->GetName();
-      GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size());
-      ge::GeTensorDesc tensor = op->GetInputDesc(0);
-      ge::GeShape data_shape = tensor.GetShape();
-      GELOGD("Data op get shape from InputDesc in ge ir graph.");
-
-      const std::vector<int64_t> &tmp_shape = data_shape.GetDims();
-      if (tmp_shape.empty()) {
-        GELOGW("Data op: %s has zero shape dims!", data_op_name.c_str());
-        continue;
-      }
-
-      string tmp_shape_str;
-      bool is_dynamic_input = false;
-
-      tmp_shape_str += data_op_name + ":";
-      for (auto tmp_dim : tmp_shape) {
-        if (tmp_dim < 0) {
-          is_dynamic_input = true;
-        }
-        tmp_shape_str += to_string((long)tmp_dim) + ",";
-      }
-      tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1);
-      tmp_shape_str += ";";
-
-      if (is_dynamic_input) {
-        dynamic_shape_flag = true;
-        default_shape += tmp_shape_str;
-        GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str());
-      }
-    }
-  }
-  default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1));
-  GELOGI("Get default data op shape: %s from ge ir graph.", default_shape.c_str());
-  return GRAPH_SUCCESS;
-}
-
 graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::string> &options) {
   // 1. check options
   graphStatus ret = CheckOptions(options);
@@ -444,24 +341,8 @@ graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::stri
   GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID);
   options_[ge::ir_option::LOG_LEVEL] = log;
 
-  string default_input_shape;
-  bool dynamic_shape_flag = false;
   string input_shape = options_.find("input_shape") == options_.end() ? "" : options_["input_shape"];
-  if (input_shape.empty()) {
-    GE_CHK_BOOL_EXEC(GetDefaultInputShape(graph, default_input_shape, dynamic_shape_flag) == ge::SUCCESS,
-                     return ge::GRAPH_PARAM_INVALID, "Get default data op shape from graph failed!");
-    input_shape.assign(default_input_shape);
-  }
-
-  string default_input_format;
   string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"];
-  if (!input_format.empty()) {
-    GE_CHK_BOOL_RET_STATUS_NOLOG(CheckInputFormat(input_format) == GRAPH_SUCCESS, GRAPH_PARAM_INVALID);
-  } else if (dynamic_shape_flag) {
-    GE_CHK_BOOL_EXEC(GetDefaultInputFormat(graph, default_input_format) == ge::SUCCESS, return ge::GRAPH_PARAM_INVALID,
-                     "Get default data op format from graph failed!");
-    input_format.assign(default_input_format);
-  }
   string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"];
   string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end()
                                   ? ""