diff --git a/mindspore/lite/src/lite_model.cc b/mindspore/lite/src/lite_model.cc
index caf7038dbf..17c58cf4aa 100644
--- a/mindspore/lite/src/lite_model.cc
+++ b/mindspore/lite/src/lite_model.cc
@@ -126,7 +126,7 @@ void LiteModel::Destroy() {
 int LiteModel::ConvertSubGraph(const schema::SubGraph &sub_graph) {
   if (sub_graph.name() == nullptr || sub_graph.inputIndices() == nullptr || sub_graph.outputIndices() == nullptr ||
       sub_graph.nodeIndices() == nullptr || sub_graph.tensorIndices() == nullptr) {
-    MS_LOG(ERROR) << "sub_graph is invalid.";
+    MS_LOG(ERROR) << "sub_graph is invalid";
     return RET_ERROR;
   }
 
diff --git a/mindspore/lite/src/ops/squeeze.cc b/mindspore/lite/src/ops/squeeze.cc
index 93e4422d53..da3d816cfc 100644
--- a/mindspore/lite/src/ops/squeeze.cc
+++ b/mindspore/lite/src/ops/squeeze.cc
@@ -111,11 +111,10 @@ int Squeeze::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> out
   std::vector<int> out_shape;
 
   auto axis = GetAxis();
-  std::vector<int> axes_;
-  for (auto iter = axis.begin(); iter != axis.end(); iter++) {
-    axes_.push_back(*iter);
-  }
-  if (axes_.size() == 0) {
+  std::vector<int> axes;
+  std::transform(axis.begin(), axis.end(), std::back_inserter(axes),
+                 [in_shape](int a) { return a >= 0 ? a : a + in_shape.size(); });
+  if (axes.size() == 0) {
     for (size_t i = 0; i < in_shape.size(); i++) {
       if (in_shape.at(i) != 1) {
         out_shape.push_back(in_shape.at(i));
@@ -124,7 +123,7 @@ int Squeeze::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> out
   } else {
     size_t axisIdx = 0;
     for (size_t i = 0; i < in_shape.size(); i++) {
-      if (axisIdx < axes_.size() && axes_.at(axisIdx) == static_cast<int>(i)) {
+      if (axisIdx < axes.size() && axes.at(axisIdx) == static_cast<int>(i)) {
         MS_ASSERT(in_shape.at(i) == 1);
         axisIdx++;
         continue;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/merge.cc b/mindspore/lite/src/runtime/kernel/arm/base/merge.cc
index 43373dcf7a..2b4737ba47 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/merge.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/merge.cc
@@ -27,12 +27,26 @@ using mindspore::schema::PrimitiveType_Merge;
 
 namespace mindspore::kernel {
 int MergeCPUKernel::FreeInWorkTensor() const {
-  for (auto &in_tensor : this->in_tensors_) {
-    MS_ASSERT(in_tensor != nullptr);
-    if (in_tensor->root_tensor() == in_tensor) {
-      continue;
+  size_t stride = in_tensors_.size() / 2;
+  if (this->ready_part_ == LEFT_INPUT_PART) {
+    for (size_t i = 0; i < stride; ++i) {
+      auto in_tensor = in_tensors_[i];
+      MS_ASSERT(in_tensor != nullptr);
+      if (in_tensor->root_tensor() == in_tensor) {
+        continue;
+      }
+      in_tensor->DecRefCount();
+    }
+  }
+  if (this->ready_part_ == RIGHT_INPUT_PART) {
+    for (size_t i = stride; i < in_tensors_.size(); ++i) {
+      auto in_tensor = in_tensors_[i];
+      MS_ASSERT(in_tensor != nullptr);
+      if (in_tensor->root_tensor() == in_tensor) {
+        continue;
+      }
+      in_tensor->DecRefCount();
     }
-    in_tensor->DecRefCount();
   }
   return RET_OK;
 }
@@ -102,15 +116,15 @@ InputPart MergeCPUKernel::FindReadyPart(const std::vector<lite::Tensor *> &scope
 
 int MergeCPUKernel::Run() {
   MS_ASSERT(in_tensors_.size() == 2 * out_tensors_.size());
-  auto ready_part = FindReadyPart(this->in_tensors_);
-  if (ready_part == LEFT_INPUT_PART) {
+  ready_part_ = FindReadyPart(this->in_tensors_);
+  if (ready_part_ == LEFT_INPUT_PART) {
     auto ret = MoveData(this->out_tensors_.begin(), this->out_tensors_.end(), this->in_tensors_.begin(),
                         this->in_tensors_.end());
     if (ret != RET_OK) {
       MS_LOG(ERROR) << "carry data error : " << ret;
       return ret;
     }
-  } else if (ready_part == RIGHT_INPUT_PART) {
+  } else if (ready_part_ == RIGHT_INPUT_PART) {
     auto ret = MoveData(this->out_tensors_.begin(), this->out_tensors_.end(),
                         (this->in_tensors_.begin() + in_tensors_.size() / 2), this->in_tensors_.end());
     if (ret != RET_OK) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/merge.h b/mindspore/lite/src/runtime/kernel/arm/base/merge.h
index 7268a1c2ba..096d14fa95 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/merge.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/merge.h
@@ -41,7 +41,7 @@ class MergeCPUKernel : public CarryDataKernel {
   InputPart FindReadyPart(const std::vector<lite::Tensor *> &scope_tensors);
 
  private:
-  bool PartialInputReady(int num_begin, int num_end);
+  InputPart ready_part_ = UNKNOWN_INPUT_PART;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc
index b1f63901c8..497759a434 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc
@@ -42,6 +42,11 @@ int TransposeCPUKernel::ReSize() {
   if (in_tensors_.at(kInputIndex)->shape().size() != static_cast<size_t>(param->num_axes_) && in_tensors_.size() != 2) {
     return RET_OK;
   }
+  if (in_tensors_.size() == 2) {
+    auto input_perm = in_tensors_.at(1);
+    MS_ASSERT(input_perm != nullptr);
+    param->num_axes_ = input_perm->ElementsNum();
+  }
   auto &inTensor = in_tensors_.front();
   auto &outTensor = out_tensors_.front();
   auto in_shape = inTensor->shape();
@@ -94,14 +99,12 @@ int TransposeCPUKernel::Run() {
     MS_ASSERT(input_perm != nullptr);
     MS_ASSERT(input_perm->data_c() != nullptr);
     int *perm_data = reinterpret_cast<int *>(input_perm->data_c());
-    auto perm = std::vector<int>{perm_data, perm_data + input_perm->ElementsNum()};
     for (int i = 0; i < input_perm->ElementsNum(); ++i) {
-      param->perm_[i] = perm[i];
+      param->perm_[i] = perm_data[i];
     }
     for (int i = input_perm->ElementsNum(); i <= 8; ++i) {
       param->perm_[i] = 0;
     }
-    param->num_axes_ = input_perm->ElementsNum();
   }
   if (in_tensor->shape().size() != static_cast<size_t>(param->num_axes_)) {
     memcpy(out_data_, in_data_, in_tensor->ElementsNum() * sizeof(float));
diff --git a/mindspore/lite/test/models_tf.cfg b/mindspore/lite/test/models_tf.cfg
index 93a35ce588..e62f926486 100644
--- a/mindspore/lite/test/models_tf.cfg
+++ b/mindspore/lite/test/models_tf.cfg
@@ -1,3 +1,4 @@
 decoder_step_201217.pb 5
 decoder_step_201217_modified.pb 5
 unet_model_reconstruct.pb 1;1,256,256,3
+encoder_201228.pb 3;1:1,22:1
diff --git a/mindspore/lite/test/run_benchmark_nets.sh b/mindspore/lite/test/run_benchmark_nets.sh
index 3c84cc1f71..d953c3a4a8 100644
--- a/mindspore/lite/test/run_benchmark_nets.sh
+++ b/mindspore/lite/test/run_benchmark_nets.sh
@@ -336,9 +336,9 @@ function Run_x86() {
         else
             run_result='x86: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
         fi
-        # run benchmark test without clib data
-        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --inputShapes='${input_shapes}' --modelFile='${ms_models_path}'/'${model_name}'.ms' >> "${run_x86_log_file}"
-        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --inputShapes=${input_shapes} --modelFile=${ms_models_path}/${model_name}.ms >> "${run_x86_log_file}"
+        # run benchmark test with input data
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${input_files}' --inputShapes='${input_shapes} >> "${run_x86_log_file}"
+        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}.ms --inDataFile=${input_files} --inputShapes=${input_shapes} >> "${run_x86_log_file}"
         if [ $? = 0 ]; then
             run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
         else
@@ -1229,10 +1229,10 @@ function Run_arm64() {
         else
             run_result='arm64: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
         fi
-        # run benchmark test without clib data
+        # run benchmark test with input data
         echo 'cd  /data/local/tmp/benchmark_test' > adb_run_cmd.txt
-        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --inputShapes='${input_shapes}' --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "${run_arm64_log_file}"
-        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --inputShapes='${input_shapes}' --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --inputShapes='${input_shapes}' --modelFile='${model_name}'.ms --inDataFile='${input_files}' --warmUpLoopCount=1 --loopCount=2' >> "${run_arm64_log_file}"
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --inputShapes='${input_shapes}' --modelFile='${model_name}'.ms --inDataFile='${input_files}' --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt
         adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}"
         if [ $? = 0 ]; then
             run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
diff --git a/mindspore/lite/tools/benchmark/benchmark.cc b/mindspore/lite/tools/benchmark/benchmark.cc
index c7ff63c855..82723d216b 100644
--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@@ -327,8 +327,9 @@ int Benchmark::CompareDataGetTotalBiasAndSize(const std::string &name, tensor::M
     MS_LOG(ERROR) << "mutableData is nullptr.";
     return RET_ERROR;
   }
-  switch (msCalibDataType) {
-    case TypeId::kNumberTypeFloat: {
+  switch (tensor->data_type()) {
+    case TypeId::kNumberTypeFloat:
+    case TypeId::kNumberTypeFloat32: {
       bias = CompareData<float>(name, tensor->shape(), mutableData);
       break;
     }
diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/switch_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/switch_pass.cc
index b335c3f77a..444e898bd6 100644
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/switch_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/switch_pass.cc
@@ -43,6 +43,35 @@ STATUS SwitchPass::Run(mindspore::schema::MetaGraphT *graph) {
       return ret;
     }
   }
+  // remove empty subgraphs
+  std::vector<std::unique_ptr<SubGraphT>> new_sub_graphs;
+  std::map<uint32_t, uint32_t> sub_graph_index_map;
+  for (size_t i = 0; i < graph->subGraph.size(); ++i) {
+    auto &sub_graph = graph->subGraph.at(i);
+    if (!sub_graph->nodeIndices.empty()) {
+      new_sub_graphs.emplace_back(std::move(sub_graph));
+      sub_graph_index_map.emplace(std::make_pair(i, new_sub_graphs.size() - 1));
+    }
+  }
+  graph->subGraph.swap(new_sub_graphs);
+  for (size_t i = 0; i < graph->nodes.size(); ++i) {
+    auto &node = graph->nodes.at(i);
+    auto type = node->primitive->value.type;
+    if (type != schema::PrimitiveType_Partial) {
+      continue;
+    }
+    MS_ASSERT(node->primitive != nullptr);
+    MS_ASSERT(node->primitive->value.AsPartial() != nullptr);
+    auto partial_prim = node->primitive->value.AsPartial();
+    if (partial_prim->subGraphIndex == -1) {
+      continue;
+    }
+    if (sub_graph_index_map.find(partial_prim->subGraphIndex) == sub_graph_index_map.end()) {
+      MS_LOG(ERROR) << "subGraphIndex is illegal";
+      return RET_ERROR;
+    }
+    partial_prim->subGraphIndex = sub_graph_index_map[partial_prim->subGraphIndex];
+  }
   return RET_OK;
 }
 
@@ -283,7 +312,7 @@ STATUS SingleSwitchPass::InsertPartialAndMergeAfterSwitch() {
   auto origin_switch_outputs = switch_node_->outputIndex;
   switch_node_->outputIndex.clear();
   for (size_t i = 3; i < switch_node_->inputIndex.size(); i++) {
-    auto &switch_in_tensor = graph_->allTensors.at(i);
+    auto &switch_in_tensor = graph_->allTensors.at(switch_node_->inputIndex[i]);
     auto tensor = NewTensor(switch_in_tensor);
     graph_->allTensors.push_back(std::move(tensor));
     switch_node_->outputIndex.push_back(graph_->allTensors.size() - 1);
@@ -549,6 +578,9 @@ STATUS SingleSwitchPass::UpdateSubgraphOutput(const size_t &subgraph_index, sche
 
 STATUS SingleSwitchPass::ConcatCondSubgraphInputAndOutput() {
   if (first_subgraph_index_ == -1) {
+    MS_ASSERT(first_partial_node_->primitive != nullptr);
+    MS_ASSERT(first_partial_node_->primitive->value.AsPartial() != nullptr);
+    first_partial_node_->primitive->value.AsPartial()->subGraphIndex = -1;
     return RET_OK;
   }
   int ret = UpdateSubgraphInput(first_subgraph_index_, first_partial_node_, first_graph_nodes_);
@@ -567,6 +599,9 @@ STATUS SingleSwitchPass::ConcatCondSubgraphInputAndOutput() {
 
 STATUS SingleSwitchPass::ConcatBodySubgraphInputAndOutput() {
   if (second_subgraph_index_ == -1) {
+    MS_ASSERT(first_partial_node_->primitive != nullptr);
+    MS_ASSERT(first_partial_node_->primitive->value.AsPartial() != nullptr);
+    first_partial_node_->primitive->value.AsPartial()->subGraphIndex = -1;
     return RET_OK;
   }
   int ret = UpdateSubgraphInput(second_subgraph_index_, second_partial_node_, second_graph_nodes_);
diff --git a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc
index bb73cd0332..8c86976420 100644
--- a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc
+++ b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc
@@ -592,7 +592,17 @@ STATUS TFModelParser::ControlFlowNodePostProcess(const std::map<CNodePtr, FuncGr
     auto second_value_node = NewValueNode(second_sub_graph);
     auto inputs = control_flow_node->inputs();
     inputs.insert(inputs.begin() + 1, {first_value_node, second_value_node});
-    control_flow_node->set_inputs(inputs);
+    auto new_node = anf_root_graph_->NewCNode(inputs);  // must create new node, otherwise node_users won't update
+    if (new_node == nullptr) {
+      MS_LOG(ERROR) << "new node failed";
+      return RET_ERROR;
+    }
+    new_node->set_abstract(control_flow_node->abstract()->Clone());
+    new_node->set_fullname_with_scope(control_flow_node->fullname_with_scope());
+    if (!root_func_manager->Replace(control_flow_node, new_node)) {
+      MS_LOG(ERROR) << "replace new node failed";
+      return RET_ERROR;
+    }
   }
   return RET_OK;
 }