diff --git a/mindspore/lite/nnacl/fp32/arithmetic_fp32.c b/mindspore/lite/nnacl/fp32/arithmetic_fp32.c
index 770815589d..fdbdc76a2e 100644
--- a/mindspore/lite/nnacl/fp32/arithmetic_fp32.c
+++ b/mindspore/lite/nnacl/fp32/arithmetic_fp32.c
@@ -940,6 +940,14 @@ int ElementLogicalAndInt(const int *input0, const int *input1, int *output, cons
   return NNACL_OK;
 }
 
+int ElementLogicalAndBool(const bool *input0, const bool *input1, bool *output, const int element_size) {
+  int index = 0;
+  for (; index < element_size; index++) {
+    output[index] = (bool)((bool)(input0[index]) & (bool)(input1[index]));
+  }
+  return NNACL_OK;
+}
+
 int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size) {
   ElementSub(input0, input1, output, element_size);
   return ElementMul(output, output, output, element_size);
diff --git a/mindspore/lite/nnacl/fp32/arithmetic_fp32.h b/mindspore/lite/nnacl/fp32/arithmetic_fp32.h
index e2d1ac6a28..f699fe9582 100644
--- a/mindspore/lite/nnacl/fp32/arithmetic_fp32.h
+++ b/mindspore/lite/nnacl/fp32/arithmetic_fp32.h
@@ -93,6 +93,7 @@ int BroadcastDiv(const float *input0, const float *input1, float *tile_input0, f
 
 int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size);
 int ElementLogicalAndInt(const int *input0, const int *input1, int *output, const int element_size);
+int ElementLogicalAndBool(const bool *input0, const bool *input1, bool *output, const int element_size);
 int BroadcastLogicalAnd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                         int element_size, ArithmeticParameter *param);
 
diff --git a/mindspore/lite/src/lite_kernel.cc b/mindspore/lite/src/lite_kernel.cc
index 98e0a4ce61..e36def21fd 100644
--- a/mindspore/lite/src/lite_kernel.cc
+++ b/mindspore/lite/src/lite_kernel.cc
@@ -43,9 +43,9 @@ void LiteKernel::FreeWorkspace() {
 }
 #endif
 bool LiteKernel::IsReady(const std::vector<lite::Tensor *> &scope_tensors) {
-  return std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *kernel_in_tensor) {
-    if (IsContain(scope_tensors, kernel_in_tensor)) {
-      return (kernel_in_tensor->IsConst() || kernel_in_tensor->IsGraphInput() || kernel_in_tensor->ref_count() >= 1);
+  return std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *in_tensor) {
+    if (IsContain(scope_tensors, in_tensor)) {
+      return in_tensor->IsReady();
     } else {
       return true;
     }
@@ -66,13 +66,9 @@ void LiteKernel::InitOutTensorInitRefCount() {
 
 int LiteKernel::DecOutTensorRefCount() {
   for (auto *tensor : this->out_tensors_) {
-    tensor->DecRefCount();
+    tensor->set_ref_count(tensor->ref_count() - 1);
     if (0 >= tensor->ref_count()) {
-      auto ret = tensor->FreeData();
-      if (0 != ret) {
-        MS_LOG(ERROR) << "Free tensor data failed";
-        return ret;
-      }
+      tensor->FreeData();
     }
   }
   return 0;
@@ -81,18 +77,10 @@ int LiteKernel::DecOutTensorRefCount() {
 int LiteKernel::FreeInWorkTensor() const {
   for (auto &in_tensor : this->in_tensors_) {
     MS_ASSERT(in_tensor != nullptr);
-    if (in_tensor->IsConst() || in_tensor->IsGraphInput()) {
+    if (in_tensor->root_tensor() == in_tensor) {
       continue;
     }
-    MS_ASSERT(in_tensor->ref_count() > 0);
-    in_tensor->set_ref_count(in_tensor->ref_count() - 1);
-    if (in_tensor->ref_count() <= 0) {
-      auto ret = in_tensor->FreeData();
-      if (0 != ret) {
-        MS_LOG(ERROR) << "Free tensor data failed";
-        return ret;
-      }
-    }
+    in_tensor->DecRefCount();
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h
index 8af23f0cc2..f8bdd298e8 100644
--- a/mindspore/lite/src/lite_kernel.h
+++ b/mindspore/lite/src/lite_kernel.h
@@ -157,7 +157,7 @@ class LiteKernel {
 
   const std::vector<LiteKernel *> &out_kernels() const { return this->out_kernels_; }
 
-  virtual bool IsReady(const std::vector<lite::Tensor *> &scope_tensors);
+  virtual bool IsReady(const std::vector<lite::Tensor *> &in_tensor);
 
   virtual void InitOutTensorInitRefCount();
 
diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc
index 7ddc0296e1..09fc2401d2 100644
--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@@ -143,7 +143,7 @@ lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) {
       }
     }
   }
-  lite::Tensor *dst_tensor = nullptr;
+  lite::Tensor *dst_tensor;
   if (TypeId(src_tensor.dataType()) == kObjectTypeTensorType) {
     dst_tensor = new (std::nothrow) TensorList(shape, std::vector<int>(), src_category);
   } else {
diff --git a/mindspore/lite/src/ops/merge.cc b/mindspore/lite/src/ops/merge.cc
index 7dd0397234..a959f45d6f 100644
--- a/mindspore/lite/src/ops/merge.cc
+++ b/mindspore/lite/src/ops/merge.cc
@@ -18,6 +18,7 @@
 #ifndef PRIMITIVE_WRITEABLE
 #include "src/ops/ops_register.h"
 #endif
+#include "src/tensorlist.h"
 
 namespace mindspore {
 namespace lite {
@@ -72,8 +73,29 @@ int Merge::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outpu
     return RET_INFER_INVALID;
   }
   for (size_t i = 0; i < inputs_.size() / 2; i++) {
-    outputs_[i]->set_data_type(inputs_[i]->data_type());
-    outputs_[i]->set_shape(inputs_[i]->shape());
+    auto *input = inputs_[i];
+    auto *output = outputs_[i];
+    if (input == nullptr) {
+      MS_LOG(ERROR) << "input tensor is nullptr";
+      return RET_ERROR;
+    }
+    if (output == nullptr) {
+      MS_LOG(ERROR) << "output tensor is nullptr";
+      return RET_ERROR;
+    }
+    output->set_data_type(input->data_type());
+    output->set_shape(input->shape());
+    output->set_format(input->format());
+    auto data_type = input->data_type();
+    if (data_type != kObjectTypeTensorType) {
+      continue;
+    } else {
+      auto input_tensorlist = reinterpret_cast<TensorList *>(input);
+      auto output_tensorlist = reinterpret_cast<TensorList *>(output);
+      output_tensorlist->set_element_shape(input_tensorlist->element_shape());
+      output_tensorlist->set_max_elements_num(input_tensorlist->max_elements_num());
+      output_tensorlist->set_tensors_data_type(input_tensorlist->tensors_data_type());
+    }
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/src/ops/populate/split_populate.cc b/mindspore/lite/src/ops/populate/split_populate.cc
index 3726dd0451..9fcd931506 100644
--- a/mindspore/lite/src/ops/populate/split_populate.cc
+++ b/mindspore/lite/src/ops/populate/split_populate.cc
@@ -48,7 +48,6 @@ OpParameter *PopulateSplitParameter(const mindspore::lite::PrimitiveC *primitive
   memset(split_param->split_sizes_, 0, split_param->num_split_ * sizeof(int));
 
   auto split_sizes_vector_ = param->size_splits();
-  MS_ASSERT(split_sizes_vector_.size() == split_param->num_split_);
   for (size_t i = 0; i < split_sizes_vector_.size(); i++) {
     split_param->split_sizes_[i] = split_sizes_vector_[i];
   }
diff --git a/mindspore/lite/src/ops/switch.cc b/mindspore/lite/src/ops/switch.cc
index 2135f5fbb7..eacbd2cf7e 100644
--- a/mindspore/lite/src/ops/switch.cc
+++ b/mindspore/lite/src/ops/switch.cc
@@ -19,6 +19,7 @@
 #ifndef PRIMITIVE_WRITEABLE
 #include "src/ops/ops_register.h"
 #endif
+#include "src/tensorlist.h"
 
 namespace mindspore {
 namespace lite {
@@ -76,12 +77,37 @@ int Switch::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outp
     return RET_INFER_INVALID;
   }
   for (size_t i = 0; i < outputs_.size() / 2; i++) {
-    outputs_[i]->set_data_type(inputs_[i + 1]->data_type());
-    outputs_[i + outputs_.size() / 2]->set_data_type(inputs_[i + 1]->data_type());
-    outputs_[i]->set_shape(inputs_[i + 1]->shape());
-    outputs_[i + outputs_.size() / 2]->set_shape(inputs_[i + 1]->shape());
-    outputs_[i]->set_format(inputs_[i + 1]->format());
-    outputs_[i + outputs_.size() / 2]->set_format(inputs_[i + 1]->format());
+    auto *input = inputs_[i + 1];
+    auto *output_true = outputs_[i];
+    auto *output_false = outputs_[i + outputs_.size() / 2];
+    if (input == nullptr) {
+      MS_LOG(ERROR) << "input tensor is nullptr";
+      return RET_ERROR;
+    }
+    if (output_true == nullptr || output_false == nullptr) {
+      MS_LOG(ERROR) << "output tensor is nullptr";
+      return RET_ERROR;
+    }
+    output_true->set_data_type(input->data_type());
+    output_false->set_data_type(input->data_type());
+    output_true->set_shape(input->shape());
+    output_false->set_shape(input->shape());
+    output_true->set_format(input->format());
+    output_false->set_format(input->format());
+    auto data_type = input->data_type();
+    if (data_type != kObjectTypeTensorType) {
+      continue;
+    } else {
+      auto input_tensorlist = reinterpret_cast<TensorList *>(input);
+      auto output_true_tensorlist = reinterpret_cast<TensorList *>(output_true);
+      auto output_false_tensorlist = reinterpret_cast<TensorList *>(output_false);
+      output_true_tensorlist->set_element_shape(input_tensorlist->element_shape());
+      output_false_tensorlist->set_element_shape(input_tensorlist->element_shape());
+      output_true_tensorlist->set_max_elements_num(input_tensorlist->max_elements_num());
+      output_false_tensorlist->set_max_elements_num(input_tensorlist->max_elements_num());
+      output_true_tensorlist->set_tensors_data_type(input_tensorlist->tensors_data_type());
+      output_false_tensorlist->set_tensors_data_type(input_tensorlist->tensors_data_type());
+    }
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/src/ops/tensorlist_getitem.cc b/mindspore/lite/src/ops/tensorlist_getitem.cc
index 5e9f8627d2..fee1c6ae45 100644
--- a/mindspore/lite/src/ops/tensorlist_getitem.cc
+++ b/mindspore/lite/src/ops/tensorlist_getitem.cc
@@ -136,7 +136,7 @@ int TensorListGetItem::InferShape(std::vector<lite::Tensor *> inputs_, std::vect
     MS_LOG(ERROR) << "index_:" << index_ << "must in [0, " << input0->ElementsNum() - 1 << "]";
     return RET_ERROR;
   }
-  auto tensor_index = input0->GetTensorIndex(index_);
+  auto tensor_index = input0->GetTensor(index_);
   MS_ASSERT(tensor_index != nullptr);
   auto output = outputs_.front();
   MS_ASSERT(output != nullptr);
@@ -159,7 +159,7 @@ int TensorListGetItem::InferShape(std::vector<lite::Tensor *> inputs_, std::vect
     }
     if (!IsFullyDefined(element_shape_)) {
       for (int i = 0; i < input0->ElementsNum(); ++i) {
-        auto input = input0->GetTensorIndex(i);
+        auto input = input0->GetTensor(i);
         MS_ASSERT(input != nullptr);
         if (input->data_type() != kTypeUnknown) {
           status = MergeShape(input->shape());
diff --git a/mindspore/lite/src/ops/tensorlist_setitem.cc b/mindspore/lite/src/ops/tensorlist_setitem.cc
index 34969c44ed..b753237762 100644
--- a/mindspore/lite/src/ops/tensorlist_setitem.cc
+++ b/mindspore/lite/src/ops/tensorlist_setitem.cc
@@ -140,7 +140,7 @@ int TensorListSetItem::InferShape(std::vector<lite::Tensor *> inputs_, std::vect
   } else {
     output0->set_shape(input0->shape());
     for (int i = 0; i < input0->ElementsNum(); ++i) {
-      auto src_ptr = input0->GetTensorIndex(i);
+      auto src_ptr = input0->GetTensor(i);
       if (src_ptr == nullptr) {
         MS_LOG(ERROR) << "input0->tensors_[" << i << "] is nullptr!";
         return RET_ERROR;
diff --git a/mindspore/lite/src/ops/tensorlist_stack.cc b/mindspore/lite/src/ops/tensorlist_stack.cc
index 05d46578b9..9e06b912fd 100644
--- a/mindspore/lite/src/ops/tensorlist_stack.cc
+++ b/mindspore/lite/src/ops/tensorlist_stack.cc
@@ -133,6 +133,7 @@ int TensorListStack::InferShape(std::vector<lite::Tensor *> inputs_, std::vector
     return RET_NULL_PTR;
   }
   auto ele_shape_ptr = reinterpret_cast<int *>(ele_shape->data_c());
+  output_shape_.clear();
   for (int i = 0; i < ele_shape->ElementsNum(); ++i) {
     output_shape_.push_back(ele_shape_ptr[i]);
   }
@@ -148,7 +149,7 @@ int TensorListStack::InferShape(std::vector<lite::Tensor *> inputs_, std::vector
   }
   if (!IsFullyDefined(input0->element_shape())) {
     for (int i = 0; i < input0->ElementsNum(); ++i) {
-      auto tensor_ele = input0->GetTensorIndex(i);
+      auto tensor_ele = input0->GetTensor(i);
       MS_ASSERT(tensor_ele != nullptr);
       if (tensor_ele->data_type() != kTypeUnknown) {
         status = MergeShape(tensor_ele->shape());
diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
index 34effac772..5125e88754 100644
--- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
+++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
@@ -62,11 +62,7 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
         memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[index]->Size());
         in_tensors[index]->set_ref_count(in_tensors[index]->ref_count() - 1);
         if (in_tensors[index]->ref_count() <= 0) {
-          auto ret = in_tensors[index]->FreeData();
-          if (ret != RET_OK) {
-            MS_LOG(ERROR) << "Free tensor data failed";
-            return RET_ERROR;
-          }
+          in_tensors[index]->FreeData();
         }
         break;
       }
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc
new file mode 100644
index 0000000000..509404c68d
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc
@@ -0,0 +1,108 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/arm/base/carry_data.h"
+#include "include/errorcode.h"
+#include "src/tensorlist.h"
+
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+
+namespace mindspore::kernel {
+int CarryDataKernel::MoveData(std::vector<lite::Tensor *>::iterator dst_begin,
+                              std::vector<lite::Tensor *>::iterator dst_end,
+                              std::vector<lite::Tensor *>::iterator src_begin,
+                              std::vector<lite::Tensor *>::iterator src_limit) {
+  for (auto dst_iter = dst_begin, src_iter = src_begin; dst_iter != dst_end; dst_iter++, src_iter++) {
+    if (src_iter == src_limit) {
+      MS_LOG(ERROR) << "out of range of input tensor";
+      return RET_ERROR;
+    }
+    auto *dst_tensor = *dst_iter;
+    auto *src_tensor = *src_iter;
+    if (dst_tensor == nullptr || src_tensor == nullptr) {
+      MS_LOG(ERROR) << "input tensor or output tensor of merge is nullptr";
+      return RET_ERROR;
+    }
+    lite::STATUS ret;
+    if (src_tensor->data_type() == kObjectTypeTensorType && dst_tensor->data_type() == kObjectTypeTensorType) {
+      ret = MoveTensorLiteData(reinterpret_cast<lite::TensorList *>(dst_tensor),
+                               reinterpret_cast<lite::TensorList *>(src_tensor));
+    } else {
+      ret = MoveTensorData(dst_tensor, src_tensor);
+    }
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Move data failed : " << ret;
+      return ret;
+    }
+  }
+  return RET_OK;
+}
+
+int CarryDataKernel::MoveTensorData(lite::Tensor *dst_tensor, lite::Tensor *src_tensor) {
+  if (dst_tensor->data_type() != src_tensor->data_type() || dst_tensor->format() != src_tensor->format() ||
+      !(dst_tensor->shape() == src_tensor->shape() || (dst_tensor->shape().empty() && src_tensor->shape().empty()))) {
+    MS_LOG(ERROR) << "input tensor and output tensor is incompatible";
+    return RET_ERROR;
+  }
+  if (src_tensor->root_tensor() == nullptr) {
+    if (src_tensor->IsConst() || src_tensor->IsGraphInput() || src_tensor->ref_count() > 1) {
+      auto dst_data = dst_tensor->MutableData();
+      if (dst_data == nullptr) {
+        MS_LOG(ERROR) << "data of dst tensor is nullptr";
+        return RET_ERROR;
+      }
+      auto src_data = src_tensor->data_c();
+      MS_ASSERT(src_data != nullptr);
+      memcpy(dst_data, src_data, dst_tensor->Size());
+    } else {
+      dst_tensor->FreeData();
+      dst_tensor->set_data(src_tensor->data_c());
+      src_tensor->set_data(nullptr);
+    }
+  } else {
+    auto ret = dst_tensor->set_root_tensor(src_tensor->root_tensor());
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Set root tensor for tensor(" << dst_tensor->tensor_name() << ") failed";
+      return ret;
+    }
+  }
+  return RET_OK;
+}
+
+int CarryDataKernel::MoveTensorLiteData(lite::TensorList *dst_tensor, lite::TensorList *src_tensor) {
+  // shape may change, because tensors.size() can be change in RunGraph
+  if (dst_tensor->data_type() != src_tensor->data_type() || dst_tensor->format() != src_tensor->format() ||
+      !(dst_tensor->element_shape() == src_tensor->element_shape() ||
+        (dst_tensor->element_shape().empty() && src_tensor->element_shape().empty())) ||
+      dst_tensor->tensors_data_type() != src_tensor->tensors_data_type()) {
+    MS_LOG(ERROR) << "input tensorlist and output tensorlist is incompatible";
+    return RET_ERROR;
+  }
+  if (src_tensor->root_tensor() == nullptr) {
+    dst_tensor->CopyTensorList(*src_tensor, false);
+    src_tensor->set_tensors({});
+  } else {
+    dst_tensor->set_shape(src_tensor->shape());
+    auto ret = dst_tensor->set_root_tensor(src_tensor->root_tensor());
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Set root tensor for tensor(" << dst_tensor->tensor_name() << ") failed";
+      return ret;
+    }
+  }
+  return RET_OK;
+}
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h
new file mode 100644
index 0000000000..ba960b772b
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CARRY_DATA_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CARRY_DATA_H_
+
+#include <vector>
+#include "src/lite_kernel.h"
+#include "src/tensor.h"
+#include "src/tensorlist.h"
+
+namespace mindspore::kernel {
+class CarryDataKernel : public LiteKernel {
+ public:
+  CarryDataKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
+                  const mindspore::lite::PrimitiveC *primitive)
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
+  ~CarryDataKernel() override = default;
+
+ protected:
+  int MoveData(std::vector<lite::Tensor *>::iterator dst_begin, std::vector<lite::Tensor *>::iterator dst_end,
+               std::vector<lite::Tensor *>::iterator src_begin, std::vector<lite::Tensor *>::iterator src_limit);
+  static int MoveTensorData(lite::Tensor *dst_tensor, lite::Tensor *src_tensor);
+  static int MoveTensorLiteData(lite::TensorList *dst_tensor, lite::TensorList *src_tensor);
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CARRY_DATA_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/merge.cc b/mindspore/lite/src/runtime/kernel/arm/base/merge.cc
index 85b1486fb1..43373dcf7a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/merge.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/merge.cc
@@ -18,6 +18,7 @@
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
 #include "src/tensorlist.h"
+#include "src/common/utils.h"
 
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
@@ -28,104 +29,97 @@ namespace mindspore::kernel {
 int MergeCPUKernel::FreeInWorkTensor() const {
   for (auto &in_tensor : this->in_tensors_) {
     MS_ASSERT(in_tensor != nullptr);
-    if (in_tensor->IsConst() || in_tensor->IsGraphInput()) {
+    if (in_tensor->root_tensor() == in_tensor) {
       continue;
     }
-    if (in_tensor->ref_count() > 0) {
-      in_tensor->set_ref_count(in_tensor->ref_count() - 1);
-      if (in_tensor->ref_count() <= 0) {
-        auto ret = in_tensor->FreeData();
-        if (0 != ret) {
-          MS_LOG(ERROR) << "Free tensor data failed";
-          return ret;
-        }
-      }
-    }
+    in_tensor->DecRefCount();
   }
   return RET_OK;
 }
 
-// if one of input of merge is const-tensor, merge is always ready, this will cause error.
 bool MergeCPUKernel::IsReady(const std::vector<lite::Tensor *> &scope_tensors) {
-  MS_ASSERT(in_tensors().size() == 2 * out_tensors().size());
-  return std::all_of(this->in_tensors().begin(), this->in_tensors().begin() + in_tensors().size() / 2,
-                     [&](lite::Tensor *kernel_in_tensor) {
-                       return kernel_in_tensor->IsConst() || kernel_in_tensor->IsGraphInput() ||
-                              kernel_in_tensor->ref_count() >= 1;
-                     }) ||
-         std::all_of(this->in_tensors().begin() + in_tensors().size() / 2, this->in_tensors().end(),
-                     [&](lite::Tensor *kernel_in_tensor) {
-                       return kernel_in_tensor->IsConst() || kernel_in_tensor->IsGraphInput() ||
-                              kernel_in_tensor->ref_count() >= 1 ||
-                              (kernel_in_tensor->data_type() == kObjectTypeTensorType);
-                     });
+  auto ready_part = FindReadyPart(scope_tensors);
+  return ready_part == LEFT_INPUT_PART || ready_part == RIGHT_INPUT_PART;
 }
 
-int MergeCPUKernel::Init() { return RET_OK; }
+int MergeCPUKernel::Init() {
+  MS_ASSERT(in_tensors_.size() == 2 * out_tensors_.size());
+  size_t stride = in_tensors_.size() / 2;
+  for (size_t i = 0; i < in_tensors_.size() / 2; i++) {
+    MS_ASSERT(in_tensors_[i] != nullptr);
+    MS_ASSERT(in_tensors_[i + stride] != nullptr);
+    if (in_tensors_[i] == in_tensors_[i + stride]) {
+      auto ret = in_tensors_[i]->set_root_tensor(in_tensors_[i]);
+      if (ret != RET_OK) {
+        MS_LOG(ERROR) << "Set root tensor for tensor(" << in_tensors_[i]->tensor_name() << ") failed";
+        return ret;
+      }
+      ret = in_tensors_[i + stride]->set_root_tensor(in_tensors_[i + stride]);
+      if (ret != RET_OK) {
+        MS_LOG(ERROR) << "Set root tensor for tensor(" << in_tensors_[i + stride]->tensor_name() << ") failed";
+        return ret;
+      }
+    }
+  }
+  return RET_OK;
+}
 
 int MergeCPUKernel::ReSize() { return RET_OK; }
 
-bool MergeCPUKernel::PartialInputReady(int num_begin, int num_end) {
+InputPart MergeCPUKernel::FindReadyPart(const std::vector<lite::Tensor *> &scope_tensors) {
   MS_ASSERT(in_tensors_.size() == 2 * out_tensors_.size());
-  bool result = (std::all_of(this->in_tensors().begin() + num_begin, this->in_tensors().begin() + num_end,
-                             [&](lite::Tensor *kernel_in_tensor) {
-                               return kernel_in_tensor->IsConst() || kernel_in_tensor->ref_count() >= 1 ||
-                                      kernel_in_tensor->IsGraphInput() ||
-                                      kernel_in_tensor->data_type() == kObjectTypeTensorType;
-                             })) &&
-                std::all_of(this->in_tensors_.begin() + num_begin, this->in_tensors_.begin() + num_end,
-                            [&](lite::Tensor *in_tensor) {
-                              if (in_tensor->data_type() != kObjectTypeTensorType) {
-                                return in_tensor->data_c() != nullptr;
-                              } else {
-                                return true;
-                              }
-                            });
-  return result;
+  bool is_root_tensor_ready =
+    std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *in_tensor) {
+      // if not in scope_tensors, not care
+      if (!IsContain(scope_tensors, in_tensor)) {
+        return true;
+      }
+      // if not a root_tensor, not care
+      if (in_tensor->root_tensor() == nullptr || in_tensor->root_tensor() != in_tensor) {
+        return true;
+      }
+      return in_tensor->IsReady();
+    });
+  // check if all root tensor is ready
+  if (!is_root_tensor_ready) {
+    return UNKNOWN_INPUT_PART;
+  }
+  // check one part of in tensors of merge is ready
+  // if not in scope_tensors, not care
+  // if in scope_tensors, in_tensor need to be ready
+  if (std::all_of(
+        this->in_tensors().begin() + in_tensors().size() / 2, this->in_tensors().end(),
+        [&](lite::Tensor *in_tensor) { return !IsContain(scope_tensors, in_tensor) || in_tensor->IsReady(); })) {
+    return RIGHT_INPUT_PART;
+  }
+  if (std::all_of(
+        this->in_tensors().begin(), this->in_tensors().begin() + in_tensors().size() / 2,
+        [&](lite::Tensor *in_tensor) { return !IsContain(scope_tensors, in_tensor) || in_tensor->IsReady(); })) {
+    return LEFT_INPUT_PART;
+  }
+  return UNKNOWN_INPUT_PART;
 }
 
 int MergeCPUKernel::Run() {
   MS_ASSERT(in_tensors_.size() == 2 * out_tensors_.size());
-  int in_tesnor_part_one = 0;
-  int in_tensor_part_two = in_tensors_.size() / 2;
-  int in_tensor_part_three = in_tensors_.size();
-  if (PartialInputReady(in_tesnor_part_one, in_tensor_part_two)) {
-    for (size_t i = 0; i < out_tensors().size(); i++) {
-      auto out_data = out_tensors_[i]->data_c();
-      auto in_data = in_tensors_[i]->data_c();
-      if (in_tensors_[i]->data_type() == kObjectTypeTensorType) {
-        auto in_tensor_list = reinterpret_cast<lite::TensorList *>(in_tensors_[i]);
-        auto out_tensor_list = reinterpret_cast<lite::TensorList *>(out_tensors_[i]);
-        if (std::any_of(in_tensor_list->tensors().begin(), in_tensor_list->tensors().end(),
-                        [&](lite::Tensor *tensor) { return tensor->data_c() == nullptr; })) {
-          continue;
-        }
-        *out_tensor_list = *in_tensor_list;
-        continue;
-      }
-      MS_ASSERT(in_data != nullptr);
-      MS_ASSERT(out_data != nullptr);
-      memcpy(out_data, in_data, in_tensors_[i]->Size());
+  auto ready_part = FindReadyPart(this->in_tensors_);
+  if (ready_part == LEFT_INPUT_PART) {
+    auto ret = MoveData(this->out_tensors_.begin(), this->out_tensors_.end(), this->in_tensors_.begin(),
+                        this->in_tensors_.end());
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "carry data error : " << ret;
+      return ret;
     }
-  }
-  if (PartialInputReady(in_tensor_part_two, in_tensor_part_three)) {
-    for (size_t i = 0; i < out_tensors().size(); i++) {
-      auto out_data = out_tensors_[i]->data_c();
-      auto in_data = in_tensors_[i + in_tensor_part_two]->data_c();
-      if (in_tensors_[i]->data_type() == kObjectTypeTensorType) {
-        auto in_tensor_list = reinterpret_cast<lite::TensorList *>(in_tensors_[i + in_tensor_part_two]);
-        auto out_tensor_list = reinterpret_cast<lite::TensorList *>(out_tensors_[i]);
-        if (std::any_of(in_tensor_list->tensors().begin(), in_tensor_list->tensors().end(),
-                        [&](lite::Tensor *tensor) { return tensor->data_c() == nullptr; })) {
-          continue;
-        }
-        *out_tensor_list = *in_tensor_list;
-        continue;
-      }
-      MS_ASSERT(in_data != nullptr);
-      MS_ASSERT(out_data != nullptr);
-      memcpy(out_data, in_data, in_tensors_[i]->Size());
+  } else if (ready_part == RIGHT_INPUT_PART) {
+    auto ret = MoveData(this->out_tensors_.begin(), this->out_tensors_.end(),
+                        (this->in_tensors_.begin() + in_tensors_.size() / 2), this->in_tensors_.end());
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "carry data error : " << ret;
+      return ret;
     }
+  } else {
+    MS_LOG(ERROR) << "none input part of merge is ready";
+    return RET_ERROR;
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/merge.h b/mindspore/lite/src/runtime/kernel/arm/base/merge.h
index 481a3b9ad4..7268a1c2ba 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/merge.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/merge.h
@@ -17,21 +17,28 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MERGE_H_
 
 #include <vector>
-#include "src/lite_kernel.h"
+#include "src/runtime/kernel/arm/base/carry_data.h"
+#include "src/tensor.h"
+#include "src/tensorlist.h"
 
 namespace mindspore::kernel {
-class MergeCPUKernel : public LiteKernel {
+enum InputPart { UNKNOWN_INPUT_PART, LEFT_INPUT_PART, RIGHT_INPUT_PART };
+
+class MergeCPUKernel : public CarryDataKernel {
  public:
   MergeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                  const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
-  ~MergeCPUKernel() override {}
+      : CarryDataKernel(parameter, inputs, outputs, ctx, primitive) {}
+  bool IsReady(const std::vector<lite::Tensor *> &scope_tensors) override;
+  ~MergeCPUKernel() override = default;
+  int FreeInWorkTensor() const override;
   int Init() override;
   int ReSize() override;
   int Run() override;
-  int FreeInWorkTensor() const override;
-  bool IsReady(const std::vector<lite::Tensor *> &scope_tensors) override;
+
+ private:
+  InputPart FindReadyPart(const std::vector<lite::Tensor *> &scope_tensors);
 
  private:
   bool PartialInputReady(int num_begin, int num_end);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/switch.cc b/mindspore/lite/src/runtime/kernel/arm/base/switch.cc
index b3ecb0bfd5..cbd681eb54 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/switch.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/switch.cc
@@ -43,6 +43,16 @@ int SwitchCPUKernel::PostProcess() {
     auto out_tensor = out_tensors_.at(out_index++);
     out_tensor->ResetRefCount();
   }
+  if (!*active) {
+    for (auto &in_tensor : this->in_tensors_) {
+      MS_ASSERT(in_tensor != nullptr);
+      auto root_tensor = in_tensor->root_tensor();
+      if (root_tensor == nullptr) {
+        continue;
+      }
+      root_tensor->DecRefCount();
+    }
+  }
   return FreeInWorkTensor();
 }
 
@@ -64,29 +74,20 @@ int SwitchCPUKernel::Run() {
     MS_LOG(ERROR) << "data of bool tensor is nullptr";
     return lite::RET_NULL_PTR;
   }
-  size_t in_index = 1;
-  size_t out_index = (*active) ? 0 : (out_tensors_.size() / 2);
-  while (in_index < in_tensors_.size()) {
-    auto in_tensor = in_tensors_.at(in_index++);
-    auto out_tensor = out_tensors_.at(out_index++);
-    // copy for tensorlist
-    if (in_tensor->data_type() == kObjectTypeTensorType) {
-      auto in_tensor_list = reinterpret_cast<lite::TensorList *>(in_tensor);
-      auto out_tensor_list = reinterpret_cast<lite::TensorList *>(out_tensor);
-      *out_tensor_list = *in_tensor_list;
-      continue;
+  if (*active) {
+    auto ret = MoveData(this->out_tensors_.begin(), this->out_tensors_.begin() + out_tensors_.size() / 2,
+                        this->in_tensors_.begin() + 1, this->in_tensors_.end());
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "carry data error : " << ret;
+      return ret;
     }
-    // copy for tensor
-    MS_ASSERT(in_tensor != nullptr);
-    MS_ASSERT(out_tensor != nullptr);
-    auto input = in_tensor->data_c();
-    auto output = out_tensor->data_c();
-    MS_ASSERT(in_tensor->Size() == out_tensor->Size());
-    if (input == nullptr || output == nullptr) {
-      MS_LOG(ERROR) << "input tensor or output tensor have not been malloced";
-      return lite::RET_NULL_PTR;
+  } else {
+    auto ret = MoveData(this->out_tensors_.begin() + out_tensors_.size() / 2, this->out_tensors_.end(),
+                        this->in_tensors_.begin() + 1, this->in_tensors_.end());
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "carry data error : " << ret;
+      return ret;
     }
-    memcpy(output, input, in_tensor->Size());
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/switch.h b/mindspore/lite/src/runtime/kernel/arm/base/switch.h
index 7e7530a088..66187bd416 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/switch.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/switch.h
@@ -17,30 +17,22 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SWITCH_H_
 
 #include <vector>
+#include "src/runtime/kernel/arm/base/carry_data.h"
 #include "src/lite_kernel.h"
+#include "src/tensorlist.h"
 
 namespace mindspore::kernel {
-
-typedef struct SwitchParameter {
-  OpParameter op_parameter_;
-} SwitchParameter;
-
-class SwitchCPUKernel : public LiteKernel {
+class SwitchCPUKernel : public CarryDataKernel {
  public:
   SwitchCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                   const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
-    switch_param_ = reinterpret_cast<SwitchParameter *>(op_parameter_);
-  }
+      : CarryDataKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~SwitchCPUKernel() override = default;
   int PostProcess() override;
   int Init() override;
   int ReSize() override;
   int Run() override;
-
- private:
-  SwitchParameter *switch_param_ = nullptr;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
index 2a1a17b143..5ba89af94e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
@@ -157,6 +157,7 @@ void ArithmeticCPUKernel::InitRunFunction() {
     case PrimitiveType_LogicalAnd:
       arithmetic_run_ = ElementLogicalAnd;
       arithmetic_run_int_ = ElementLogicalAndInt;
+      arithmetic_run_bool_ = ElementLogicalAndBool;
       break;
     case PrimitiveType_LogicalOr:
       arithmetic_run_ = ElementLogicalOr;
@@ -295,6 +296,8 @@ void ArithmeticCPUKernel::InitParam() {
   arithmeticParameter_->ndim_ = arithmetic_lite_primitive->NDims();
   if (in_tensors_[0]->data_type() == kNumberTypeFloat32 || in_tensors_[0]->data_type() == kNumberTypeFloat16) {
     data_type_ = kDataTypeFloat;
+  } else if (in_tensors_[0]->data_type() == kNumberTypeBool) {
+    data_type_ = KDataTypeBool;
   } else {
     data_type_ = kDataTypeInt;
   }
@@ -419,6 +422,10 @@ int ArithmeticCPUKernel::DoArithmetic(int task_id) {
     error_code = arithmetic_run_(reinterpret_cast<float *>(input0_ptr_) + stride * task_id,
                                  reinterpret_cast<float *>(input1_ptr_) + stride * task_id,
                                  reinterpret_cast<float *>(out_tensors_[0]->data_c()) + stride * task_id, count);
+  } else if (data_type_ == KDataTypeBool) {
+    error_code = arithmetic_run_bool_(reinterpret_cast<bool *>(input0_ptr_) + stride * task_id,
+                                      reinterpret_cast<bool *>(input1_ptr_) + stride * task_id,
+                                      reinterpret_cast<bool *>(out_tensors_[0]->data_c()) + stride * task_id, count);
   } else {
     error_code = arithmetic_run_int_(reinterpret_cast<int *>(input0_ptr_) + stride * task_id,
                                      reinterpret_cast<int *>(input1_ptr_) + stride * task_id,
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.h
index 78bedefdf3..5c6ebfb8a7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.h
@@ -50,6 +50,7 @@ class ArithmeticCPUKernel : public LiteKernel {
   typedef int (*ArithmeticIntRun)(const int *input0, const int *input1, int *output, const int element_size);
   typedef int (*ArithmeticOptIntRun)(const int *input0, const int *input1, int *output, const int element_size,
                                      const ArithmeticParameter *param);
+  typedef int (*ArithmeticBoolRun)(const bool *input0, const bool *input1, bool *output, const int element_size);
 
  public:
   ArithmeticCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
@@ -91,6 +92,7 @@ class ArithmeticCPUKernel : public LiteKernel {
   ArithmeticOptRun arithmetic_opt_run_ = nullptr;
   ArithmeticIntRun arithmetic_run_int_ = nullptr;
   ArithmeticOptIntRun arithmetic_opt_run_int_ = nullptr;
+  ArithmeticBoolRun arithmetic_run_bool_ = nullptr;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_fromtensor_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_fromtensor_fp32.cc
index 0c1f6ac174..18a65d648b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_fromtensor_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_fromtensor_fp32.cc
@@ -89,7 +89,7 @@ int TensorListFromTensorCPUKernel::Run() {
   auto in_ptr = reinterpret_cast<float *>(input0_->data_c());
   // copy data from input0(tensor) to output(tensorlist) vector<*tensor>
   for (int i = 0; i < dim0; ++i) {
-    auto out_ptr = output0->GetTensorIndex(i);
+    auto out_ptr = output0->GetTensor(i);
     MS_ASSERT(out_ptr != nullptr);
     if (out_ptr->ElementsNum() != devision_dim0) {
       MS_LOG(ERROR) << "tensors_[" << i << "].ElementsNum():" << out_ptr->ElementsNum()
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_getitem_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_getitem_fp32.cc
index 7018bbed11..9f7df69baf 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_getitem_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_getitem_fp32.cc
@@ -49,7 +49,7 @@ int TensorListGetItemCPUKernel::Init() {
 
 int TensorListGetItemCPUKernel::Run() {
   auto input0 = reinterpret_cast<lite::TensorList *>(in_tensors_[0]);
-  auto src_ptr = input0->GetTensorIndex(index_);
+  auto src_ptr = input0->GetTensor(index_);
   MS_ASSERT(src_ptr != nullptr);
   if (src_ptr->data_type() != kTypeUnknown) {
     if (src_ptr->ElementsNum() != out_tensors_[0]->ElementsNum()) {
@@ -57,7 +57,7 @@ int TensorListGetItemCPUKernel::Run() {
                     << " must be equal to out_tensors_[0]->ElementsNum():" << out_tensors_[0]->ElementsNum();
       return RET_ERROR;
     }
-    auto status = out_tensors_[0]->CopyTensorData(*src_ptr);
+    auto status = lite::Tensor::CopyTensorData(*src_ptr, out_tensors_[0]);
     if (status == RET_ERROR) {
       MS_LOG(ERROR) << "copy tensor data failed!";
       return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_setitem_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_setitem_fp32.cc
index 9ac4a54840..5a74c14c73 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_setitem_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_setitem_fp32.cc
@@ -59,23 +59,41 @@ int TensorListSetItemCPUKernel::Run() {
   MS_ASSERT(output0_ != nullptr);
   // copy each tensor in tensors_
   for (int i = 0; i < output0_->ElementsNum(); ++i) {
-    auto dst = output0_->GetTensorIndex(i);
-    MS_ASSERT(dst != nullptr);
-    auto src = input0_->GetTensorIndex(i);
     if (i == index_) {
-      // copy input2_ data buff
-      src = input2_;
-    }
-    MS_ASSERT(src != nullptr);
-    if (src->data_type() != kTypeUnknown) {
-      if (src->Size() != dst->Size()) {
-        MS_LOG(ERROR) << "src->Size():" << src->Size() << " must be equal to dst->Size():" << dst->Size();
-        return RET_ERROR;
+      auto dst = output0_->GetTensor(i);
+      if (dst == nullptr) {
+        dst = lite::Tensor::CopyTensor(*input2_, true);
+        auto &tensors = output0_->tensors();
+        tensors.emplace_back(dst);
+      } else {
+        dst->set_data_type(input2_->data_type());
+        dst->set_shape(input2_->shape());
+        dst->set_format(input2_->format());
+        dst->set_category(input2_->category());
+        dst->set_root_tensor(input2_->root_tensor());
+        dst->set_tensor_name(input2_->tensor_name());
+        dst->set_quant_clusters(input2_->quant_clusters());
+        auto ret = lite::Tensor::CopyTensorData(*input2_, dst);
+        if (ret != RET_OK) {
+          MS_LOG(ERROR) << "CopyTensorData[" << i << "] is failed!";
+          return RET_ERROR;
+        }
       }
-      auto ret = dst->CopyTensorData(*src);
-      if (ret != RET_OK) {
-        MS_LOG(ERROR) << "CopyTensorData[" << i << "] is failed!";
-        return RET_ERROR;
+    } else {
+      auto src = input0_->GetTensor(i);
+      auto dst = output0_->GetTensor(i);
+      MS_ASSERT(src != nullptr);
+      MS_ASSERT(dst != nullptr);
+      if (src->data_type() != kTypeUnknown) {
+        if (src->Size() != dst->Size()) {
+          MS_LOG(ERROR) << "src->Size():" << src->Size() << " must be equal to dst->Size():" << dst->Size();
+          return RET_ERROR;
+        }
+        auto ret = lite::Tensor::CopyTensorData(*src, dst);
+        if (ret != RET_OK) {
+          MS_LOG(ERROR) << "CopyTensorData[" << i << "] is failed!";
+          return RET_ERROR;
+        }
       }
     }
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_stack_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_stack_fp32.cc
index b95bb1e4bc..143e2a3801 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_stack_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/tensorlist_stack_fp32.cc
@@ -79,6 +79,7 @@ int TensorListStackCPUKernel::MergeElementShape() {
     return RET_ERROR;
   }
   auto ele_shape_data = reinterpret_cast<int *>(in_tensors_[1]->data_c());
+  output_shape_.clear();
   for (int i = 0; i < in_tensors_[1]->ElementsNum(); ++i) {
     output_shape_.push_back(ele_shape_data[i]);
   }
@@ -94,7 +95,7 @@ int TensorListStackCPUKernel::MergeElementShape() {
   }
   if (!IsFullyDefined(input0_->element_shape())) {
     for (int i = 0; i < input0_->ElementsNum(); ++i) {  // get tensorlist every tensor
-      auto tensor_ele = input0_->GetTensorIndex(i);
+      auto tensor_ele = input0_->GetTensor(i);
       MS_ASSERT(tensor_ele != nullptr);
       if (tensor_ele->data_type() != kTypeUnknown) {
         status = MergeSubShape(tensor_ele->shape());
@@ -150,7 +151,7 @@ int TensorListStackCPUKernel::Run() {
   }
   auto out_ptr = reinterpret_cast<float *>(output0_->MutableData());
   for (int i = 0; i < num_element_; ++i) {
-    auto in_ptr = input0_->GetTensorIndex(i);
+    auto in_ptr = input0_->GetTensor(i);
     MS_ASSERT(in_ptr != nullptr);
     if (in_ptr->data_type() != kTypeUnknown) {
       int in_size = in_ptr->ElementsNum();
diff --git a/mindspore/lite/src/sub_graph_kernel.cc b/mindspore/lite/src/sub_graph_kernel.cc
index ff7369d3d0..6cd3d54476 100644
--- a/mindspore/lite/src/sub_graph_kernel.cc
+++ b/mindspore/lite/src/sub_graph_kernel.cc
@@ -115,11 +115,7 @@ int SubGraphKernel::ReSize(bool is_interrupt) {
     std::vector<lite::Tensor *> inputs = kernel->in_tensors();
     std::vector<lite::Tensor *> outputs = kernel->out_tensors();
     for (auto &output : outputs) {
-      auto ret = output->FreeData();
-      if (ret != RET_OK) {
-        MS_LOG(ERROR) << "FreeData failed";
-        return RET_ERROR;
-      }
+      output->FreeData();
     }
     primitive->set_infer_flag(!is_interrupt);
     auto ret = primitive->InferShape(inputs, outputs);
diff --git a/mindspore/lite/src/sub_graph_kernel.h b/mindspore/lite/src/sub_graph_kernel.h
index f91a95c173..990da11375 100644
--- a/mindspore/lite/src/sub_graph_kernel.h
+++ b/mindspore/lite/src/sub_graph_kernel.h
@@ -52,13 +52,13 @@ struct DataStore {
 
 class SubGraphKernel : public LiteKernel {
  public:
-  explicit SubGraphKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
-                          const std::vector<LiteKernel *> &in_kernels, const std::vector<LiteKernel *> &out_kernels,
-                          std::vector<LiteKernel *> nodes, const lite::InnerContext *ctx)
+  SubGraphKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                 std::vector<LiteKernel *> in_kernels, std::vector<LiteKernel *> out_kernels,
+                 std::vector<LiteKernel *> nodes, const lite::InnerContext *ctx)
       : LiteKernel(nullptr, inputs, outputs, ctx, nullptr),
         nodes_(std::move(nodes)),
-        in_nodes_(in_kernels),
-        out_nodes_(out_kernels) {
+        in_nodes_(std::move(in_kernels)),
+        out_nodes_(std::move(out_kernels)) {
     subgraph_type_ = kCpuFP32SubGraph;
   }
 
@@ -109,20 +109,20 @@ class SubGraphKernel : public LiteKernel {
   std::vector<LiteKernel *> nodes() { return this->nodes_; }
 
  protected:
-  std::vector<LiteKernel *> nodes_;
+  std::vector<LiteKernel *> nodes_{};
   // entry nodes in nodes
-  std::vector<LiteKernel *> in_nodes_;
+  std::vector<LiteKernel *> in_nodes_{};
   // exit nodes in nodes
-  std::vector<LiteKernel *> out_nodes_;
+  std::vector<LiteKernel *> out_nodes_{};
   mindspore::lite::Executor *executor_ = nullptr;
 };
 
 class CpuSubGraph : public SubGraphKernel {
  public:
-  explicit CpuSubGraph(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
-                       const std::vector<LiteKernel *> &in_kernels, const std::vector<LiteKernel *> &out_kernels,
-                       const std::vector<LiteKernel *> &nodes, const lite::InnerContext *ctx)
-      : SubGraphKernel(inputs, outputs, in_kernels, out_kernels, nodes, ctx) {
+  CpuSubGraph(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+              std::vector<LiteKernel *> in_kernels, std::vector<LiteKernel *> out_kernels,
+              std::vector<LiteKernel *> nodes, const lite::InnerContext *ctx)
+      : SubGraphKernel(inputs, outputs, std::move(in_kernels), std::move(out_kernels), std::move(nodes), ctx) {
     subgraph_type_ = kCpuFP32SubGraph;
   }
 
@@ -139,10 +139,10 @@ class CpuSubGraph : public SubGraphKernel {
 
 class CpuFp32SubGraph : public CpuSubGraph {
  public:
-  explicit CpuFp32SubGraph(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
-                           const std::vector<LiteKernel *> &in_kernels, const std::vector<LiteKernel *> &out_kernels,
-                           const std::vector<LiteKernel *> &nodes, const lite::InnerContext *ctx)
-      : CpuSubGraph(inputs, outputs, in_kernels, out_kernels, nodes, ctx) {
+  CpuFp32SubGraph(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                  std::vector<LiteKernel *> in_kernels, std::vector<LiteKernel *> out_kernels,
+                  std::vector<LiteKernel *> nodes, const lite::InnerContext *ctx)
+      : CpuSubGraph(inputs, outputs, std::move(in_kernels), std::move(out_kernels), std::move(nodes), ctx) {
     subgraph_type_ = kCpuFP32SubGraph;
     this->name_ = "CpuFP32SubGraph";
   }
@@ -159,10 +159,10 @@ class CpuFp32SubGraph : public CpuSubGraph {
 
 class CpuFp16SubGraph : public CpuSubGraph {
  public:
-  explicit CpuFp16SubGraph(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
-                           const std::vector<LiteKernel *> &in_kernels, const std::vector<LiteKernel *> &out_kernels,
-                           const std::vector<LiteKernel *> &nodes, const lite::InnerContext *ctx)
-      : CpuSubGraph(inputs, outputs, in_kernels, out_kernels, nodes, ctx) {
+  CpuFp16SubGraph(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                  std::vector<LiteKernel *> in_kernels, std::vector<LiteKernel *> out_kernels,
+                  std::vector<LiteKernel *> nodes, const lite::InnerContext *ctx)
+      : CpuSubGraph(inputs, outputs, std::move(in_kernels), std::move(out_kernels), std::move(nodes), ctx) {
     subgraph_type_ = kCpuFP16SubGraph;
     this->name_ = "CpuFP16SubGraph";
   }
@@ -180,7 +180,7 @@ class CpuFp16SubGraph : public CpuSubGraph {
   void FreeOriginInputData();
 
  private:
-  std::vector<DataStore *> origin_input_data_;
+  std::vector<DataStore *> origin_input_data_{};
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_SUB_GRAPH_H
diff --git a/mindspore/lite/src/tensor.cc b/mindspore/lite/src/tensor.cc
index 6b919093b6..da022e599e 100644
--- a/mindspore/lite/src/tensor.cc
+++ b/mindspore/lite/src/tensor.cc
@@ -29,48 +29,53 @@ namespace lite {
 Tensor::Tensor(const TypeId data_type, std::vector<int> shape, const schema::Format &format, Category category)
     : data_type_(data_type), shape_(std::move(shape)), format_(format), category_(category) {}
 
-Tensor::Tensor(const Tensor &tensor) {
-  auto ret = CopyTensor(tensor, true);
-  if (0 != ret) {
-    MS_LOG(ERROR) << "CopyTensorData error";
+int Tensor::CopyTensorData(const Tensor &src_tensor, Tensor *dst_tensor) {
+  if (dst_tensor == nullptr) {
+    MS_LOG(ERROR) << "dst_tensor is nullptr";
+    return RET_PARAM_INVALID;
   }
-}
-
-int Tensor::CopyTensorData(const Tensor &src_tensor) {
   if (src_tensor.data_ == nullptr) {
     MS_LOG(ERROR) << "data of src tensor is nullptr";
     return RET_PARAM_INVALID;
   }
-  size_t data_size = this->Size();
-  MS_ASSERT(data_size == src_tensor.Size());
-  if (this->data_ == nullptr) {
+  size_t data_size = dst_tensor->Size();
+  if (data_size != src_tensor.Size()) {
+    MS_LOG(ERROR) << "Size of dst tensor is not compatible with src tensor";
+    return RET_ERROR;
+  }
+  if (dst_tensor->data_ == nullptr) {
     if (data_size > kMaxMallocSize) {
       MS_LOG(ERROR) << "Malloc size is too big while coping data, " << data_size << " bytes";
       return RET_ERROR;
     }
-    this->data_ = malloc(data_size);
-    if (this->data_ == nullptr) {
+    dst_tensor->data_ = malloc(data_size);
+    if (dst_tensor->data_ == nullptr) {
       MS_LOG(ERROR) << "Malloc memory failed";
       return RET_ERROR;
     }
   }
-  memcpy(this->data_, src_tensor.data_, data_size);
+  memcpy(dst_tensor->data_, src_tensor.data_, data_size);
   return RET_OK;
 }
 
-int Tensor::CopyTensor(const Tensor &src_tensor, bool copy_data) {
-  this->data_type_ = src_tensor.data_type_;
-  this->shape_ = src_tensor.shape_;
-  this->category_ = src_tensor.category_;
-  this->format_ = src_tensor.format_;
+Tensor *Tensor::CopyTensor(const Tensor &src_tensor, bool copy_data) {
+  auto *result = new (std::nothrow) Tensor;
+  if (result == nullptr) {
+    MS_LOG(ERROR) << "New tensor failed";
+    return nullptr;
+  }
+  result->data_type_ = src_tensor.data_type_;
+  result->shape_ = src_tensor.shape_;
+  result->category_ = src_tensor.category_;
+  result->format_ = src_tensor.format_;
   if (copy_data) {
-    auto ret = CopyTensorData(src_tensor);
-    if (0 != ret) {
+    auto ret = CopyTensorData(src_tensor, result);
+    if (ret != RET_OK) {
       MS_LOG(ERROR) << "CopyTensorData error";
-      return RET_ERROR;
+      return nullptr;
     }
   }
-  return RET_OK;
+  return result;
 }
 
 Tensor::~Tensor() {
@@ -84,18 +89,6 @@ Tensor::~Tensor() {
   }
 }
 
-Tensor &Tensor::operator=(const Tensor &tensor) {
-  if (&tensor == this) {
-    return *this;
-  }
-  auto ret = CopyTensor(tensor, true);
-  if (0 != ret) {
-    MS_LOG(ERROR) << "CopyTensorData error";
-    MS_ASSERT(false);
-  }
-  return *this;
-}
-
 bool Tensor::operator==(const Tensor &tensor) {
   return data_ == tensor.data_ && shape_ == tensor.shape_ && data_type_ == tensor.data_type_;
 }
@@ -283,6 +276,25 @@ std::string Tensor::ToString() const {
   return oss.str();
 }
 
+int Tensor::set_root_tensor(Tensor *tensor) {
+  this->root_tensor_ = tensor;
+  if (this->root_tensor_ == this) {
+    return RET_OK;
+  }
+  if (this->root_tensor_ == nullptr) {
+    MS_LOG(ERROR) << "root tensor is nullptr";
+    return RET_NULL_PTR;
+  }
+  this->shape_ = this->root_tensor_->shape_;
+  this->format_ = this->root_tensor_->format_;
+  this->data_type_ = this->root_tensor_->data_type_;
+  this->allocator_ = this->root_tensor_->allocator_;
+  this->category_ = this->root_tensor_->category_;
+  this->quant_params_ = this->root_tensor_->quant_params_;
+  this->quant_clusters_ = this->root_tensor_->quant_clusters_;
+  return RET_OK;
+}
+
 int Tensor::MallocData(const mindspore::lite::Allocator *allocator) {
   if (nullptr != this->data_) {
     return RET_OK;
@@ -303,9 +315,9 @@ int Tensor::MallocData(const mindspore::lite::Allocator *allocator) {
   return RET_OK;
 }
 
-int Tensor::FreeData() {
+void Tensor::FreeData() {
   if (nullptr == this->data_) {
-    return RET_OK;
+    return;
   }
   if (nullptr == allocator_) {
     free(this->data_);
@@ -314,10 +326,19 @@ int Tensor::FreeData() {
     allocator_->Free(this->data_);
     this->data_ = nullptr;
   }
-  return RET_OK;
 }
 
 void *Tensor::MutableData() {
+  if (this->root_tensor_ != nullptr) {
+    if (this->root_tensor_ != this && this->root_tensor_->data_ == nullptr) {
+      MS_LOG(ERROR) << "root tensor has not been malloced";
+      return nullptr;
+    } else if (this->root_tensor_ != this && this->root_tensor_->data_ != nullptr) {
+      return this->root_tensor_->data_;
+    } else {
+      // malloc self
+    }
+  }
   if (this->data_ == nullptr) {
     auto ret = this->MallocData();
     if (ret != 0) {
@@ -328,6 +349,17 @@ void *Tensor::MutableData() {
   return this->data_;
 }
 
+void Tensor::DecRefCount() {
+  if (this->IsConst() || this->IsGraphInput()) {
+    return;
+  }
+  this->ref_count_--;
+  if (this->ref_count_ <= 0) {
+    FreeData();
+    this->ref_count_ = 0;
+  }
+}
+
 void Tensor::AddQuantParam(const QuantArg &quant_arg) { this->quant_params_.push_back(quant_arg); }
 
 std::vector<QuantArg> Tensor::quant_params() const { return this->quant_params_; }
diff --git a/mindspore/lite/src/tensor.h b/mindspore/lite/src/tensor.h
index b67962fc43..69e44494ad 100644
--- a/mindspore/lite/src/tensor.h
+++ b/mindspore/lite/src/tensor.h
@@ -53,15 +53,19 @@ class Tensor : public mindspore::tensor::MSTensor {
   Tensor(TypeId data_type, std::vector<int> shape, const schema::Format &format = schema::Format::Format_NHWC,
          Category category = VAR);
 
-  Tensor(const Tensor &tensor);
+  Tensor(const Tensor &tensor) = delete;
 
-  ~Tensor() override;
+  Tensor(Tensor &&other) = delete;
+
+  Tensor &operator=(const Tensor &tensor) = delete;
 
-  int CopyTensorData(const Tensor &srcTensor);
+  Tensor &operator=(Tensor &&src) = delete;
 
-  int CopyTensor(const Tensor &srcTensor, bool copyData = false);
+  ~Tensor() override;
 
-  Tensor &operator=(const Tensor &tensor);
+  static int CopyTensorData(const Tensor &src_tensor, Tensor *dst_tensor);
+
+  static Tensor *CopyTensor(const Tensor &src_tensor, bool copy_data = false);
 
   virtual bool operator==(const Tensor &tensor);
 
@@ -99,11 +103,16 @@ class Tensor : public mindspore::tensor::MSTensor {
 
   virtual int MallocData(const mindspore::lite::Allocator *allocator = nullptr);
 
-  virtual int FreeData();
+  virtual void FreeData();
 
   void *MutableData() override;
 
-  virtual void *data_c() const { return data_; }
+  virtual void *data_c() const {
+    if (this->root_tensor_ != nullptr) {
+      return this->root_tensor_->data_;
+    }
+    return data_;
+  }
 
   virtual void set_data(void *data) { this->data_ = data; }
 
@@ -125,7 +134,7 @@ class Tensor : public mindspore::tensor::MSTensor {
 
   void ResetRefCount() { this->ref_count_ = this->init_ref_count_; }
 
-  void DecRefCount() { this->ref_count_--; }
+  void DecRefCount();
 
   std::string ToString() const;
 
@@ -151,6 +160,14 @@ class Tensor : public mindspore::tensor::MSTensor {
     }
   }
 
+  virtual int set_root_tensor(Tensor *tensor);
+
+  Tensor *root_tensor() const { return this->root_tensor_; }
+
+  bool IsReady() const {
+    return this->IsConst() || (this->IsGraphInput() && this->data_ != nullptr) || this->ref_count_ >= 1;
+  }
+
  private:
   template <typename T>
   std::string DataToString(void *data, size_t data_number) const {
@@ -168,7 +185,6 @@ class Tensor : public mindspore::tensor::MSTensor {
  protected:
   std::string tensor_name_;
   void *data_ = nullptr;
-  void *device_data_ = nullptr;
   TypeId data_type_;
   std::vector<int> shape_;
   schema::Format format_;
@@ -178,6 +194,7 @@ class Tensor : public mindspore::tensor::MSTensor {
   std::vector<QuantArg> quant_params_;
   std::vector<float> quant_clusters_;
   mindspore::lite::Allocator *allocator_ = nullptr;
+  Tensor *root_tensor_ = nullptr;
 };
 
 inline size_t DataTypeSize(const TypeId type) {
diff --git a/mindspore/lite/src/tensorlist.cc b/mindspore/lite/src/tensorlist.cc
index 85dc1dbdc5..1b57a60112 100644
--- a/mindspore/lite/src/tensorlist.cc
+++ b/mindspore/lite/src/tensorlist.cc
@@ -14,38 +14,26 @@
  * limitations under the License.
  */
 
+#include "src/tensorlist.h"
+#include <utility>
 #include "include/ms_tensor.h"
-
 #include "src/common/log_adapter.h"
 #include "schema/model_generated.h"
 #include "src/tensor.h"
-#include "src/tensorlist.h"
 
-namespace mindspore {
-namespace lite {
+namespace mindspore::lite {
 
 TensorList::TensorList(std::vector<int> shape, std::vector<int> element_shape, Category category)
-    : Tensor(kObjectTypeTensorType, shape, schema::Format::Format_NHWC, category), element_shape_(element_shape) {}
+    : Tensor(kObjectTypeTensorType, std::move(shape), schema::Format::Format_NHWC, category),
+      element_shape_(std::move(element_shape)) {}
 
 TensorList::~TensorList() {
   if (!this->tensors_.empty()) {
-    this->FreeData();
+    this->TensorList::FreeData();
     this->FreeTensorListData();
   }
 }
 
-TensorList &TensorList::operator=(const TensorList &src) {
-  if (&src == this) {
-    return *this;
-  }
-  auto ret = CopyTensorList(src, true);
-  if (ret == RET_ERROR) {
-    MS_LOG(ERROR) << "CopyTensorList error!";
-    MS_ASSERT(false);
-  }
-  return *this;
-}
-
 int TensorList::CopyTensorList(const TensorList &src, bool copy_data) {
   this->data_type_ = src.data_type_;
   this->tensors_data_type_ = src.tensors_data_type_;
@@ -59,6 +47,10 @@ int TensorList::CopyTensorList(const TensorList &src, bool copy_data) {
       return RET_ERROR;
     }
   } else {
+    for (auto tensor : this->tensors()) {
+      delete tensor;
+    }
+    this->tensors_.clear();
     // each tensor in tensors_ will share the same memory space.
     this->tensors_ = src.tensors_;
   }
@@ -69,17 +61,20 @@ int TensorList::CopyTensorData(const TensorList &src) {
   if (src.tensors_.empty()) {
     return RET_OK;
   }
+  for (auto tensor : this->tensors()) {
+    delete tensor;
+  }
+  this->tensors_.clear();
   for (int i = 0; i < this->ElementsNum(); ++i) {
     if (src.tensors_[i] == nullptr) {
       MS_LOG(ERROR) << "src tensors_[" << i << "] is nullptr!";
       return RET_ERROR;
     }
-    auto dst_tensor = new (std::nothrow) Tensor;
+    auto dst_tensor = Tensor::CopyTensor(*src.tensors_[i]);
     if (dst_tensor == nullptr) {
       MS_LOG(ERROR) << "CopyTensorData: new tensor[" << i << "] is failed!";
       return RET_ERROR;
     }
-    *reinterpret_cast<Tensor *>(dst_tensor) = *src.tensors_[i];
     this->tensors_.push_back(dst_tensor);
   }
   return RET_OK;
@@ -143,17 +138,11 @@ int TensorList::MallocData(const mindspore::lite::Allocator *allocator) {
   return RET_OK;
 }
 
-int TensorList::FreeData() {
+void TensorList::FreeData() {
   // free data buf of each tensor in tensors_
-  if (this->tensors_.empty()) {
-    return RET_OK;
+  for (auto tensor : tensors_) {
+    tensor->FreeData();
   }
-  for (int i = 0; i < this->ElementsNum(); ++i) {
-    if (this->tensors_[i] != nullptr) {
-      this->tensors_[i]->FreeData();
-    }
-  }
-  return RET_OK;
 }
 
 int TensorList::FreeTensorListData() {
@@ -171,7 +160,7 @@ int TensorList::FreeTensorListData() {
   return RET_OK;
 }
 
-int TensorList::SetTensorIndex(int index, Tensor *src_tensor) {
+int TensorList::SetTensor(int index, Tensor *src_tensor) {
   // your can use this fun to modify tensor[index] value
   if (src_tensor->data_type() != this->tensors_data_type_) {
     MS_LOG(ERROR) << "src_tensor->data_type()：" << src_tensor->data_type()
@@ -183,15 +172,13 @@ int TensorList::SetTensorIndex(int index, Tensor *src_tensor) {
     return RET_ERROR;
   }
   auto dst_tensor = this->tensors_[index];
-  if (dst_tensor != nullptr) {  // free original tensor data
-    delete dst_tensor;
-  }
-  this->tensors_[index] = new (std::nothrow) Tensor;
+  // free original tensor data
+  delete dst_tensor;
+  this->tensors_[index] = Tensor::CopyTensor(*src_tensor);
   if (this->tensors_[index] == nullptr) {
-    MS_LOG(ERROR) << "SetTensorIndex: new tensor is failed!";
+    MS_LOG(ERROR) << "SetTensor: new tensor is failed!";
     return RET_ERROR;
   }
-  *this->tensors_[index] = *src_tensor;
   return RET_OK;
 }
 
@@ -211,9 +198,40 @@ int TensorList::CheckTensorListParam() {
   return RET_OK;
 }
 
-Tensor *TensorList::GetTensorIndex(int index) {
+int TensorList::set_root_tensor(Tensor *tensor) {
+  auto ret = Tensor::set_root_tensor(tensor);
+  if (ret != RET_OK) {
+    return ret;
+  }
+  if (this->data_type_ != kObjectTypeTensorType) {
+    return RET_OK;
+  }
+  auto root_tensorlist = reinterpret_cast<TensorList *>(this->root_tensor_);
+  if (root_tensorlist == nullptr) {
+    MS_LOG(ERROR) << "root_tensor of tensorlist should be a tensorlist";
+    return RET_INFER_INVALID;
+  }
+  this->element_shape_ = root_tensorlist->element_shape_;
+  this->max_elements_num_ = root_tensorlist->max_elements_num_;
+  this->tensors_data_type_ = root_tensorlist->tensors_data_type_;
+  return RET_OK;
+}
+
+Tensor *TensorList::GetTensor(int index) {
   // return tensor[index] ptr. With this function, you can modify tensors_[index] at will.
-  if (index < 0 || index >= static_cast<int>(tensors_.size())) {
+  if (this->root_tensor_ != nullptr) {
+    if (this->data_type_ != kObjectTypeTensorType) {
+      MS_LOG(ERROR) << "root_tensor of tensorlist should be a tensorlist";
+      return nullptr;
+    }
+    auto root_tensorlist = reinterpret_cast<TensorList *>(this->root_tensor_);
+    if (index < 0 || index >= static_cast<int>(root_tensorlist->tensors_.size())) {
+      MS_LOG(ERROR) << "index:" << index << " must in [0, " << this->ElementsNum() - 1 << "]!";
+      return nullptr;
+    }
+    return root_tensorlist->tensors_[index];
+  }
+  if (index < 0 || index >= static_cast<int>(this->tensors_.size())) {
     MS_LOG(ERROR) << "index:" << index << " must in [0, " << this->ElementsNum() - 1 << "]!";
     return nullptr;
   }
@@ -264,5 +282,4 @@ STATUS TensorList::Decode(const int *data) {
 
 bool TensorList::IsConst() const { return this->category_ == CONST_TENSOR || this->category_ == CONST_SCALAR; }
 
-}  // namespace lite
-}  // namespace mindspore
+}  // namespace mindspore::lite
diff --git a/mindspore/lite/src/tensorlist.h b/mindspore/lite/src/tensorlist.h
index 1291c3c80d..e8529ffafe 100644
--- a/mindspore/lite/src/tensorlist.h
+++ b/mindspore/lite/src/tensorlist.h
@@ -25,8 +25,7 @@
 #include "schema/model_generated.h"
 #include "src/tensor.h"
 
-namespace mindspore {
-namespace lite {
+namespace mindspore::lite {
 /**
  * Tensorlist is a container of vector, in which each element is a tensor object.
  * Member objects:
@@ -64,17 +63,9 @@ class TensorList : public Tensor {
 
   ~TensorList() override;
 
-  // **Note**: This is a shallow copy, src and dst tensorlist share one memory space of each tensor in tensors_
-  // If your want to not share one memory space please use "operator="
-  TensorList(const TensorList &other)
-      : Tensor(other.data_type_, other.shape()),
-        tensors_(other.tensors_),
-        tensors_data_type_(other.tensors_data_type_),
-        element_shape_(other.element_shape_),
-        max_elements_num_(other.max_elements_num_) {}
+  TensorList(const TensorList &other) = delete;
 
-  // tensorlist deep copy memory
-  TensorList &operator=(const TensorList &tl);
+  TensorList &operator=(const TensorList &tl) = delete;
 
   void set_element_shape(const std::vector<int> &shape) { element_shape_ = shape; }
 
@@ -90,15 +81,15 @@ class TensorList : public Tensor {
 
   int FreeTensorListData();
 
-  int FreeData() override;
+  void FreeData() override;
 
   int CopyTensorList(const TensorList &src, bool copy_data);
 
   int CopyTensorData(const TensorList &src);
 
-  int SetTensorIndex(int index, Tensor *);
+  int SetTensor(int index, Tensor *src_tensor);
 
-  Tensor *GetTensorIndex(int index);
+  Tensor *GetTensor(int index);
 
   void set_tensors_data_type(TypeId type) { tensors_data_type_ = type; }
 
@@ -106,6 +97,8 @@ class TensorList : public Tensor {
 
   std::vector<Tensor *> &tensors() { return tensors_; }
 
+  void set_tensors(const std::vector<Tensor *> &tensors) { this->tensors_ = tensors; }
+
   int CheckTensorListParam();
 
   bool IsCompatibleShape(const std::vector<int> &shape);
@@ -116,18 +109,19 @@ class TensorList : public Tensor {
 
   bool IsConst() const override;
 
+  int set_root_tensor(Tensor *tensor) override;
+
  protected:
   // The following functions must be masked.
-  void set_data(void *data) override { return; }
+  void set_data(void *data) override {}
   void *data_c() const override { return nullptr; }
   void *MutableData() override { return nullptr; }
   size_t Size() const override { return 0; }
-  std::vector<Tensor *> tensors_;
-  TypeId tensors_data_type_;
-  std::vector<int> element_shape_;
+  std::vector<Tensor *> tensors_{};
+  TypeId tensors_data_type_ = kTypeUnknown;
+  std::vector<int> element_shape_{};
   int max_elements_num_ = -1;
 };
-}  // namespace lite
-}  // namespace mindspore
+}  // namespace mindspore::lite
 
 #endif  // MINDSPORE_LITE_SRC_TENSORLIST_H_
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc
index 57c7ece607..484175ac6f 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc
@@ -43,23 +43,23 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std
 
   // simulating benchmark:  session_->CompileGraph() -> ConvertTensors()
   MS_LOG(DEBUG) << "create Tensors & init weight data";
-  std::vector<Tensor> tensors;
+  std::vector<std::shared_ptr<Tensor>> tensors;
   // firstly, create all Tensors
   tensors.reserve(input_infos.size());  // vector's capacity() is 0, so call reserve() avoiding vector re-malloc
   for (auto input_info : input_infos) {
     auto &shape = std::get<0>(input_info);
     auto category = std::get<2>(input_info);
     auto data_type = std::get<3>(input_info);
-    tensors.emplace_back(data_type, shape, Format_NHWC, category);
+    tensors.emplace_back(std::make_shared<Tensor>(data_type, shape, Format_NHWC, category));
   }
   // secondly, init weight Tensor's data
   std::vector<Tensor *> kernel_inputs;
   std::vector<Tensor *> subgraph_inputs;
   std::map<Tensor *, float *> subgraph_inputs_data;
   for (int i = 0; i < tensors.size(); ++i) {
-    auto *tensor = &tensors[i];
+    auto tensor = tensors[i];
     auto *input_data = std::get<1>(input_infos[i]);
-    kernel_inputs.push_back(tensor);
+    kernel_inputs.push_back(tensor.get());
     if (tensor->category() != VAR) {  // tensor is weight
       // simulating src/lite_session.cc:WeightTensorNeedCopy()
       if (packed_op.count(primitive_type)) {
@@ -69,8 +69,8 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std
       }
     } else {
       EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32);
-      subgraph_inputs.push_back(tensor);
-      subgraph_inputs_data[tensor] = reinterpret_cast<float *>(input_data);
+      subgraph_inputs.push_back(tensor.get());
+      subgraph_inputs_data[tensor.get()] = reinterpret_cast<float *>(input_data);
     }
   }
 
@@ -115,7 +115,7 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std
   // simulating benchmark:  model->Free(), clear weight data in input_infos
   std::vector<std::unique_ptr<uint8_t[]>> saved_weights;
   for (int i = 0; i < tensors.size(); ++i) {
-    auto *tensor = &tensors[i];
+    auto &tensor = tensors[i];
     if (tensor->category() != VAR) {
       saved_weights.emplace_back(new uint8_t[tensor->Size()]);
       auto *weight_data = std::get<1>(input_infos[i]);
@@ -143,12 +143,12 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std
 
   MS_LOG(DEBUG) << "release resources";
   for (auto &tensor : tensors) {
-    if (tensor.category() != VAR && packed_op.count(primitive_type)) {
-      tensor.set_data(nullptr);
+    if (tensor->category() != VAR && packed_op.count(primitive_type)) {
+      tensor->set_data(nullptr);
     }
   }
   for (int i = 0, j = 0; i < tensors.size(); ++i) {  // resume weight data to input_infos
-    auto *tensor = &tensors[i];
+    auto &tensor = tensors[i];
     if (tensor->category() != VAR) {
       auto *weight_data = std::get<1>(input_infos[i]);
       memcpy(weight_data, saved_weights[j++].get(), tensor->Size());