diff --git a/mindspore/lite/include/errorcode.h b/mindspore/lite/include/errorcode.h
index cf61470844..ff74b02dac 100644
--- a/mindspore/lite/include/errorcode.h
+++ b/mindspore/lite/include/errorcode.h
@@ -28,13 +28,14 @@ using STATUS = int;
 constexpr int RET_OK = 0; /**< No error occurs. */
 
 /* Common error code, range: [-1, -100）*/
-constexpr int RET_ERROR = -1;         /**< Common error code. */
-constexpr int RET_NULL_PTR = -2;      /**< NULL pointer returned.*/
-constexpr int RET_PARAM_INVALID = -3; /**< Invalid parameter.*/
-constexpr int RET_NO_CHANGE = -4;     /**< No change. */
-constexpr int RET_SUCCESS_EXIT = -5;  /**< No error but exit. */
-constexpr int RET_MEMORY_FAILED = -6; /**< Fail to create memory. */
-constexpr int RET_NOT_SUPPORT = -7;   /**< Fail to support. */
+constexpr int RET_ERROR = -1;             /**< Common error code. */
+constexpr int RET_NULL_PTR = -2;          /**< NULL pointer returned.*/
+constexpr int RET_PARAM_INVALID = -3;     /**< Invalid parameter.*/
+constexpr int RET_NO_CHANGE = -4;         /**< No change. */
+constexpr int RET_SUCCESS_EXIT = -5;      /**< No error but exit. */
+constexpr int RET_MEMORY_FAILED = -6;     /**< Fail to create memory. */
+constexpr int RET_NOT_SUPPORT = -7;       /**< Fail to support. */
+constexpr int RET_THREAD_POOL_ERROR = -8; /**< Error occur in thread pool. */
 
 /* Executor error code, range: [-100,-200) */
 constexpr int RET_OUT_OF_TENSOR_RANGE = -100; /**< Failed to check range. */
diff --git a/mindspore/lite/src/errorcode.cc b/mindspore/lite/src/errorcode.cc
index 1c555fccb2..3fc8cb4f68 100644
--- a/mindspore/lite/src/errorcode.cc
+++ b/mindspore/lite/src/errorcode.cc
@@ -29,6 +29,7 @@ std::string GetErrorInfo(STATUS status) {
                                          {RET_SUCCESS_EXIT, "No error but exit."},
                                          {RET_MEMORY_FAILED, "Fail to create memory."},
                                          {RET_NOT_SUPPORT, "Fail to support."},
+                                         {RET_THREAD_POOL_ERROR, "Thread pool error."},
                                          {RET_OUT_OF_TENSOR_RANGE, "Failed to check range."},
                                          {RET_INPUT_TENSOR_ERROR, "Failed to check input tensor."},
                                          {RET_REENTRANT_ERROR, "Exist executor running."},
diff --git a/mindspore/lite/src/lite_kernel.cc b/mindspore/lite/src/lite_kernel.cc
index 6d7f5aad04..6227f71ce4 100644
--- a/mindspore/lite/src/lite_kernel.cc
+++ b/mindspore/lite/src/lite_kernel.cc
@@ -17,13 +17,14 @@
 #include "src/lite_kernel.h"
 #include <algorithm>
 #include <queue>
+#include <set>
 #include "src/tensor.h"
 #include "src/common/utils.h"
 
 namespace mindspore::kernel {
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
-
+#ifdef SUPPORT_TRAIN
 void *LiteKernel::workspace_ = nullptr;
 
 void LiteKernel::AllocWorkspace(size_t size) {
@@ -40,7 +41,7 @@ void LiteKernel::FreeWorkspace() {
   free(workspace_);
   workspace_ = nullptr;
 }
-
+#endif
 bool LiteKernel::IsReady(const std::vector<lite::Tensor *> &scope_tensors) {
   return std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *kernel_in_tensor) {
     if (IsContain(scope_tensors, kernel_in_tensor)) {
diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h
index d1466c4d27..4e4a3a7d75 100644
--- a/mindspore/lite/src/lite_kernel.h
+++ b/mindspore/lite/src/lite_kernel.h
@@ -169,15 +169,18 @@ class LiteKernel {
   void set_desc(const KernelKey kernel_key) { desc_ = kernel_key; }
 
   const mindspore::lite::PrimitiveC *GetPrimitive() const { return primitive_; }
+
+  SubGraphType subgraph_type() const { return this->subgraph_type_; }
+
+  virtual std::string ToString() const;
+
+#ifdef SUPPORT_TRAIN
   void set_workspace_size(size_t value) { workspace_size_ = value; }
   size_t workspace_size() { return workspace_size_; }
   static void AllocWorkspace(size_t size);
   static void FreeWorkspace();
   void *workspace() { return workspace_; }
-
-  SubGraphType subgraph_type() const { return this->subgraph_type_; }
-
-  virtual std::string ToString() const;
+#endif
 
  protected:
   bool InferShapeDone() { return !(primitive_ != nullptr && !primitive_->infer_flag()); }
@@ -195,9 +198,11 @@ class LiteKernel {
   bool train_mode_ = false;
   bool trainable_ = false;  // paramaters of this Kernel are trained in Train Session
   bool is_model_output_ = false;
+  SubGraphType subgraph_type_ = kNotSubGraph;
+#ifdef SUPPORT_TRAIN
   size_t workspace_size_ = 0;
   static void *workspace_;
-  SubGraphType subgraph_type_ = kNotSubGraph;
+#endif
 };
 
 typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs,
diff --git a/mindspore/lite/src/runtime/thread_pool.c b/mindspore/lite/src/runtime/thread_pool.c
index ae20ee7f81..cfe3ea2220 100644
--- a/mindspore/lite/src/runtime/thread_pool.c
+++ b/mindspore/lite/src/runtime/thread_pool.c
@@ -40,7 +40,7 @@
 #endif
 
 #define RET_TP_OK (0)
-#define RET_TP_ERROR (1)
+#define RET_TP_ERROR (-8)
 #define RET_TP_SYSTEM_ERROR (-1)
 
 #define MAX_THREAD_NUM (8)
@@ -49,6 +49,8 @@
 typedef struct {
   int (*func)(void *arg, int);
   void *content;
+  int *return_code;
+  int task_num;
 } Task;
 
 typedef struct Thread {
@@ -669,8 +671,11 @@ int DistributeTask(struct ThreadPool *thread_pool, Task *task, int task_num) {
     return RET_TP_ERROR;
   }
   bool k_success_flag = false;
-  int size = thread_pool->thread_num < task_num ? thread_pool->thread_num : task_num;
-  for (int i = 0; i < size - 1; ++i) {
+  if (thread_pool->thread_num < task_num) {
+    LOG_ERROR("task_num: %d should not be larger than thread num: %d", task_num, thread_pool->thread_num);
+    return RET_TP_ERROR;
+  }
+  for (int i = 0; i < task_num - 1; ++i) {
     do {
       k_success_flag = true;
       if (!PushTaskToQueue(thread_pool, i, task)) {
@@ -683,9 +688,18 @@ int DistributeTask(struct ThreadPool *thread_pool, Task *task, int task_num) {
     LOG_ERROR("task->func is nullptr");
     return RET_TP_ERROR;
   }
-  task->func(task->content, size - 1);
+  if (task->task_num <= task_num - 1) {
+    LOG_ERROR("task_num out of range in master thread");
+    return RET_TP_ERROR;
+  }
+  task->return_code[task_num - 1] = task->func(task->content, task_num - 1);
   // wait
   WaitAllThread(thread_pool);
+  for (size_t i = 0; i < task->task_num; i++) {
+    if (task->return_code[i] != 0) {
+      return task->return_code[i];
+    }
+  }
   return RET_TP_OK;
 }
 
@@ -697,14 +711,26 @@ int AddTask(struct ThreadPool *thread_pool, int func(void *, int), void *content
   // if single thread, run master thread
   if (thread_pool->thread_num <= 1 || task_num <= 1) {
     for (int i = 0; i < task_num; ++i) {
-      func(content, i);
+      int ret = func(content, i);
+      if (ret != 0) {
+        return ret;
+      }
     }
     return RET_TP_OK;
   }
   Task task;
   task.func = func;
   task.content = content;
-  return DistributeTask(thread_pool, &task, task_num);
+  task.return_code = (int *)malloc(sizeof(int) * task_num);
+  task.task_num = task_num;
+  if (task.return_code == NULL) {
+    LOG_ERROR("malloc return code return nullptr");
+    return RET_TP_ERROR;
+  }
+  memset(task.return_code, 0, sizeof(int) * task_num);
+  int ret = DistributeTask(thread_pool, &task, task_num);
+  free(task.return_code);
+  return ret;
 }
 
 int ParallelLaunch(struct ThreadPool *thread_pool, int (*func)(void *, int), void *content, int task_num) {
@@ -730,7 +756,11 @@ void ThreadRun(Thread *thread) {
           LOG_ERROR("task->func is nullptr");
           return;
         }
-        task->func(task->content, thread_id);
+        if (task->task_num <= thread_id) {
+          LOG_ERROR("task_num out of range in worker thread");
+          return;
+        }
+        task->return_code[thread_id] = task->func(task->content, thread_id);
         atomic_fetch_sub_explicit(&thread->task_size, 1, memory_order_release);
         spin_count = 0;
         sem_trywait(&thread->sem);
diff --git a/mindspore/lite/src/tensor.cc b/mindspore/lite/src/tensor.cc
index 6b8b215050..23d798ed86 100644
--- a/mindspore/lite/src/tensor.cc
+++ b/mindspore/lite/src/tensor.cc
@@ -209,10 +209,15 @@ int32_t Tensor::Width() const {
 }
 
 size_t Tensor::Size() const {
-  size_t size = DataTypeSize(this->data_type_);
-  size *= (format_ == schema::Format::Format_NC4HW4 || format_ == schema::Format::Format_NHWC4) ? ElementsC4Num()
-                                                                                                : ElementsNum();
-  return size;
+  size_t element_size = DataTypeSize(this->data_type_);
+  auto element_num = (format_ == schema::Format::Format_NC4HW4 || format_ == schema::Format::Format_NHWC4)
+                       ? ElementsC4Num()
+                       : ElementsNum();
+  if (element_num < 0) {
+    MS_LOG(ERROR) << "Element number of tensor should large than 0 : " << element_num;
+    return 0;
+  }
+  return element_size * element_num;
 }
 
 int Tensor::ElementsNum() const {
diff --git a/mindspore/lite/test/models_with_several_inputs_or_without_outputs.cfg b/mindspore/lite/test/models_with_several_inputs_or_without_outputs.cfg
index 4aacb8ecf4..05da988dd3 100644
--- a/mindspore/lite/test/models_with_several_inputs_or_without_outputs.cfg
+++ b/mindspore/lite/test/models_with_several_inputs_or_without_outputs.cfg
@@ -1,7 +1,7 @@
 lite-model_arbitrary-image-stylization-inceptionv3_dr_transfer_1.tflite
 lite-model_arbitrary-image-stylization-inceptionv3_int8_transfer_1.tflite
 lite-model_arbitrary-image-stylization-inceptionv3_fp16_transfer_1.tflite;2
-lite-model_arbitrary-image-stylization-inceptionv3-dynamic-shapes_dr_transfer_1.tflite
+# lite-model_arbitrary-image-stylization-inceptionv3-dynamic-shapes_dr_transfer_1.tflite # has nan input for rsqrt
 lite-model_cartoongan_dr_1.tflite
 mindspore_efficientnet_b0.mindir
 mindspore_efficientnet_b4minus.mindir
@@ -17,7 +17,7 @@ lite-model_east-text-detector_dr_1.tflite
 magenta_arbitrary-image-stylization-v1-256_fp16_transfer_1.tflite;2
 magenta_arbitrary-image-stylization-v1-256_int8_transfer_1.tflite
 magenta_arbitrary-image-stylization-v1-256_int8_prediction_1.tflite
-albert_lite_base_squadv1_1.tflite;3
+# albert_lite_base_squadv1_1.tflite;3 # input need in range [0,2)
 efficientnet_lite0_int8_2.tflite
 efficientnet_lite1_int8_2.tflite
 efficientnet_lite2_int8_2.tflite
@@ -26,7 +26,7 @@ efficientnet_lite4_int8_2.tflite
 mtk_transformer_encoder.tflite
 mtk_transformer_decoder_joint.tflite
 ml_ei_facedetection.onnx
-mobilebert_1_default_1.tflite;3
+# mobilebert_1_default_1.tflite;3  # input need in range [0,2)
 quant_aware_bank_card_detection_inception.onnx
 quant_aware_bank_card_recognition_fcny.onnx
 quant_aware_identify_card_detect.onnx
diff --git a/mindspore/lite/test/run_benchmark_nets.sh b/mindspore/lite/test/run_benchmark_nets.sh
index b2d6e79f44..3020e0dbc7 100644
--- a/mindspore/lite/test/run_benchmark_nets.sh
+++ b/mindspore/lite/test/run_benchmark_nets.sh
@@ -784,6 +784,9 @@ function Run_x86_sse() {
     # Run converted models which has several inputs or does not need to be cared about the accuracy:
     while read line; do
         model_name=${line%%;*}
+        if [[ $model_name == \#* ]]; then
+          continue
+        fi
         model_name_len=${#model_name}
         input_params=${line:model_name_len+1}
         input_num=${input_params%%;*}
@@ -1163,7 +1166,7 @@ function Run_arm64() {
         else
             run_result='arm64_gpu_fp16: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
         fi
-	#sleep 1
+    #sleep 1
     done < ${models_gpu_fp16_config}
 
     # Run GPU weightquant converted models:
@@ -1182,7 +1185,7 @@ function Run_arm64() {
         else
             run_result='arm64_gpu_weightquant: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
         fi
-	#sleep 1
+    #sleep 1
     done < ${models_gpu_weightquant_config}
 
     # Run mindir converted models:
@@ -1206,8 +1209,8 @@ function Run_arm64() {
         # run benchmark test without clib data
         echo ${model_name} >> "${run_arm64_log_file}"
         echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt
-        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "{run_arm64_log_file}"
-        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --warmUpLoopCount=1 --loopCount=2' >> "{run_arm64_log_file}"
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt
         adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}"
         if [ $? = 0 ]; then
             run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
@@ -1266,6 +1269,9 @@ function Run_arm64() {
     # Run converted models which has several inputs or does not need to be cared about the accuracy:
     while read line; do
         model_name=${line%%;*}
+        if [[ $model_name == \#* ]]; then
+          continue
+        fi
         model_name_len=${#model_name}
         input_params=${line:model_name_len+1}
         input_num=${input_params%%;*}
@@ -1379,15 +1385,15 @@ echo ${basepath}
 while getopts "r:m:d:" opt; do
     case ${opt} in
         r)
-	    release_path=${OPTARG}
+            release_path=${OPTARG}
             echo "release_path is ${OPTARG}"
             ;;
         m)
-	    models_path=${OPTARG}
+            models_path=${OPTARG}
             echo "models_path is ${OPTARG}"
             ;;
         d)
-	    device_id=${OPTARG}
+            device_id=${OPTARG}
             echo "device_id is ${OPTARG}"
             ;;
         ?)
diff --git a/mindspore/lite/test/st/sub_graph_test.cc b/mindspore/lite/test/st/sub_graph_test.cc
index fbe29e4f7b..e05946a2bc 100644
--- a/mindspore/lite/test/st/sub_graph_test.cc
+++ b/mindspore/lite/test/st/sub_graph_test.cc
@@ -32,160 +32,286 @@
 namespace mindspore {
 class SubGraphTest : public mindspore::CommonTest {
  public:
-  SubGraphTest() {}
+  SubGraphTest() = default;
 };
 
 TEST_F(SubGraphTest, RecursiveSubGraphTest) {
-  // add0 partial1 2 3 tensor0 1 2
-  auto add_0 = std::make_unique<schema::CNodeT>();
-  add_0->inputIndex = {0, 1};
-  add_0->outputIndex = {2};
-  add_0->primitive = std::make_unique<schema::PrimitiveT>();
-  add_0->primitive->value.type = schema::PrimitiveType_Add;
-  auto add_0_prim = new schema::AddT;
-  add_0_prim->activationType = schema::ActivationType_NO_ACTIVATION;
-  add_0->primitive->value.value = add_0_prim;
-  add_0->name = "Add0";
-  auto partial_1 = std::make_unique<schema::CNodeT>();
-  partial_1->inputIndex = {2};
-  partial_1->outputIndex = {7};
-  partial_1->primitive = std::make_unique<schema::PrimitiveT>();
-  partial_1->primitive->value.type = schema::PrimitiveType_Partial;
-  auto partial_1_prim = new schema::PartialT;
-  partial_1_prim->subGraphIndex = 1;
-  partial_1->primitive->value.value = partial_1_prim;
-  partial_1->name = "Partial1";
-  auto partial_2 = std::make_unique<schema::CNodeT>();
-  partial_2->inputIndex = {2};
-  partial_2->outputIndex = {7};
-  partial_2->primitive = std::make_unique<schema::PrimitiveT>();
-  partial_2->primitive->value.type = schema::PrimitiveType_Partial;
-  auto partial_2_prim = new schema::PartialT;
-  partial_2_prim->subGraphIndex = 2;
-  partial_2->primitive->value.value = partial_2_prim;
-  partial_2->name = "Partial2";
-  auto partial_3 = std::make_unique<schema::CNodeT>();
-  partial_3->inputIndex = {4, 6};
-  partial_3->outputIndex = {7};
-  partial_3->primitive = std::make_unique<schema::PrimitiveT>();
-  partial_3->primitive->value.type = schema::PrimitiveType_Partial;
-  auto partial_3_prim = new schema::PartialT;
-  partial_3_prim->subGraphIndex = 3;
-  partial_3->primitive->value.value = partial_3_prim;
-  partial_3->name = "Partial3";
-  auto tensor_0 = std::make_unique<schema::TensorT>();
-  tensor_0->nodeType = schema::NodeType::NodeType_Parameter;
-  tensor_0->format = schema::Format_NHWC;
-  tensor_0->dataType = TypeId::kNumberTypeFloat32;
-  tensor_0->dims = {1, 2};
-  auto tensor_1 = std::make_unique<schema::TensorT>();
-  tensor_1->nodeType = schema::NodeType::NodeType_ValueNode;
-  tensor_1->format = schema::Format_NHWC;
-  tensor_1->dataType = TypeId::kNumberTypeFloat32;
-  tensor_1->dims = {1, 2};
-  auto tensor_2 = std::make_unique<schema::TensorT>();
-  tensor_2->nodeType = schema::NodeType::NodeType_Parameter;
-  tensor_2->format = schema::Format_NHWC;
-  tensor_2->dataType = TypeId::kNumberTypeFloat32;
-  auto sub_graph_0 = std::make_unique<schema::SubGraphT>();
-  sub_graph_0->name = "main_graph";
-  sub_graph_0->inputIndices = {0};
-  sub_graph_0->outputIndices = {7};
-  sub_graph_0->nodeIndices = {0, 1, 2};
-  sub_graph_0->tensorIndices = {0, 1, 2, 7};
-  // add1 tensor3 4
-  auto add_1 = std::make_unique<schema::CNodeT>();
-  add_1->inputIndex = {2, 3};
-  add_1->outputIndex = {4};
-  add_1->primitive = std::make_unique<schema::PrimitiveT>();
-  add_1->primitive->value.type = schema::PrimitiveType_Add;
-  auto add_1_prim = new schema::AddT;
-  add_1_prim->activationType = schema::ActivationType_NO_ACTIVATION;
-  add_1->primitive->value.value = add_1_prim;
-  add_1->name = "Add1";
-  auto tensor_3 = std::make_unique<schema::TensorT>();
-  tensor_3->nodeType = schema::NodeType::NodeType_ValueNode;
-  tensor_3->format = schema::Format_NHWC;
-  tensor_3->dataType = TypeId::kNumberTypeFloat32;
-  tensor_3->dims = {1, 2};
-  auto tensor_4 = std::make_unique<schema::TensorT>();
-  tensor_4->nodeType = schema::NodeType::NodeType_Parameter;
-  tensor_4->format = schema::Format_NHWC;
-  tensor_4->dataType = TypeId::kNumberTypeFloat32;
-  auto sub_graph_1 = std::make_unique<schema::SubGraphT>();
-  sub_graph_1->name = "sub_graph_1";
-  sub_graph_1->inputIndices = {2};
-  sub_graph_1->outputIndices = {7};
-  sub_graph_1->nodeIndices = {4, 3};
-  sub_graph_1->tensorIndices = {2, 3, 4, 7};
-  // add2 tensor5 6
-  auto add_2 = std::make_unique<schema::CNodeT>();
-  add_2->inputIndex = {2, 5};
-  add_2->outputIndex = {6};
-  add_2->primitive = std::make_unique<schema::PrimitiveT>();
-  add_2->primitive->value.type = schema::PrimitiveType_Add;
-  auto add_2_prim = new schema::AddT;
-  add_2_prim->activationType = schema::ActivationType_NO_ACTIVATION;
-  add_2->primitive->value.value = add_2_prim;
-  add_2->name = "Add2";
-  auto tensor_5 = std::make_unique<schema::TensorT>();
-  tensor_5->nodeType = schema::NodeType::NodeType_ValueNode;
-  tensor_5->format = schema::Format_NHWC;
-  tensor_5->dataType = TypeId::kNumberTypeFloat32;
-  tensor_5->dims = {1, 2};
-  auto tensor_6 = std::make_unique<schema::TensorT>();
-  tensor_6->nodeType = schema::NodeType::NodeType_Parameter;
-  tensor_6->format = schema::Format_NHWC;
-  tensor_6->dataType = TypeId::kNumberTypeFloat32;
-  auto sub_graph_2 = std::make_unique<schema::SubGraphT>();
-  sub_graph_2->name = "sub_graph_2";
-  sub_graph_2->inputIndices = {2};
-  sub_graph_2->outputIndices = {7};
-  sub_graph_2->nodeIndices = {5, 3};
-  sub_graph_2->tensorIndices = {2, 5, 6, 7};
-  // add3 tensor7
-  auto add_3 = std::make_unique<schema::CNodeT>();
-  add_3->inputIndex = {4, 6};
-  add_3->outputIndex = {7};
-  add_3->primitive = std::make_unique<schema::PrimitiveT>();
-  add_3->primitive->value.type = schema::PrimitiveType_Add;
-  auto add_3_prim = new schema::AddT;
-  add_3_prim->activationType = schema::ActivationType_NO_ACTIVATION;
-  add_3->primitive->value.value = add_3_prim;
-  add_3->name = "Add3";
-  auto tensor_7 = std::make_unique<schema::TensorT>();
-  tensor_7->nodeType = schema::NodeType::NodeType_Parameter;
-  tensor_7->format = schema::Format_NHWC;
-  tensor_7->dataType = TypeId::kNumberTypeFloat32;
-  auto sub_graph_3 = std::make_unique<schema::SubGraphT>();
-  sub_graph_3->name = "sub_graph_3";
-  sub_graph_3->inputIndices = {4, 6};
-  sub_graph_3->outputIndices = {7};
-  sub_graph_3->nodeIndices = {6};
-  sub_graph_3->tensorIndices = {4, 6, 7};
-
-  // make graph
   auto meta_graph = std::make_shared<schema::MetaGraphT>();
+  meta_graph->allTensors.resize(16);
+  {    // subgraph-0
+    {  // add-0
+      auto add_0 = std::make_unique<schema::CNodeT>();
+      add_0->inputIndex = {0, 1};
+      add_0->outputIndex = {2};
+      add_0->primitive = std::make_unique<schema::PrimitiveT>();
+      add_0->primitive->value.type = schema::PrimitiveType_Add;
+      auto add_0_prim = new schema::AddT;
+      add_0_prim->activationType = schema::ActivationType_NO_ACTIVATION;
+      add_0->primitive->value.value = add_0_prim;
+      add_0->name = "Add0";
+      auto tensor_0 = std::make_unique<schema::TensorT>();
+      tensor_0->nodeType = schema::NodeType::NodeType_ValueNode;
+      tensor_0->format = schema::Format_NHWC;
+      tensor_0->dataType = TypeId::kNumberTypeFloat32;
+      tensor_0->dims = {1};
+      auto tensor_1 = std::make_unique<schema::TensorT>();
+      tensor_1->nodeType = schema::NodeType::NodeType_ValueNode;
+      tensor_1->format = schema::Format_NHWC;
+      tensor_1->dataType = TypeId::kNumberTypeFloat32;
+      tensor_1->dims = {1};
+      tensor_1->data.resize(sizeof(float));
+      auto data1 = reinterpret_cast<float *>(tensor_1->data.data());
+      ASSERT_NE(data1, nullptr);
+      data1[0] = 1;
+      auto tensor_2 = std::make_unique<schema::TensorT>();
+      tensor_2->nodeType = schema::NodeType::NodeType_Parameter;
+      tensor_2->format = schema::Format_NHWC;
+      tensor_2->dataType = TypeId::kNumberTypeFloat32;
+      meta_graph->nodes.emplace_back(std::move(add_0));
+      meta_graph->allTensors[0] = std::move(tensor_0);
+      meta_graph->allTensors[1] = std::move(tensor_1);
+      meta_graph->allTensors[2] = std::move(tensor_2);
+    }
+    {  // add-1
+      auto add_1 = std::make_unique<schema::CNodeT>();
+      add_1->inputIndex = {2, 3};
+      add_1->outputIndex = {4};
+      add_1->primitive = std::make_unique<schema::PrimitiveT>();
+      add_1->primitive->value.type = schema::PrimitiveType_Add;
+      auto add_1_prim = new schema::AddT;
+      add_1_prim->activationType = schema::ActivationType_NO_ACTIVATION;
+      add_1->primitive->value.value = add_1_prim;
+      add_1->name = "Add1";
+      auto tensor_3 = std::make_unique<schema::TensorT>();
+      tensor_3->nodeType = schema::NodeType::NodeType_ValueNode;
+      tensor_3->format = schema::Format_NHWC;
+      tensor_3->dataType = TypeId::kNumberTypeFloat32;
+      tensor_3->dims = {1};
+      tensor_3->data.resize(sizeof(float));
+      auto data3 = reinterpret_cast<float *>(tensor_3->data.data());
+      ASSERT_NE(data3, nullptr);
+      data3[0] = 1;
+      auto tensor_4 = std::make_unique<schema::TensorT>();
+      tensor_4->nodeType = schema::NodeType::NodeType_Parameter;
+      tensor_4->format = schema::Format_NHWC;
+      tensor_4->dataType = TypeId::kNumberTypeFloat32;
+      meta_graph->nodes.emplace_back(std::move(add_1));
+      meta_graph->allTensors[3] = std::move(tensor_3);
+      meta_graph->allTensors[4] = std::move(tensor_4);
+    }
+    {  // partial cond
+      auto partial_cond = std::make_unique<schema::CNodeT>();
+      partial_cond->inputIndex = {4};
+      partial_cond->outputIndex = {9};
+      partial_cond->primitive = std::make_unique<schema::PrimitiveT>();
+      partial_cond->primitive->value.type = schema::PrimitiveType_Partial;
+      auto partial_cond_prim = new schema::PartialT;
+      partial_cond_prim->subGraphIndex = 1;
+      partial_cond->primitive->value.value = partial_cond_prim;
+      partial_cond->name = "partial_cond";
+      meta_graph->nodes.emplace_back(std::move(partial_cond));
+    }
+    {  // add-5
+      auto add_5 = std::make_unique<schema::CNodeT>();
+      add_5->inputIndex = {9, 13};
+      add_5->outputIndex = {14};
+      add_5->primitive = std::make_unique<schema::PrimitiveT>();
+      add_5->primitive->value.type = schema::PrimitiveType_Add;
+      auto add_5_prim = new schema::AddT;
+      add_5_prim->activationType = schema::ActivationType_NO_ACTIVATION;
+      add_5->primitive->value.value = add_5_prim;
+      add_5->name = "Add5";
+      auto tensor_13 = std::make_unique<schema::TensorT>();
+      tensor_13->nodeType = schema::NodeType::NodeType_ValueNode;
+      tensor_13->format = schema::Format_NHWC;
+      tensor_13->dataType = TypeId::kNumberTypeFloat32;
+      tensor_13->dims = {1};
+      tensor_13->data.resize(sizeof(float));
+      auto data13 = reinterpret_cast<float *>(tensor_13->data.data());
+      ASSERT_NE(data13, nullptr);
+      data13[0] = 1;
+      auto tensor_14 = std::make_unique<schema::TensorT>();
+      tensor_14->nodeType = schema::NodeType::NodeType_Parameter;
+      tensor_14->format = schema::Format_NHWC;
+      tensor_14->dataType = TypeId::kNumberTypeFloat32;
+      meta_graph->nodes.emplace_back(std::move(add_5));
+      meta_graph->allTensors[13] = std::move(tensor_13);
+      meta_graph->allTensors[14] = std::move(tensor_14);
+    }
+    auto sub_graph_0 = std::make_unique<schema::SubGraphT>();
+    sub_graph_0->name = "main_graph";
+    sub_graph_0->inputIndices = {0};
+    sub_graph_0->outputIndices = {14};
+    sub_graph_0->nodeIndices = {0, 1, 2, 3};
+    sub_graph_0->tensorIndices = {0, 1, 2, 3, 4, 9, 13, 14};
+    meta_graph->subGraph.emplace_back(std::move(sub_graph_0));
+  }
+  {    // subgraph-1
+    {  // add-2
+      auto add_2 = std::make_unique<schema::CNodeT>();
+      add_2->inputIndex = {4, 5};
+      add_2->outputIndex = {6};
+      add_2->primitive = std::make_unique<schema::PrimitiveT>();
+      add_2->primitive->value.type = schema::PrimitiveType_Add;
+      auto add_2_prim = new schema::AddT;
+      add_2_prim->activationType = schema::ActivationType_NO_ACTIVATION;
+      add_2->primitive->value.value = add_2_prim;
+      add_2->name = "Add2";
+      auto tensor_5 = std::make_unique<schema::TensorT>();
+      tensor_5->nodeType = schema::NodeType::NodeType_ValueNode;
+      tensor_5->format = schema::Format_NHWC;
+      tensor_5->dataType = TypeId::kNumberTypeFloat32;
+      tensor_5->dims = {1};
+      tensor_5->data.resize(sizeof(float));
+      auto data5 = reinterpret_cast<float *>(tensor_5->data.data());
+      ASSERT_NE(data5, nullptr);
+      data5[0] = 1;
+      auto tensor_6 = std::make_unique<schema::TensorT>();
+      tensor_6->nodeType = schema::NodeType::NodeType_Parameter;
+      tensor_6->format = schema::Format_NHWC;
+      tensor_6->dataType = TypeId::kNumberTypeFloat32;
+      meta_graph->nodes.emplace_back(std::move(add_2));
+      meta_graph->allTensors[5] = std::move(tensor_5);
+      meta_graph->allTensors[6] = std::move(tensor_6);
+    }
+    {  // less
+      auto less = std::make_unique<schema::CNodeT>();
+      less->inputIndex = {6, 15};
+      less->outputIndex = {7};
+      less->primitive = std::make_unique<schema::PrimitiveT>();
+      less->primitive->value.type = schema::PrimitiveType_Less;
+      auto less_prim = new schema::LessT;
+      less->primitive->value.value = less_prim;
+      less->name = "less";
+      auto tensor_15 = std::make_unique<schema::TensorT>();
+      tensor_15->nodeType = schema::NodeType::NodeType_ValueNode;
+      tensor_15->format = schema::Format_NHWC;
+      tensor_15->dataType = TypeId::kNumberTypeFloat32;
+      tensor_15->dims = {1};
+      tensor_15->data.resize(sizeof(float));
+      auto data15 = reinterpret_cast<float *>(tensor_15->data.data());
+      ASSERT_NE(data15, nullptr);
+      data15[0] = 1;
+      auto tensor_7 = std::make_unique<schema::TensorT>();
+      tensor_7->nodeType = schema::NodeType::NodeType_Parameter;
+      tensor_7->format = schema::Format_NHWC;
+      tensor_7->dataType = TypeId::kNumberTypeFloat32;
+      meta_graph->nodes.emplace_back(std::move(less));
+      meta_graph->allTensors[7] = std::move(tensor_7);
+      meta_graph->allTensors[15] = std::move(tensor_15);
+    }
+    {  // switch
+      auto switchop = std::make_unique<schema::CNodeT>();
+      switchop->inputIndex = {7, 4};
+      switchop->outputIndex = {8, 9};
+      switchop->primitive = std::make_unique<schema::PrimitiveT>();
+      switchop->primitive->value.type = schema::PrimitiveType_Switch;
+      auto switch_prim = new schema::SwitchT;
+      switchop->primitive->value.value = switch_prim;
+      switchop->name = "switch";
+      auto tensor_8 = std::make_unique<schema::TensorT>();
+      tensor_8->nodeType = schema::NodeType::NodeType_Parameter;
+      tensor_8->format = schema::Format_NHWC;
+      tensor_8->dataType = TypeId::kNumberTypeFloat32;
+      auto tensor_9 = std::make_unique<schema::TensorT>();
+      tensor_9->nodeType = schema::NodeType::NodeType_Parameter;
+      tensor_9->format = schema::Format_NHWC;
+      tensor_9->dataType = TypeId::kNumberTypeFloat32;
+      meta_graph->nodes.emplace_back(std::move(switchop));
+      meta_graph->allTensors[8] = std::move(tensor_8);
+      meta_graph->allTensors[9] = std::move(tensor_9);
+    }
+    {  // partial body
+      auto partial_body = std::make_unique<schema::CNodeT>();
+      partial_body->inputIndex = {8};
+      partial_body->outputIndex = {4};
+      partial_body->primitive = std::make_unique<schema::PrimitiveT>();
+      partial_body->primitive->value.type = schema::PrimitiveType_Partial;
+      auto partial_body_prim = new schema::PartialT;
+      partial_body_prim->subGraphIndex = 2;
+      partial_body->primitive->value.value = partial_body_prim;
+      partial_body->name = "partial_body";
+      meta_graph->nodes.emplace_back(std::move(partial_body));
+    }
+    auto sub_graph_1 = std::make_unique<schema::SubGraphT>();
+    sub_graph_1->name = "while_cond";
+    sub_graph_1->inputIndices = {4};
+    sub_graph_1->outputIndices = {9};
+    sub_graph_1->nodeIndices = {4, 5, 6, 7};
+    sub_graph_1->tensorIndices = {4, 5, 6, 7, 8, 9, 15};
+    meta_graph->subGraph.emplace_back(std::move(sub_graph_1));
+  }
+  {    // subgraph-2
+    {  // add-3
+      auto add_3 = std::make_unique<schema::CNodeT>();
+      add_3->inputIndex = {8, 10};
+      add_3->outputIndex = {11};
+      add_3->primitive = std::make_unique<schema::PrimitiveT>();
+      add_3->primitive->value.type = schema::PrimitiveType_Add;
+      auto add_3_prim = new schema::AddT;
+      add_3_prim->activationType = schema::ActivationType_NO_ACTIVATION;
+      add_3->primitive->value.value = add_3_prim;
+      add_3->name = "Add3";
+      auto tensor_10 = std::make_unique<schema::TensorT>();
+      tensor_10->nodeType = schema::NodeType::NodeType_ValueNode;
+      tensor_10->format = schema::Format_NHWC;
+      tensor_10->dataType = TypeId::kNumberTypeFloat32;
+      tensor_10->dims = {1};
+      tensor_10->data.resize(sizeof(float));
+      auto data10 = reinterpret_cast<float *>(tensor_10->data.data());
+      ASSERT_NE(data10, nullptr);
+      data10[0] = 1;
+      auto tensor_11 = std::make_unique<schema::TensorT>();
+      tensor_11->nodeType = schema::NodeType::NodeType_Parameter;
+      tensor_11->format = schema::Format_NHWC;
+      tensor_11->dataType = TypeId::kNumberTypeFloat32;
+      meta_graph->nodes.emplace_back(std::move(add_3));
+      meta_graph->allTensors[10] = std::move(tensor_10);
+      meta_graph->allTensors[11] = std::move(tensor_11);
+    }
+    {  // add-4
+      auto add_4 = std::make_unique<schema::CNodeT>();
+      add_4->inputIndex = {11, 12};
+      add_4->outputIndex = {4};
+      add_4->primitive = std::make_unique<schema::PrimitiveT>();
+      add_4->primitive->value.type = schema::PrimitiveType_Add;
+      auto add_4_prim = new schema::AddT;
+      add_4_prim->activationType = schema::ActivationType_NO_ACTIVATION;
+      add_4->primitive->value.value = add_4_prim;
+      add_4->name = "Add4";
+      auto tensor_12 = std::make_unique<schema::TensorT>();
+      tensor_12->nodeType = schema::NodeType::NodeType_ValueNode;
+      tensor_12->format = schema::Format_NHWC;
+      tensor_12->dataType = TypeId::kNumberTypeFloat32;
+      tensor_12->dims = {1};
+      tensor_12->data.resize(sizeof(float));
+      auto data12 = reinterpret_cast<float *>(tensor_12->data.data());
+      ASSERT_NE(data12, nullptr);
+      data12[0] = 1;
+      meta_graph->nodes.emplace_back(std::move(add_4));
+      meta_graph->allTensors[12] = std::move(tensor_12);
+    }
+    {  // partial cond
+      auto partial_cond = std::make_unique<schema::CNodeT>();
+      partial_cond->inputIndex = {4};
+      partial_cond->outputIndex = {9};
+      partial_cond->primitive = std::make_unique<schema::PrimitiveT>();
+      partial_cond->primitive->value.type = schema::PrimitiveType_Partial;
+      auto partial_cond_prim = new schema::PartialT;
+      partial_cond_prim->subGraphIndex = 1;
+      partial_cond->primitive->value.value = partial_cond_prim;
+      partial_cond->name = "partial_cond1";
+      meta_graph->nodes.emplace_back(std::move(partial_cond));
+    }
+    auto sub_graph_2 = std::make_unique<schema::SubGraphT>();
+    sub_graph_2->name = "while_body";
+    sub_graph_2->inputIndices = {8};
+    sub_graph_2->outputIndices = {9};
+    sub_graph_2->nodeIndices = {8, 9, 10};
+    sub_graph_2->tensorIndices = {8, 10, 11, 12, 4, 9};
+    meta_graph->subGraph.emplace_back(std::move(sub_graph_2));
+  }
   meta_graph->name = "graph";
-  meta_graph->nodes.emplace_back(std::move(add_0));
-  meta_graph->nodes.emplace_back(std::move(partial_1));
-  meta_graph->nodes.emplace_back(std::move(partial_2));
-  meta_graph->nodes.emplace_back(std::move(partial_3));
-  meta_graph->nodes.emplace_back(std::move(add_1));
-  meta_graph->nodes.emplace_back(std::move(add_2));
-  meta_graph->nodes.emplace_back(std::move(add_3));
-  meta_graph->allTensors.emplace_back(std::move(tensor_0));
-  meta_graph->allTensors.emplace_back(std::move(tensor_1));
-  meta_graph->allTensors.emplace_back(std::move(tensor_2));
-  meta_graph->allTensors.emplace_back(std::move(tensor_3));
-  meta_graph->allTensors.emplace_back(std::move(tensor_4));
-  meta_graph->allTensors.emplace_back(std::move(tensor_5));
-  meta_graph->allTensors.emplace_back(std::move(tensor_6));
-  meta_graph->allTensors.emplace_back(std::move(tensor_7));
-  meta_graph->subGraph.emplace_back(std::move(sub_graph_0));
-  meta_graph->subGraph.emplace_back(std::move(sub_graph_1));
-  meta_graph->subGraph.emplace_back(std::move(sub_graph_2));
-  meta_graph->subGraph.emplace_back(std::move(sub_graph_3));
   meta_graph->version = lite::Version();
   //  -----------------------------------------------------------------------
   lite::Storage::Save(*meta_graph,
diff --git a/mindspore/lite/tools/benchmark/benchmark.cc b/mindspore/lite/tools/benchmark/benchmark.cc
index d382e850b4..2eaf256ac8 100644
--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@@ -33,11 +33,37 @@ static const char *DELIM_COLON = ":";
 static const char *DELIM_COMMA = ",";
 static const char *DELIM_SLASH = "/";
 
-int Benchmark::GenerateRandomData(size_t size, void *data) {
+int Benchmark::GenerateRandomData(size_t size, void *data, TypeId data_type) {
   MS_ASSERT(data != nullptr);
-  char *casted_data = static_cast<char *>(data);
-  for (size_t i = 0; i < size; i++) {
-    casted_data[i] = static_cast<char>(i);
+  switch (data_type) {
+    case kNumberTypeFloat32:
+    case kNumberTypeFloat:
+      FillInputData<float>(size, data, std::uniform_real_distribution<float>(-0.5f, 0.5f));
+      break;
+    case kNumberTypeFloat64:
+      FillInputData<double>(size, data, std::uniform_real_distribution<double>(-0.5, 0.5));
+      break;
+    case kNumberTypeInt64:
+      FillInputData<int64_t>(size, data, std::uniform_int_distribution<int64_t>(0, 99));
+      break;
+    case kNumberTypeInt:
+    case kNumberTypeInt32:
+      FillInputData<int32_t>(size, data, std::uniform_int_distribution<int32_t>(0, 99));
+      break;
+    case kNumberTypeInt16:
+      FillInputData<int16_t>(size, data, std::uniform_int_distribution<int16_t>(0, 99));
+      break;
+    case kNumberTypeInt8:
+      FillInputData<int8_t>(size, data, std::uniform_int_distribution<int8_t>(-127, 127));
+      break;
+    case kNumberTypeUInt8:
+      FillInputData<uint8_t>(size, data, std::uniform_int_distribution<uint8_t>(0, 254));
+      break;
+    default:
+      char *casted_data = static_cast<char *>(data);
+      for (size_t i = 0; i < size; i++) {
+        casted_data[i] = static_cast<char>(i);
+      }
   }
   return RET_OK;
 }
@@ -54,7 +80,7 @@ int Benchmark::GenerateInputData() {
     if (tensor->data_type() == kObjectTypeString) {
       status = StringsToMSTensor({"you're the best."}, tensor);
     } else {
-      status = GenerateRandomData(tensor->Size(), input_data);
+      status = GenerateRandomData(tensor->Size(), input_data, tensor->data_type());
     }
     if (status != RET_OK) {
       std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl;
diff --git a/mindspore/lite/tools/benchmark/benchmark.h b/mindspore/lite/tools/benchmark/benchmark.h
index 744653a08a..cf8d97a954 100644
--- a/mindspore/lite/tools/benchmark/benchmark.h
+++ b/mindspore/lite/tools/benchmark/benchmark.h
@@ -19,6 +19,7 @@
 
 #include <getopt.h>
 #include <signal.h>
+#include <random>
 #include <unordered_map>
 #include <fstream>
 #include <iostream>
@@ -124,7 +125,7 @@ class MS_API Benchmark {
   // call GenerateRandomData to fill inputTensors
   int GenerateInputData();
 
-  int GenerateRandomData(size_t size, void *data);
+  int GenerateRandomData(size_t size, void *data, TypeId data_type);
 
   int ReadInputFile();
 
@@ -224,6 +225,14 @@ class MS_API Benchmark {
     }
   }
 
+  template <typename T, typename Distribution>
+  void FillInputData(int size, void *data, Distribution distribution) {
+    MS_ASSERT(data != nullptr);
+    int elements_num = size / sizeof(T);
+    (void)std::generate_n(static_cast<T *>(data), elements_num,
+                          [&]() { return static_cast<T>(distribution(random_engine_)); });
+  }
+
   int MarkPerformance();
 
   int MarkAccuracy();
@@ -249,6 +258,7 @@ class MS_API Benchmark {
 
   KernelCallBack before_call_back_;
   KernelCallBack after_call_back_;
+  std::mt19937 random_engine_;
 };
 
 int MS_API RunBenchmark(int argc, const char **argv);