diff --git a/mindspore/lite/internal/src/kernel/fp32/matmul.cc b/mindspore/lite/internal/src/kernel/fp32/matmul.cc
index 2e5c1e1795..f40995623e 100644
--- a/mindspore/lite/internal/src/kernel/fp32/matmul.cc
+++ b/mindspore/lite/internal/src/kernel/fp32/matmul.cc
@@ -148,6 +148,10 @@ int DoMatMul(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tenso
   params->col_8_ = UP_ROUND(params->col_, 8);
 
   MatMulCPUKernelData *kernel_data = (MatMulCPUKernelData *)malloc(sizeof(MatMulCPUKernelData));
+  if (kernel_data == NULL) {
+    LITE_ERROR_LOG("Malloc MatMulCPUKernelData failed");
+    return RET_MEMORY_FAILED;
+  }
   kernel_data->a_c12_ptr_
     = reinterpret_cast<float *>(allocator->Malloc(params->batch * params->row_12_ * params->deep_ * sizeof(float)));
   if (kernel_data->a_c12_ptr_ == NULL) {
diff --git a/mindspore/lite/java/build_aar.sh b/mindspore/lite/java/build_aar.sh
index b060ebf731..2cc69ba3c4 100644
--- a/mindspore/lite/java/build_aar.sh
+++ b/mindspore/lite/java/build_aar.sh
@@ -33,7 +33,7 @@ build_mslite_arm64() {
 }
 
 build_mslite_arm32() {
-    # build mindspore-lite arm64
+    # build mindspore-lite arm32
     cd ${TOP_PATH}
     bash build.sh -I arm32
     COMPILE_RET=$?
diff --git a/mindspore/lite/java/java/app/src/main/native/runtime/ms_tensor.cpp b/mindspore/lite/java/java/app/src/main/native/runtime/ms_tensor.cpp
index fb431ceb81..36ad3425b7 100644
--- a/mindspore/lite/java/java/app/src/main/native/runtime/ms_tensor.cpp
+++ b/mindspore/lite/java/java/app/src/main/native/runtime/ms_tensor.cpp
@@ -170,6 +170,7 @@ extern "C" JNIEXPORT jboolean JNICALL Java_com_mindspore_lite_MSTensor_setData(J
   auto *data_arr = env->GetByteArrayElements(data, &is_copy);
   auto *local_data = ms_tensor_ptr->MutableData();
   memcpy(local_data, data_arr, data_len);
+  env->ReleaseByteArrayElements(data, data_arr, JNI_ABORT);
   return static_cast<jboolean>(true);
 }
 
@@ -200,6 +201,7 @@ extern "C" JNIEXPORT jboolean JNICALL Java_com_mindspore_lite_MSTensor_setByteBu
   auto *data_arr = env->GetByteArrayElements(data, &is_copy);
   auto *local_data = ms_tensor_ptr->MutableData();
   memcpy(local_data, data_arr, data_len);
+  env->ReleaseByteArrayElements(data, data_arr, JNI_ABORT);
   return static_cast<jboolean>(true);
 }
 
diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt
index e511cbec73..9819c8f075 100644
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@@ -17,6 +17,7 @@ set(LITE_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/runtime/workspace_pool.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/tensor.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/executor.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/inner_context.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/kernel_registry.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/lite_kernel.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/populate_parameter.cc
diff --git a/mindspore/lite/src/inner_context.cc b/mindspore/lite/src/inner_context.cc
new file mode 100644
index 0000000000..109aa34999
--- /dev/null
+++ b/mindspore/lite/src/inner_context.cc
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/inner_context.h"
+#include "include/errorcode.h"
+#include "utils/log_adapter.h"
+
+namespace mindspore::lite {
+int InnerContext::Init() {
+  if (this->thread_pool_ == nullptr) {
+    this->thread_pool_ = CreateLiteThreadPool(this->thread_num_, this->cpu_bind_mode_);
+    if (this->thread_pool_ == nullptr) {
+      MS_LOG(ERROR) << "Create ThreadPool failed";
+      return RET_NULL_PTR;
+    }
+  }
+  if (this->allocator == nullptr) {
+    this->allocator = Allocator::Create();
+    if (this->allocator == nullptr) {
+      MS_LOG(ERROR) << "Create Allocator failed";
+      return RET_NULL_PTR;
+    }
+  }
+  return RET_OK;
+}
+
+InnerContext::~InnerContext() {
+  if (this->thread_pool_ != NULL) {
+    DestroyThreadPool(this->thread_pool_);
+    free(this->thread_pool_);
+    this->thread_pool_ = NULL;
+  }
+}
+}  // namespace mindspore::lite
diff --git a/mindspore/lite/src/inner_context.h b/mindspore/lite/src/inner_context.h
new file mode 100644
index 0000000000..1d6f0d3be8
--- /dev/null
+++ b/mindspore/lite/src/inner_context.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_INNER_CONTEXT_H
+#define MINDSPORE_LITE_SRC_INNER_CONTEXT_H
+
+#include "include/context.h"
+#include "src/runtime/runtime_api.h"
+#include "src/runtime/allocator.h"
+
+namespace mindspore::lite {
+struct InnerContext : public Context {
+ public:
+  struct ThreadPool *thread_pool_ = nullptr;
+
+ public:
+  int Init();
+
+  virtual ~InnerContext();
+};
+}  // namespace mindspore::lite
+
+#endif  // MINDSPORE_LITE_SRC_INNER_CONTEXT_H
diff --git a/mindspore/lite/src/kernel_registry.cc b/mindspore/lite/src/kernel_registry.cc
index 994867b076..41908c47a6 100644
--- a/mindspore/lite/src/kernel_registry.cc
+++ b/mindspore/lite/src/kernel_registry.cc
@@ -98,7 +98,7 @@ const kernel::KernelCreator *KernelRegistry::GetCreatorArrays() { return creator
 
 kernel::LiteKernel *KernelRegistry::GetKernel(const std::vector<Tensor *> &in_tensors,
                                               const std::vector<Tensor *> &out_tensors, const PrimitiveC *primitive,
-                                              const Context *ctx, const kernel::KernelKey &key) {
+                                              const InnerContext *ctx, const kernel::KernelKey &key) {
   MS_ASSERT(nullptr != primitive);
   MS_ASSERT(nullptr != ctx);
   auto parameter = kernel::PopulateParameter(primitive);
diff --git a/mindspore/lite/src/kernel_registry.h b/mindspore/lite/src/kernel_registry.h
index c0511441ba..552f6bec46 100644
--- a/mindspore/lite/src/kernel_registry.h
+++ b/mindspore/lite/src/kernel_registry.h
@@ -45,7 +45,7 @@ class KernelRegistry {
                  kernel::KernelCreator creator);
   bool Merge(const std::unordered_map<kernel::KernelKey, kernel::KernelCreator> &newCreators);
   kernel::LiteKernel *GetKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
-                                const PrimitiveC *primitive, const Context *ctx, const kernel::KernelKey &key);
+                                const PrimitiveC *primitive, const InnerContext *ctx, const kernel::KernelKey &key);
 
  protected:
   static const int device_type_length_{kKernelArch_MAX - kKernelArch_MIN + 1};
diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h
index 1ff4314ea8..2e5445f8b7 100644
--- a/mindspore/lite/src/lite_kernel.h
+++ b/mindspore/lite/src/lite_kernel.h
@@ -24,7 +24,7 @@
 #endif
 #include "src/ops/primitive_c.h"
 #include "nnacl/op_base.h"
-#include "include/context.h"
+#include "src/inner_context.h"
 #include "src/tensor.h"
 #include "include/errorcode.h"
 
@@ -56,7 +56,7 @@ class LiteKernel {
   LiteKernel() = default;
   // parameter should be deleted or freed by caller, and should be deleted or freed after LiteKernel is deleted
   LiteKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &in_tensors,
-             const std::vector<lite::Tensor *> &out_tensors, const lite::Context *ctx,
+             const std::vector<lite::Tensor *> &out_tensors, const lite::InnerContext *ctx,
              const mindspore::lite::PrimitiveC *primitive)
       : op_parameter_(parameter),
         in_tensors_(in_tensors),
@@ -156,7 +156,7 @@ class LiteKernel {
   std::vector<lite::Tensor *> in_tensors_;
   std::vector<lite::Tensor *> out_tensors_;
   const mindspore::lite::PrimitiveC *primitive_ = nullptr;
-  const lite::Context *context_ = nullptr;
+  const lite::InnerContext *context_ = nullptr;
   std::vector<LiteKernel *> in_kernels_;
   std::vector<LiteKernel *> out_kernels_;
   bool train_mode_ = false;
@@ -168,7 +168,7 @@ class SubGraphKernel : public LiteKernel {
   explicit SubGraphKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                           const std::vector<kernel::LiteKernel *> &in_kernels,
                           const std::vector<kernel::LiteKernel *> &out_kernels,
-                          const std::vector<kernel::LiteKernel *> &nodes, const lite::Context *ctx,
+                          const std::vector<kernel::LiteKernel *> &nodes, const lite::InnerContext *ctx,
                           const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(nullptr, inputs, outputs, ctx, primitive), nodes_(nodes) {
     in_kernels_ = in_kernels;
@@ -186,7 +186,7 @@ class SubGraphKernel : public LiteKernel {
 
 typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs,
                                      const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                     const lite::Context *ctx, const KernelKey &desc,
+                                     const lite::InnerContext *ctx, const KernelKey &desc,
                                      const mindspore::lite::PrimitiveC *primitive);
 
 class LiteKernelUtil {
diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc
index e8279cc505..0af5b7d992 100644
--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@@ -17,10 +17,10 @@
 #include "src/lite_session.h"
 #include <vector>
 #include <utility>
+#include "src/runtime/runtime_api.h"
 #include "include/errorcode.h"
 #include "utils/log_adapter.h"
 #include "src/scheduler.h"
-#include "src/runtime/runtime_api.h"
 #include "src/runtime/allocator.h"
 #include "src/executor.h"
 #include "src/common/utils.h"
@@ -51,6 +51,8 @@ static bool WeightTensorNeedCopy(const lite::Model *model, const uint32_t tensor
   });
 }
 
+LiteSession::LiteSession() { this->is_running_.store(false); }
+
 int LiteSession::ConvertTensors(const lite::Model *model) {
   MS_ASSERT(model != nullptr);
   copyed_tensor_idxes_.clear();
@@ -247,15 +249,22 @@ void LiteSession::InitGraphInOutTensors(const lite::Model *model) {
 }
 
 int LiteSession::CompileGraph(Model *model) {
+  bool expected = false;
+  if (!is_running_.compare_exchange_strong(expected, true)) {
+    MS_LOG(ERROR) << "Not support multi-threading";
+    return RET_ERROR;
+  }
   // model.MetaGraph ==> kernels
   if (model == nullptr) {
     MS_LOG(ERROR) << "The input model is nullptr.";
+    is_running_.store(false);
     return RET_PARAM_INVALID;
   }
 
   auto ret = ConvertTensors(model);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ConvertTensors failed: " << ret;
+    is_running_.store(false);
     return ret;
   }
 
@@ -266,47 +275,71 @@ int LiteSession::CompileGraph(Model *model) {
   ret = scheduler.Schedule(model, &tensors_, &kernels_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Schedule kernels failed: " << ret;
+    is_running_.store(false);
     return ret;
   }
 
-  executor->Prepare(this->kernels_);
+  ret = executor->Prepare(this->kernels_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Prepare kernels failed: " << ret;
+    is_running_.store(false);
+    return ret;
+  }
 #ifndef SUPPORT_TRAIN
   model->Free();
 #endif
+  is_running_.store(false);
   return RET_OK;
 }
 
 std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputs() const { return this->input_vec_; }
 
 int LiteSession::RunGraph(const session::KernelCallBack &before, const session::KernelCallBack &after) {
+  bool expected = false;
+  if (!is_running_.compare_exchange_strong(expected, true)) {
+    MS_LOG(ERROR) << "Not support multi-threading";
+    return RET_ERROR;
+  }
+  STATUS ret = RET_ERROR;
   MS_ASSERT(this->context_);
   if (before == nullptr && after == nullptr) {
-    return executor->Run(this->inputs_, this->outputs_, this->kernels_, this->context_->allocator.get());
+    ret = executor->Run(this->inputs_, this->outputs_, this->kernels_, this->context_->allocator.get());
   } else {
-    return executor->Run(this->inputs_, this->outputs_, this->kernels_, this->context_->allocator.get(), before, after);
+    ret = executor->Run(this->inputs_, this->outputs_, this->kernels_, this->context_->allocator.get(), before, after);
   }
+  is_running_.store(false);
+  return ret;
 }
 
 int LiteSession::Init(Context *context) {
+  bool expected = false;
+  if (!is_running_.compare_exchange_strong(expected, true)) {
+    MS_LOG(ERROR) << "Not support multi-threading";
+    return RET_ERROR;
+  }
+
   MS_ASSERT(nullptr != context);
-  this->context_ = new (std::nothrow) Context();
+  this->context_ = new (std::nothrow) InnerContext();
   if (this->context_ == nullptr) {
-    MS_LOG(ERROR) << "new context failed";
+    MS_LOG(ERROR) << "New Context failed";
+    is_running_.store(false);
     return RET_MEMORY_FAILED;
   }
-  // context->thread_num_, context->allocator, context->device_ctx
-  this->context_->thread_num_ = context->thread_num_;
   this->context_->allocator = context->allocator;
+  this->context_->thread_num_ = context->thread_num_;
+  this->context_->cpu_bind_mode_ = context->cpu_bind_mode_;
   this->context_->device_type_ = context->device_type_;
   this->context_->float16_priority = context->float16_priority;
-  this->context_->cpu_bind_mode_ = context->cpu_bind_mode_;
-  if (context_->allocator == nullptr) {
-    context_->allocator = Allocator::Create();
+  auto ret = this->context_->Init();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Init Context failed";
+    is_running_.store(false);
+    return ret;
   }
-  ConfigThreadPool(THREAD_POOL_DEFAULT, context->thread_num_, context->cpu_bind_mode_);
-  auto ret = KernelRegistry::GetInstance()->Init();
+  ret = KernelRegistry::GetInstance()->Init();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "KernelRegistry Init Failed.";
+    is_running_.store(false);
     return ret;
   }
 #if SUPPORT_GPU
@@ -319,19 +352,27 @@ int LiteSession::Init(Context *context) {
 #endif
   executor = new Executor();
   if (nullptr == executor) {
-    MS_LOG(ERROR) << "new Executor failed";
+    MS_LOG(ERROR) << "New Executor failed";
+    is_running_.store(false);
     return RET_ERROR;
   }
+  is_running_.store(false);
   return RET_OK;
 }
 
 void LiteSession::BindThread(bool if_bind) {
   if (this->context_->cpu_bind_mode_ != NO_BIND) {
-    BindThreads(THREAD_POOL_DEFAULT, if_bind, this->context_->cpu_bind_mode_);
+    MS_ASSERT(this->context_->thread_pool_ != NULL);
+    BindThreads(this->context_->thread_pool_, if_bind, this->context_->cpu_bind_mode_);
   }
 }
 
 LiteSession::~LiteSession() {
+  bool expected = false;
+  if (!is_running_.compare_exchange_strong(expected, true)) {
+    MS_LOG(ERROR) << "Not support multi-threading";
+    return;
+  }
   for (size_t i = 0; i < tensors_.size(); i++) {
     auto *tensor = tensors_.at(i);
     MS_ASSERT(tensor != nullptr);
@@ -358,6 +399,7 @@ LiteSession::~LiteSession() {
   delete this->context_;
   delete this->executor;
   this->executor = nullptr;
+  is_running_.store(false);
 }
 
 std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputsByName(const std::string &name) const {
@@ -426,6 +468,11 @@ void LiteSession::ResetInputsShape(const std::vector<std::vector<int>> &dims) {
 
 int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs,
                         const std::vector<std::vector<int>> &dims) {
+  bool expected = false;
+  if (!is_running_.compare_exchange_strong(expected, true)) {
+    MS_LOG(ERROR) << "Not support multi-threading";
+    return RET_ERROR;
+  }
   std::vector<std::vector<int>> old_dims;
   for (size_t i = 0; i < inputs_.size(); ++i) {
     old_dims.push_back(inputs_[i]->shape());
@@ -433,6 +480,7 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
   auto ret = ResizeInputs(inputs, dims);
   if (ret != RET_OK) {
     ResetInputsShape(old_dims);
+    is_running_.store(false);
     return ret;
   }
 
@@ -444,8 +492,10 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
     if (resize_ret != RET_OK) {
       MS_LOG(ERROR) << "restore kernel size fail!ret: " << resize_ret;
     }
+    is_running_.store(false);
     return ret;
   }
+  is_running_.store(false);
   return RET_OK;
 }
 }  // namespace lite
diff --git a/mindspore/lite/src/lite_session.h b/mindspore/lite/src/lite_session.h
index bd4a3d047b..7046086623 100644
--- a/mindspore/lite/src/lite_session.h
+++ b/mindspore/lite/src/lite_session.h
@@ -21,11 +21,12 @@
 #include <vector>
 #include <string>
 #include <unordered_map>
+#include <atomic>
 #include "src/lite_kernel.h"
 #include "include/ms_tensor.h"
 #include "include/lite_session.h"
 #include "include/model.h"
-#include "include/context.h"
+#include "src/inner_context.h"
 #include "schema/model_generated.h"
 #include "src/executor.h"
 #include "src/tensor.h"
@@ -34,7 +35,7 @@ namespace mindspore {
 namespace lite {
 class LiteSession : public session::LiteSession {
  public:
-  LiteSession() = default;
+  LiteSession();
 
   ~LiteSession() override;
 
@@ -81,14 +82,13 @@ class LiteSession : public session::LiteSession {
 
   void InitGraphOutputTensorMap(const lite::Model *model);
 
-  int ResizeInputs(const std::vector<mindspore::tensor::MSTensor *> &inputs,
-                   const std::vector<std::vector<int>> &dims);
+  int ResizeInputs(const std::vector<mindspore::tensor::MSTensor *> &inputs, const std::vector<std::vector<int>> &dims);
 
  private:
   void ResetInputsShape(const std::vector<std::vector<int>> &dims);
 
  protected:
-  Context *context_ = nullptr;
+  InnerContext *context_ = nullptr;
   std::vector<kernel::LiteKernel *> kernels_;
   std::vector<Tensor *> tensors_;
   std::vector<size_t> copyed_tensor_idxes_;
@@ -107,6 +107,7 @@ class LiteSession : public session::LiteSession {
   // graph output tensor name -- output tensor
   std::unordered_map<std::string, mindspore::tensor::MSTensor *> output_tensor_map_;
   Executor *executor = nullptr;
+  std::atomic<bool> is_running_ = false;
 };
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/src/model.cc b/mindspore/lite/src/model.cc
index 3282764182..0c8f39aaa1 100644
--- a/mindspore/lite/src/model.cc
+++ b/mindspore/lite/src/model.cc
@@ -18,6 +18,7 @@
 #include "utils/log_adapter.h"
 #include "include/errorcode.h"
 #include "src/common/graph_util.h"
+#include "include/version.h"
 
 namespace mindspore::lite {
 namespace {
@@ -100,6 +101,11 @@ Model *Model::Import(const char *model_buf, size_t size) {
   if (meta_graph->version() != nullptr) {
     model->version_ = meta_graph->version()->c_str();
   }
+
+  if (model->version_ != Version()) {
+    MS_LOG(WARNING) << "model version is " << model->version_ << ", inference version is " << Version() << " not equal";
+  }
+
   auto in_count = meta_graph->inputIndex()->size();
   for (uint32_t i = 0; i < in_count; ++i) {
     model->input_indices_.push_back(size_t(meta_graph->inputIndex()->GetAs<uint32_t>(i)));
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc
index e9c727902f..1f90cc1f00 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc
@@ -93,7 +93,7 @@ int ArgMinMaxBaseCPUKernel::Run() {
 
 kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   if (op_parameter == nullptr) {
     MS_LOG(ERROR) << "Input op_parameter is nullptr!";
@@ -117,7 +117,7 @@ kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::Tensor
 
 kernel::LiteKernel *CpuArgMinMaxFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   if (op_parameter == nullptr) {
     MS_LOG(ERROR) << "Input op_parameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h b/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h
index 636c66c2ae..29f774166a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class ArgMinMaxBaseCPUKernel : public LiteKernel {
  public:
   ArgMinMaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.cc
index e7fdfee2f8..979c7dc3a3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.cc
@@ -55,7 +55,7 @@ int BatchToSpaceBaseCPUKernel::ReSize() {
 
 kernel::LiteKernel *CpuBatchToSpaceInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                      const std::vector<lite::Tensor *> &outputs,
-                                                     OpParameter *op_parameter, const lite::Context *ctx,
+                                                     OpParameter *op_parameter, const lite::InnerContext *ctx,
                                                      const kernel::KernelKey &desc,
                                                      const mindspore::lite::PrimitiveC *primitive) {
   if (op_parameter == nullptr) {
@@ -80,7 +80,7 @@ kernel::LiteKernel *CpuBatchToSpaceInt8KernelCreator(const std::vector<lite::Ten
 
 kernel::LiteKernel *CpuBatchToSpaceFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                      const std::vector<lite::Tensor *> &outputs,
-                                                     OpParameter *op_parameter, const lite::Context *ctx,
+                                                     OpParameter *op_parameter, const lite::InnerContext *ctx,
                                                      const kernel::KernelKey &desc,
                                                      const mindspore::lite::PrimitiveC *primitive) {
   if (op_parameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.h b/mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.h
index 2d454df1f6..89860337ba 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class BatchToSpaceBaseCPUKernel : public LiteKernel {
  public:
   BatchToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                            const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                             const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc
index e6404b400c..b8535063a7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc
@@ -38,7 +38,7 @@ int ConcatBaseCPUKernel::ReSize() {
 
 kernel::LiteKernel *CpuConcatInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const Context *ctx, const kernel::KernelKey &desc,
+                                               const InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
@@ -62,7 +62,7 @@ kernel::LiteKernel *CpuConcatInt8KernelCreator(const std::vector<lite::Tensor *>
 
 kernel::LiteKernel *CpuConcatInt32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const Context *ctx, const kernel::KernelKey &desc,
+                                                const InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
@@ -86,7 +86,7 @@ kernel::LiteKernel *CpuConcatInt32KernelCreator(const std::vector<lite::Tensor *
 
 kernel::LiteKernel *CpuConcatFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const Context *ctx, const kernel::KernelKey &desc,
+                                               const InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.h b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.h
index f13aa2fc35..0f5b6340b6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.h
@@ -22,13 +22,13 @@
 #include "nnacl/concat_parameter.h"
 #include "src/runtime/kernel/arm/base/layout_transform.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class ConcatBaseCPUKernel : public LiteKernel {
  public:
   ConcatBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
     concat_param_ = reinterpret_cast<ConcatParameter *>(op_parameter_);
@@ -44,7 +44,7 @@ class ConcatBaseCPUKernel : public LiteKernel {
 
  protected:
   int axis_;
-  const Context *ctx_;
+  const InnerContext *ctx_;
   int thread_count_;
   ConcatParameter *concat_param_ = nullptr;
 };
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
index d824990f09..1d58abc978 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
@@ -29,7 +29,7 @@
 #include "include/context.h"
 #include "src/runtime/kernel/arm/base/layout_transform.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 using mindspore::schema::PadMode;
 using mindspore::schema::QuantType;
 
@@ -37,7 +37,7 @@ namespace mindspore::kernel {
 class ConvolutionBaseCPUKernel : public LiteKernel {
  public:
   ConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                           const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                           const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                            const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
     op_parameter_->thread_num_ = ctx->thread_num_;
@@ -64,7 +64,7 @@ class ConvolutionBaseCPUKernel : public LiteKernel {
   int tile_num_;
   void *bias_data_ = nullptr;
   void *nhwc4_input_ = nullptr;
-  const Context *ctx_;
+  const InnerContext *ctx_;
   int thread_count_;
   ConvParameter *conv_param_;
   ConvQuantArg *conv_quant_arg_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc
index 283d483022..d0a2019a01 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc
@@ -32,7 +32,7 @@ int CropBaseCPUKernel::Init() { return RET_OK; }
 
 kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const Context *ctx, const kernel::KernelKey &desc,
+                                             const InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
@@ -56,7 +56,7 @@ kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector<lite::Tensor *> &
 
 kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const Context *ctx, const kernel::KernelKey &desc,
+                                              const InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
@@ -80,7 +80,7 @@ kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector<lite::Tensor *>
 
 kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const Context *ctx, const kernel::KernelKey &desc,
+                                             const InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/crop_base.h b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.h
index a123a80e9d..ae28780343 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/crop_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.h
@@ -21,13 +21,13 @@
 #include "src/lite_kernel.h"
 #include "nnacl/crop_parameter.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class CropBaseCPUKernel : public LiteKernel {
  public:
   CropBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                    const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                    const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                     const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {}
   ~CropBaseCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.cc
index a803bb8141..78805a96fc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.cc
@@ -63,7 +63,7 @@ int DepthToSpaceBaseCPUKernel::ReSize() {
 
 kernel::LiteKernel *CpuDepthToSpaceInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                      const std::vector<lite::Tensor *> &outputs,
-                                                     OpParameter *op_parameter, const lite::Context *ctx,
+                                                     OpParameter *op_parameter, const lite::InnerContext *ctx,
                                                      const kernel::KernelKey &desc,
                                                      const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace);
@@ -89,7 +89,7 @@ kernel::LiteKernel *CpuDepthToSpaceInt8KernelCreator(const std::vector<lite::Ten
 
 kernel::LiteKernel *CpuDepthToSpaceFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                      const std::vector<lite::Tensor *> &outputs,
-                                                     OpParameter *op_parameter, const lite::Context *ctx,
+                                                     OpParameter *op_parameter, const lite::InnerContext *ctx,
                                                      const kernel::KernelKey &desc,
                                                      const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.h b/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.h
index e90532ac49..622e159da2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class DepthToSpaceBaseCPUKernel : public LiteKernel {
  public:
   DepthToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                            const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                             const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc
index 7ff75ba0b4..01b17785f6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc
@@ -34,7 +34,7 @@ int FullconnectionBaseCPUKernel::Init() {
 
 kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
-                                                       OpParameter *opParameter, const lite::Context *ctx,
+                                                       OpParameter *opParameter, const lite::InnerContext *ctx,
                                                        const kernel::KernelKey &desc,
                                                        const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
@@ -55,7 +55,7 @@ kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::T
 }
 kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
-                                                       OpParameter *opParameter, const lite::Context *ctx,
+                                                       OpParameter *opParameter, const lite::InnerContext *ctx,
                                                        const kernel::KernelKey &desc,
                                                        const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h b/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h
index 924915b391..f76263d8b2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h
@@ -22,13 +22,13 @@
 #include "include/context.h"
 #include "nnacl/matmul_parameter.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class FullconnectionBaseCPUKernel : public LiteKernel {
  public:
   FullconnectionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                              const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                              const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                               const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
     fc_param_ = reinterpret_cast<MatMulParameter *>(op_parameter_);
@@ -42,7 +42,7 @@ class FullconnectionBaseCPUKernel : public LiteKernel {
  protected:
   MatMulParameter *fc_param_;
   int thread_stride_;
-  const Context *ctx_;
+  const InnerContext *ctx_;
   int thread_count_;
 };
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/leaky_relu_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/leaky_relu_base.cc
index 5dcb65ce4b..84477245f4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/leaky_relu_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/leaky_relu_base.cc
@@ -31,7 +31,7 @@ int LeakyReluBaseCPUKernel::Init() { return RET_OK; }
 
 kernel::LiteKernel *CpuLeakyReluInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const Context *ctx, const kernel::KernelKey &desc,
+                                                  const InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/leaky_relu_base.h b/mindspore/lite/src/runtime/kernel/arm/base/leaky_relu_base.h
index e61a5633ba..f109c4f38f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/leaky_relu_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/leaky_relu_base.h
@@ -22,13 +22,13 @@
 #include "nnacl/leaky_relu_parameter.h"
 #include "src/runtime/kernel/arm/base/layout_transform.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class LeakyReluBaseCPUKernel : public LiteKernel {
  public:
   LeakyReluBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                         const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc
index ab1a499649..78f2594cc8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc
@@ -28,7 +28,7 @@ using mindspore::schema::PrimitiveType_MatMul;
 namespace mindspore::kernel {
 kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                            const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                           const lite::Context *ctx, const kernel::KernelKey &desc,
+                                           const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                            const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h
index 3cec23c462..e987c2fdbc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h
@@ -22,13 +22,13 @@
 #include "include/context.h"
 #include "nnacl/matmul_parameter.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class MatmulBaseCPUKernel : public LiteKernel {
  public:
   MatmulBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
     params_ = reinterpret_cast<MatMulParameter *>(op_parameter_);
@@ -42,7 +42,7 @@ class MatmulBaseCPUKernel : public LiteKernel {
  protected:
   MatMulParameter *params_;
   int thread_stride_;
-  const Context *ctx_;
+  const InnerContext *ctx_;
   int thread_count_;
 };
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/pad.cc b/mindspore/lite/src/runtime/kernel/arm/base/pad.cc
index ea9e075549..aeffa3126b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/pad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/pad.cc
@@ -30,7 +30,7 @@ namespace mindspore::kernel {
 
 kernel::LiteKernel *CpuPadInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                             const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                            const lite::Context *ctx, const kernel::KernelKey &desc,
+                                            const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                             const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Pad);
@@ -51,7 +51,7 @@ kernel::LiteKernel *CpuPadInt8KernelCreator(const std::vector<lite::Tensor *> &i
 
 kernel::LiteKernel *CpuPadFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                             const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                            const lite::Context *ctx, const kernel::KernelKey &desc,
+                                            const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                             const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Pad);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/pooling_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/pooling_base.cc
index ee93ea5a99..af494201fc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/pooling_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/pooling_base.cc
@@ -92,7 +92,7 @@ int PoolingBaseCPUKernel::ReSize() {
 
 kernel::LiteKernel *CpuPoolingInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const Context *ctx, const kernel::KernelKey &desc,
+                                                const InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
@@ -116,7 +116,7 @@ kernel::LiteKernel *CpuPoolingInt8KernelCreator(const std::vector<lite::Tensor *
 
 kernel::LiteKernel *CpuPoolingFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const Context *ctx, const kernel::KernelKey &desc,
+                                                const InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/pooling_base.h b/mindspore/lite/src/runtime/kernel/arm/base/pooling_base.h
index f02fb668f4..a837e452f0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/pooling_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/pooling_base.h
@@ -22,14 +22,14 @@
 #include "nnacl/fp32/pooling.h"
 #include "include/errorcode.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 namespace mindspore::kernel {
 class PoolingBaseCPUKernel : public LiteKernel {
  public:
   PoolingBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
     pooling_param_ = reinterpret_cast<PoolingParameter *>(op_parameter_);
@@ -43,7 +43,7 @@ class PoolingBaseCPUKernel : public LiteKernel {
   void FreeQuantParam();
 
  protected:
-  const Context *ctx_;
+  const InnerContext *ctx_;
   int thread_count_;
   PoolingParameter *pooling_param_;
   QuantArg **pooling_quant_arg_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/power_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/power_base.cc
index 8e1d6700ad..e1949b8b53 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/power_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/power_base.cc
@@ -33,7 +33,7 @@ int PowerBaseCPUKernel::ReSize() { return RET_OK; }
 
 kernel::LiteKernel *CpuPowerInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
@@ -57,7 +57,7 @@ kernel::LiteKernel *CpuPowerInt8KernelCreator(const std::vector<lite::Tensor *>
 
 kernel::LiteKernel *CpuPowerFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Power);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/power_base.h b/mindspore/lite/src/runtime/kernel/arm/base/power_base.h
index eb57deaebf..8691790fba 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/power_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/power_base.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class PowerBaseCPUKernel : public LiteKernel {
  public:
   PowerBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     param_ = reinterpret_cast<PowerParameter *>(op_parameter_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc b/mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc
index 874bc69c8b..f4052e9440 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc
@@ -168,7 +168,7 @@ int PriorBoxCPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail! Ret error code[" << prepare_ret << "]";
     return prepare_ret;
   }
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, RunPriorBox, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, RunPriorBox, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "PriorBox run error, error_code[" << error_code << "]";
     return RET_ERROR;
@@ -178,7 +178,7 @@ int PriorBoxCPUKernel::Run() {
 
 kernel::LiteKernel *CpuPriorBoxKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
-                                             const Context *ctx, const kernel::KernelKey &desc,
+                                             const InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   if (op_parameter == nullptr) {
     MS_LOG(ERROR) << "Input op_parameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/prior_box.h b/mindspore/lite/src/runtime/kernel/arm/base/prior_box.h
index 2781374ea0..d392a05143 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/prior_box.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/prior_box.h
@@ -22,13 +22,13 @@
 #include "nnacl/reshape_parameter.h"
 #include "nnacl/prior_box.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class PriorBoxCPUKernel : public LiteKernel {
  public:
   PriorBoxCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                    const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                    const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                     const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
     prior_box_param_ = reinterpret_cast<PriorBoxParameter *>(op_parameter_);
@@ -41,7 +41,7 @@ class PriorBoxCPUKernel : public LiteKernel {
   int PriorBoxImpl(int task_id);
 
  protected:
-  const Context *ctx_;
+  const InnerContext *ctx_;
   int thread_count_;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
index 6c913aba38..1c5bdd5b22 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
@@ -83,8 +83,8 @@ int QuantDTypeCastCPUKernel::QuantDTypeCast(int task_id) {
     MS_LOG(ERROR) << "QuantDTypeCast need quantization parameters which is not found.";
     return RET_ERROR;
   }
-  auto quant_arg = !in_tensors_.front()->GetQuantParams().empty() ? in_tensors_.front()->GetQuantParams().front() :
-                   out_tensors_.front()->GetQuantParams().front();
+  auto quant_arg = !in_tensors_.front()->GetQuantParams().empty() ? in_tensors_.front()->GetQuantParams().front()
+                                                                  : out_tensors_.front()->GetQuantParams().front();
   int ret;
   if (inverse_) {
     ret = DoDequantizeInt8(int8_ptr_ + thread_offset, float32_ptr_ + thread_offset, quant_arg.scale,
@@ -124,7 +124,7 @@ int QuantDTypeCastCPUKernel::Run() {
     int8_ptr_ = reinterpret_cast<int8_t *>(out_tensors_[0]->MutableData());
   }
 
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, QuantDTypeCastRun, this, thread_n_num_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, QuantDTypeCastRun, this, thread_n_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
     return RET_ERROR;
@@ -135,7 +135,7 @@ int QuantDTypeCastCPUKernel::Run() {
 
 kernel::LiteKernel *CpuQuantDTypeCastFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
-                                                       OpParameter *opParameter, const lite::Context *ctx,
+                                                       OpParameter *opParameter, const lite::InnerContext *ctx,
                                                        const kernel::KernelKey &desc,
                                                        const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.h b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.h
index 703c9ee774..e6ca8580b4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class QuantDTypeCastCPUKernel : public LiteKernel {
  public:
   QuantDTypeCastCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                          const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                          const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                           const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {}
   ~QuantDTypeCastCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc
index 3008492cdf..ecf3bf6b0d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc
@@ -171,7 +171,7 @@ int ReduceBaseCPUKernel::ReSize() {
 
 kernel::LiteKernel *CpuReduceFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Reduce);
@@ -200,7 +200,7 @@ kernel::LiteKernel *CpuReduceFp32KernelCreator(const std::vector<lite::Tensor *>
 
 kernel::LiteKernel *CpuMeanFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const lite::Context *ctx, const kernel::KernelKey &desc,
+                                             const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Mean);
@@ -229,7 +229,7 @@ kernel::LiteKernel *CpuMeanFp32KernelCreator(const std::vector<lite::Tensor *> &
 
 kernel::LiteKernel *CpuReduceInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Reduce);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.h b/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.h
index f233385dd6..115eee4973 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class ReduceBaseCPUKernel : public LiteKernel {
  public:
   ReduceBaseCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(param, inputs, outputs, ctx, primitive) {}
   virtual ~ReduceBaseCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
index 637fc38100..01448f9331 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
@@ -32,7 +32,7 @@ int ReshapeBaseCPUKernel::Init() { return RET_OK; }
 
 kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const Context *ctx, const kernel::KernelKey &desc,
+                                                const InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
@@ -56,7 +56,7 @@ kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vector<lite::Tensor *
 
 kernel::LiteKernel *CpuReshapeInt32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                 const Context *ctx, const kernel::KernelKey &desc,
+                                                 const InnerContext *ctx, const kernel::KernelKey &desc,
                                                  const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
@@ -80,7 +80,7 @@ kernel::LiteKernel *CpuReshapeInt32KernelCreator(const std::vector<lite::Tensor
 
 kernel::LiteKernel *CpuReshapeFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const Context *ctx, const kernel::KernelKey &desc,
+                                                const InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
index 7ff2835521..02c92c67b5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
@@ -21,13 +21,13 @@
 #include "src/lite_kernel.h"
 #include "nnacl/reshape_parameter.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class ReshapeBaseCPUKernel : public LiteKernel {
  public:
   ReshapeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx) {
     reshape_param_ = reinterpret_cast<ReshapeParameter *>(op_parameter_);
@@ -39,7 +39,7 @@ class ReshapeBaseCPUKernel : public LiteKernel {
   int Run() override { return 0; }
 
  protected:
-  const Context *ctx_;
+  const InnerContext *ctx_;
   ReshapeParameter *reshape_param_;
 };
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/resize_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/resize_base.cc
index 1aec98e9d8..e4f8972c62 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/resize_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/resize_base.cc
@@ -107,7 +107,7 @@ int ResizeBaseCPUKernel::Init() {
 
 kernel::LiteKernel *CpuResizeFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
@@ -132,7 +132,7 @@ kernel::LiteKernel *CpuResizeFp32KernelCreator(const std::vector<lite::Tensor *>
 
 kernel::LiteKernel *CpuResizeInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/resize_base.h b/mindspore/lite/src/runtime/kernel/arm/base/resize_base.h
index 59b0afc24d..3540855ce8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/resize_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/resize_base.h
@@ -27,9 +27,9 @@ namespace mindspore::kernel {
 class ResizeBaseCPUKernel : public LiteKernel {
  public:
   ResizeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), context_(ctx) {}
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
 
   virtual ~ResizeBaseCPUKernel() = default;
 
@@ -37,7 +37,6 @@ class ResizeBaseCPUKernel : public LiteKernel {
   int ReSize() override { return 0; };
 
  protected:
-  const lite::Context *context_;
   int method_;
   int64_t new_height_;
   int64_t new_width_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
index 77a77978e6..cbfc4bfa27 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
@@ -63,7 +63,7 @@ int SliceBaseCPUKernel::ReSize() {
 
 kernel::LiteKernel *CpuSliceInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
@@ -87,7 +87,7 @@ kernel::LiteKernel *CpuSliceInt8KernelCreator(const std::vector<lite::Tensor *>
 
 kernel::LiteKernel *CpuSliceFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.h b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.h
index 51d585d723..be1410f6a9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class SliceBaseCPUKernel : public LiteKernel {
  public:
   SliceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     param_ = reinterpret_cast<SliceParameter *>(op_parameter_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc
index b06e26c73a..465f1f7b65 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc
@@ -58,7 +58,7 @@ int SoftmaxBaseCPUKernel::ReSize() {
 
 kernel::LiteKernel *CpuSoftmaxInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
@@ -82,7 +82,7 @@ kernel::LiteKernel *CpuSoftmaxInt8KernelCreator(const std::vector<lite::Tensor *
 
 kernel::LiteKernel *CpuSoftmaxFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.h b/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.h
index 0c7bbda0a2..be5a638825 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class SoftmaxBaseCPUKernel : public LiteKernel {
  public:
   SoftmaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
     softmax_param_ = reinterpret_cast<SoftmaxParameter *>(op_parameter_);
@@ -37,7 +37,7 @@ class SoftmaxBaseCPUKernel : public LiteKernel {
   int Run() override { return 0; }
 
  protected:
-  const lite::Context *ctx_;
+  const lite::InnerContext *ctx_;
   int thread_count_;
   SoftmaxParameter *softmax_param_;
 };
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/split_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/split_base.cc
index 7d4612dae4..9c9d71c39c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/split_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/split_base.cc
@@ -66,7 +66,7 @@ int SplitBaseCPUKernel::ReSize() {
 
 kernel::LiteKernel *CpuSplitInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const Context *ctx, const kernel::KernelKey &desc,
+                                              const InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
@@ -90,7 +90,7 @@ kernel::LiteKernel *CpuSplitInt8KernelCreator(const std::vector<lite::Tensor *>
 
 kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const Context *ctx, const kernel::KernelKey &desc,
+                                               const InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
@@ -114,7 +114,7 @@ kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::Tensor *>
 
 kernel::LiteKernel *CpuSplitFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const Context *ctx, const kernel::KernelKey &desc,
+                                              const InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/split_base.h b/mindspore/lite/src/runtime/kernel/arm/base/split_base.h
index 45bcf96afc..6f94d4a425 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/split_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/split_base.h
@@ -21,13 +21,13 @@
 #include "src/lite_kernel.h"
 #include "nnacl/split_parameter.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class SplitBaseCPUKernel : public LiteKernel {
  public:
   SplitBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
     param = reinterpret_cast<SplitParameter *>(op_parameter_);
@@ -39,7 +39,7 @@ class SplitBaseCPUKernel : public LiteKernel {
   int Run() override { return 0; }
 
  protected:
-  const Context *ctx_;
+  const InnerContext *ctx_;
   int thread_count_;
   int thread_n_stride_;
   int thread_n_num_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc
index 513dc390e4..c29efde98e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc
@@ -31,7 +31,7 @@ int SqueezeBaseCPUKernel::Init() { return RET_OK; }
 
 kernel::LiteKernel *CpuSqueezeInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const Context *ctx, const kernel::KernelKey &desc,
+                                                const InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h
index 6df6dc36f1..3a7b2a7d4b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h
@@ -22,13 +22,13 @@
 #include "nnacl/squeeze_parameter.h"
 #include "src/runtime/kernel/arm/base/layout_transform.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class SqueezeBaseCPUKernel : public LiteKernel {
  public:
   SqueezeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {}
 
@@ -42,7 +42,7 @@ class SqueezeBaseCPUKernel : public LiteKernel {
 
  protected:
   int *axis_;
-  const Context *ctx_;
+  const InnerContext *ctx_;
   int thread_count_;
 };
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc
index 586a4baa81..c96dbc9447 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc
@@ -118,7 +118,7 @@ int StridedSliceCPUKernel::Run() {
 
 kernel::LiteKernel *CpuStridedSliceKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                 const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                 const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                  const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(desc.type == schema::PrimitiveType_StridedSlice);
   if (opParameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h
index 78accab45e..c9d5a8e60b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class StridedSliceCPUKernel : public LiteKernel {
  public:
   StridedSliceCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                        const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                         const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~StridedSliceCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
index 0786c17bda..d7ed15e366 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
@@ -115,7 +115,7 @@ int ActivationFp16CPUKernel::Run() {
     return ret;
   }
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ActivationRun, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationRun, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
     FreeTmpBuffer();
@@ -132,7 +132,7 @@ int ActivationFp16CPUKernel::Run() {
 
 kernel::LiteKernel *CpuActivationFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                    const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                   const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                   const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                    const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Activation);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.h
index 2374126959..902091d7be 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class ActivationFp16CPUKernel : public LiteKernel {
  public:
   ActivationFp16CPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
-                          const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                          const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                           const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(param, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
     type_ = (reinterpret_cast<ActivationParameter *>(param))->type_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
index cfc696d7ba..e015be627d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
@@ -202,7 +202,7 @@ int ArithmeticFP16CPUKernel::Run() {
     FreeTmpBuffer();
     return RET_ERROR;
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ArithmeticsRunFp16, this, context_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticsRunFp16, this, context_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ArithmeticsRunFp16 run error error_code[" << ret << "]";
   }
@@ -231,7 +231,7 @@ void ArithmeticFP16CPUKernel::FreeTmpBuffer() {
 
 kernel::LiteKernel *CpuArithmeticFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                    const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                                   const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                   const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                    const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr) {
     MS_LOG(ERROR) << "input parameter is null!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h
index 2eca1833a2..b1dfcc2236 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h
@@ -36,7 +36,7 @@ typedef struct {
 class ArithmeticFP16CPUKernel : public LiteKernel {
  public:
   ArithmeticFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                          const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                          const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                           const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     param_ = reinterpret_cast<ArithmeticParameter *>(parameter);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
index 472b021481..b0b59ac223 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
@@ -62,7 +62,7 @@ int BatchnormFp16CPUKernel::Run() {
     return RET_ERROR;
   }
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, BatchNormRun, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
   }
@@ -92,7 +92,7 @@ void BatchnormFp16CPUKernel::FreeInputAndOutput() {
 
 kernel::LiteKernel *CpuBatchnormFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) BatchnormFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h
index c4d586f3a0..14ea1a2c20 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class BatchnormFp16CPUKernel : public BatchnormCPUKernel {
  public:
   BatchnormFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                         const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
       : BatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   virtual ~BatchnormFp16CPUKernel() {}
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
index 43112f8eaa..153506a21d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
@@ -91,12 +91,12 @@ int CastFp16CPUKernel::Run() {
   if (data_num_ == 0) {
     return RET_OK;
   }
-  return ParallelLaunch(THREAD_POOL_DEFAULT, CastRun, this, op_parameter_->thread_num_);
+  return ParallelLaunch(this->context_->thread_pool_, CastRun, this, op_parameter_->thread_num_);
 }
 
 kernel::LiteKernel *CpuCastFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const lite::Context *ctx, const kernel::KernelKey &desc,
+                                             const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.h
index 3249aedebc..72f9dbade8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.h
@@ -23,7 +23,7 @@ namespace mindspore::kernel {
 class CastFp16CPUKernel : public LiteKernel {
  public:
   CastFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                    const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                    const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                     const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.cc
index cd984734f8..5763e569f8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.cc
@@ -18,7 +18,7 @@
 #include "nnacl/fp16/cast_fp16.h"
 
 namespace mindspore::kernel {
-float16_t *ConvertInputFp32toFp16(lite::Tensor *input, const lite::Context *ctx) {
+float16_t *ConvertInputFp32toFp16(lite::Tensor *input, const lite::InnerContext *ctx) {
   float16_t *fp16_data = nullptr;
   auto data_type = input->data_type();
   if (data_type == kNumberTypeFloat32) {
@@ -32,7 +32,7 @@ float16_t *ConvertInputFp32toFp16(lite::Tensor *input, const lite::Context *ctx)
   return fp16_data;
 }
 
-float16_t *MallocOutputFp16(lite::Tensor *output, const lite::Context *ctx) {
+float16_t *MallocOutputFp16(lite::Tensor *output, const lite::InnerContext *ctx) {
   float16_t *fp16_data = nullptr;
   auto data_type = output->data_type();
   if (data_type == kNumberTypeFloat32) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.h
index 79309ebc6b..88111fdca1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.h
@@ -20,9 +20,9 @@
 #include "src/lite_kernel.h"
 
 namespace mindspore::kernel {
-float16_t *ConvertInputFp32toFp16(lite::Tensor *input, const lite::Context *ctx);
+float16_t *ConvertInputFp32toFp16(lite::Tensor *input, const lite::InnerContext *ctx);
 
-float16_t *MallocOutputFp16(lite::Tensor *output, const lite::Context *ctx);
+float16_t *MallocOutputFp16(lite::Tensor *output, const lite::InnerContext *ctx);
 
 bool IsExistFp16Tensor(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs);
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
index fea1bff604..b7009b378c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
@@ -142,7 +142,7 @@ int ConcatFp16CPUKernel::Run() {
 
 kernel::LiteKernel *CpuConcatFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                               const Context *ctx, const kernel::KernelKey &desc,
+                                               const InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr) {
     MS_LOG(ERROR) << "Input parameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h
index 9b47ffc184..1603f217a7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h
@@ -22,13 +22,13 @@
 #include "include/context.h"
 #include "src/runtime/kernel/arm/base/concat_base.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class ConcatFp16CPUKernel : public ConcatBaseCPUKernel {
  public:
   ConcatFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : ConcatBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
index c5784fa2a0..9cbcdbe6b7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
@@ -216,7 +216,7 @@ int Convolution1x1FP16CPUKernel::Run() {
       execute_input_ + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_,
       execute_output_ + batch_index * matmul_param_->row_ * matmul_param_->col_);
 
-    int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, Convolution1x1Fp16Impl, this, thread_count_);
+    int error_code = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16Impl, this, thread_count_);
     if (error_code != RET_OK) {
       MS_LOG(ERROR) << "conv1x1 fp16 error error_code[" << error_code << "]";
       return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
index 551dd9a609..a7f9b4c627 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
@@ -29,7 +29,7 @@ namespace mindspore::kernel {
 class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   Convolution1x1FP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                              const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                              const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                               const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~Convolution1x1FP16CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc
index 33bb6cb80d..a63c6a59c4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc
@@ -237,7 +237,7 @@ int Convolution3x3FP16CPUKernel::Run() {
   int in_channel = conv_param_->input_channel_;
   PackNHWCToNHWC8Fp16(reinterpret_cast<void *>(execute_input_), nhwc4_input_, in_batch, in_h * in_w, in_channel);
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, Convolution3x3Fp16Impl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, Convolution3x3Fp16Impl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "conv3x3 fp16 error error_code[" << error_code << "]";
     FreeTmpBuffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.h
index 25021e2d10..fbd8748a42 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class Convolution3x3FP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   Convolution3x3FP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                              const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                              const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                               const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~Convolution3x3FP16CPUKernel() override {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h
index 5d4c82bc18..6934b3a679 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionBaseFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                               const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                               const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                                const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ConvolutionBaseFP16CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
index 4581daee20..a91c045115 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
@@ -125,7 +125,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() {
     return ret;
   }
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConvDwFp16Run, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]";
     return RET_ERROR;
@@ -138,7 +138,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() {
 
 kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const Context *ctx, const kernel::KernelKey &desc,
+                                               const InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
index 32687e7a53..a028707707 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
@@ -35,7 +35,7 @@ namespace mindspore::kernel {
 class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                    const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                                    const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                                     const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ConvolutionDepthwiseFp16CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
index 8d3a3a2fa5..3bede0b309 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
@@ -171,7 +171,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
     packed_output_ = execute_output_;
   }
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConvDwSWFp16Run, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWFp16Run, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
index 9b94ad8742..15e1169ed5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
@@ -36,7 +36,7 @@ namespace mindspore::kernel {
 class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   ConvolutionDepthwiseSWFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                      const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                                      const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                                       const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ConvolutionDepthwiseSWFp16CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
index 5870b75933..85c59e3d15 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
@@ -200,7 +200,7 @@ int ConvolutionFP16CPUKernel::Run() {
   int in_channel = conv_param_->input_channel_;
   convert_func_(reinterpret_cast<void *>(execute_input_), nhwc4_input_, in_batch, in_h * in_w, in_channel);
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionFp16Impl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionFp16Impl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "conv fp16 error error_code[" << error_code << "]";
     FreeTmpBuffer();
@@ -214,7 +214,7 @@ int ConvolutionFP16CPUKernel::Run() {
 
 kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const Context *ctx, const kernel::KernelKey &desc,
+                                             const InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
index 7c9a35ede7..ec55e20152 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class ConvolutionFP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                           const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                           const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                            const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ConvolutionFP16CPUKernel() override {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.cc
index 1237d90b34..bdbcc3a182 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.cc
@@ -214,7 +214,7 @@ int ConvolutionSWFP16CPUKernel::Run() {
   int in_channel = conv_param_->input_channel_;
   convert_func_(reinterpret_cast<void *>(execute_input_), nhwc4_input_, in_batch, in_h * in_w, in_channel);
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionSWFp16Impl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionSWFp16Impl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "conv fp16 error error_code[" << error_code << "]";
     FreeTmpBuffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.h
index c35d46eb31..a0b3680f20 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class ConvolutionSWFP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   ConvolutionSWFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                             const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                             const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                              const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ConvolutionSWFP16CPUKernel() override {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc
index 14d8e73a00..681dba03f1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc
@@ -393,7 +393,7 @@ int ConvolutionWinogradFP16CPUKernel::Run() {
   int in_channel = conv_param_->input_channel_;
   PackNHWCToNHWC8Fp16(execute_input_, nhwc4_input_, in_batch, in_h * in_w, in_channel);
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionWinogradFp16Impl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradFp16Impl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "conv winograd error error_code[" << error_code << "]";
     FreeTmpBuffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h
index f025450d60..5d74715dd5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h
@@ -30,7 +30,7 @@ namespace mindspore::kernel {
 class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   ConvolutionWinogradFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                   const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                                   const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                                    const mindspore::lite::PrimitiveC *primitive, int out_unit)
       : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive), output_unit_(out_unit) {}
   ~ConvolutionWinogradFP16CPUKernel() override {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
index 5b3f5f2936..f087932c66 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
@@ -178,7 +178,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
   if (!need_align_) {
     packed_output_ = execute_output_;
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, DeconvDwFp16Run, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwFp16Run, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]";
     return RET_ERROR;
@@ -197,7 +197,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
 
 kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                 const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                 const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                  const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_DeDepthwiseConv2D);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
index aa8392c8cb..f301fd0a40 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
@@ -37,7 +37,7 @@ namespace mindspore::kernel {
 class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                      const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                                      const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                                       const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~DeconvolutionDepthwiseFp16CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
index 2c2a4c55e0..98a40f3bf7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
@@ -188,7 +188,7 @@ int DeConvolutionFp16CPUKernel::Run() {
   for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
     RowMajor2Col16MajorFp16(execute_input_, pack_input_, input_plane_, conv_param_->input_channel_);
 
-    error_code = ParallelLaunch(THREAD_POOL_DEFAULT, DeConvFp16Run, this, thread_count_);
+    error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_);
     if (error_code != RET_OK) {
       MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
       return RET_ERROR;
@@ -204,7 +204,7 @@ int DeConvolutionFp16CPUKernel::Run() {
 
 kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_DeConv2D);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h
index 7ac0cd5adb..0dbab90335 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h
@@ -33,7 +33,7 @@ namespace mindspore::kernel {
 class DeConvolutionFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   DeConvolutionFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                             const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                             const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                              const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {
     matmul_param_ = new (std::nothrow) MatMulParameter();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc
index 689c46b8e0..65089c6b38 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc
@@ -154,7 +154,7 @@ int FullconnectionFP16CPUKernel::Run() {
   } else {
     InitMatrixA(reinterpret_cast<float16_t *>(in_tensors_[0]->MutableData()), a_pack_ptr_);
   }
-  ParallelLaunch(THREAD_POOL_DEFAULT, FcFP16Run, this, thread_count_);
+  ParallelLaunch(this->context_->thread_pool_, FcFP16Run, this, thread_count_);
   if (out_tensor->data_type() == kNumberTypeFloat32) {
     auto size = out_tensor->ElementsNum();
     auto out_tensor_data = reinterpret_cast<float *>(out_tensor->MutableData());
@@ -165,7 +165,7 @@ int FullconnectionFP16CPUKernel::Run() {
 
 kernel::LiteKernel *CpuFullConnectionFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
-                                                       OpParameter *opParameter, const lite::Context *ctx,
+                                                       OpParameter *opParameter, const lite::InnerContext *ctx,
                                                        const kernel::KernelKey &desc,
                                                        const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) FullconnectionFP16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.h
index a14017348e..408ffe70ad 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.h
@@ -29,7 +29,7 @@ namespace mindspore::kernel {
 class FullconnectionFP16CPUKernel : public FullconnectionBaseCPUKernel {
  public:
   explicit FullconnectionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                       const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                       const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                        const mindspore::lite::PrimitiveC *primitive)
       : FullconnectionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~FullconnectionFP16CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc
index 6ff5a25d48..7cad6bd4f7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc
@@ -79,7 +79,7 @@ int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) {
 
 kernel::LiteKernel *CpuFusedBatchnormFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
-                                                       OpParameter *op_parameter, const lite::Context *ctx,
+                                                       OpParameter *op_parameter, const lite::InnerContext *ctx,
                                                        const kernel::KernelKey &desc,
                                                        const mindspore::lite::PrimitiveC *primitive) {
   FusedBatchnormFp16CPUKernel *kernel =
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.h
index 27e14927fc..79b6a044b7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class FusedBatchnormFp16CPUKernel : public FusedBatchnormCPUKernel {
  public:
   FusedBatchnormFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                              const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                              const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                               const mindspore::lite::PrimitiveC *primitive)
       : FusedBatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   virtual ~FusedBatchnormFp16CPUKernel() {}
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc
index c8cf94d8be..3ba6395566 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc
@@ -216,7 +216,7 @@ int MatmulFP16CPUKernel::Run() {
     current_a_ = a_pack_ptr_ + i * params_->row_16_ * params_->deep_;
     current_b_ = b_pack_ptr_ + i * params_->deep_ * params_->col_8_;
     current_c_ = c_ptr + i * params_->row_ * params_->col_;
-    ParallelLaunch(THREAD_POOL_DEFAULT, MatmulFP16Run, this, thread_count_);
+    ParallelLaunch(this->context_->thread_pool_, MatmulFP16Run, this, thread_count_);
   }
   if (out_tensor->data_type() == kNumberTypeFloat32) {
     auto size = out_tensor->ElementsNum();
@@ -228,7 +228,7 @@ int MatmulFP16CPUKernel::Run() {
 
 kernel::LiteKernel *CpuMatmulFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) MatmulFP16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.h
index a13dc93ae4..7e6a1a9a5a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.h
@@ -29,7 +29,7 @@ namespace mindspore::kernel {
 class MatmulFP16CPUKernel : public MatmulBaseCPUKernel {
  public:
   explicit MatmulFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                               const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                               const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                const mindspore::lite::PrimitiveC *primitive)
       : MatmulBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~MatmulFP16CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc
index 2b8d1572fc..d1316348f6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc
@@ -55,7 +55,7 @@ int PadFp16CPUKernel::Run() {
   }
 
   memset(output_, 0, output_tensor->ElementsNum() * sizeof(float16_t));
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, PadImpl, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
   }
@@ -78,9 +78,8 @@ void PadFp16CPUKernel::FreeInputAndOutput() {
 }
 
 kernel::LiteKernel *CpuPadFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
-                                            const std::vector<lite::Tensor *> &outputs,
-                                            OpParameter *opParameter, const lite::Context *ctx,
-                                            const kernel::KernelKey &desc,
+                                            const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
+                                            const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                             const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) PadFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.h
index caedf186b9..5f96fb4b6e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class PadFp16CPUKernel : public PadCPUKernel {
  public:
   PadFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                   const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                    const mindspore::lite::PrimitiveC *primitive)
       : PadCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc
index 7880722d0b..b4a1fd55f1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc
@@ -88,7 +88,7 @@ int PoolingFp16CPUKernel::Run() {
   MS_ASSERT(out_data_type_ == kNumberTypeFloat32 || out_data_type_ == kNumberTypeFloat16);
   fp16_output_ = MallocOutputFp16(out_tensor, context_);
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, PoolingFp16Impl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, PoolingFp16Impl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
     return RET_ERROR;
@@ -108,7 +108,7 @@ int PoolingFp16CPUKernel::Run() {
 
 kernel::LiteKernel *CpuPoolingFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const Context *ctx, const kernel::KernelKey &desc,
+                                                const InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.h
index d0cad4d1cb..9bab2bb7a1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class PoolingFp16CPUKernel : public PoolingBaseCPUKernel {
  public:
   PoolingFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : PoolingBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~PoolingFp16CPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
index 9cd329c012..8420eb7c03 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
@@ -102,7 +102,7 @@ int ReduceFp16CPUKernel::Run() {
     outer_size_ = outer_sizes_[i];
     inner_size_ = inner_sizes_[i];
     axis_size_ = axis_sizes_[i];
-    auto error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ReduceImpl, this, context_->thread_num_);
+    auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceImpl, this, context_->thread_num_);
     if (error_code != RET_OK) {
       FreeTmpBuffer();
       MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
@@ -166,7 +166,7 @@ int ReduceFp16CPUKernel::MallocTmpBuffer() {
 
 kernel::LiteKernel *CpuReduceFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Reduce);
@@ -195,7 +195,7 @@ kernel::LiteKernel *CpuReduceFp16KernelCreator(const std::vector<lite::Tensor *>
 
 kernel::LiteKernel *CpuMeanFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const lite::Context *ctx, const kernel::KernelKey &desc,
+                                             const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Mean);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.h
index ceb0b228ec..78074f7e66 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.h
@@ -31,7 +31,7 @@ class ReduceFp16CPUKernel : public ReduceBaseCPUKernel {
 
  public:
   ReduceFp16CPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {}
   ~ReduceFp16CPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc
index cd3288fdee..5309201bd9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc
@@ -76,7 +76,7 @@ int ReshapeFp16CPUKernel::Run() {
 
 kernel::LiteKernel *CpuReshapeFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const Context *ctx, const kernel::KernelKey &desc,
+                                                const InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h
index 50f50da85a..badddff56e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h
@@ -23,13 +23,13 @@
 #include "include/context.h"
 #include "src/runtime/kernel/arm/fp32/reshape.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class ReshapeFp16CPUKernel : public ReshapeCPUKernel {
  public:
   ReshapeFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : ReshapeCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ReshapeFp16CPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc
index 2bf28e91b3..48715273e6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc
@@ -126,7 +126,7 @@ int SoftmaxFp16CPUKernel::Run() {
 
 kernel::LiteKernel *CpuSoftmaxFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.h
index 2bd2c03fc2..230c2e38bd 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class SoftmaxFp16CPUKernel : public SoftmaxBaseCPUKernel {
  public:
   SoftmaxFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive), sum_data_(nullptr) {}
   ~SoftmaxFp16CPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.cc
index 53235b1465..b04f973954 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.cc
@@ -97,7 +97,7 @@ int SplitFp16CPUKernel::Run() {
       output_ptr_[i] = reinterpret_cast<float16_t *>(out_tensors_.at(i)->MutableData());
     }
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, SplitRun, this, thread_n_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, SplitRun, this, thread_n_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "split error error_code[" << ret << "]";
     return RET_ERROR;
@@ -117,7 +117,7 @@ int SplitFp16CPUKernel::Run() {
 
 kernel::LiteKernel *CpuSplitFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const Context *ctx, const kernel::KernelKey &desc,
+                                              const InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.h
index f1be314b88..1bf3f4a57c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class SplitFp16CPUKernel : public SplitBaseCPUKernel {
  public:
   SplitFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : SplitBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~SplitFp16CPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc
index c3c2f9a60b..5655ff4d39 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc
@@ -162,7 +162,7 @@ int TransposeFp16CPUKernel::Run() {
   in_shape_ = const_cast<int *>(in_tensor->shape().data());
   out_shape_ = const_cast<int *>(out_tensor->shape().data());
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, TransposeRun, this, thread_h_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, TransposeRun, this, thread_h_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Tranpose error error_code[" << ret << "]";
     FreeFp16Buffer();
@@ -180,7 +180,7 @@ int TransposeFp16CPUKernel::Run() {
 
 kernel::LiteKernel *CpuTransposeFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(desc.type == schema::PrimitiveType_Transpose);
   if (opParameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.h
index 36f4c58e9d..29c959d4a6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class TransposeFp16CPUKernel : public LiteKernel {
  public:
   explicit TransposeFp16CPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
-                                  const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                   const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(param, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {}
   ~TransposeFp16CPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc
index 98885f5ebe..883ce9bea4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc
@@ -83,7 +83,7 @@ int ActivationCPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare failed.";
     return ret;
   }
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ActivationRun, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationRun, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
     return RET_ERROR;
@@ -93,7 +93,7 @@ int ActivationCPUKernel::Run() {
 
 kernel::LiteKernel *CpuActivationFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                    const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                   const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                   const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                    const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Activation);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/activation.h b/mindspore/lite/src/runtime/kernel/arm/fp32/activation.h
index fb8692d212..8846334a78 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/activation.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/activation.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class ActivationCPUKernel : public LiteKernel {
  public:
   ActivationCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(param, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
     type_ = (reinterpret_cast<ActivationParameter *>(param))->type_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc
index 25d0d8bf4f..d5cc423c41 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc
@@ -74,7 +74,7 @@ int AddNCPUKernel::Run() {
   in1_addr_ = input0_data;
   in2_addr_ = input1_data;
   out_addr_ = output_data;
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, AddNLaunch, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, AddNLaunch, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "addn launch fail!ret: " << ret;
     return RET_ERROR;
@@ -82,7 +82,7 @@ int AddNCPUKernel::Run() {
   for (size_t i = 2; i < in_tensors_.size(); ++i) {
     in1_addr_ = reinterpret_cast<float *>(in_tensors_[i]->MutableData());
     in2_addr_ = output_data;
-    ret = ParallelLaunch(THREAD_POOL_DEFAULT, AddNLaunch, this, op_parameter_->thread_num_);
+    ret = ParallelLaunch(this->context_->thread_pool_, AddNLaunch, this, op_parameter_->thread_num_);
     if (ret != RET_OK) {
       MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i;
       return RET_ERROR;
@@ -93,7 +93,7 @@ int AddNCPUKernel::Run() {
 
 kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
-                                             const lite::Context *ctx, const kernel::KernelKey &desc,
+                                             const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   if (op_parameter == nullptr) {
     MS_LOG(ERROR) << "Input op_parameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/addn.h b/mindspore/lite/src/runtime/kernel/arm/fp32/addn.h
index 091529553e..60a2b303e9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/addn.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/addn.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class AddNCPUKernel : public LiteKernel {
  public:
   AddNCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                 const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~AddNCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.h b/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.h
index c91ede1168..da7e71919f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.h
@@ -23,7 +23,7 @@ namespace mindspore::kernel {
 class ArgMinMaxCPUKernel : public ArgMinMaxBaseCPUKernel {
  public:
   ArgMinMaxCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : ArgMinMaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc
index 727a063c5c..95d87e747d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc
@@ -209,7 +209,7 @@ int ArithmeticCPUKernel::Run() {
     ComputeStrides(arithmeticParameter_->out_shape_, arithmeticParameter_->out_strides_, arithmeticParameter_->ndim_);
   }
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ArithmeticsRun, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ArithmeticsRun, this, thread_count_);
 
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "Arithmetic function error error_code[" << error_code << "]";
@@ -220,7 +220,7 @@ int ArithmeticCPUKernel::Run() {
 
 kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                    const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                                   const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                   const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                    const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(parameter != nullptr);
   auto kernel = new (std::nothrow) ArithmeticCPUKernel(parameter, inputs, outputs, ctx, primitive);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h
index f979653fb6..276f456232 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h
@@ -48,7 +48,7 @@ class ArithmeticCPUKernel : public LiteKernel {
 
  public:
   ArithmeticCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
     arithmeticParameter_ = reinterpret_cast<ArithmeticParameter *>(parameter);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc
index 026c4047ca..98bf1fbd29 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc
@@ -79,7 +79,7 @@ int ArithmeticSelfCPUKernel::Run() {
   auto out_tensor = out_tensors_.at(0);
   in_ptr_ = reinterpret_cast<float *>(input_tensor->MutableData());
   out_ptr_ = reinterpret_cast<float *>(out_tensor->MutableData());
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ArithmeticSelfRuns, this, thread_sz_count_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRuns, this, thread_sz_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
     return ret;
@@ -89,7 +89,7 @@ int ArithmeticSelfCPUKernel::Run() {
 
 kernel::LiteKernel *CpuArithmeticSelfFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
-                                                       OpParameter *opParameter, const lite::Context *ctx,
+                                                       OpParameter *opParameter, const lite::InnerContext *ctx,
                                                        const kernel::KernelKey &desc,
                                                        const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h
index 254c529f58..50216d5e84 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h
@@ -24,19 +24,19 @@
 #include "schema/model_generated.h"
 #include "include/context.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 using mindspore::schema::PrimitiveType_Abs;
 using mindspore::schema::PrimitiveType_Ceil;
 using mindspore::schema::PrimitiveType_Cos;
 using mindspore::schema::PrimitiveType_Floor;
 using mindspore::schema::PrimitiveType_Log;
 using mindspore::schema::PrimitiveType_LogicalNot;
+using mindspore::schema::PrimitiveType_Neg;
 using mindspore::schema::PrimitiveType_Round;
 using mindspore::schema::PrimitiveType_Rsqrt;
 using mindspore::schema::PrimitiveType_Sin;
 using mindspore::schema::PrimitiveType_Sqrt;
 using mindspore::schema::PrimitiveType_Square;
-using mindspore::schema::PrimitiveType_Neg;
 
 namespace mindspore::kernel {
 class ArithmeticSelfCPUKernel : public LiteKernel {
@@ -44,7 +44,7 @@ class ArithmeticSelfCPUKernel : public LiteKernel {
 
  public:
   explicit ArithmeticSelfCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                   const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                    const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
     switch (parameter->type_) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space.h b/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space.h
index 4853599c01..615c7a3c67 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space.h
@@ -22,7 +22,7 @@ namespace mindspore::kernel {
 class BatchToSpaceCPUKernel : public BatchToSpaceBaseCPUKernel {
  public:
   BatchToSpaceCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                        const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                         const mindspore::lite::PrimitiveC *primitive)
       : BatchToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
index 0c375c0c47..d17bac7c40 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
@@ -75,7 +75,7 @@ int BatchnormCPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret;
     return ret;
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, BatchNormRun, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
   }
@@ -99,7 +99,7 @@ int BatchNormRun(void *cdata, int task_id) {
 
 kernel::LiteKernel *CpuBatchnormKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   auto *kernel = new (std::nothrow) BatchnormCPUKernel(opParameter, inputs, outputs, ctx, primitive);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
index 13a9672f1e..5eb97da9b4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
@@ -24,13 +24,13 @@
 #include "nnacl/batchnorm_parameter.h"
 #include "src/runtime/runtime_api.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class BatchnormCPUKernel : public LiteKernel {
  public:
   BatchnormCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   virtual ~BatchnormCPUKernel() { FreeMeanAndVariance(); }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc
index bd5603bb9a..d6140becba 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc
@@ -76,7 +76,7 @@ int BiasCPUKernel::Init() {
 
 kernel::LiteKernel *CpuBiasFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                             const lite::Context *ctx, const kernel::KernelKey &desc,
+                                             const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(parameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_BiasAdd);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/bias.h b/mindspore/lite/src/runtime/kernel/arm/fp32/bias.h
index 6da898229b..800af6ffc9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/bias.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/bias.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class BiasCPUKernel : public LiteKernel {
  public:
   BiasCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                 const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     bias_param_ = reinterpret_cast<ArithmeticParameter *>(parameter);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc
index 32762561ec..e55a05e335 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc
@@ -62,7 +62,7 @@ int BroadcastToCPUKernel::Run() {
 
 kernel::LiteKernel *CpuBroadcastToFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                     const std::vector<lite::Tensor *> &outputs,
-                                                    OpParameter *op_parameter, const lite::Context *ctx,
+                                                    OpParameter *op_parameter, const lite::InnerContext *ctx,
                                                     const kernel::KernelKey &desc,
                                                     const mindspore::lite::PrimitiveC *primitive) {
   if (op_parameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.h b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.h
index 28a9cd0199..3a7cf853a7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class BroadcastToCPUKernel : public LiteKernel {
  public:
   BroadcastToCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~BroadcastToCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc
index 7a780a44fb..41bd500fbf 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc
@@ -111,12 +111,12 @@ int CastCPUKernel::Run() {
   if (data_num_ == 0) {
     return RET_OK;
   }
-  return ParallelLaunch(THREAD_POOL_DEFAULT, CastRun, this, op_parameter_->thread_num_);
+  return ParallelLaunch(this->context_->thread_pool_, CastRun, this, op_parameter_->thread_num_);
 }
 
 kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const lite::Context *ctx, const kernel::KernelKey &desc,
+                                             const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/cast.h b/mindspore/lite/src/runtime/kernel/arm/fp32/cast.h
index ed327df714..c320d3ddc1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast.h
@@ -23,7 +23,7 @@ namespace mindspore::kernel {
 class CastCPUKernel : public LiteKernel {
  public:
   CastCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                 const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/concat.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/concat.cc
index 2f0f4eddb1..9d828819a8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/concat.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/concat.cc
@@ -80,7 +80,7 @@ int ConcatCPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
     return prepare_ret;
   }
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConcatsRun, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ConcatsRun, this, thread_count_);
   return error_code;
 }
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/concat.h b/mindspore/lite/src/runtime/kernel/arm/fp32/concat.h
index 66e249088a..00299ebb1d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/concat.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/concat.h
@@ -22,13 +22,13 @@
 #include "include/context.h"
 #include "src/runtime/kernel/arm/base/concat_base.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class ConcatCPUKernel : public ConcatBaseCPUKernel {
  public:
   ConcatCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                  const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                   const mindspore::lite::PrimitiveC *primitive)
       : ConcatBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/constant_of_shape.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/constant_of_shape.cc
index 6d5d5f9a73..0fd35de6b3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/constant_of_shape.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/constant_of_shape.cc
@@ -62,7 +62,7 @@ int ConstantOfShapeCPUKernel::Run() {
   param_->unit_ = UP_DIV(param_->element_sz_, thread_num);
   param_->op_parameter_.thread_num_ = thread_num;
   out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConstantOfShapeRun, this, thread_num);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, ConstantOfShapeRun, this, thread_num);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ConstantOfShapeRun error error_code[" << ret << "]";
     return ret;
@@ -72,7 +72,7 @@ int ConstantOfShapeCPUKernel::Run() {
 
 kernel::LiteKernel *CpuConstantOfShapeFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                         const std::vector<lite::Tensor *> &outputs,
-                                                        OpParameter *opParameter, const lite::Context *ctx,
+                                                        OpParameter *opParameter, const lite::InnerContext *ctx,
                                                         const kernel::KernelKey &desc,
                                                         const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/constant_of_shape.h b/mindspore/lite/src/runtime/kernel/arm/fp32/constant_of_shape.h
index 51e932afcd..dc44872180 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/constant_of_shape.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/constant_of_shape.h
@@ -21,13 +21,13 @@
 #include "include/context.h"
 #include "nnacl/fp32/constant_of_shape.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class ConstantOfShapeCPUKernel : public LiteKernel {
  public:
   ConstantOfShapeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                           const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                           const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                            const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     param_ = reinterpret_cast<ConstantOfShapeParameter *>(parameter);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc
index 9a2ede760d..60b3ad10b9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc
@@ -192,7 +192,7 @@ int ConvolutionCPUKernel::Run() {
   PackNHWCToNHWC4Fp32(ori_input_data, nhwc4_input_, conv_param_->input_batch_,
                       conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_);
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionImpl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "conv error error_code[" << error_code << "]";
     FreeTmpBuffer();
@@ -217,7 +217,7 @@ bool CheckIfUseSlideWindow(ConvParameter *conv_param) {
 
 kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
-                                             const Context *ctx, const kernel::KernelKey &desc,
+                                             const InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(op_parameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.h
index 7d7ee30ecc..7237cb6ce3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ConvolutionCPUKernel() override {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc
index 64e56f5f3c..fc9209ca43 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc
@@ -189,7 +189,7 @@ int Convolution1x1CPUKernel::Run() {
     Pre1x1Trans(src_in + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_,
                 src_out + batch_index * matmul_param_->row_ * matmul_param_->col_);
 
-    int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, Convolution1x1Run, this, thread_count_);
+    int error_code = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Run, this, thread_count_);
     if (error_code != RET_OK) {
       MS_LOG(ERROR) << "conv1x1 strassen error error_code[" << error_code << "]";
       return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.h
index dc04394628..9b32f6b9b7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.h
@@ -34,7 +34,7 @@ namespace mindspore::kernel {
 class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   Convolution1x1CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                          const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                          const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                           const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {
     matmul_param_ = new (std::nothrow) MatMulParameter();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc
index ce8c9392d9..3ad3cfa9a3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc
@@ -250,7 +250,7 @@ int Convolution3x3CPUKernel::Run() {
   PackNHWCToNHWC4Fp32(ori_input_data, nhwc4_input_, conv_param_->input_batch_,
                       conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_);
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, Convolution3x3Impl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, Convolution3x3Impl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "conv3x3 error error_code[" << error_code << "]";
     FreeTmpBuffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.h
index 198bb11746..b269481acc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class Convolution3x3CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   Convolution3x3CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                          const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                          const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                           const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~Convolution3x3CPUKernel() override {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc
index 2be3545287..f8c612e88c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc
@@ -116,7 +116,7 @@ int ConvolutionDepthwiseCPUKernel::Run() {
   auto output_tensor = out_tensors_.at(kOutputIndex);
   output_ptr_ = reinterpret_cast<float *>(output_tensor->MutableData());
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConvDwRun, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ConvDwRun, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]";
     return RET_ERROR;
@@ -126,7 +126,7 @@ int ConvolutionDepthwiseCPUKernel::Run() {
 
 kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const Context *ctx, const kernel::KernelKey &desc,
+                                               const InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h
index 8139c995dd..ed9067f6c0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                 const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ConvolutionDepthwiseCPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc
index 1df3711891..0df3603639 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc
@@ -167,7 +167,7 @@ int ConvolutionDepthwiseSWCPUKernel::Run() {
     packed_output_ = output_ptr;
   }
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConvDwSWRun, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWRun, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.h
index d1e66efd30..726de88f7d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class ConvolutionDepthwiseSWCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseSWCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                  const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                   const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ConvolutionDepthwiseSWCPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.cc
index 975061fd20..a2a037788e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.cc
@@ -181,7 +181,7 @@ int ConvolutionSWCPUKernel::Run() {
   PackNHWCToNHWC4Fp32(ori_input_data, nhwc4_input_, conv_param_->input_batch_,
                       conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_);
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionSWImpl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionSWImpl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "conv error error_code[" << error_code << "]";
     FreeTmpBuffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.h
index d23a51732e..fcf616ee3f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.h
@@ -28,7 +28,7 @@ namespace mindspore::kernel {
 class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionSWCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc
index 2120ccd787..773b94ba7c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc
@@ -338,7 +338,7 @@ int ConvolutionWinogradCPUKernel::Run() {
   PackNHWCToNHWC4Fp32(ori_input_data, nhwc4_input_, conv_param_->input_batch_,
                       conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_);
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionWinogradImpl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "conv winograd error error_code[" << error_code << "]";
     FreeTmpBuffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.h
index 61c7a1f118..28f11b57b3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                               const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                               const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                const mindspore::lite::PrimitiveC *primitive, int output_unit)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive),
         output_unit_(output_unit),
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc
index e9ec8e206a..323b674c91 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc
@@ -68,7 +68,7 @@ int CropCPUKernel::Run() {
     return RET_OK;
   }
 
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, CropLaunch, this, param->op_parameter_.thread_num_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, CropLaunch, this, param->op_parameter_.thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Crop launch fail!ret: " << ret;
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.h b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.h
index 278620df76..e08465317a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class CropCPUKernel : public CropBaseCPUKernel {
  public:
   CropCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                 const mindspore::lite::PrimitiveC *primitive)
       : CropBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~CropCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc
index 65e09881e5..29f9756809 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc
@@ -214,7 +214,7 @@ int DeConvolutionCPUKernel::Run() {
 
     RowMajor2Col12Major(input_ptr_, pack_input_, input_plane_, conv_param_->input_channel_);
 
-    error_code = ParallelLaunch(THREAD_POOL_DEFAULT, DeConvFp32Run, this, thread_count_);
+    error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp32Run, this, thread_count_);
     if (error_code != RET_OK) {
       MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
       return error_code;
@@ -227,7 +227,7 @@ int DeConvolutionCPUKernel::Run() {
 
 kernel::LiteKernel *CpuDeConvFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_DeConv2D);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h
index 1f206ab7de..4c8b25975f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h
@@ -31,7 +31,7 @@ namespace mindspore::kernel {
 class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {
     matmul_param_ = new MatMulParameter();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc
index cb0b7bf6ae..1a08ecf343 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc
@@ -178,7 +178,7 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
     packed_output_ = output_addr;
   }
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, DeconvDwRun, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwRun, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]";
     return RET_ERROR;
@@ -195,7 +195,7 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
 
 kernel::LiteKernel *CpuDeconvDwFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                 const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                 const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                  const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_DeDepthwiseConv2D);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h
index dff725dee7..ccd07327e3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeconvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                  const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                   const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~DeconvolutionDepthwiseCPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space.h b/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space.h
index e5d3c869ff..5dd55ca984 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space.h
@@ -23,7 +23,7 @@ namespace mindspore::kernel {
 class DepthToSpaceCPUKernel : public DepthToSpaceBaseCPUKernel {
  public:
   DepthToSpaceCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                        const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                         const mindspore::lite::PrimitiveC *primitive)
       : DepthToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~DepthToSpaceCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.cc
index 884f656912..827d8622ae 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.cc
@@ -111,7 +111,7 @@ int DetectionPostProcessCPUKernel::Run() {
 
 kernel::LiteKernel *CpuDetectionPostProcessFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                              const std::vector<lite::Tensor *> &outputs,
-                                                             OpParameter *opParameter, const lite::Context *ctx,
+                                                             OpParameter *opParameter, const lite::InnerContext *ctx,
                                                              const kernel::KernelKey &desc,
                                                              const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.h b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.h
index 8f2b4fda39..c51befff44 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.h
@@ -22,13 +22,13 @@
 #include "include/context.h"
 #include "nnacl/fp32/detection_post_process.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class DetectionPostProcessCPUKernel : public LiteKernel {
  public:
   DetectionPostProcessCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                 const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     param_ = reinterpret_cast<DetectionPostProcessCPUKernel *>(parameter);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/elu.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/elu.cc
index 61bd1d02bc..e0ff87f98c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu.cc
@@ -65,7 +65,7 @@ int EluCPUKernel::Run() {
   input_addr = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
   output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
 
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, EluRun, this, elu_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, elu_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]";
     return RET_ERROR;
@@ -75,7 +75,7 @@ int EluCPUKernel::Run() {
 
 kernel::LiteKernel *CpuEluFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                             const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                            const lite::Context *ctx, const KernelKey &desc,
+                                            const lite::InnerContext *ctx, const KernelKey &desc,
                                             const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr || ctx == nullptr) {
     MS_LOG(ERROR) << "parameter or ctx is nullptr";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/elu.h b/mindspore/lite/src/runtime/kernel/arm/fp32/elu.h
index 69a179729c..3a3ccadda7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class EluCPUKernel : public LiteKernel {
  public:
   explicit EluCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                        const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                         const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {}
   ~EluCPUKernel() override{};
@@ -36,7 +36,7 @@ class EluCPUKernel : public LiteKernel {
   int DoExcute(int task_id);
 
  protected:
-  const lite::Context *ctx_;
+  const lite::InnerContext *ctx_;
   int thread_count_;
   EluParameter *elu_parameter_;
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc
index 7214049392..59adeb36b4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc
@@ -102,7 +102,8 @@ int EmbeddingLookupCPUKernel::Run() {
   output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
   ids_addr_ = reinterpret_cast<int *>(in_tensors_.back()->MutableData());
 
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num);
+  auto ret =
+    ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num);
   context_->allocator->Free(input_addr_);
   context_->allocator->Free(embedding_lookup_parameter_->is_regulated_);
   if (ret != RET_OK) {
@@ -113,7 +114,7 @@ int EmbeddingLookupCPUKernel::Run() {
 
 kernel::LiteKernel *CpuEmbeddingLookupFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                         const std::vector<lite::Tensor *> &outputs,
-                                                        OpParameter *parameter, const lite::Context *ctx,
+                                                        OpParameter *parameter, const lite::InnerContext *ctx,
                                                         const KernelKey &desc,
                                                         const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr || ctx == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.h b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.h
index 6296ea6d70..7c5e56c7bc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class EmbeddingLookupCPUKernel : public LiteKernel {
  public:
   explicit EmbeddingLookupCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                    const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                    const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                     const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {}
   ~EmbeddingLookupCPUKernel() override {
@@ -43,7 +43,7 @@ class EmbeddingLookupCPUKernel : public LiteKernel {
   int DoExcute(int task_id);
 
  protected:
-  const lite::Context *ctx_;
+  const lite::InnerContext *ctx_;
   int thread_count_;
   EmbeddingLookupParameter *embedding_lookup_parameter_;
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/exp.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/exp.cc
index d87c83f800..0216bc977b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/exp.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/exp.cc
@@ -78,7 +78,7 @@ int ExpCPUKernel::Run() {
   output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
   exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum();
 
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, ExpRun, this, exp_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, ExpRun, this, exp_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Exp error: error_code[" << ret << "]";
     return RET_ERROR;
@@ -88,7 +88,7 @@ int ExpCPUKernel::Run() {
 
 kernel::LiteKernel *CpuExpFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                             const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                            const lite::Context *ctx, const KernelKey &desc,
+                                            const lite::InnerContext *ctx, const KernelKey &desc,
                                             const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr || ctx == nullptr) {
     MS_LOG(ERROR) << "parameter or ctx is nullptr";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/exp.h b/mindspore/lite/src/runtime/kernel/arm/fp32/exp.h
index 584659a435..c21a0af52b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/exp.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/exp.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class ExpCPUKernel : public LiteKernel {
  public:
   explicit ExpCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                        const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                         const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {}
   ~ExpCPUKernel() override{};
@@ -36,7 +36,7 @@ class ExpCPUKernel : public LiteKernel {
   int DoExcute(int task_id);
 
  protected:
-  const lite::Context *ctx_;
+  const lite::InnerContext *ctx_;
   int thread_count_;
   ExpParameter *exp_parameter_;
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.cc
index b44d91593a..cedad34e89 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.cc
@@ -74,7 +74,7 @@ int ExpandDimsCPUKernel::Run() {
   }
   in_ptr_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
   out_ptr_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, ExpandDimsRun, this, thread_sz_count_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, ExpandDimsRun, this, thread_sz_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ExpandDimsRun error error_code[" << ret << "]";
     return ret;
@@ -84,7 +84,7 @@ int ExpandDimsCPUKernel::Run() {
 
 kernel::LiteKernel *CpuExpandsDimsFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                     const std::vector<lite::Tensor *> &outputs,
-                                                    OpParameter *opParameter, const lite::Context *ctx,
+                                                    OpParameter *opParameter, const lite::InnerContext *ctx,
                                                     const kernel::KernelKey &desc,
                                                     const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.h b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.h
index 9c3ddf56c7..c07ee71740 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.h
@@ -24,13 +24,13 @@
 
 #include "include/context.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class ExpandDimsCPUKernel : public LiteKernel {
  public:
   ExpandDimsCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {}
   ~ExpandDimsCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fill.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fill.cc
index 833f65b754..e04e4affb4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fill.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fill.cc
@@ -77,7 +77,7 @@ int FillCPUKernel::Run() {
   auto fill_data = reinterpret_cast<float *>(fillData->MutableData());
   src_data_ = fill_data[0];
   out_ptr_ = reinterpret_cast<float *>(output->MutableData());
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, FillRun, this, thread_sz_count_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, FillRun, this, thread_sz_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "FillRun error error_code[" << ret << "]";
     return ret;
@@ -87,7 +87,7 @@ int FillCPUKernel::Run() {
 
 kernel::LiteKernel *CpuFillFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const lite::Context *ctx, const kernel::KernelKey &desc,
+                                             const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   if (opParameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fill.h b/mindspore/lite/src/runtime/kernel/arm/fp32/fill.h
index 62a77eb167..3cad52d42d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fill.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fill.h
@@ -22,13 +22,13 @@
 #include "include/context.h"
 #include "nnacl/fp32/fill.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class FillCPUKernel : public LiteKernel {
  public:
   FillCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                 const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {}
   ~FillCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.cc
index 231c88333c..a0db0abed8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.cc
@@ -57,7 +57,7 @@ int FlattenCPUKernel::Run() {
 
 kernel::LiteKernel *CpuFlattenFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   if (opParameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.h b/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.h
index 31be510ab1..510005bf12 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.h
@@ -22,13 +22,13 @@
 #include "include/context.h"
 #include "nnacl/flatten.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class FlattenCPUKernel : public LiteKernel {
  public:
   FlattenCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                   const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                    const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     flatten_param_ = reinterpret_cast<FlattenParameter *>(parameter);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.cc
index 563deba6ba..a4e5e3c3da 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.cc
@@ -142,7 +142,7 @@ int FullconnectionCPUKernel::Run() {
   if (!fc_param_->a_const_) InitMatrixA(a_ptr, a_c12_ptr_);
   if (!fc_param_->b_const_) InitMatrixB(b_ptr, b_r8_ptr_);
 
-  ParallelLaunch(THREAD_POOL_DEFAULT, FcFp32MatmulRun, this, thread_count_);
+  ParallelLaunch(this->context_->thread_pool_, FcFp32MatmulRun, this, thread_count_);
 
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.h b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.h
index 776d370904..fbace601fd 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.h
@@ -23,13 +23,13 @@
 #include "nnacl/fp32/matmul.h"
 #include "src/runtime/kernel/arm/base/fullconnection_base.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class FullconnectionCPUKernel : public FullconnectionBaseCPUKernel {
  public:
   FullconnectionCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                          const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                          const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                           const mindspore::lite::PrimitiveC *primitive)
       : FullconnectionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~FullconnectionCPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc
index 2be4b72208..bd97b59ac4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc
@@ -76,11 +76,11 @@ int FusedBatchnormCPUKernel::Run() {
     float *run_var = static_cast<float *>(out_tensors_[2]->MutableData());
     float *save_mean = static_cast<float *>(out_tensors_[3]->MutableData());
     float *save_inv_var = static_cast<float *>(out_tensors_[4]->MutableData());
-    std::fill(run_mean, run_mean+param->channel_, 0.f);
-    std::fill(run_var, run_var+param->channel_, 0.f);
+    std::fill(run_mean, run_mean + param->channel_, 0.f);
+    std::fill(run_var, run_var + param->channel_, 0.f);
     FusedBatchNormFp32MeanVar(in, 0.9, run_mean, run_var, param, save_mean, save_inv_var);
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, BatchNormRun, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
   }
@@ -96,7 +96,7 @@ int FusedBatchnormCPUKernel::DoExecute(int task_id) {
 
 kernel::LiteKernel *CpuFusedBatchnormKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                    const std::vector<lite::Tensor *> &outputs,
-                                                   OpParameter *op_parameter, const lite::Context *ctx,
+                                                   OpParameter *op_parameter, const lite::InnerContext *ctx,
                                                    const kernel::KernelKey &desc,
                                                    const mindspore::lite::PrimitiveC *primitive) {
   FusedBatchnormCPUKernel *kernel =
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h
index 476c4fb0ae..615f1070d0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class FusedBatchnormCPUKernel : public BatchnormCPUKernel {
  public:
   FusedBatchnormCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                          const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                          const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                           const mindspore::lite::PrimitiveC *primitive)
       : BatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~FusedBatchnormCPUKernel() { FreeScaleAndOffset(); }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gather.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gather.cc
index 3f124619d0..7d9e1206f4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gather.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gather.cc
@@ -106,7 +106,7 @@ int GatherCPUKernel::Run() {
     return ret;
   }
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, GatherRun, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, GatherRun, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]";
   }
@@ -141,7 +141,7 @@ int GatherCPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num, lit
 
 kernel::LiteKernel *CpuGatherFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(desc.type == schema::PrimitiveType_Gather);
   if (opParameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gather.h b/mindspore/lite/src/runtime/kernel/arm/fp32/gather.h
index 48c7ef2021..65eec3e8c4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gather.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gather.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class GatherCPUKernel : public LiteKernel {
  public:
   GatherCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                  const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                   const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~GatherCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.cc
index 7f5698ba86..af659f543c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.cc
@@ -123,7 +123,7 @@ int GatherNdCPUKernel::Run() {
   }
   in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
   out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, GatherNdRun, this, thread_sz_count_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, GatherNdRun, this, thread_sz_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "gatherNd error error_code[" << ret << "]";
     return ret;
@@ -133,7 +133,7 @@ int GatherNdCPUKernel::Run() {
 
 kernel::LiteKernel *CpuGatherNdFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                 const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                 const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                  const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_GatherNd);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.h b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.h
index b3761e52ba..ecf3df6261 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.h
@@ -24,13 +24,13 @@
 #include "include/context.h"
 #include "nnacl/op_base.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class GatherNdCPUKernel : public LiteKernel {
  public:
   GatherNdCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                    const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                    const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                     const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {}
   ~GatherNdCPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm.cc
index 2154be4b5f..a6c4250794 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm.cc
@@ -151,7 +151,7 @@ int L2NormCPUKernel::Run() {
   output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData());
   if (l2_norm_param_->axis_num_ == 0 || l2_norm_param_->axis_num_ == input_shape.size()) {
     // all axis
-    ret = ParallelLaunch(THREAD_POOL_DEFAULT, SquareSumRun, this, context_->thread_num_);
+    ret = ParallelLaunch(this->context_->thread_pool_, SquareSumRun, this, context_->thread_num_);
     if (ret != RET_OK) {
       MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
       return RET_ERROR;
@@ -161,13 +161,13 @@ int L2NormCPUKernel::Run() {
       sum += tmp_sum_[i];
     }
     sqrt_sum_ = sqrt(sum > l2_norm_param_->epsilon_ ? sum : l2_norm_param_->epsilon_);
-    ret = ParallelLaunch(THREAD_POOL_DEFAULT, L2NormRun, this, context_->thread_num_);
+    ret = ParallelLaunch(this->context_->thread_pool_, L2NormRun, this, context_->thread_num_);
     if (ret != RET_OK) {
       MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
       return RET_ERROR;
     }
   } else if (l2_norm_param_->axis_num_ == 1 && l2_norm_param_->axis_[0] == static_cast<int>(input_shape.size()) - 1) {
-    ret = ParallelLaunch(THREAD_POOL_DEFAULT, L2NormTrailingAxisRun, this, context_->thread_num_);
+    ret = ParallelLaunch(this->context_->thread_pool_, L2NormTrailingAxisRun, this, context_->thread_num_);
     if (ret != RET_OK) {
       MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
       return RET_ERROR;
@@ -181,7 +181,7 @@ int L2NormCPUKernel::Run() {
 
 kernel::LiteKernel *CpuL2NormFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *param,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   if (param == nullptr) {
     MS_LOG(ERROR) << "input param is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm.h
index 4ccd37c2e9..ffd56111b0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm.h
@@ -24,13 +24,13 @@
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class L2NormCPUKernel : public LiteKernel {
  public:
   L2NormCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                  const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                  const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                   const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     l2_norm_param_ = reinterpret_cast<L2NormParameter *>(op_parameter_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/leaky_relu.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/leaky_relu.cc
index 7411ff8dcb..1f342a8aef 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/leaky_relu.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/leaky_relu.cc
@@ -65,7 +65,7 @@ int LeakyReluCPUKernel::Run() {
   input_data = reinterpret_cast<float *>(input->MutableData());
   output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
 
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, LeakyReluRun, this, context_->thread_num_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, LeakyReluRun, this, context_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "PReluDwRun error: error_code[" << ret << "]";
     return RET_ERROR;
@@ -75,7 +75,7 @@ int LeakyReluCPUKernel::Run() {
 
 kernel::LiteKernel *CpuLeakyReluFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *param,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   if (param == nullptr) {
     MS_LOG(ERROR) << "input param is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/leaky_relu.h b/mindspore/lite/src/runtime/kernel/arm/fp32/leaky_relu.h
index 981a5b474e..b373f17fc6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/leaky_relu.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/leaky_relu.h
@@ -22,13 +22,13 @@
 #include "nnacl/fp32/leaky_relu.h"
 #include "src/runtime/kernel/arm/base/layout_transform.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class LeakyReluCPUKernel : public LiteKernel {
  public:
   LeakyReluCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     prelu_param_ = (reinterpret_cast<LeakyReluParameter *>(op_parameter_));
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm.cc
index 157fe839ac..c22af789da 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm.cc
@@ -79,7 +79,7 @@ int LocalResponseNormCPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
     return prepare_ret;
   }
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, LocalResponseNormRun, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, LocalResponseNormRun, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "LocalResponseNorm function error error_code[" << error_code << "]";
     return RET_ERROR;
@@ -89,7 +89,7 @@ int LocalResponseNormCPUKernel::Run() {
 
 kernel::LiteKernel *CpuLocalResponseNormFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                           const std::vector<lite::Tensor *> &outputs,
-                                                          OpParameter *opParameter, const lite::Context *ctx,
+                                                          OpParameter *opParameter, const lite::InnerContext *ctx,
                                                           const kernel::KernelKey &desc,
                                                           const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm.h
index de16a5f3bb..81ccdfbed6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class LocalResponseNormCPUKernel : public LiteKernel {
  public:
   LocalResponseNormCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                             const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                             const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                              const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {}
   ~LocalResponseNormCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/lstm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/lstm.cc
index c29b18a3d0..2f13072d89 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/lstm.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/lstm.cc
@@ -158,7 +158,7 @@ int LstmCPUKernel::Run() {
 
 kernel::LiteKernel *CpuLstmKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                          const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                         const lite::Context *ctx, const kernel::KernelKey &desc,
+                                         const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                          const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr) {
     MS_LOG(ERROR) << "Input parameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/lstm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/lstm.h
index 1b119b0bde..c0bd5399c7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/lstm.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/lstm.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class LstmCPUKernel : public LiteKernel {
  public:
   LstmCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                 const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     lstm_parm_ = reinterpret_cast<LstmParameter *>(op_parameter_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc
index 9908c738db..fa71d8638e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc
@@ -197,12 +197,12 @@ int MatmulCPUKernel::Run() {
     a_ptr_ = a_c12_ptr_ + i * params_->row_12_ * params_->deep_;
     b_ptr_ = b_r8_ptr_ + i * params_->deep_ * params_->col_8_;
     c_ptr_ = c_src + i * params_->row_ * params_->col_;
-    ParallelLaunch(THREAD_POOL_DEFAULT, MatmulFloatRun, this, thread_count_);
+    ParallelLaunch(this->context_->thread_pool_, MatmulFloatRun, this, thread_count_);
   }
   return RET_OK;
 }
 
-void  MatmulCPUKernel::eval() {
+void MatmulCPUKernel::eval() {
   // Copy weights after training
   LiteKernel::eval();
   if (params_->a_const_ == true) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h
index a7cbaceb57..deede64796 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class MatmulCPUKernel : public MatmulBaseCPUKernel {
  public:
   explicit MatmulCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                           const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                           const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                            const mindspore::lite::PrimitiveC *primitive)
       : MatmulBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~MatmulCPUKernel() override;
@@ -36,7 +36,6 @@ class MatmulCPUKernel : public MatmulBaseCPUKernel {
   int RunImpl(int task_id);
   void eval() override;
 
-
  private:
   void InitMatrixA(float *src_ptr, float *dst_ptr);
   void InitMatrixB(float *src_ptr, float *dst_ptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/nchw2nhwc.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/nchw2nhwc.cc
index fd64392f9e..025e8e4e0a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/nchw2nhwc.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/nchw2nhwc.cc
@@ -52,7 +52,7 @@ int Nchw2NhwcCPUKernel::Run() {
 
 kernel::LiteKernel *CpuNchw2NhwcFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Nchw2Nhwc);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/nchw2nhwc.h b/mindspore/lite/src/runtime/kernel/arm/fp32/nchw2nhwc.h
index dfd5398f51..29ccbc6bb6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/nchw2nhwc.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/nchw2nhwc.h
@@ -29,7 +29,7 @@ namespace mindspore::kernel {
 class Nchw2NhwcCPUKernel : public LiteKernel {
  public:
   Nchw2NhwcCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~Nchw2NhwcCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/nhwc2nchw.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/nhwc2nchw.cc
index 61c3a14140..5868eb8740 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/nhwc2nchw.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/nhwc2nchw.cc
@@ -52,7 +52,7 @@ int Nhwc2NchwCPUKernel::Run() {
 
 kernel::LiteKernel *CpuNhwc2NchwFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Nhwc2Nchw);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/nhwc2nchw.h b/mindspore/lite/src/runtime/kernel/arm/fp32/nhwc2nchw.h
index 375d617ad0..16cd5f599a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/nhwc2nchw.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/nhwc2nchw.h
@@ -29,7 +29,7 @@ namespace mindspore::kernel {
 class Nhwc2NchwCPUKernel : public LiteKernel {
  public:
   Nhwc2NchwCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~Nhwc2NchwCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot.cc
index bde05cc01c..5aa7840109 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot.cc
@@ -166,7 +166,7 @@ int OneHotCPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
     return prepare_ret;
   }
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, RunOneHot, this, context_->thread_num_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, RunOneHot, this, context_->thread_num_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "OneHot function error error_code[" << error_code << "]";
     return RET_ERROR;
@@ -176,7 +176,7 @@ int OneHotCPUKernel::Run() {
 
 kernel::LiteKernel *CpuOneHotFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "OneHot opParameter nullptr.";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot.h b/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot.h
index 3713726922..d4f8a993f7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class OneHotCPUKernel : public LiteKernel {
  public:
   OneHotCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                  const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                   const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/pad.cc
index 53a4d232f5..a7fe0b3916 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad.cc
@@ -248,7 +248,7 @@ int PadCPUKernel::Run() {
         output_data[i] = pad_param_->constant_value_;
       }
     }
-    error_code = ParallelLaunch(THREAD_POOL_DEFAULT, PadImpl, this, context_->thread_num_);
+    error_code = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, context_->thread_num_);
     if (error_code != RET_OK) {
       MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]";
       return RET_ERROR;
@@ -257,7 +257,7 @@ int PadCPUKernel::Run() {
     // mirror pad case
     HandleMirrorPad();
 
-    error_code = ParallelLaunch(THREAD_POOL_DEFAULT, MirrorPadImpl, this, context_->thread_num_);
+    error_code = ParallelLaunch(this->context_->thread_pool_, MirrorPadImpl, this, context_->thread_num_);
     if (error_code != RET_OK) {
       MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << error_code << "]";
       return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pad.h b/mindspore/lite/src/runtime/kernel/arm/fp32/pad.h
index 25e6a01018..bf2ad31571 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class PadCPUKernel : public LiteKernel {
  public:
   PadCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-               const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+               const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     pad_param_ = reinterpret_cast<PadParameter *>(parameter);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling.cc
index 0734ae64e8..a287f6061a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling.cc
@@ -85,7 +85,7 @@ int PoolingCPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
     return prepare_ret;
   }
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, PoolingImpl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, PoolingImpl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling.h b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling.h
index 2d74ad7f71..4634f36e7d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class PoolingCPUKernel : public PoolingBaseCPUKernel {
  public:
   PoolingCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                   const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                   const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                    const mindspore::lite::PrimitiveC *primitive)
       : PoolingBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~PoolingCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/power.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/power.cc
index d0e4defef2..7fb24f8cdc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/power.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/power.cc
@@ -46,7 +46,7 @@ int PowerCPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
     return prepare_ret;
   }
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, PowerImpl, this, thread_count_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, PowerImpl, this, thread_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "PowerCPUKernel error: " << ret;
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/power.h b/mindspore/lite/src/runtime/kernel/arm/fp32/power.h
index ff9043ab4c..6fa0635d8f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/power.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/power.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class PowerCPUKernel : public PowerBaseCPUKernel {
  public:
   PowerCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
-                 const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                  const mindspore::lite::PrimitiveC *primitive)
       : PowerBaseCPUKernel(param, inputs, outputs, ctx, primitive),
         thread_count_(ctx->thread_num_),
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.cc
index 22298fdc0b..436d770fad 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.cc
@@ -135,7 +135,7 @@ int PReluCPUKernel::Run() {
   auto negative_slope_tensor = in_tensors_.at(1);
   prelu_param_->slope_ = reinterpret_cast<float *>(negative_slope_tensor->MutableData());
 
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, PReluRun, this, prelu_param_->op_parameter_.thread_num_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, PReluRun, this, prelu_param_->op_parameter_.thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "PRelu Run error: error_code[" << ret << "]";
     context_->allocator->Free(input_data_);
@@ -149,7 +149,7 @@ int PReluCPUKernel::Run() {
 
 kernel::LiteKernel *CpuPReluFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *param,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   if (param == nullptr) {
     MS_LOG(ERROR) << "input param is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.h b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.h
index 49a66bdbfb..a9ca09eb36 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class PReluCPUKernel : public LiteKernel {
  public:
   PReluCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                 const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                  const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     prelu_param_ = reinterpret_cast<PReluParameter *>(op_parameter_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/range.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/range.cc
index 23024761bf..2c7b156414 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/range.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/range.cc
@@ -47,7 +47,7 @@ int RangeCPUKernel::Run() {
 
 kernel::LiteKernel *CpuRangeFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Range);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/range.h b/mindspore/lite/src/runtime/kernel/arm/fp32/range.h
index 3d4a961701..c650fdf023 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/range.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/range.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class RangeCPUKernel : public LiteKernel {
  public:
   explicit RangeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                          const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                          const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                           const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~RangeCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/rank.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/rank.cc
index e08e689f2e..f7a722fcd3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/rank.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/rank.cc
@@ -46,7 +46,7 @@ int RankCPUKernel::Run() {
 
 kernel::LiteKernel *CpuRankFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const lite::Context *ctx, const kernel::KernelKey &desc,
+                                             const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Rank);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/rank.h b/mindspore/lite/src/runtime/kernel/arm/fp32/rank.h
index fc101570d3..fdf6eed2d3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/rank.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/rank.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class RankCPUKernel : public LiteKernel {
  public:
   explicit RankCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~RankCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc
index 08100211b6..7db56b7581 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc
@@ -136,7 +136,7 @@ int ReduceCPUKernel::Run() {
     outer_size_ = outer_sizes_[i];
     inner_size_ = inner_sizes_[i];
     axis_size_ = axis_sizes_[i];
-    auto error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ReduceImpl, this, context_->thread_num_);
+    auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceImpl, this, context_->thread_num_);
     if (error_code != RET_OK) {
       MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
       FreeTmpBuffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h
index c983966590..b7b485de88 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h
@@ -34,7 +34,7 @@ class ReduceCPUKernel : public ReduceBaseCPUKernel {
 
  public:
   ReduceCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
-                  const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                   const mindspore::lite::PrimitiveC *primitive)
       : ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {}
   ~ReduceCPUKernel() {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape.h b/mindspore/lite/src/runtime/kernel/arm/fp32/reshape.h
index 01002beadd..695acb44cd 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reshape.h
@@ -23,13 +23,13 @@
 #include "include/context.h"
 #include "src/runtime/kernel/arm/base/reshape_base.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class ReshapeCPUKernel : public ReshapeBaseCPUKernel {
  public:
   ReshapeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                   const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                   const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                    const mindspore::lite::PrimitiveC *primitive)
       : ReshapeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ReshapeCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/resize.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/resize.cc
index d0256d5a0f..7ee2bb6d61 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/resize.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/resize.cc
@@ -198,7 +198,7 @@ int ResizeCPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare failed.";
     return RET_ERROR;
   }
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ResizeImpl, this, context_->thread_num_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ResizeImpl, this, context_->thread_num_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]";
     FreeTmpBuffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/resize.h b/mindspore/lite/src/runtime/kernel/arm/fp32/resize.h
index fccb71d202..9826727ec8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/resize.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/resize.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class ResizeCPUKernel : public ResizeBaseCPUKernel {
  public:
   ResizeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                  const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                   const mindspore::lite::PrimitiveC *primitive)
       : ResizeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.cc
index 66d403d7fd..d885ca365f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.cc
@@ -132,7 +132,7 @@ int ReverseCPUKernel::Run() {
   }
   in_ptr_ = reinterpret_cast<float *>(in_tensors_[0]->MutableData());
   out_ptr_ = reinterpret_cast<float *>(out_tensors_[0]->MutableData());
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ReverseRun, this, thread_sz_count_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ReverseRun, this, thread_sz_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Reverse run error error_code[" << ret << "]";
     return ret;
@@ -142,7 +142,7 @@ int ReverseCPUKernel::Run() {
 
 kernel::LiteKernel *CpuReverseFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "opParameter is NULL! ";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.h b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.h
index ae546474f9..ea99002f10 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.h
@@ -23,13 +23,13 @@
 
 #define REVERSE_STRIDE_MAX_SIZE 4
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class ReverseCPUKernel : public LiteKernel {
  public:
   ReverseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                   const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                    const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {}
   ~ReverseCPUKernel() {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence.cc
index 040bc48fda..bc6d6055f1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence.cc
@@ -103,7 +103,7 @@ int ReverseSequenceCPUKernel::Run() {
 
 kernel::LiteKernel *CpuReverseSequenceFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                         const std::vector<lite::Tensor *> &outputs,
-                                                        OpParameter *parameter, const lite::Context *ctx,
+                                                        OpParameter *parameter, const lite::InnerContext *ctx,
                                                         const KernelKey &desc,
                                                         const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(parameter != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence.h b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence.h
index de885eba7f..00af584e6d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class ReverseSequenceCPUKernel : public LiteKernel {
  public:
   ReverseSequenceCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                           const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                           const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                            const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ReverseSequenceCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.cc
index 039596ae1d..28dec86d3a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.cc
@@ -91,7 +91,7 @@ int ROIPoolingCPUKernel::Run() {
   in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
   out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
   roi_ptr_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ROIPoolingRun, this, param_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ROIPoolingRun, this, param_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ROIPooling error: error_code[" << ret << "]";
     return ret;
@@ -101,7 +101,7 @@ int ROIPoolingCPUKernel::Run() {
 
 kernel::LiteKernel *CpuROIPoolingFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                    const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                   const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                   const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                    const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "Input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.h b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.h
index 69c5cf5a70..00e607c648 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class ROIPoolingCPUKernel : public LiteKernel {
  public:
   ROIPoolingCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     param_ = reinterpret_cast<ROIPoolingParameter *>(parameter);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
index 06ce532151..f251c99059 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
@@ -162,7 +162,7 @@ int ScaleCPUKernel::Run() {
   auto out_tensor = out_tensors_.front();
   output_ptr_ = reinterpret_cast<float *>(out_tensor->MutableData());
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ScaleRun, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ScaleRun, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
     return RET_ERROR;
@@ -171,7 +171,7 @@ int ScaleCPUKernel::Run() {
 }
 kernel::LiteKernel *CpuScaleFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(desc.type == schema::PrimitiveType_Scale);
   if (opParameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h
index e4492fc591..a31e61d14b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class ScaleCPUKernel : public LiteKernel {
  public:
   ScaleCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                 const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                  const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     scale_param_ = reinterpret_cast<ScaleParameter *>(op_parameter_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc
index 1145680b15..da3e40eb31 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc
@@ -153,7 +153,7 @@ int ScatterNDCPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
     return ret;
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ScatterNDRun, this, thread_n_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ScatterNDRun, this, thread_n_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ScatterND error error_code[" << ret << "]";
     return RET_ERROR;
@@ -164,7 +164,7 @@ int ScatterNDCPUKernel::Run() {
 
 kernel::LiteKernel *CpuScatterNDFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(desc.type == schema::PrimitiveType_ScatterND);
   if (opParameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.h b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.h
index b84110c9c2..8339bcbde4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class ScatterNDCPUKernel : public LiteKernel {
  public:
   explicit ScatterNDCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                              const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                              const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                               const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ScatterNDCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/shape.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/shape.cc
index 77f328c183..5d5cd404b1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/shape.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/shape.cc
@@ -56,7 +56,7 @@ int ShapeCPUKernel::Run() {
 
 kernel::LiteKernel *CpuShapeFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(desc.type == schema::PrimitiveType_Shape);
   if (opParameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/shape.h b/mindspore/lite/src/runtime/kernel/arm/fp32/shape.h
index 5e79144893..42b118d96d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/shape.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/shape.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class ShapeCPUKernel : public LiteKernel {
  public:
   ShapeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                 const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                  const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ShapeCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc
index d0f3322447..f7ebd7d2f2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc
@@ -106,7 +106,7 @@ int SliceCPUKernel::Run() {
     DoSliceNoParallel(input_data, output_data, param);
     return RET_OK;
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, SliceLaunch, this, param->op_parameter_.thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, SliceLaunch, this, param->op_parameter_.thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "slice launch fail!ret: " << ret;
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/slice.h b/mindspore/lite/src/runtime/kernel/arm/fp32/slice.h
index 15a876a91a..d9d85cfe93 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/slice.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/slice.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class SliceCPUKernel : public SliceBaseCPUKernel {
  public:
   SliceCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                 const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                  const mindspore::lite::PrimitiveC *primitive)
       : SliceBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~SliceCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h
index 640c268e2f..6436853cbd 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class SoftmaxCPUKernel : public SoftmaxBaseCPUKernel {
  public:
   SoftmaxCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                   const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                    const mindspore::lite::PrimitiveC *primitive)
       : SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive), sum_data_(nullptr) {}
   ~SoftmaxCPUKernel() override {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.cc
index 2c1d82a975..a7f37feb7c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.cc
@@ -101,7 +101,7 @@ void SpaceToBatchCPUKernel::FreeTmpBuffer() {
 
 kernel::LiteKernel *CpuSpaceToBatchFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                      const std::vector<lite::Tensor *> &outputs, OpParameter *param,
-                                                     const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                     const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                      const mindspore::lite::PrimitiveC *primitive) {
   if (param == nullptr) {
     MS_LOG(ERROR) << "Input param is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.h b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.h
index 5debd97e05..919ea3e802 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.h
@@ -23,7 +23,7 @@ namespace mindspore::kernel {
 class SpaceToBatchCPUKernel : public LiteKernel {
  public:
   SpaceToBatchCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                        const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                         const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.cc
index d1dc8bafe3..11f7bdee26 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.cc
@@ -93,7 +93,7 @@ int SpaceToDepthCPUKernel::Run() {
   input_ptr_ = reinterpret_cast<float *>(in_tensors_[0]->MutableData());
   output_ptr_ = reinterpret_cast<float *>(out_tensors_[0]->MutableData());
   if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC) {
-    ret = ParallelLaunch(THREAD_POOL_DEFAULT, SpaceToDepthRun, this, thread_h_num_);
+    ret = ParallelLaunch(this->context_->thread_pool_, SpaceToDepthRun, this, thread_h_num_);
     if (ret != RET_OK) {
       MS_LOG(ERROR) << "SpaceToDepth error error_code[" << ret << "]";
       return ret;
@@ -108,7 +108,7 @@ int SpaceToDepthCPUKernel::Run() {
 
 kernel::LiteKernel *CpuSpaceToDepthFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                      const std::vector<lite::Tensor *> &outputs,
-                                                     OpParameter *opParameter, const lite::Context *ctx,
+                                                     OpParameter *opParameter, const lite::InnerContext *ctx,
                                                      const kernel::KernelKey &desc,
                                                      const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.h b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.h
index 4c5da3db3d..aa8f7af39d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class SpaceToDepthCPUKernel : public LiteKernel {
  public:
   SpaceToDepthCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                        const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                         const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~SpaceToDepthCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.cc
index fdcecccbd3..e3aa0996a3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.cc
@@ -68,9 +68,8 @@ int SparseToDenseCPUKernel::DoExcute(int task_id) {
   int index_start = task_id * count_unit_;
   int index_end = index_start + real_dst_count;
   int out_width = output_num / index_num;
-  SparseToDense(sparse_indices_vect, output_shape, sparse_values,
-                default_value, output_data, isScalar,
-                index_start, index_end, out_width);
+  SparseToDense(sparse_indices_vect, output_shape, sparse_values, default_value, output_data, isScalar, index_start,
+                index_end, out_width);
   return RET_OK;
 }
 
@@ -78,8 +77,7 @@ int SparseToDenseRun(void *cdata, int task_id) {
   auto s2ddata = reinterpret_cast<SparseToDenseCPUKernel *>(cdata);
   auto ret = s2ddata->DoExcute(task_id);
   if (ret != RET_OK) {
-    MS_LOG(ERROR) << "SparseToDenseRun error task_id[" << task_id
-                  << "] error_code[" << ret << "]";
+    MS_LOG(ERROR) << "SparseToDenseRun error task_id[" << task_id << "] error_code[" << ret << "]";
     return RET_ERROR;
   }
   return RET_OK;
@@ -143,8 +141,8 @@ int SparseToDenseCPUKernel::IndicesValidCheck() {
   int d3 = output_shape[3];
   int index_before = -1;
   for (int i = 0; i < index_num; i++) {
-    int index = d1 * sparse_indices_vect[i][0] + d2 * sparse_indices_vect[i][1] +
-                d3 * sparse_indices_vect[i][2] + sparse_indices_vect[i][3];
+    int index = d1 * sparse_indices_vect[i][0] + d2 * sparse_indices_vect[i][1] + d3 * sparse_indices_vect[i][2] +
+                sparse_indices_vect[i][3];
     if (index <= index_before) {
       return RET_ERROR;
     }
@@ -173,8 +171,7 @@ int SparseToDenseCPUKernel::Run() {
   }
   output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
   count_unit_ = thread_count_ > 1 ? UP_DIV(index_num, thread_count_) : index_num;
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, SparseToDenseRun, this,
-                       s2d_param->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, SparseToDenseRun, this, s2d_param->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "SparseToDenseRun error: error_code[" << ret << "]";
     return RET_ERROR;
@@ -194,7 +191,7 @@ int SparseToDenseCPUKernel::Run() {
 
 kernel::LiteKernel *CpuSparseToDenseFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                       const std::vector<lite::Tensor *> &outputs,
-                                                      OpParameter *opParameter, const lite::Context *ctx,
+                                                      OpParameter *opParameter, const lite::InnerContext *ctx,
                                                       const kernel::KernelKey &desc,
                                                       const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
@@ -202,25 +199,20 @@ kernel::LiteKernel *CpuSparseToDenseFp32KernelCreator(const std::vector<lite::Te
     return nullptr;
   }
   MS_ASSERT(desc.type == schema::PrimitiveType_SparseToDense);
-  auto *kernel = new (std::nothrow)
-      SparseToDenseCPUKernel(opParameter, inputs, outputs, ctx, primitive);
+  auto *kernel = new (std::nothrow) SparseToDenseCPUKernel(opParameter, inputs, outputs, ctx, primitive);
   if (kernel == nullptr) {
     MS_LOG(ERROR) << "new SparseToDenseCPUKernel fail!";
     return nullptr;
   }
   auto ret = kernel->Init();
   if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_
-                  << ", type: "
-                  << schema::EnumNamePrimitiveType(
-                         static_cast<schema::PrimitiveType>(
-                             opParameter->type_));
+    MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
+                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
     delete kernel;
     return nullptr;
   }
   return kernel;
 }
 
-REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SparseToDense,
-           CpuSparseToDenseFp32KernelCreator)
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SparseToDense, CpuSparseToDenseFp32KernelCreator)
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.h b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.h
index 34c2d56061..6d9de08360 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.h
@@ -23,13 +23,13 @@
 #include "mindspore/lite/nnacl/fp32/sparse_to_dense.h"
 #include "src/runtime/kernel/arm/base/layout_transform.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class SparseToDenseCPUKernel : public LiteKernel {
  public:
   SparseToDenseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
     s2d_param = (reinterpret_cast<SparseToDenseParameter *>(op_parameter_));
@@ -45,7 +45,7 @@ class SparseToDenseCPUKernel : public LiteKernel {
   int IndicesValidCheck();
 
  protected:
-  const Context *ctx_;
+  const InnerContext *ctx_;
   int thread_count_;
   SparseToDenseParameter *s2d_param;
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/split.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/split.cc
index 5f990ab98a..f9af0c6ebb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/split.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/split.cc
@@ -83,7 +83,7 @@ int SplitCPUKernel::Run() {
   for (int i = 0; i < param->num_split_; i++) {
     output_ptr_[i] = reinterpret_cast<float *>(out_tensors_.at(i)->MutableData());
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, SplitRun, this, thread_n_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, SplitRun, this, thread_n_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/split.h b/mindspore/lite/src/runtime/kernel/arm/fp32/split.h
index 7191b93635..0c844f1693 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/split.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/split.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class SplitCPUKernel : public SplitBaseCPUKernel {
  public:
   SplitCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                 const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                  const mindspore::lite::PrimitiveC *primitive)
       : SplitBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~SplitCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze.cc
index 9836960533..949a8a61fc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze.cc
@@ -58,7 +58,7 @@ int SqueezeCPUKernel::Run() {
 
 kernel::LiteKernel *CpuSqueezeFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                                const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(desc.type == schema::PrimitiveType_Squeeze);
   if (parameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze.h b/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze.h
index 1c381b3ec3..149b5de16d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class SqueezeCPUKernel : public LiteKernel {
  public:
   explicit SqueezeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                            const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                             const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~SqueezeCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/stack.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/stack.cc
index 01878786ff..f3c6d6f078 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/stack.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/stack.cc
@@ -75,7 +75,7 @@ int StackCPUKernel::Run() {
 
 kernel::LiteKernel *CpuStackFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   if (op_parameter == nullptr) {
     MS_LOG(ERROR) << "Input op_parameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/stack.h b/mindspore/lite/src/runtime/kernel/arm/fp32/stack.h
index 70fbb75989..deafdd5d6c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/stack.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/stack.h
@@ -23,7 +23,7 @@ namespace mindspore::kernel {
 class StackCPUKernel : public LiteKernel {
  public:
   StackCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                 const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                  const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/tile.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/tile.cc
index 1c62aaee7c..835f326b13 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/tile.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/tile.cc
@@ -65,7 +65,7 @@ int TileCPUKernel::Run() {
 
 kernel::LiteKernel *CpuTileFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                             const lite::Context *ctx, const KernelKey &desc,
+                                             const lite::InnerContext *ctx, const KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr || ctx == nullptr) {
     MS_LOG(ERROR) << "parameter or ctx is nullptr";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/tile.h b/mindspore/lite/src/runtime/kernel/arm/fp32/tile.h
index 74a1377780..76fc5f32ca 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/tile.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/tile.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class TileCPUKernel : public LiteKernel {
  public:
   explicit TileCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~TileCPUKernel() override {}
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/topk.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/topk.cc
index dd64072941..7a69a45f66 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/topk.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/topk.cc
@@ -68,7 +68,7 @@ int TopKCPUKernel::Run() {
 
 kernel::LiteKernel *CpuTopKFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                             const lite::Context *ctx, const KernelKey &desc,
+                                             const lite::InnerContext *ctx, const KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr) {
     MS_LOG(ERROR) << "input parameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/topk.h b/mindspore/lite/src/runtime/kernel/arm/fp32/topk.h
index 8bd4288b19..af4ef4f4f8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/topk.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/topk.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class TopKCPUKernel : public LiteKernel {
  public:
   explicit TopKCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~TopKCPUKernel() override {}
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc
index c885786d76..7bb188928d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc
@@ -118,7 +118,7 @@ int TransposeCPUKernel::Run() {
   in_data_ = reinterpret_cast<float *>(in_tensor->MutableData());
   out_data_ = reinterpret_cast<float *>(out_tensor->MutableData());
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, TransposeRun, this, thread_h_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, TransposeRun, this, thread_h_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Tranpose error error_code[" << ret << "]";
     return ret;
@@ -128,7 +128,7 @@ int TransposeCPUKernel::Run() {
 
 kernel::LiteKernel *CpuTransposeFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(desc.type == schema::PrimitiveType_Transpose);
   if (opParameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.h b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.h
index 77746ef9de..7025fab34a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class TransposeCPUKernel : public LiteKernel {
  public:
   explicit TransposeCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
-                              const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                              const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                               const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(param, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {}
   ~TransposeCPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/unique.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/unique.cc
index 5e1306b87f..4ac0217a96 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/unique.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unique.cc
@@ -48,7 +48,7 @@ int UniqueCPUKernel::Run() {
 
 kernel::LiteKernel *CpuUniqueFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                               const lite::Context *ctx, const KernelKey &desc,
+                                               const lite::InnerContext *ctx, const KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(parameter);
   MS_ASSERT(desc.type == PrimitiveType_Unique);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/unique.h b/mindspore/lite/src/runtime/kernel/arm/fp32/unique.h
index 7f3530cd7e..35238f993d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/unique.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unique.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class UniqueCPUKernel : public LiteKernel {
  public:
   UniqueCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                  const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                   const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~UniqueCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze.cc
index 8ec52e594b..e003050c77 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze.cc
@@ -73,7 +73,7 @@ int UnsqueezeCPUKernel::Run() {
   }
   in_ptr_ = reinterpret_cast<int8_t *>(in_tensors_.at(0)->MutableData());
   out_ptr_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData());
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, UnsqueezeRun, this, thread_sz_count_);
+  ret = ParallelLaunch(this->context_->thread_pool_, UnsqueezeRun, this, thread_sz_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "UnsqueezeRun error error_code[" << ret << "]";
     return ret;
@@ -83,7 +83,7 @@ int UnsqueezeCPUKernel::Run() {
 
 kernel::LiteKernel *CpuUnsqueezeFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(parameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Unsqueeze);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze.h b/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze.h
index 9c487590a1..13491f5321 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze.h
@@ -21,13 +21,13 @@
 #include "include/context.h"
 #include "nnacl/fp32/unsqueeze.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class UnsqueezeCPUKernel : public LiteKernel {
  public:
   UnsqueezeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~UnsqueezeCPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/unstack.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/unstack.cc
index 7a0608dc70..e638eaf084 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/unstack.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unstack.cc
@@ -80,7 +80,7 @@ int UnstackCPUKernel::Run() {
 
 kernel::LiteKernel *CpuUnstackFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                                const lite::Context *ctx, const KernelKey &desc,
+                                                const lite::InnerContext *ctx, const KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(parameter != nullptr);
   MS_ASSERT(desc.type == PrimitiveType_Unstack);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/unstack.h b/mindspore/lite/src/runtime/kernel/arm/fp32/unstack.h
index ad07753547..f2f4fc0ab5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/unstack.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unstack.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class UnstackCPUKernel : public LiteKernel {
  public:
   UnstackCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                   const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                    const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~UnstackCPUKernel() { free(output_addr_array_); }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/where.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/where.cc
index 232664158c..38b6e0b3c3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/where.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/where.cc
@@ -79,7 +79,7 @@ int WhereCPUKernel::Run() {
     MS_LOG(ERROR) << "Error, inputs' length are zero !!!";
     return RET_ERROR;
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, WhereRun, this, where_param_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, WhereRun, this, where_param_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "WhereDwRun error: error_code[" << ret << "]";
     return RET_ERROR;
@@ -89,7 +89,7 @@ int WhereCPUKernel::Run() {
 
 kernel::LiteKernel *CpuWhereFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/where.h b/mindspore/lite/src/runtime/kernel/arm/fp32/where.h
index 8129fd41cb..830a65fe5b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/where.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/where.h
@@ -23,13 +23,13 @@
 #include "nnacl/where.h"
 #include "src/runtime/kernel/arm/base/layout_transform.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class WhereCPUKernel : public LiteKernel {
  public:
   WhereCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                 const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                  const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
     where_param_ = reinterpret_cast<WhereParameter *>(op_parameter_);
@@ -42,7 +42,7 @@ class WhereCPUKernel : public LiteKernel {
   int DoExcute(int task_id);
 
  protected:
-  const Context *ctx_;
+  const InnerContext *ctx_;
   int thread_count_;
   WhereParameter *where_param_;
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike.cc
index 9aa22abc0c..adbd5c3fa4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike.cc
@@ -44,7 +44,7 @@ int ZerosLikeCPUKernel::Run() {
 
 kernel::LiteKernel *CpuZerosLikeFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "input opParameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike.h b/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike.h
index 0c9cabb39a..070e6805f3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike.h
@@ -23,7 +23,7 @@ namespace mindspore::kernel {
 class ZerosLikeCPUKernel : public LiteKernel {
  public:
   ZerosLikeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc
index 898a4e6afa..3559fd9f8e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc
@@ -85,7 +85,7 @@ int ActivationGradCPUKernel::Run() {
     return ret;
   }
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ActivationGradRun, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationGradRun, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
     return RET_ERROR;
@@ -95,7 +95,7 @@ int ActivationGradCPUKernel::Run() {
 
 kernel::LiteKernel *CpuActivationGradFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
-                                                       OpParameter *opParameter, const lite::Context *ctx,
+                                                       OpParameter *opParameter, const lite::InnerContext *ctx,
                                                        const kernel::KernelKey &desc,
                                                        const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.h
index b4cd554af5..61e3e9bae5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class ActivationGradCPUKernel : public LiteKernel {
  public:
   explicit ActivationGradCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
-                                   const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                    const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(param, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
     param_act_grad_ = reinterpret_cast<ActivationParameter *>(param);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc
index 7ade3be4fb..951aa8d457 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc
@@ -77,7 +77,7 @@ int ApplyMomentumCPUKernel::Init() {
 
 kernel::LiteKernel *CpuApplyMomentumFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                       const std::vector<lite::Tensor *> &outputs,
-                                                      OpParameter *opParameter, const lite::Context *ctx,
+                                                      OpParameter *opParameter, const lite::InnerContext *ctx,
                                                       const kernel::KernelKey &desc,
                                                       const lite::PrimitiveC *primitive) {
   MS_ASSERT(desc.type == schema::PrimitiveType_ApplyMomentum);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.h
index 121248ed3c..a1885b347f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class ApplyMomentumCPUKernel : public LiteKernel {
  public:
   explicit ApplyMomentumCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                  const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                   const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {}
   ~ApplyMomentumCPUKernel() override {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc
index 1c34c40e18..b56fd8fcd2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc
@@ -194,7 +194,7 @@ int ArithmeticGradCPUKernel::Run() {
 
 kernel::LiteKernel *CpuArithmeticGradFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
-                                                       OpParameter *opParameter, const lite::Context *ctx,
+                                                       OpParameter *opParameter, const lite::InnerContext *ctx,
                                                        const kernel::KernelKey &desc,
                                                        const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(nullptr != opParameter);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h
index 570fae8582..6c76667b6a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h
@@ -37,7 +37,7 @@ class ArithmeticGradCPUKernel : public LiteKernel {
 
  public:
   explicit ArithmeticGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                   const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                    const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), tile_data0(NULL), tile_data1(NULL), tile_data2(NULL) {
     switch (Type()) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_self_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_self_grad.cc
index 2a84bf309f..37e07c7e01 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_self_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_self_grad.cc
@@ -69,7 +69,7 @@ int ArithmeticSelfGradCPUKernel::Run() {
   int dy_size = in_tensors_.at(0)->ElementsNum();
   op_parameter_->thread_num_ = MSMIN(op_parameter_->thread_num_, static_cast<int>(dy_size));
   thread_stride_ = UP_DIV(dy_size, op_parameter_->thread_num_);
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, ArithmeticSelfGradRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfGradRun, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "parallel launch fail!ret: " << ret;
     return ret;
@@ -80,7 +80,7 @@ int ArithmeticSelfGradCPUKernel::Run() {
 
 kernel::LiteKernel *CpuArithmeticSelfGradFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                            const std::vector<lite::Tensor *> &outputs,
-                                                           OpParameter *param, const lite::Context *ctx,
+                                                           OpParameter *param, const lite::InnerContext *ctx,
                                                            const kernel::KernelKey &desc,
                                                            const mindspore::lite::PrimitiveC *primitive) {
   if (param == nullptr) {
@@ -95,8 +95,8 @@ kernel::LiteKernel *CpuArithmeticSelfGradFp32KernelCreator(const std::vector<lit
 
   auto ret = kernel->Init();
   if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Init kernel failed, name: " << param->name_ << ", type: "
-                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(param->type_));
+    MS_LOG(ERROR) << "Init kernel failed, name: " << param->name_
+                  << ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(param->type_));
     delete kernel;
     return nullptr;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_self_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_self_grad.h
index 5955e7dd24..ff414590a5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_self_grad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_self_grad.h
@@ -26,9 +26,10 @@ namespace mindspore::kernel {
 
 class ArithmeticSelfGradCPUKernel : public LiteKernel {
   typedef int (*ArithmeticSelfGradOperation)(float *, float *, float *, int);
+
  public:
   ArithmeticSelfGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                              const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                              const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                               const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ArithmeticSelfGradCPUKernel() override {}
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.cc
index c2f8abc39f..0b8cf33ade 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.cc
@@ -71,12 +71,11 @@ int BiasGradCPUKernel::Run() {
 
 kernel::LiteKernel *CpuBiasGradFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                 const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                 const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                  const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_BiasGrad);
-  auto *kernel =
-    new (std::nothrow) BiasGradCPUKernel(opParameter, inputs, outputs, ctx, primitive);
+  auto *kernel = new (std::nothrow) BiasGradCPUKernel(opParameter, inputs, outputs, ctx, primitive);
   if (kernel == nullptr) {
     MS_LOG(ERROR) << "new BiasGradCPUKernel fail!";
     return nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.h
index e7a8b40a0b..827e224d6a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class BiasGradCPUKernel : public LiteKernel {
  public:
   explicit BiasGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                             const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                             const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                              const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     bias_param = reinterpret_cast<ArithmeticParameter *>(parameter);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc
index 0cc3b67134..f6f7e13218 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc
@@ -85,7 +85,7 @@ int BNGradCPUKernel::Run() {
 
 kernel::LiteKernel *CpuBNGradFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_BNGrad);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h
index e5055caa61..a53118d6d3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h
@@ -26,13 +26,11 @@ namespace mindspore::kernel {
 class BNGradCPUKernel : public LiteKernel {
  public:
   explicit BNGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                           const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                           const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                            const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr),
-       workspace_size(0) {}
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr), workspace_size(0) {}
   ~BNGradCPUKernel() override {
-    if (workspace)
-      delete [] workspace;
+    if (workspace) delete[] workspace;
   }
 
   int Init() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc
index d4b1a89609..1c375bcbcf 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc
@@ -41,7 +41,7 @@ int ConvolutionTrainCPUKernel::Init() {
   conv_param_->kernel_h_ = input_weight->shape().at(kNHWC_H);
   conv_param_->kernel_w_ = input_weight->shape().at(kNHWC_W);
 
-  conv_param_->group_ = (conv_param_->group_ == 0)? conv_param_->input_channel_:conv_param_->group_;
+  conv_param_->group_ = (conv_param_->group_ == 0) ? conv_param_->input_channel_ : conv_param_->group_;
 
   int ws_size = conv_param_->output_h_ * conv_param_->output_w_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ *
                 conv_param_->input_channel_ / conv_param_->group_;
@@ -104,7 +104,7 @@ int ConvolutionTrainCPUKernel::Run() {
 
 kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D || desc.type == schema::PrimitiveType_DepthwiseConv2D);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h
index 7cd9cbaf54..8f06ea4d01 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class ConvolutionTrainCPUKernel : public LiteKernel {
  public:
   explicit ConvolutionTrainCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                      const lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {}
   ~ConvolutionTrainCPUKernel() override {
@@ -43,7 +43,7 @@ class ConvolutionTrainCPUKernel : public LiteKernel {
 
 kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const lite::PrimitiveC *primitive);
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc
index 4b82b7a814..4c703e2874 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc
@@ -115,7 +115,7 @@ int ConvolutionGradFilterCPUKernel::Run() {
 
 kernel::LiteKernel *CpuConvGradFilterFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
-                                                       OpParameter *opParameter, const lite::Context *ctx,
+                                                       OpParameter *opParameter, const lite::InnerContext *ctx,
                                                        const kernel::KernelKey &desc,
                                                        const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h
index ea2f4aa825..34190bd6f5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class ConvolutionGradFilterCPUKernel : public LiteKernel {
  public:
   explicit ConvolutionGradFilterCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                          const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                          const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                           const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {}
   ~ConvolutionGradFilterCPUKernel() override  {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc
index 0320bbf430..c8a370c29f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc
@@ -112,7 +112,7 @@ int ConvolutionGradInputCPUKernel::Run() {
 
 kernel::LiteKernel *CpuConvGradInputFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                       const std::vector<lite::Tensor *> &outputs,
-                                                      OpParameter *opParameter, const lite::Context *ctx,
+                                                      OpParameter *opParameter, const lite::InnerContext *ctx,
                                                       const kernel::KernelKey &desc,
                                                       const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h
index df0a3f84cd..c4a2459582 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class ConvolutionGradInputCPUKernel : public LiteKernel {
  public:
   explicit ConvolutionGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                         const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                          const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {}
   ~ConvolutionGradInputCPUKernel() override  {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/depend.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/depend.cc
index 7700c84143..ce42686068 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/depend.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/depend.cc
@@ -47,7 +47,7 @@ int DependCPUKernel::Run() {
 
 kernel::LiteKernel *CpuDependFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Depend);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/depend.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/depend.h
index 3b32a3804b..2568ff2ceb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/depend.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/depend.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class DependCPUKernel : public LiteKernel {
  public:
   explicit DependCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                           const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                           const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                            const lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     param = parameter;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/make_tuple.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/make_tuple.h
index 176ea09858..cb65756511 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/make_tuple.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/make_tuple.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class MakeTupleCPUKernel : public LiteKernel {
  public:
   explicit MakeTupleCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                              const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                              const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                               const lite::Primitive *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     param = parameter;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/neg_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/neg_grad.cc
index e0394013c0..868dca63bb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/neg_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/neg_grad.cc
@@ -57,7 +57,7 @@ int NegGradCPUKernel::Run() {
   int dy_size = in_tensors_.at(0)->ElementsNum();
   op_parameter_->thread_num_ = MSMIN(op_parameter_->thread_num_, static_cast<int>(dy_size));
   thread_stride_ = UP_DIV(dy_size, op_parameter_->thread_num_);
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, NegGradRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, NegGradRun, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "parallel launch fail!ret: " << ret;
     return ret;
@@ -67,9 +67,8 @@ int NegGradCPUKernel::Run() {
 }
 
 kernel::LiteKernel *CpuNegGradFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
-                                                const std::vector<lite::Tensor *> &outputs,
-                                                OpParameter *param, const lite::Context *ctx,
-                                                const kernel::KernelKey &desc,
+                                                const std::vector<lite::Tensor *> &outputs, OpParameter *param,
+                                                const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (param == nullptr) {
     MS_LOG(ERROR) << "input parameter is nullptr!";
@@ -83,8 +82,8 @@ kernel::LiteKernel *CpuNegGradFp32KernelCreator(const std::vector<lite::Tensor *
 
   auto ret = kernel->Init();
   if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Init kernel failed, name: " << param->name_ << ", type: "
-                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(param->type_));
+    MS_LOG(ERROR) << "Init kernel failed, name: " << param->name_
+                  << ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(param->type_));
     delete kernel;
     return nullptr;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/neg_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/neg_grad.h
index 799a84f1e5..239b3a7958 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/neg_grad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/neg_grad.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class NegGradCPUKernel : public LiteKernel {
  public:
   explicit NegGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                            const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                             const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~NegGradCPUKernel() override {}
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.cc
index 33a1cc872c..6be058706e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.cc
@@ -82,7 +82,7 @@ int PoolingGradCPUKernel::Run() {
 
 kernel::LiteKernel *CpuPoolingGradFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                     const std::vector<lite::Tensor *> &outputs,
-                                                    OpParameter *opParameter, const lite::Context *ctx,
+                                                    OpParameter *opParameter, const lite::InnerContext *ctx,
                                                     const kernel::KernelKey &desc,
                                                     const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.h
index 467c84ac3b..968f121cdb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.h
@@ -30,7 +30,7 @@ using mindspore::schema::RoundMode;
 class PoolingGradCPUKernel : public LiteKernel {
  public:
   explicit PoolingGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                 const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~PoolingGradCPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.cc
index 2bef37ba1e..65607c1faf 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.cc
@@ -49,7 +49,7 @@ int PowerGradCPUKernel::Run() {
 
 kernel::LiteKernel *CpuPowerGradFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_PowerGrad);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.h
index 16a46e99f3..08728218df 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class PowerGradCPUKernel : public LiteKernel {
  public:
   PowerGradCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(param, inputs, outputs, ctx, primitive) {
     PowerParameter *power_param = reinterpret_cast<PowerParameter *>(param);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc
index d608350545..82151f60ac 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc
@@ -30,8 +30,8 @@ namespace mindspore::kernel {
 
 int SoftmaxCrossEntropyWithLogitsCPUKernel::ReSize() { return RET_OK; }
 
-void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *labels, const float *logits,
-                                                                float *grads, float *output2) const {
+void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *labels, const float *logits, float *grads,
+                                                                float *output2) const {
   float eps = 1e-6;
   float total_loss = 0.0;
   if (grads != nullptr) {
@@ -40,7 +40,8 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *lab
         float logit =
           -logf(logits[i * param_->number_of_classes_ + j] <= 0.0 ? eps : logits[i * param_->number_of_classes_ + j]);
         grads[i * param_->number_of_classes_ + j] =
-          (logits[i * param_->number_of_classes_ + j] - labels[i * param_->number_of_classes_ + j])/param_->batch_size_;
+          (logits[i * param_->number_of_classes_ + j] - labels[i * param_->number_of_classes_ + j]) /
+          param_->batch_size_;
         total_loss += labels[i * param_->number_of_classes_ + j] * logit;
       }
     }
@@ -120,7 +121,7 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::Init() {
 
 kernel::LiteKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                             const std::vector<lite::Tensor *> &outputs,
-                                                            OpParameter *opParameter, const lite::Context *ctx,
+                                                            OpParameter *opParameter, const lite::InnerContext *ctx,
                                                             const kernel::KernelKey &desc,
                                                             const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
@@ -137,5 +138,5 @@ kernel::LiteKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector<li
   }
   return kernel;
 }
-  // REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SoftmaxCrossEntropy, CpuSoftmaxCrossEntropyFp32KernelCreator)
+// REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SoftmaxCrossEntropy, CpuSoftmaxCrossEntropyFp32KernelCreator)
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.h
index 123f9d48e7..3db1a1314a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.h
@@ -28,23 +28,19 @@ namespace mindspore::kernel {
 
 class SoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel {
  public:
-  explicit SoftmaxCrossEntropyWithLogitsCPUKernel(OpParameter *parameter,
-                                                        const std::vector<lite::Tensor *> &inputs,
-                                                        const std::vector<lite::Tensor *> &outputs,
-                                                        const lite::Context *ctx,
-                                                        const mindspore::lite::PrimitiveC *primitive)
-      : LossKernel(parameter, inputs, outputs, ctx, primitive), losses_(nullptr), sum_data_(nullptr)  {
+  explicit SoftmaxCrossEntropyWithLogitsCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                                                  const std::vector<lite::Tensor *> &outputs,
+                                                  const lite::InnerContext *ctx,
+                                                  const mindspore::lite::PrimitiveC *primitive)
+      : LossKernel(parameter, inputs, outputs, ctx, primitive), losses_(nullptr), sum_data_(nullptr) {
     param_ = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter);
   }
   ~SoftmaxCrossEntropyWithLogitsCPUKernel() override {
-    if (losses_)
-      delete[] losses_;
-    if (sum_data_)
-      delete[] sum_data_;
+    if (losses_) delete[] losses_;
+    if (sum_data_) delete[] sum_data_;
   }
 
-  void ForwardPostExecute(const float *labels, const float *logits,
-                          float *output1, float *output2) const;
+  void ForwardPostExecute(const float *labels, const float *logits, float *output1, float *output2) const;
   // void ForwardPostExecute(const int *labels, const float *losses, float *output) const;
   // void GradPostExecute(const int *labels, const float *losses, float* grads, float *output) const;
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.cc
index e29fc89d25..cc6732ff49 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.cc
@@ -78,7 +78,7 @@ int SoftmaxGradCPUKernel::Run() {
 
 kernel::LiteKernel *CpuSoftmaxGradFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                     const std::vector<lite::Tensor *> &outputs,
-                                                    OpParameter *opParameter, const lite::Context *ctx,
+                                                    OpParameter *opParameter, const lite::InnerContext *ctx,
                                                     const kernel::KernelKey &desc,
                                                     const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h
index b70f1a1bff..fda77469a3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h
@@ -21,21 +21,18 @@
 #include "src/lite_kernel.h"
 #include "nnacl/softmax_parameter.h"
 
-
 namespace mindspore::kernel {
 class SoftmaxGradCPUKernel : public LiteKernel {
  public:
   explicit SoftmaxGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                 const lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), sum_data_(nullptr), sum_mul_(nullptr)  {
-        param = reinterpret_cast<SoftmaxParameter *>(parameter);
-      }
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive), sum_data_(nullptr), sum_mul_(nullptr) {
+    param = reinterpret_cast<SoftmaxParameter *>(parameter);
+  }
   ~SoftmaxGradCPUKernel() override {
-    if (sum_data_)
-      delete[] sum_data_;
-    if (sum_mul_)
-      delete[] sum_mul_;
+    if (sum_data_) delete[] sum_data_;
+    if (sum_mul_) delete[] sum_mul_;
   }
   int Init() override;
   int ReSize() override;
@@ -50,4 +47,3 @@ class SoftmaxGradCPUKernel : public LiteKernel {
 }  // namespace mindspore::kernel
 
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_GRAD_H_
-
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc
index fbbe3f88dd..6386e27554 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc
@@ -143,11 +143,9 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Init() {
   return RET_OK;
 }
 
-kernel::LiteKernel *CpuSparseSoftmaxCrossEntropyFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
-                                                            const std::vector<lite::Tensor *> &outputs,
-                                                            OpParameter *opParameter, const lite::Context *ctx,
-                                                            const kernel::KernelKey &desc,
-                                                            const mindspore::lite::PrimitiveC *primitive) {
+kernel::LiteKernel *CpuSparseSoftmaxCrossEntropyFp32KernelCreator(
+  const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
+  const lite::InnerContext *ctx, const kernel::KernelKey &desc, const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_SoftmaxCrossEntropy);
   auto *kernel =
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.h
index 3d00c0fffe..27c9dc52d7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.h
@@ -31,7 +31,7 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel {
   explicit SparseSoftmaxCrossEntropyWithLogitsCPUKernel(OpParameter *parameter,
                                                         const std::vector<lite::Tensor *> &inputs,
                                                         const std::vector<lite::Tensor *> &outputs,
-                                                        const lite::Context *ctx,
+                                                        const lite::InnerContext *ctx,
                                                         const mindspore::lite::PrimitiveC *primitive)
       : LossKernel(parameter, inputs, outputs, ctx, primitive), losses_(nullptr), sum_data_(nullptr)  {
     param = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.cc
index 47f4427c58..a47a8a6ee3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.cc
@@ -48,7 +48,7 @@ int TupleGetItemCPUKernel::Run() {
 
 kernel::LiteKernel *CpuTupleGetItemFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                      const std::vector<lite::Tensor *> &outputs,
-                                                     OpParameter *opParameter, const lite::Context *ctx,
+                                                     OpParameter *opParameter, const lite::InnerContext *ctx,
                                                      const kernel::KernelKey &desc, const lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_TupleGetItem);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.h
index 34a85849be..d7de92aaab 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class TupleGetItemCPUKernel : public LiteKernel {
  public:
   explicit TupleGetItemCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                 const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                  const lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     param = parameter;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/activation.cc b/mindspore/lite/src/runtime/kernel/arm/int8/activation.cc
index d4a2f5e26a..7e616e3f49 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/activation.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/activation.cc
@@ -32,7 +32,7 @@ using mindspore::schema::PrimitiveType_Activation;
 namespace mindspore::kernel {
 kernel::LiteKernel *CpuActivationInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                    const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                                   const lite::Context *ctx, const KernelKey &desc,
+                                                   const lite::InnerContext *ctx, const KernelKey &desc,
                                                    const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr) {
     MS_LOG(ERROR) << "parameter is nullptr";
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc
index b31bf3aa8f..d0251e66d7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc
@@ -96,13 +96,13 @@ int QuantizedAddCPUKernel::Run() {
                         static_cast<uint8_t *>(in_tensors_.at(1)->MutableData()),
                         reinterpret_cast<uint8_t *>(input0_data_), reinterpret_cast<uint8_t *>(input1_data_),
                         arith_para_);
-    ret = ParallelLaunch(THREAD_POOL_DEFAULT, AddInt8Run, this, thread_count_);
+    ret = ParallelLaunch(this->context_->thread_pool_, AddInt8Run, this, thread_count_);
     ctx_->allocator->Free(input0_data_);
     ctx_->allocator->Free(input1_data_);
     return ret;
   }
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, AddInt8Run, this, thread_count_);
+  ret = ParallelLaunch(this->context_->thread_pool_, AddInt8Run, this, thread_count_);
   return ret;
 }
 
@@ -124,7 +124,7 @@ int QuantizedAddCPUKernel::DoExecute(int tId) {
 
 kernel::LiteKernel *CpuAddInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                             const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                            const lite::Context *ctx, const KernelKey &desc,
+                                            const lite::InnerContext *ctx, const KernelKey &desc,
                                             const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr || ctx == nullptr) {
     MS_LOG(ERROR) << "parameter or ctx is nullptr";
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h
index 552d0f9d35..5acaaf0ac9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class QuantizedAddCPUKernel : public LiteKernel {
  public:
   explicit QuantizedAddCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                 const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                  const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx_->thread_num_) {
     arith_para_ = reinterpret_cast<ArithmeticParameter *>(parameter);
@@ -39,7 +39,7 @@ class QuantizedAddCPUKernel : public LiteKernel {
   int DoExecute(int tId);
 
  private:
-  const lite::Context *ctx_;
+  const lite::InnerContext *ctx_;
   AddQuantParameter para_;
   ArithmeticParameter *arith_para_;
   int thread_count_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h
index 47b12ec30c..4efccf403a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class ArgMinMaxInt8CPUKernel : public ArgMinMaxBaseCPUKernel {
  public:
   ArgMinMaxInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
       : ArgMinMaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc
index dc5e1c57bd..852a0add51 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc
@@ -149,7 +149,7 @@ int ArithmeticInt8CPUKernel::Run() {
     }
     TileDimensionsInt8(input_data0, input_data1, tile_data0_, tile_data1_, param);
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ArithmeticsInt8Launch, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticsInt8Launch, this, op_parameter_->thread_num_);
   if (param->broadcasting_) {
     context_->allocator->Free(tile_data0_);
     context_->allocator->Free(tile_data1_);
@@ -162,7 +162,7 @@ int ArithmeticInt8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuArithmeticInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                    const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                                   const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                   const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                    const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr) {
     MS_LOG(ERROR) << "Input parameter is null!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
index df9aafb6c7..5dec016406 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
@@ -29,7 +29,7 @@ class ArithmeticInt8CPUKernel : public LiteKernel {
 
  public:
   ArithmeticInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                          const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                          const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                           const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ArithmeticInt8CPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc
index da306907f8..499160f939 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc
@@ -104,7 +104,7 @@ int ArithmeticSelfInt8CPUKernel::Run() {
   auto out_tensor = out_tensors_.at(0);
   in_ptr_ = reinterpret_cast<int8_t *>(input_tensor->MutableData());
   out_ptr_ = reinterpret_cast<int8_t *>(out_tensor->MutableData());
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ArithmeticSelfInt8Runs, this, thread_sz_count_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfInt8Runs, this, thread_sz_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
     return ret;
@@ -114,7 +114,7 @@ int ArithmeticSelfInt8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuArithmeticSelfInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
-                                                       OpParameter *opParameter, const lite::Context *ctx,
+                                                       OpParameter *opParameter, const lite::InnerContext *ctx,
                                                        const kernel::KernelKey &desc,
                                                        const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.h
index db577fe84d..5fc64fa3f5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.h
@@ -24,7 +24,7 @@
 #include "schema/model_generated.h"
 #include "include/context.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 using mindspore::schema::PrimitiveType_Abs;
 using mindspore::schema::PrimitiveType_Ceil;
 using mindspore::schema::PrimitiveType_Cos;
@@ -43,7 +43,7 @@ class ArithmeticSelfInt8CPUKernel : public LiteKernel {
 
  public:
   explicit ArithmeticSelfInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                       const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                                        const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
     switch (parameter->type_) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h
index 0c59f8efb5..b39f15d5fb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h
@@ -23,7 +23,7 @@ namespace mindspore::kernel {
 class BatchToSpaceInt8CPUKernel : public BatchToSpaceBaseCPUKernel {
  public:
   BatchToSpaceInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                            const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                             const mindspore::lite::PrimitiveC *primitive)
       : BatchToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc
index a332c193f1..b1b458ee4c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc
@@ -193,7 +193,8 @@ int BatchnormInt8CPUKernel::Run() {
   in_addr_ = reinterpret_cast<int8_t *>(in_tensors_.at(0)->MutableData());
   out_addr_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData());
 
-  int ret = ParallelLaunch(THREAD_POOL_DEFAULT, BatchNormInt8Run, this, batchnorm_param_->op_parameter_.thread_num_);
+  int ret =
+    ParallelLaunch(this->context_->thread_pool_, BatchNormInt8Run, this, batchnorm_param_->op_parameter_.thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
     return ret;
@@ -203,7 +204,7 @@ int BatchnormInt8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuBatchnormInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_BatchNorm);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.h
index 22f933703d..5d271b6957 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.h
@@ -23,13 +23,13 @@
 #include "nnacl/int8/batchnorm_int8.h"
 #include "nnacl/batchnorm_parameter.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class BatchnormInt8CPUKernel : public LiteKernel {
  public:
   BatchnormInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                         const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.cc
index 94d0b38e42..f78af52d2d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.cc
@@ -70,7 +70,7 @@ int BiasAddInt8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuBiasAddInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                                const lite::Context *ctx, const KernelKey &desc,
+                                                const lite::InnerContext *ctx, const KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr || ctx == nullptr) {
     MS_LOG(ERROR) << "parameter or context is nullptr";
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.h
index 93b3455adf..3d2b6870a1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class BiasAddInt8CPUKernel : public LiteKernel {
  public:
   BiasAddInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx) {}
   ~BiasAddInt8CPUKernel() = default;
@@ -35,7 +35,7 @@ class BiasAddInt8CPUKernel : public LiteKernel {
   int Run() override;
 
  private:
-  const lite::Context *ctx_;
+  const lite::InnerContext *ctx_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
index e3fcfd2eb5..65848b1596 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
@@ -111,7 +111,7 @@ int ConcatInt8CPUKernel::Run() {
   }
   output_data_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData());
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConcatInt8Run, this, thread_count_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ConcatInt8Run, this, thread_count_);
 
   return ret;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h
index 8d1fa317e9..ae74cc6a53 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h
@@ -23,13 +23,13 @@
 #include "src/runtime/kernel/arm/base/concat_base.h"
 #include "src/runtime/runtime_api.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class ConcatInt8CPUKernel : public ConcatBaseCPUKernel {
  public:
   ConcatInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : ConcatBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ConcatInt8CPUKernel() override {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc
index c8c1d57d1f..8882842a04 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc
@@ -320,7 +320,7 @@ void Convolution1x1Int8CPUKernel::Pre1x1Trans(int8_t *src_input, int8_t *src_out
   }
 
   if (support_optimize_) {
-    ParallelLaunch(THREAD_POOL_DEFAULT, Convolution1x1Int8Pre, this, thread_count_hw_);
+    ParallelLaunch(this->context_->thread_pool_, Convolution1x1Int8Pre, this, thread_count_hw_);
   } else {
     RowMajor2Row16x4MajorInt8(input_ptr_, packed_input_, matmul_param_->row_, matmul_param_->deep_);
     PackInputSum16x4Int8(packed_input_, input_sum_, filter_zp_ptr_, conv_param_);
@@ -473,7 +473,7 @@ int Convolution1x1Int8CPUKernel::Run() {
   for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
     Pre1x1Trans(src_in + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_,
                 src_out + batch_index * matmul_param_->row_ * matmul_param_->col_);
-    ParallelLaunch(THREAD_POOL_DEFAULT, Convolution1x1Int8Impl, this, thread_count_);
+    ParallelLaunch(this->context_->thread_pool_, Convolution1x1Int8Impl, this, thread_count_);
   }
 
   FreeRunBuf();
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
index 2144344f78..7543ce55cb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
@@ -31,7 +31,7 @@ namespace mindspore::kernel {
 class Convolution1x1Int8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   Convolution1x1Int8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                              const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                              const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                               const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~Convolution1x1Int8CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc
index 53642ac863..9cdde50671 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc
@@ -227,7 +227,7 @@ int Convolution3x3Int8CPUKernel::Run() {
   auto input_addr = reinterpret_cast<int8_t *>(in_tensors_.at(kInputIndex)->MutableData());
   PackInputToC8Int8(input_addr, input_data_, conv_param_);
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, Convolution3x3Int8Impl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, Convolution3x3Int8Impl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "conv3x3 int8 error error_code[" << error_code << "]";
     FreeTmpBuffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h
index da90bbd4a0..ed1bde2c6a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   Convolution3x3Int8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                              const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                              const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                               const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~Convolution3x3Int8CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
index 972fc1e99b..fd917fc3f1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
@@ -161,7 +161,7 @@ int ConvolutionDepthwiseInt8CPUKernel::Run() {
   auto output_tensor = out_tensors_.at(kOutputIndex);
   output_ptr_ = reinterpret_cast<int8_t *>(output_tensor->MutableData());
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConvDwInt8Run, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ConvDwInt8Run, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ConvDwInt8Run error: error_code[" << ret << "]";
     return RET_ERROR;
@@ -173,7 +173,7 @@ int ConvolutionDepthwiseInt8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuConvDwInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const Context *ctx, const kernel::KernelKey &desc,
+                                               const InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h
index e3ec53b5b7..a3398310c1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class ConvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                    const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                                    const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                                     const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ConvolutionDepthwiseInt8CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
index ec4ee2aaa9..30471d7e35 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
@@ -307,7 +307,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() {
     packed_output_ = output_addr;
   }
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConvDwSWInt8Run, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWInt8Run, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ConvDwSWInt8Run error: error_code[" << ret << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
index ad01fe0668..0ecac9170d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class ConvolutionDepthwiseSWInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseSWInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                      const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                                      const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                                       const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ConvolutionDepthwiseSWInt8CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
index e7cf131d95..d0df4a0fa5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
@@ -355,7 +355,7 @@ int ConvolutionInt8CPUKernel::Run() {
   PackNHWCToNHWC4Int8(ori_input_data, nhwc4_input_, conv_param_->input_batch_,
                       conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_);
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionInt8Impl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionInt8Impl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "conv int8 error error_code[" << error_code << "]";
     FreeTmpBuffer();
@@ -367,7 +367,7 @@ int ConvolutionInt8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuConvInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const Context *ctx, const kernel::KernelKey &desc,
+                                             const InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h
index 147f5c5854..76ee710178 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class ConvolutionInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                           const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                           const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                            const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ConvolutionInt8CPUKernel() override {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc
index adc684734e..0f351fbb7b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc
@@ -67,7 +67,7 @@ int CropInt8CPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
     return ret;
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, CropInt8Run, this, thread_count_);
+  ret = ParallelLaunch(this->context_->thread_pool_, CropInt8Run, this, thread_count_);
   return ret;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h
index 233d1811d8..5e10639efa 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h
@@ -23,13 +23,13 @@
 #include "src/runtime/kernel/arm/base/crop_base.h"
 #include "src/runtime/runtime_api.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class CropInt8CPUKernel : public CropBaseCPUKernel {
  public:
   CropInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                    const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                    const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                     const mindspore::lite::PrimitiveC *primitive)
       : CropBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {
     crop_para_ = reinterpret_cast<CropParameter *>(op_parameter_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc
index 8bf54c7b9b..4b651bb535 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc
@@ -196,7 +196,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() {
     packed_output_ = output_addr;
   }
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, DeconvDwInt8Run, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwInt8Run, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "DeconvDwInt8Run error: error_code[" << ret << "]";
     return RET_ERROR;
@@ -214,7 +214,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuDeconvDwInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                 const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                 const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                  const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_DeDepthwiseConv2D);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h
index 1dd84ce824..40b347c5ec 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeconvolutionDepthwiseInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                      const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                                      const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                                       const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~DeconvolutionDepthwiseInt8CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc
index 6de1032254..a202de2fdf 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc
@@ -280,7 +280,7 @@ int DeConvInt8CPUKernel::Run() {
     DeConvPackInputSum(input_ptr_, input_sum_, conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_,
                        UP_ROUND(matmul_param_->row_, C4NUM), UP_ROUND(matmul_param_->deep_, C16NUM), support_optimize_);
 
-    error_code = ParallelLaunch(THREAD_POOL_DEFAULT, DeConvInt8Run, this, thread_count_);
+    error_code = ParallelLaunch(this->context_->thread_pool_, DeConvInt8Run, this, thread_count_);
     if (error_code != RET_OK) {
       MS_LOG(ERROR) << "deconv int8 run error! error_code[" << error_code << "]";
       return RET_ERROR;
@@ -293,7 +293,7 @@ int DeConvInt8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuDeConvInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_DeConv2D);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h
index 4c3c25d9e0..a9a1c84834 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h
@@ -33,7 +33,7 @@ namespace mindspore::kernel {
 class DeConvInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeConvInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~DeConvInt8CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h
index f6bd116152..ac3d4f21b3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class DepthToSpaceInt8CPUKernel : public DepthToSpaceBaseCPUKernel {
  public:
   DepthToSpaceInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                            const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                             const mindspore::lite::PrimitiveC *primitive)
       : DepthToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc
index b36d46a610..9eddd941a9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc
@@ -121,7 +121,7 @@ int DivInt8CPUKernel::Run() {
                         static_cast<uint8_t *>(in_tensors_.at(1)->MutableData()),
                         reinterpret_cast<uint8_t *>(tile0_data_), reinterpret_cast<uint8_t *>(tile1_data_), &tile_para);
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, DivInt8Run, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, DivInt8Run, this, op_parameter_->thread_num_);
   if (broadcast_) {
     context_->allocator->Free(tile0_data_);
     context_->allocator->Free(tile1_data_);
@@ -134,7 +134,7 @@ int DivInt8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuDivInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                             const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                            const lite::Context *ctx, const KernelKey &desc,
+                                            const lite::InnerContext *ctx, const KernelKey &desc,
                                             const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr || ctx == nullptr) {
     MS_LOG(ERROR) << "parameter or ctx is nullptr";
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.h
index e505ee6e8e..5f265e342e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class DivInt8CPUKernel : public LiteKernel {
  public:
   explicit DivInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                            const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                             const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~DivInt8CPUKernel() override {}
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.cc
index ae626d98ed..38aa69cdeb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.cc
@@ -137,7 +137,7 @@ int FullconnectionInt8CPUKernel::Run() {
   auto input_ptr = reinterpret_cast<int8_t *>(in_tensors_[0]->MutableData());
   RowMajor2Row16x4MajorInt8(input_ptr, a_r4x16_ptr_, fc_param_->row_, fc_param_->deep_);
   CalcInputSums(input_ptr, fc_param_->row_, fc_param_->deep_, quant_params_.weight.zp_, input_sums_, RowMajor);
-  ParallelLaunch(THREAD_POOL_DEFAULT, FcInt8Run, this, thread_count_);
+  ParallelLaunch(this->context_->thread_pool_, FcInt8Run, this, thread_count_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h
index b3773f4de0..3d6c0c197b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h
@@ -23,13 +23,13 @@
 #include "src/runtime/kernel/arm/base/fullconnection_base.h"
 #include "nnacl/int8/common_func.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class FullconnectionInt8CPUKernel : public FullconnectionBaseCPUKernel {
  public:
   FullconnectionInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                              const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                              const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                               const mindspore::lite::PrimitiveC *primitive)
       : FullconnectionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~FullconnectionInt8CPUKernel() override { FreeTmpBuffer(); }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
index d2fa55c808..341c1c0f94 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
@@ -132,7 +132,7 @@ int GatherNdInt8CPUKernel::Run() {
   }
   in_ptr_ = reinterpret_cast<int8_t *>(in_tensors_.front()->MutableData());
   out_ptr_ = reinterpret_cast<int8_t *>(out_tensors_.front()->MutableData());
-  auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, GatherNdInt8Run, this, thread_sz_count_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, GatherNdInt8Run, this, thread_sz_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "gatherNd error error_code[" << ret << "]";
     return ret;
@@ -142,7 +142,7 @@ int GatherNdInt8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuGatherNdInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                 const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                 const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                  const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_GatherNd);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h
index 1871295ac1..3007530d13 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class GatherNdInt8CPUKernel : public LiteKernel {
  public:
   GatherNdInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                        const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                         const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {}
   ~GatherNdInt8CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc
index 9ee8723fb6..6cfcd0488b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc
@@ -111,7 +111,7 @@ int GatherInt8CPUKernel::Run() {
     return prepare_ret;
   }
 
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, GatherInt8Run, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, GatherInt8Run, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "Gather function error error_code[" << error_code << "]";
     return RET_ERROR;
@@ -121,7 +121,7 @@ int GatherInt8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuGatherInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(desc.type == schema::PrimitiveType_Gather);
   if (opParameter == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.h
index 3062b31c3a..7972630eed 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.h
@@ -26,11 +26,10 @@ namespace mindspore::kernel {
 class GatherInt8CPUKernel : public LiteKernel {
  public:
   GatherInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {}
-  ~GatherInt8CPUKernel() {
-  }
+  ~GatherInt8CPUKernel() {}
 
   int Init() override;
   int ReSize() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.cc
index 095e508a36..ebec77a096 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.cc
@@ -94,7 +94,7 @@ int HswishInt8CPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare failed.";
     return RET_ERROR;
   }
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, HswishInt8Run, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, HswishInt8Run, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "HswishInt8Run function error error_code[" << error_code << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h
index 5319d9ffc5..00008ae263 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class HswishInt8CPUKernel : public LiteKernel {
  public:
   HswishInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {}
   ~HswishInt8CPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.cc
index c2c80c5d12..87aadbdc5a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.cc
@@ -94,7 +94,7 @@ int LeakyReluInt8CPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
     return ret;
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, LeakyReluInt8Run, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, LeakyReluInt8Run, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "RunPreluParam failed. errorcode: ";
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h
index 85caa0a8a3..33f3b74846 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class LeakyReluInt8CPUKernel : public LeakyReluBaseCPUKernel {
  public:
   LeakyReluInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
       : LeakyReluBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~LeakyReluInt8CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc
index 8df3bedb96..c63e612ef5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc
@@ -155,7 +155,7 @@ int MatmulInt8CPUKernel::Run() {
                          NULL, weight_bias_sums_, RowMajor);
     }
     c_ptr_ = c_ptr + i * c_stride;
-    ret = ParallelLaunch(THREAD_POOL_DEFAULT, MatmulInt8Run, this, thread_count_);
+    ret = ParallelLaunch(this->context_->thread_pool_, MatmulInt8Run, this, thread_count_);
     if (ret != RET_OK) {
       MS_LOG(ERROR) << "MatmulInt8Run error: [" << ret << "]";
       return ret;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h
index 89e16d59f6..d40c824068 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h
@@ -22,13 +22,13 @@
 #include "nnacl/quantization/quantize.h"
 #include "src/runtime/kernel/arm/base/matmul_base.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class MatmulInt8CPUKernel : public MatmulBaseCPUKernel {
  public:
   MatmulInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : MatmulBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~MatmulInt8CPUKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc
index 80303ecd48..9a8b3c1251 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc
@@ -86,13 +86,13 @@ int MulInt8CPUKernel::Run() {
     }
     TileDimensionsInt8(static_cast<int8_t *>(in_tensors_.at(0)->MutableData()),
                        static_cast<int8_t *>(in_tensors_.at(1)->MutableData()), input0_data_, input1_data_, &tile_para);
-    ret = ParallelLaunch(THREAD_POOL_DEFAULT, MulInt8Run, this, thread_count_);
+    ret = ParallelLaunch(this->context_->thread_pool_, MulInt8Run, this, thread_count_);
     ctx_->allocator->Free(input0_data_);
     ctx_->allocator->Free(input1_data_);
     return ret;
   }
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, MulInt8Run, this, thread_count_);
+  ret = ParallelLaunch(this->context_->thread_pool_, MulInt8Run, this, thread_count_);
   return ret;
 }
 
@@ -117,7 +117,7 @@ int MulInt8CPUKernel::DoExecute(int task_id) {
 
 kernel::LiteKernel *CpuMulInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                             const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                            const lite::Context *ctx, const KernelKey &desc,
+                                            const lite::InnerContext *ctx, const KernelKey &desc,
                                             const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Mul);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h
index 71be6231af..4c89383f75 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class MulInt8CPUKernel : public LiteKernel {
  public:
   explicit MulInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                            const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                             const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx_->thread_num_) {}
   ~MulInt8CPUKernel() override{};
@@ -36,7 +36,7 @@ class MulInt8CPUKernel : public LiteKernel {
   int DoExecute(int task_id);
 
  private:
-  const lite::Context *ctx_;
+  const lite::InnerContext *ctx_;
   MulParameter para_;
   int thread_count_;
   int64_t elements_num_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc
index d5cddcd46a..7405679cad 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc
@@ -132,7 +132,7 @@ int PadInt8CPUKernel::Run() {
   out_data_ = reinterpret_cast<int8_t *>(out_tensors_[0]->MutableData());
 
   memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0], out_tensors_[0]->ElementsNum() * sizeof(int8_t));
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, PadInt8Impl, this, context_->thread_num_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, PadInt8Impl, this, context_->thread_num_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.h
index 622f0bb2dc..b8b9ba294b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class PadInt8CPUKernel : public LiteKernel {
  public:
   explicit PadInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                            const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                             const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     op_parameter_->thread_num_ = ctx->thread_num_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc
index eb96e36dac..20c2b27aff 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc
@@ -89,7 +89,7 @@ int PoolingInt8CPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare failed.";
     return RET_ERROR;
   }
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, PoolingInt8Impl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, PoolingInt8Impl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "poolingInt8 error error_code[" << error_code << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.h
index a42c65fdb8..8b690149eb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.h
@@ -23,13 +23,13 @@
 #include "include/context.h"
 #include "src/runtime/kernel/arm/base/pooling_base.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class PoolingInt8CPUKernel : public PoolingBaseCPUKernel {
  public:
   PoolingInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : PoolingBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~PoolingInt8CPUKernel() { FreeQuantParam(); }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/power_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/power_int8.cc
index 0640dc78d3..e5b10c50f0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/power_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/power_int8.cc
@@ -103,7 +103,7 @@ int PowerInt8CPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare failed.";
     return ret;
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, PowerInt8Run, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, PowerInt8Run, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "PowerInt8Run error, error_code[" << ret << "]";
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/power_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/power_int8.h
index 4fa5827b48..cd989b1000 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/power_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/power_int8.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class PowerInt8CPUKernel : public PowerBaseCPUKernel {
  public:
   PowerInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : PowerBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~PowerInt8CPUKernel() {}
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc
index 9d6c4dd999..cca03f1a5c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc
@@ -268,7 +268,7 @@ int ReduceInt8CPUKernel::Run() {
     outer_size_ = outer_sizes_[i];
     inner_size_ = inner_sizes_[i];
     axis_size_ = axis_sizes_[i];
-    auto error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ReduceInt8Impl, this, context_->thread_num_);
+    auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceInt8Impl, this, context_->thread_num_);
     if (error_code != RET_OK) {
       FreeTmpBuffer();
       MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
@@ -283,7 +283,7 @@ int ReduceInt8CPUKernel::Run() {
   axis_size_ = axis_sizes_.back();
   last_dst_data_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData());
   is_last_axis_ = true;
-  auto error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ReduceInt8Impl, this, context_->thread_num_);
+  auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceInt8Impl, this, context_->thread_num_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
     FreeTmpBuffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h
index 86d95a912e..b038225b8c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h
@@ -36,7 +36,7 @@ class ReduceInt8CPUKernel : public ReduceBaseCPUKernel {
 
  public:
   ReduceInt8CPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {}
   ~ReduceInt8CPUKernel() {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.cc
index c1361ab478..8b1990142b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.cc
@@ -74,7 +74,7 @@ int ReluXInt8CPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
     return ret;
   }
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ReluXInt8Run, this, op_parameter_->thread_num_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ReluXInt8Run, this, op_parameter_->thread_num_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "ReluXInt8Run function error error_code[" << error_code << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h
index 8473579a37..64b172e57a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 class ReluXInt8CPUKernel : public LiteKernel {
  public:
   ReluXInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     type_ = (reinterpret_cast<ActivationParameter *>(parameter))->type_;
@@ -47,7 +47,7 @@ class ReluXInt8CPUKernel : public LiteKernel {
 class ReluInt8CPUKernel : public ReluXInt8CPUKernel {
  public:
   ReluInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                    const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                    const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                     const mindspore::lite::PrimitiveC *primitive)
       : ReluXInt8CPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
@@ -64,7 +64,7 @@ class ReluInt8CPUKernel : public ReluXInt8CPUKernel {
 class Relu6Int8CPUKernel : public ReluXInt8CPUKernel {
  public:
   Relu6Int8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : ReluXInt8CPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc
index 1508c8af39..590faa70a5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc
@@ -60,7 +60,7 @@ int ReshapeInt8CPUKernel::Run() {
   elements_num_ = in_tensors_.at(kInputIndex)->ElementsNum();
   count_unit_ = op_parameter_->thread_num_ > 1 ? UP_DIV(elements_num_, op_parameter_->thread_num_) : elements_num_;
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, ReshapeInt8Run, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, ReshapeInt8Run, this, op_parameter_->thread_num_);
   return ret;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h
index 39618802f1..2382636eab 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h
@@ -23,13 +23,13 @@
 #include "src/runtime/kernel/arm/base/reshape_base.h"
 #include "src/runtime/runtime_api.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class ReshapeInt8CPUKernel : public ReshapeBaseCPUKernel {
  public:
   ReshapeInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : ReshapeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~ReshapeInt8CPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
index 4c768bf702..87f3e91395 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
@@ -124,7 +124,7 @@ int ResizeInt8CPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare failed.";
     return RET_ERROR;
   }
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ResizeInt8Impl, this, context_->thread_num_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, ResizeInt8Impl, this, context_->thread_num_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h
index ef1717c09c..ba858c5072 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h
@@ -28,7 +28,7 @@ namespace mindspore::kernel {
 class ResizeInt8CPUKernel : public ResizeBaseCPUKernel {
  public:
   ResizeInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                       const mindspore::lite::PrimitiveC *primitive)
       : ResizeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.cc
index 35f8a59913..80e9a1e780 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.cc
@@ -95,7 +95,7 @@ int SigmoidInt8CPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
     return ret;
   }
-  int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, SigmoidInt8Run, this, op_parameter_->thread_num_);
+  int error_code = ParallelLaunch(this->context_->thread_pool_, SigmoidInt8Run, this, op_parameter_->thread_num_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "SigmoidInt8Run function error error_code[" << error_code << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.h
index 71f8379b6e..03617f4b47 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class SigmoidInt8CPUKernel : public LiteKernel {
  public:
   SigmoidInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~SigmoidInt8CPUKernel() override = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.cc
index 7e98e0659e..0cd60b4952 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.cc
@@ -89,7 +89,7 @@ int SliceInt8CPUKernel::Run() {
   if (param_->size_[1] < param_->op_parameter_.thread_num_) {
     ret = SliceInt8NoParallel(input_data, output_data, param_);
   } else {
-    ret = ParallelLaunch(THREAD_POOL_DEFAULT, SliceInt8Run, this, op_parameter_->thread_num_);
+    ret = ParallelLaunch(this->context_->thread_pool_, SliceInt8Run, this, op_parameter_->thread_num_);
   }
 
   if (ret != RET_OK) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.h
index c2557a0578..90e70bbd96 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class SliceInt8CPUKernel : public SliceBaseCPUKernel {
  public:
   SliceInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : SliceBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~SliceInt8CPUKernel() {}
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc
index afabbf7f13..f0b6cbb5b7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc
@@ -120,7 +120,7 @@ int SoftmaxInt8CPUKernel::Run() {
     context_->allocator->Free(sum_data_);
     return RET_ERROR;
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, SoftmaxRun, this, thread_count_);
+  ret = ParallelLaunch(this->context_->thread_pool_, SoftmaxRun, this, thread_count_);
   context_->allocator->Free(exp_data_);
   context_->allocator->Free(sum_data_);
   if (ret != RET_OK) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h
index 5397b437b3..e652eeaa5f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel {
  public:
   SoftmaxInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~SoftmaxInt8CPUKernel() {}
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.cc
index 44491fcb01..a5cc6fe7b6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.cc
@@ -56,9 +56,8 @@ int SpaceToBatchInt8CPUKernel::Run() {
 }
 
 kernel::LiteKernel *CpuSpaceToBatchInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
-                                                     const std::vector<lite::Tensor *> &outputs,
-                                                     OpParameter *param, const lite::Context *ctx,
-                                                     const kernel::KernelKey &desc,
+                                                     const std::vector<lite::Tensor *> &outputs, OpParameter *param,
+                                                     const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                      const mindspore::lite::PrimitiveC *primitive) {
   if (param == nullptr) {
     MS_LOG(ERROR) << "Input param is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.h
index aaf0c9cbe6..cce69eb314 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.h
@@ -23,7 +23,7 @@ namespace mindspore::kernel {
 class SpaceToBatchInt8CPUKernel : public SpaceToBatchCPUKernel {
  public:
   SpaceToBatchInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                            const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                             const mindspore::lite::PrimitiveC *primitive)
       : SpaceToBatchCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
index afde0b18e0..9d685a1126 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
@@ -94,7 +94,7 @@ int SplitInt8CPUKernel::Run() {
     output_ptr_.push_back(reinterpret_cast<int8_t *>(out_tensors_.at(i)->MutableData()));
   }
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, SplitInt8Run, this, thread_n_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, SplitInt8Run, this, thread_n_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.h
index 8369bdd8bd..f81435bdf3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.h
@@ -23,13 +23,13 @@
 #include "src/runtime/kernel/arm/base/split_base.h"
 #include "src/runtime/runtime_api.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class SplitInt8CPUKernel : public SplitBaseCPUKernel {
  public:
   SplitInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                     const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
       : SplitBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~SplitInt8CPUKernel() = default;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
index 47d44bd7c9..4d041816dd 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
@@ -157,7 +157,7 @@ int SqueezeInt8CPUKernel::Run() {
     free(*(inputs_array + i));
   }
 
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, SqueezeInt8Run, this, thread_count_);
+  ret = ParallelLaunch(this->context_->thread_pool_, SqueezeInt8Run, this, thread_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "RunSqueezeParam failed. errorcode: ";
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h
index 32a69a467b..9d717e2e8c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h
@@ -23,13 +23,13 @@
 #include "src/runtime/runtime_api.h"
 #include "src/runtime/kernel/arm/base/squeeze_base.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class SqueezeInt8CPUKernel : public SqueezeBaseCPUKernel {
  public:
   SqueezeInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                       const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : SqueezeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~SqueezeInt8CPUKernel() override { delete quant_Squeeze_parm_; }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc
index 3c76e2fb1b..064611b4ad 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc
@@ -145,7 +145,7 @@ int SubInt8CPUKernel::Run() {
                         static_cast<uint8_t *>(in_tensors_.at(1)->MutableData()),
                         reinterpret_cast<uint8_t *>(tile0_data_), reinterpret_cast<uint8_t *>(tile1_data_), &tile_para);
   }
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, SubInt8Run, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_->thread_pool_, SubInt8Run, this, op_parameter_->thread_num_);
   if (broadcast_) {
     context_->allocator->Free(tile0_data_);
     context_->allocator->Free(tile1_data_);
@@ -158,7 +158,7 @@ int SubInt8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuSubInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                             const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                            const lite::Context *ctx, const KernelKey &desc,
+                                            const lite::InnerContext *ctx, const KernelKey &desc,
                                             const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr || ctx == nullptr) {
     MS_LOG(ERROR) << "parameter or ctx is nullptr";
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h
index 7efa0df9b9..2f52314c2f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 class SubInt8CPUKernel : public LiteKernel {
  public:
   explicit SubInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                            const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                             const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~SubInt8CPUKernel() override {}
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc
index 61dd3a5b65..2d7bf12e0d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc
@@ -70,7 +70,7 @@ int TopKInt8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuTopKInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
-                                             const lite::Context *ctx, const KernelKey &desc,
+                                             const lite::InnerContext *ctx, const KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   if (parameter == nullptr) {
     MS_LOG(ERROR) << "input parameter is nullptr!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.h
index 8ba030e04c..2c4892cc30 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 class TopKInt8CPUKernel : public LiteKernel {
  public:
   explicit TopKInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                             const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                             const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                              const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
     TopkParameter *param = reinterpret_cast<TopkParameter *>(op_parameter_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.cc
index 77e633f817..454d370b48 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.cc
@@ -88,7 +88,7 @@ int Unsqueezeint8CPUKernel::Run() {
   }
   in_ptr_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
   out_ptr_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
-  ret = ParallelLaunch(THREAD_POOL_DEFAULT, UnsqueezeIn8Run, this, thread_sz_count_);
+  ret = ParallelLaunch(this->context_->thread_pool_, UnsqueezeIn8Run, this, thread_sz_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "UnsqueezeRun error error_code[" << ret << "]";
     return ret;
@@ -98,7 +98,7 @@ int Unsqueezeint8CPUKernel::Run() {
 
 kernel::LiteKernel *CpuUnsqueezeInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(opParameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Unsqueeze);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.h
index 9631f54a60..88b350b3d2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.h
@@ -22,13 +22,13 @@
 #include "nnacl/int8/unsqueeze_int8.h"
 #include "src/runtime/kernel/arm/base/layout_transform.h"
 
-using mindspore::lite::Context;
+using mindspore::lite::InnerContext;
 
 namespace mindspore::kernel {
 class Unsqueezeint8CPUKernel : public LiteKernel {
  public:
   Unsqueezeint8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const Context *ctx,
+                         const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
     Unsq_para_ = reinterpret_cast<UnSqueezeParameter *>(op_parameter_);
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
index d617972399..fee15e8de5 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
@@ -137,7 +137,7 @@ int ActivationOpenClKernel::GetImageSize(size_t idx, std::vector<size_t> *img_si
 
 kernel::LiteKernel *OpenClActivationFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                       const std::vector<lite::Tensor *> &outputs,
-                                                      OpParameter *opParameter, const lite::Context *ctx,
+                                                      OpParameter *opParameter, const lite::InnerContext *ctx,
                                                       const kernel::KernelKey &desc,
                                                       const mindspore::lite::PrimitiveC *primitive) {
   if (inputs.empty()) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
index 24ad00b463..373259cb05 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
@@ -343,12 +343,12 @@ int ArithmeticOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLBiasAddKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const lite::PrimitiveC *primitive);
 
 kernel::LiteKernel *OpenCLArithmeticKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                  const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                   const mindspore::lite::PrimitiveC *primitive) {
   const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(opParameter);
   if (arithmetic_parameter->broadcasting_) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h
index 1d8fd987d7..e926f930e2 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class ArithmeticOpenCLKernel : public OpenCLKernel {
  public:
   explicit ArithmeticOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                  const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx)
+                                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
       : OpenCLKernel(parameter, inputs, outputs) {}
   ~ArithmeticOpenCLKernel() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
index 4f95798c75..1cbdfc0f24 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
@@ -187,7 +187,7 @@ int ArithmeticSelfOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLArithmeticSelfKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                       const std::vector<lite::Tensor *> &outputs,
-                                                      OpParameter *opParameter, const lite::Context *ctx,
+                                                      OpParameter *opParameter, const lite::InnerContext *ctx,
                                                       const kernel::KernelKey &desc,
                                                       const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) ArithmeticSelfOpenCLKernel(opParameter, inputs, outputs);
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h
index ac741018a1..582c8d6f9c 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_ARITHMETIC_SELF_PARAMETER_H_
 
 #include <vector>
-#include<string>
+#include <string>
 #include "ir/anf.h"
 #include "src/runtime/kernel/opencl/opencl_kernel.h"
 #include "src/runtime/opencl/opencl_runtime.h"
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
index 44825acae0..6aefc623fb 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
@@ -127,7 +127,7 @@ int BatchNormOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLBatchnormKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                 const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                 const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                  const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) BatchNormOpenCLKernel(opParameter, inputs, outputs);
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc
index 19433da577..c7248057c5 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc
@@ -138,7 +138,7 @@ int BiasAddOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
 
 kernel::LiteKernel *OpenCLBiasAddKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const lite::PrimitiveC *primitive) {
   if (inputs.size() == 0) {
     MS_LOG(ERROR) << "Input data size must be greater than 0, but your size is " << inputs.size();
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
index b7e79ec745..a58736a109 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
@@ -224,7 +224,7 @@ int ConcatOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLConcatKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) ConcatOpenCLKernel(opParameter, inputs, outputs);
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
index be238473a3..10c029af47 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
@@ -215,7 +215,7 @@ int Conv2dTransposeOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLConv2dTransposeKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
-                                                       OpParameter *opParameter, const lite::Context *ctx,
+                                                       OpParameter *opParameter, const lite::InnerContext *ctx,
                                                        const kernel::KernelKey &desc,
                                                        const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel =
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
index 4ca21a669e..ea0858d1b2 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
@@ -869,7 +869,7 @@ int ConvolutionOpenCLKernel::SetGlobalLocalConv(std::vector<size_t> *global, std
 
 kernel::LiteKernel *OpenCLConvolutionKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                    const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                   const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                   const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                    const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel =
     new (std::nothrow) ConvolutionOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
index 0082243c41..b49c82c508 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
@@ -225,7 +225,7 @@ int DepthwiseConv2dOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLDepthwiseConv2dKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
-                                                       OpParameter *opParameter, const lite::Context *ctx,
+                                                       OpParameter *opParameter, const lite::InnerContext *ctx,
                                                        const kernel::KernelKey &desc,
                                                        const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel =
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
index 1a72d1fb82..fa65179683 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
@@ -148,7 +148,7 @@ int GatherOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLGatherKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) GatherOpenCLKernel(opParameter, inputs, outputs);
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
index 91949e0730..36340d4484 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
@@ -223,7 +223,7 @@ int MatMulOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLMatMulKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   bool hasBias = false;
   if (opParameter->type_ == PrimitiveType_FullConnection) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
index fa1095dd8c..43605dba95 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
@@ -155,7 +155,7 @@ int PoolingOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLPooling2dKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                 const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                 const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                  const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
index f345330207..f9d96589d2 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
@@ -149,7 +149,7 @@ int PReluOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
 
 kernel::LiteKernel *OpenCLPReluKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const lite::Context *ctx, const kernel::KernelKey &desc,
+                                             const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                              const lite::PrimitiveC *primitive) {
   if (inputs.empty()) {
     MS_LOG(ERROR) << "Input data size must be greater than 0, but your size is " << inputs.size();
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc
index 7a7b64c88b..a5f6d2ffa4 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc
@@ -144,7 +144,7 @@ int ReduceOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLReduceKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                              const lite::Context *ctx, const kernel::KernelKey &desc,
+                                              const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) ReduceOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
index 6beba68518..3bb090be2d 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
@@ -125,7 +125,7 @@ int ReshapeOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLReshapeKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) ReshapeOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc
index bd8724871c..04b7f57834 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc
@@ -365,7 +365,7 @@ int ScaleOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLScaleKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const lite::Context *ctx, const kernel::KernelKey &desc,
+                                             const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel =
     new (std::nothrow) ScaleOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs, ctx);
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h
index 673f2e8a1b..92955ba122 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h
@@ -27,7 +27,7 @@ namespace mindspore::kernel {
 class ScaleOpenCLKernel : public OpenCLKernel {
  public:
   explicit ScaleOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                             const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx)
+                             const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
       : OpenCLKernel(parameter, inputs, outputs) {}
   ~ScaleOpenCLKernel() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
index f55131046b..c6b9c2cbe6 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
@@ -123,7 +123,7 @@ int SliceOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLSliceKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const lite::Context *ctx, const kernel::KernelKey &desc,
+                                             const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                              const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) SliceOpenCLKernel(opParameter, inputs, outputs);
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
index 9defeb9e80..bc530bc2ff 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
@@ -189,7 +189,7 @@ int SoftmaxOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLSoftMaxKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const lite::Context *ctx, const kernel::KernelKey &desc,
+                                               const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
index 719ddbe777..4f25593c99 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
@@ -169,7 +169,7 @@ int ToFormatOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLToFormatKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                 const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel = new (std::nothrow) ToFormatOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
index 9241caf827..9c400b4621 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
@@ -137,7 +137,7 @@ int TransposeOpenCLKernel::Run() {
 
 kernel::LiteKernel *OpenCLTransposeKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                                 const lite::Context *ctx, const kernel::KernelKey &desc,
+                                                 const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                                  const mindspore::lite::PrimitiveC *primitive) {
   auto *kernel =
     new (std::nothrow) TransposeOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h
index e7db0e6540..f4aae11a51 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h
@@ -34,8 +34,7 @@ class SubGraphOpenCLKernel : public SubGraphKernel {
   explicit SubGraphOpenCLKernel(const std::vector<lite::Tensor *> inputs, const std::vector<lite::Tensor *> outputs,
                                 const std::vector<kernel::LiteKernel *> inKernels,
                                 const std::vector<kernel::LiteKernel *> outKernels,
-                                const std::vector<kernel::LiteKernel *> nodes,
-                                const lite::Context *ctx = nullptr,
+                                const std::vector<kernel::LiteKernel *> nodes, const lite::InnerContext *ctx = nullptr,
                                 const mindspore::lite::PrimitiveC *primitive = nullptr)
       : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx, primitive) {}
   ~SubGraphOpenCLKernel() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/utils.cc b/mindspore/lite/src/runtime/kernel/opencl/utils.cc
index 6cc9fb018b..1046afb83e 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/utils.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/utils.cc
@@ -23,7 +23,7 @@ using mindspore::lite::KernelRegistrar;
 
 namespace mindspore::lite {
 kernel::LiteKernel *GetOpenCLKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
-                                    OpParameter *parameter, const Context *ctx, const kernel::KernelKey &key) {
+                                    OpParameter *parameter, const InnerContext *ctx, const kernel::KernelKey &key) {
   auto creator = KernelRegistry::GetInstance()->GetCreator(key);
   if (creator != nullptr) {
     auto kernel = creator(in_tensors, out_tensors, parameter, nullptr, key, nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/opencl/utils.h b/mindspore/lite/src/runtime/kernel/opencl/utils.h
index ad9f76c06f..2850a5c393 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/utils.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/utils.h
@@ -27,7 +27,7 @@
 
 namespace mindspore::lite {
 kernel::LiteKernel *GetOpenCLKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
-                                    OpParameter *parameter, const Context *ctx, const kernel::KernelKey &key);
+                                    OpParameter *parameter, const InnerContext *ctx, const kernel::KernelKey &key);
 }
 
 namespace mindspore::kernel {
diff --git a/mindspore/lite/src/runtime/opencl/opencl_executor.h b/mindspore/lite/src/runtime/opencl/opencl_executor.h
index dd3072b625..03061a82ca 100644
--- a/mindspore/lite/src/runtime/opencl/opencl_executor.h
+++ b/mindspore/lite/src/runtime/opencl/opencl_executor.h
@@ -36,7 +36,7 @@ class OpenCLExecutor : Executor {
           const session::KernelCallBack &after = nullptr);
 
  protected:
-  Context *context = nullptr;
+  InnerContext *context = nullptr;
   OpenCLAllocator *allocator_;
 };
 }  // namespace mindspore::lite::opencl
diff --git a/mindspore/lite/src/runtime/parallel_executor.cc b/mindspore/lite/src/runtime/parallel_executor.cc
index b1fbc05b20..0d13a9dbf9 100644
--- a/mindspore/lite/src/runtime/parallel_executor.cc
+++ b/mindspore/lite/src/runtime/parallel_executor.cc
@@ -20,10 +20,10 @@
 
 #define MAX_THREAD_NUM 8
 namespace mindspore::lite {
-ParallelExecutor::~ParallelExecutor() {}
+ParallelExecutor::~ParallelExecutor() { DestroyThreadPool(thread_pool_); }
 int ParallelExecutor::Prepare(std::vector<mindspore::kernel::LiteKernel *> &kernels) {
-  int status = ConfigThreadPool(THREAD_POOL_DEFAULT, MAX_THREAD_NUM, NO_BIND);
-  if (status != 0) {
+  thread_pool_ = CreateLiteThreadPool(MAX_THREAD_NUM, NO_BIND);
+  if (thread_pool_ == nullptr) {
     MS_LOG(ERROR) << "Memory error: fail to new ThreadPool";
     return RET_ERROR;
   }
@@ -79,7 +79,7 @@ int ParallelExecutor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor
   std::vector<kernel::LiteKernel *> newReadyKernels;
   while (readyKernels.size() > 0) {
     results.resize(readyKernels.size(), RET_OK);
-    ParallelLaunch(THREAD_POOL_DEFAULT, RunKernel, this, readyKernels.size());
+    ParallelLaunch(thread_pool_, RunKernel, this, readyKernels.size());
 
     if (std::find_if(results.begin(), results.end(), [](const int &ret) { return (ret != 0); }) != results.end()) {
       return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/parallel_executor.h b/mindspore/lite/src/runtime/parallel_executor.h
index ed90c9471c..f6872be4f6 100644
--- a/mindspore/lite/src/runtime/parallel_executor.h
+++ b/mindspore/lite/src/runtime/parallel_executor.h
@@ -42,6 +42,7 @@ class ParallelExecutor : public Executor {
   std::unordered_map<kernel::LiteKernel *, size_t> refCount;
   std::vector<kernel::LiteKernel *> readyKernels;
   std::vector<int> results;
+  struct ThreadPool *thread_pool_ = NULL;
 };
 
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/src/runtime/runtime_api.cc b/mindspore/lite/src/runtime/runtime_api.cc
index 374796cdaf..b7fd913049 100644
--- a/mindspore/lite/src/runtime/runtime_api.cc
+++ b/mindspore/lite/src/runtime/runtime_api.cc
@@ -24,6 +24,9 @@ static std::mutex gWorkspaceMutex;
 #ifdef __cplusplus
 extern "C" {
 #endif
+
+ThreadPool *CreateLiteThreadPool(int thread_num, int mode) { return CreateThreadPool(thread_num, mode); }
+
 void LiteAPISetLastError(const char *msg) { MS_LOG(ERROR) << "The lite api set last error is " << msg; }
 
 void *LiteBackendAllocWorkspace(int deviceType, int deviceId, uint64_t size, int dtypeCode, int dtypeBits) {
diff --git a/mindspore/lite/src/runtime/runtime_api.h b/mindspore/lite/src/runtime/runtime_api.h
index 0755e9245f..0b2018e3b2 100644
--- a/mindspore/lite/src/runtime/runtime_api.h
+++ b/mindspore/lite/src/runtime/runtime_api.h
@@ -32,9 +32,9 @@
 #ifdef __cplusplus
 extern "C" {
 #include "src/runtime/thread_pool.h"
-
+struct ThreadPool;
 #endif
-
+INTERNAL_API_DLL ThreadPool *CreateLiteThreadPool(int thread_num, int mode);
 INTERNAL_API_DLL void LiteAPISetLastError(const char *msg);
 INTERNAL_API_DLL void *LiteBackendAllocWorkspace(int deviceType, int deviceId, uint64_t size, int dtypeCode,
                                                  int dtypeBits);
diff --git a/mindspore/lite/src/runtime/thread_pool.c b/mindspore/lite/src/runtime/thread_pool.c
index 32c39ee53d..bb625327b0 100644
--- a/mindspore/lite/src/runtime/thread_pool.c
+++ b/mindspore/lite/src/runtime/thread_pool.c
@@ -43,7 +43,6 @@
 #define RET_TP_ERROR (1)
 #define RET_TP_SYSTEM_ERROR (-1)
 
-#define MAX_TASK_NUM (2)
 #define MAX_THREAD_NUM (8)
 #define MAX_THREAD_POOL_NUM (4)
 #define DEFAULT_SPIN_COUNT (30000)
@@ -54,7 +53,7 @@ typedef struct {
 } Task;
 
 typedef struct Thread {
-  int thread_pool_id;
+  void *thread_pool;
   int thread_id;
   struct Thread *next;
   pthread_t pthread;
@@ -81,22 +80,9 @@ typedef struct ThreadPool {
   atomic_bool is_alive;
 } ThreadPool;
 
-static ThreadPool thread_pool_list[MAX_THREAD_POOL_NUM];
-static atomic_int thread_pool_refcount[MAX_THREAD_POOL_NUM] = {ATOMIC_VAR_INIT(0)};
-static atomic_bool thread_pool_is_created[MAX_THREAD_POOL_NUM] = {ATOMIC_VAR_INIT(false)};
-
-ThreadPool *GetInstance(int thread_pool_id) {
-  if (thread_pool_id < 0 || thread_pool_id >= MAX_THREAD_POOL_NUM) {
-    LOG_ERROR("invaid context id: %d", thread_pool_id);
-    return NULL;
-  }
-  return &thread_pool_list[thread_pool_id];
-}
-
-Thread *GetThread(int thread_pool_id, int thread_id) {
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
+Thread *GetThread(struct ThreadPool *thread_pool, int thread_id) {
   if (thread_pool == NULL) {
-    LOG_ERROR("get thread pool instane failed, thread_pool_id: %d, thread_id: %d", thread_pool_id, thread_id);
+    LOG_ERROR("get thread pool instane failed, thread_id: %d", thread_id);
     return NULL;
   }
   ThreadList *thread_list = thread_pool->thread_list;
@@ -105,8 +91,7 @@ Thread *GetThread(int thread_pool_id, int thread_id) {
     return NULL;
   }
   if (thread_id >= thread_list->size) {
-    LOG_ERROR("invalid thread id: %d, thread_pool_id: %d, thread size: %d", thread_id, thread_pool_id,
-             thread_list->size);
+    LOG_ERROR("invalid thread id: %d, thread size: %d", thread_id, thread_list->size);
     return NULL;
   }
   if (thread_id == 0) {
@@ -134,10 +119,14 @@ void FreeThread(ThreadList *thread_list, Thread *thread) {
   // only support sequential release
   thread_list->head = thread->next;
   sem_post(&thread->sem);
-  while (thread != NULL && !thread->is_running) {
-    sem_destroy(&thread->sem);
-    free(thread);
-    thread = NULL;
+  pthread_join(thread->pthread, NULL);
+  while (true) {
+    if (thread != NULL && !thread->is_running) {
+      sem_destroy(&thread->sem);
+      free(thread);
+      thread = NULL;
+      break;
+    }
   }
 }
 
@@ -306,8 +295,7 @@ int SetAffinity(pthread_t thread_id, cpu_set_t *cpuSet) {
   return RET_TP_OK;
 }
 
-int BindMasterThread(int thread_pool_id, bool is_bind) {
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
+int BindMasterThread(struct ThreadPool *thread_pool, bool is_bind) {
   if (thread_pool == NULL) {
     LOG_ERROR("get thread pool instane failed");
     return RET_TP_ERROR;
@@ -337,8 +325,7 @@ int BindMasterThread(int thread_pool_id, bool is_bind) {
   return RET_TP_OK;
 }
 
-int BindSalverThreads(int thread_pool_id, bool is_bind) {
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
+int BindSalverThreads(struct ThreadPool *thread_pool, bool is_bind) {
   if (thread_pool == NULL) {
     LOG_ERROR("get thread pool instane failed");
     return RET_TP_ERROR;
@@ -360,9 +347,9 @@ int BindSalverThreads(int thread_pool_id, bool is_bind) {
       LOG_INFO("mode: %d, attach id: %u", thread_pool->mode, attach_id);
       CPU_ZERO(&mask);
       CPU_SET(attach_id, &mask);
-      Thread *thread = GetThread(thread_pool_id, i);
+      Thread *thread = GetThread(thread_pool, i);
       if (thread == NULL) {
-        LOG_ERROR("get thread failed, thread_pool_id: %d, thread_id: %d", thread_pool_id, i);
+        LOG_ERROR("get thread failed, thread_id: %d", i);
         return false;
       }
       int ret = SetAffinity(thread->pthread, &mask);
@@ -377,9 +364,9 @@ int BindSalverThreads(int thread_pool_id, bool is_bind) {
       CPU_SET(cpu_cores[i], &mask);
     }
     for (int i = 0; i < thread_pool->thread_num - 1; ++i) {
-      Thread *thread = GetThread(thread_pool_id, i);
+      Thread *thread = GetThread(thread_pool, i);
       if (thread == NULL) {
-        LOG_ERROR("get thread failed, thread_pool_id: %d, thread_id: %d", thread_pool_id, i);
+        LOG_ERROR("get thread failed, thread_id: %d", i);
         return false;
       }
       int ret = SetAffinity(thread->pthread, &mask);
@@ -394,22 +381,21 @@ int BindSalverThreads(int thread_pool_id, bool is_bind) {
 }
 #endif
 
-int BindThreads(int thread_pool_id, bool is_bind, int mode) {
+int BindThreads(struct ThreadPool *thread_pool, bool is_bind, int mode) {
 #ifdef BIND_CORE
   if (mode == NO_BIND_MODE) {
     return RET_TP_OK;
   }
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
   if (thread_pool == NULL) {
     LOG_ERROR("get thread pool instane failed");
     return RET_TP_ERROR;
   }
   thread_pool->mode = mode;
-  int ret = BindMasterThread(thread_pool_id, is_bind);
+  int ret = BindMasterThread(thread_pool, is_bind);
   if (ret != RET_TP_OK) {
     LOG_ERROR("bind master thread failed.");
   }
-  ret = BindSalverThreads(thread_pool_id, is_bind);
+  ret = BindSalverThreads(thread_pool, is_bind);
   if (ret != RET_TP_OK) {
     LOG_ERROR("bind salver thread failed.");
   }
@@ -419,10 +405,10 @@ int BindThreads(int thread_pool_id, bool is_bind, int mode) {
 #endif
 }
 
-bool PushTaskToQueue(int thread_pool_id, int thread_id, Task *task) {
-  Thread *thread = GetThread(thread_pool_id, thread_id);
+bool PushTaskToQueue(struct ThreadPool *thread_pool, int thread_id, Task *task) {
+  Thread *thread = GetThread(thread_pool, thread_id);
   if (thread == NULL) {
-    LOG_ERROR("get thread failed, thread_pool_id: %d, thread_id: %d", thread_pool_id, thread_id);
+    LOG_ERROR("get thread failed, thread_id: %d", thread_id);
     return false;
   }
   const int tail_index = atomic_load_explicit(&thread->tail, memory_order_relaxed);
@@ -454,8 +440,7 @@ bool PopTaskFromQueue(Thread *thread, Task **task) {
   return true;
 }
 
-void WaitAllThread(int thread_pool_id) {
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
+void WaitAllThread(struct ThreadPool *thread_pool) {
   if (thread_pool == NULL) {
     LOG_ERROR("get thread pool instane failed");
     return;
@@ -464,9 +449,9 @@ void WaitAllThread(int thread_pool_id) {
   while (!k_success_flag) {
     k_success_flag = true;
     for (int i = 0; i < thread_pool->thread_num - 1; ++i) {
-      Thread *thread = GetThread(thread_pool_id, i);
+      Thread *thread = GetThread(thread_pool, i);
       if (thread == NULL) {
-        LOG_ERROR("get thread failed, thread_pool_id: %d, thread_id: %d", thread_pool_id, i);
+        LOG_ERROR("get thread failed, thread_id: %d", i);
         return;
       }
       if (thread->task_size != 0) {
@@ -477,8 +462,7 @@ void WaitAllThread(int thread_pool_id) {
   }
 }
 
-int DistributeTask(int thread_pool_id, Task *task, int task_num) {
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
+int DistributeTask(struct ThreadPool *thread_pool, Task *task, int task_num) {
   if (thread_pool == NULL) {
     LOG_ERROR("get thread pool instane failed");
     return RET_TP_ERROR;
@@ -492,7 +476,7 @@ int DistributeTask(int thread_pool_id, Task *task, int task_num) {
   for (int i = 0; i < size - 1; ++i) {
     do {
       k_success_flag = true;
-      if (!PushTaskToQueue(thread_pool_id, i, task)) {
+      if (!PushTaskToQueue(thread_pool, i, task)) {
         k_success_flag = false;
       }
     } while (!k_success_flag);
@@ -504,12 +488,11 @@ int DistributeTask(int thread_pool_id, Task *task, int task_num) {
   }
   task->func(task->content, size - 1);
   // wait
-  WaitAllThread(thread_pool_id);
+  WaitAllThread(thread_pool);
   return RET_TP_OK;
 }
 
-int AddTask(int thread_pool_id, int func(void *, int), void *content, int task_num) {
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
+int AddTask(struct ThreadPool *thread_pool, int func(void *, int), void *content, int task_num) {
   if (thread_pool == NULL) {
     LOG_ERROR("get thread pool instane failed");
     return RET_TP_ERROR;
@@ -524,23 +507,24 @@ int AddTask(int thread_pool_id, int func(void *, int), void *content, int task_n
   Task task;
   task.func = func;
   task.content = content;
-  return DistributeTask(thread_pool_id, &task, task_num);
+  return DistributeTask(thread_pool, &task, task_num);
 }
 
-int ParallelLaunch(int thread_pool_id, int (*func)(void *, int), void *content, int task_num) {
-  return AddTask(thread_pool_id, func, content, task_num);
+int ParallelLaunch(struct ThreadPool *thread_pool, int (*func)(void *, int), void *content, int task_num) {
+  return AddTask(thread_pool, func, content, task_num);
 }
 
 void ThreadRun(Thread *thread) {
-  ThreadPool *thread_pool = GetInstance(thread->thread_pool_id);
+  thread->is_running = true;
+  ThreadPool *thread_pool = (ThreadPool *)(thread->thread_pool);
   if (thread_pool == NULL) {
     LOG_ERROR("get thread pool instane failed");
+    thread->is_running = false;
     return;
   }
   Task *task = NULL;
   int thread_id = thread->thread_id;
   int spin_count = 0;
-  thread->is_running = true;
   while (thread_pool->is_alive) {
     while (thread->activate) {
       if (PopTaskFromQueue(thread, &task)) {
@@ -565,8 +549,7 @@ void ThreadRun(Thread *thread) {
   thread->is_running = false;
 }
 
-void PushThreadToList(int thread_pool_id, Thread *thread) {
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
+void PushThreadToList(struct ThreadPool *thread_pool, Thread *thread) {
   if (thread_pool == NULL) {
     LOG_ERROR("get thread pool instane failed");
     return;
@@ -574,7 +557,7 @@ void PushThreadToList(int thread_pool_id, Thread *thread) {
   ThreadList *thread_list = thread_pool->thread_list;
   if (thread_list == NULL) {
     LOG_ERROR("thread list is null");
-    DestroyThreadPool(thread_pool_id);
+    DestroyThreadPool(thread_pool);
     return;
   }
   pthread_mutex_lock(&thread_list->lock);
@@ -589,36 +572,35 @@ void PushThreadToList(int thread_pool_id, Thread *thread) {
   pthread_mutex_unlock(&thread_list->lock);
 }
 
-int CreateNewThread(int thread_pool_id, int thread_id) {
-  LOG_INFO("thread_pool_id: %d, create thread: %d", thread_pool_id, thread_id);
+int CreateNewThread(struct ThreadPool *thread_pool, int thread_id) {
+  LOG_INFO("create thread: %d", thread_id);
   Thread *thread = (Thread *)malloc(sizeof(Thread));
   if (thread == NULL) {
     LOG_ERROR("create thread failed");
-    DestroyThreadPool(thread_pool_id);
+    DestroyThreadPool(thread_pool);
     return RET_TP_ERROR;
   }
-  thread->thread_pool_id = thread_pool_id;
+  thread->thread_pool = thread_pool;
   thread->thread_id = thread_id;
   thread->head = ATOMIC_VAR_INIT(0);
   thread->tail = ATOMIC_VAR_INIT(0);
   thread->task_size = ATOMIC_VAR_INIT(0);
   thread->activate = ATOMIC_VAR_INIT(true);
-  thread->is_running = ATOMIC_VAR_INIT(false);
+  thread->is_running = ATOMIC_VAR_INIT(true);
   thread->next = NULL;
   sem_init(&thread->sem, 0, 0);
-  PushThreadToList(thread_pool_id, thread);
+  PushThreadToList(thread_pool, thread);
   pthread_create(&thread->pthread, NULL, (void *)ThreadRun, thread);
   pthread_detach(thread->pthread);
   return RET_TP_OK;
 }
 
-int ReConfigThreadPool(int thread_pool_id, int thread_num, int mode) {
-  LOG_INFO("reconfig thread pool, thread_pool_id: %d, thread_num: %d, mode: %d", thread_pool_id, thread_num, mode);
+int ReConfigThreadPool(struct ThreadPool *thread_pool, int thread_num, int mode) {
+  LOG_INFO("reconfig thread pool, thread_num: %d, mode: %d", thread_num, mode);
   if (thread_num <= 0 || thread_num > MAX_THREAD_NUM) {
     LOG_ERROR("invalid thread num: %d", thread_num);
     return RET_TP_ERROR;
   }
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
   if (thread_pool == NULL) {
     LOG_ERROR("get thread pool instane failed");
     return RET_TP_ERROR;
@@ -634,7 +616,7 @@ int ReConfigThreadPool(int thread_pool_id, int thread_num, int mode) {
     thread_pool->thread_list = (ThreadList *)malloc(sizeof(ThreadList));
     if (thread_pool->thread_list == NULL) {
       LOG_ERROR("create thread list failed");
-      DestroyThreadPool(thread_pool_id);
+      DestroyThreadPool(thread_pool);
       return RET_TP_ERROR;
     }
     thread_pool->thread_list->head = NULL;
@@ -644,20 +626,20 @@ int ReConfigThreadPool(int thread_pool_id, int thread_num, int mode) {
   }
   int add_thread_num = thread_pool->thread_num - curr_thread_num;
   for (int i = curr_thread_num - 1, j = 0; j < add_thread_num; ++i, ++j) {
-    int ret = CreateNewThread(thread_pool_id, i);
+    int ret = CreateNewThread(thread_pool, i);
     if (ret != RET_TP_OK) {
       LOG_ERROR("create new thread failed");
       return RET_TP_ERROR;
     }
   }
-  return BindThreads(thread_pool_id, true, mode);
+  return BindThreads(thread_pool, true, mode);
 }
 
-int CreateThreadPool(int thread_pool_id, int thread_num, int mode) {
-  LOG_INFO("create thread pool, thread_pool_id: %d, thread_num: %d, mode: %d", thread_pool_id, thread_num, mode);
+ThreadPool *CreateThreadPool(int thread_num, int mode) {
+  LOG_INFO("create thread pool, thread_num: %d, mode: %d", thread_num, mode);
   if (thread_num <= 0 || thread_num > MAX_THREAD_NUM) {
     LOG_ERROR("invalid thread num: %d", thread_num);
-    return RET_TP_ERROR;
+    return NULL;
   }
 #ifdef BIND_CORE
   if (run_once) {
@@ -665,11 +647,7 @@ int CreateThreadPool(int thread_pool_id, int thread_num, int mode) {
     run_once = false;
   }
 #endif
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
-  if (thread_pool == NULL) {
-    LOG_ERROR("get thread pool instane failed");
-    return RET_TP_ERROR;
-  }
+  ThreadPool *thread_pool = (struct ThreadPool *)(malloc(sizeof(ThreadPool)));
   thread_pool->thread_num = thread_num > MAX_THREAD_NUM ? MAX_THREAD_NUM : thread_num;
   thread_pool->is_alive = ATOMIC_VAR_INIT(true);
   thread_pool->mode = mode;
@@ -678,8 +656,8 @@ int CreateThreadPool(int thread_pool_id, int thread_num, int mode) {
     thread_pool->thread_list = (ThreadList *)malloc(sizeof(ThreadList));
     if (thread_pool->thread_list == NULL) {
       LOG_ERROR("create thread list failed");
-      DestroyThreadPool(thread_pool_id);
-      return RET_TP_ERROR;
+      DestroyThreadPool(thread_pool);
+      return NULL;
     }
     thread_pool->thread_list->head = NULL;
     thread_pool->thread_list->tail = NULL;
@@ -687,55 +665,36 @@ int CreateThreadPool(int thread_pool_id, int thread_num, int mode) {
     pthread_mutex_init(&thread_pool->thread_list->lock, NULL);
   }
   for (int i = 0; i < thread_pool->thread_num - 1; ++i) {
-    int ret = CreateNewThread(thread_pool_id, i);
+    int ret = CreateNewThread(thread_pool, i);
     if (ret != RET_TP_OK) {
       LOG_ERROR("create thread %d failed", i);
-      DestroyThreadPool(thread_pool_id);
-      return RET_TP_ERROR;
+      DestroyThreadPool(thread_pool);
+      return NULL;
     }
   }
-  return RET_TP_OK;
+  return thread_pool;
 }
 
-int ConfigThreadPool(int thread_pool_id, int thread_num, int mode) {
-  LOG_INFO("config: thread_pool_id: %d, thread_num: %d, mode: %d, is_created: %d, refcount: %d", thread_pool_id,
-           thread_num, mode, thread_pool_is_created[thread_pool_id], thread_pool_refcount[thread_pool_id]);
-  if (thread_pool_id >= MAX_THREAD_POOL_NUM) {
-    LOG_ERROR("invalid context id: %d", thread_pool_id);
-    return RET_TP_ERROR;
-  }
+int ConfigThreadPool(struct ThreadPool *thread_pool, int thread_num, int mode) {
   if (thread_num <= 0 || thread_num > MAX_THREAD_NUM) {
     LOG_ERROR("invalid thread num: %d", thread_num);
     return RET_TP_ERROR;
   }
-  thread_pool_refcount[thread_pool_id] += 1;
-  int ret;
-  if (thread_pool_is_created[thread_pool_id]) {
-    ret = ReConfigThreadPool(thread_pool_id, thread_num, mode);
-    if (ret != RET_TP_OK) {
-      LOG_ERROR("reconfig thread pool failed, thread_pool_id: %d, thread_num: %d, mode: %d", thread_pool_id, thread_num,
-               mode);
-    }
-  } else {
-    thread_pool_is_created[thread_pool_id] = true;
-    ret = CreateThreadPool(thread_pool_id, thread_num, mode);
-    if (ret != RET_TP_OK) {
-      LOG_ERROR("create thread pool failed, thread_pool_id: %d, thread_num: %d, mode: %d", thread_pool_id, thread_num,
-               mode);
-    }
+  int ret = ReConfigThreadPool(thread_pool, thread_num, mode);
+  if (ret != RET_TP_OK) {
+    LOG_ERROR("reconfig thread pool failed, thread_num: %d, mode: %d", thread_num, mode);
   }
   return ret;
 }
 
-void ActivateThreadPool(int thread_pool_id) {
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
+void ActivateThreadPool(struct ThreadPool *thread_pool) {
   if (thread_pool == NULL) {
     LOG_ERROR("get thread pool instane failed");
     return;
   }
   ThreadList *thread_list = thread_pool->thread_list;
   if (thread_list == NULL) {
-    LOG_ERROR("thread pool: %d list is null", thread_pool_id);
+    LOG_ERROR("thread pool's list is null");
     return;
   }
   Thread *thread = thread_list->head;
@@ -746,15 +705,14 @@ void ActivateThreadPool(int thread_pool_id) {
   }
 }
 
-void DeactivateThreadPool(int thread_pool_id) {
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
+void DeactivateThreadPool(struct ThreadPool *thread_pool) {
   if (thread_pool == NULL) {
     LOG_ERROR("get thread pool instane failed");
     return;
   }
   ThreadList *thread_list = thread_pool->thread_list;
   if (thread_list == NULL) {
-    LOG_ERROR("thread pool: %d list is null", thread_pool_id);
+    LOG_ERROR("thread pool's list is null");
     return;
   }
   Thread *thread = thread_list->head;
@@ -764,39 +722,30 @@ void DeactivateThreadPool(int thread_pool_id) {
   }
 }
 
-void DestroyThreadPool(int thread_pool_id) {
-  thread_pool_refcount[thread_pool_id]--;
-  if (thread_pool_refcount[thread_pool_id] > 0) {
-    LOG_ERROR("no need to free, thread_pool_id: %d, refcount: %d",
-              thread_pool_id, thread_pool_refcount[thread_pool_id]);
-    return;
-  }
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
+void DestroyThreadPool(struct ThreadPool *thread_pool) {
   if (thread_pool == NULL) {
     LOG_ERROR("get thread pool instane failed");
     return;
   }
   if (thread_pool->thread_list == NULL) {
-    LOG_ERROR("thread pool: %d list is null", thread_pool_id);
+    LOG_ERROR("thread pool's list is null");
     return;
   }
-  DeactivateThreadPool(thread_pool_id);
-  thread_pool_is_created[thread_pool_id] = false;
+  DeactivateThreadPool(thread_pool);
   thread_pool->is_alive = false;
+  LOG_ERROR("DestroyThreadPool thread num : %d", thread_pool->thread_num);
   for (int i = 0; i < thread_pool->thread_num - 1; ++i) {
-    Thread *thread = GetThread(thread_pool_id, i);
+    Thread *thread = GetThread(thread_pool, i);
     if (thread != NULL) {
       FreeThread(thread_pool->thread_list, thread);
     }
   }
   free(thread_pool->thread_list);
   thread_pool->thread_list = NULL;
-  LOG_INFO("destroy thread pool success, thread_pool_id: %d, refcount: %d", thread_pool_id,
-           thread_pool_refcount[thread_pool_id]);
+  LOG_INFO("destroy thread pool success");
 }
 
-int GetCurrentThreadNum(int thread_pool_id) {
-  ThreadPool *thread_pool = GetInstance(thread_pool_id);
+int GetCurrentThreadNum(struct ThreadPool *thread_pool) {
   if (thread_pool == NULL) {
     LOG_ERROR("get thread pool instane failed");
     return 0;
diff --git a/mindspore/lite/src/runtime/thread_pool.h b/mindspore/lite/src/runtime/thread_pool.h
index 1b9e60bf15..76028150e7 100644
--- a/mindspore/lite/src/runtime/thread_pool.h
+++ b/mindspore/lite/src/runtime/thread_pool.h
@@ -19,6 +19,8 @@
 
 #include <stdbool.h>
 
+#define MAX_TASK_NUM (2)
+
 /// \brief BindMode defined for holding bind cpu strategy argument.
 typedef enum {
   MID_MODE = -1,   /**< bind middle cpu first */
@@ -34,12 +36,16 @@ typedef enum {
   THREAD_POOL_FOURTH = 3   /**< the fourth thread pool id */
 } ThreadPoolId;
 
+struct ThreadPool;
+
+struct ThreadPool *CreateThreadPool(int thread_num, int mode);
+
 /**
  * create thread pool and init
  * @param thread_num
  * @param mode
  */
-int ConfigThreadPool(int thread_pool_id, int thread_num, int mode);
+int ConfigThreadPool(struct ThreadPool *thread_pool, int thread_num, int mode);
 
 /**
  *
@@ -48,36 +54,36 @@ int ConfigThreadPool(int thread_pool_id, int thread_num, int mode);
  * @param content
  * @param task_num
  */
-int ParallelLaunch(int thread_pool_id, int (*job)(void *, int), void *content, int task_num);
+int ParallelLaunch(struct ThreadPool *thread_pool, int (*job)(void *, int), void *content, int task_num);
 
 /**
  * bind each thread to specified cpu core
  * @param is_bind
  * @param mode
  */
-int BindThreads(int thread_pool_id, bool is_bind, int mode);
+int BindThreads(struct ThreadPool *thread_pool, bool is_bind, int mode);
 
 /**
  * activate the thread pool
  * @param thread_pool_id
  */
-void ActivateThreadPool(int thread_pool_id);
+void ActivateThreadPool(struct ThreadPool *thread_pool);
 
 /**
  * deactivate the thread pool
  * @param thread_pool_id
  */
-void DeactivateThreadPool(int thread_pool_id);
+void DeactivateThreadPool(struct ThreadPool *thread_pool);
 
 /**
  *
  * @return current thread num
  */
-int GetCurrentThreadNum(int thread_pool_id);
+int GetCurrentThreadNum(struct ThreadPool *thread_pool);
 
 /**
  * destroy thread pool, and release resource
  */
-void DestroyThreadPool(int thread_pool_id);
+void DestroyThreadPool(struct ThreadPool *thread_pool);
 
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_THREAD_POOL_H_
diff --git a/mindspore/lite/src/scheduler.h b/mindspore/lite/src/scheduler.h
index 20fe42cb6c..1d0f73ffa6 100644
--- a/mindspore/lite/src/scheduler.h
+++ b/mindspore/lite/src/scheduler.h
@@ -19,14 +19,14 @@
 
 #include <vector>
 #include "src/lite_kernel.h"
-#include "include/context.h"
+#include "src/inner_context.h"
 #include "include/model.h"
 #include "src/ops/primitive_c.h"
 
 namespace mindspore::lite {
 class Scheduler {
  public:
-  explicit Scheduler(const Context *ctx) { context_ = const_cast<Context *>(ctx); }
+  explicit Scheduler(const InnerContext *ctx) { context_ = const_cast<InnerContext *>(ctx); }
   int Schedule(const lite::Model *model, std::vector<Tensor *> *tensors, std::vector<kernel::LiteKernel *> *kernels);
 
   int ReSizeKernels(const std::vector<kernel::LiteKernel *> &kernels);
@@ -48,7 +48,7 @@ class Scheduler {
   void SetKernelTensorDataType(kernel::LiteKernel *kernel);
 
  protected:
-  Context *context_ = nullptr;
+  InnerContext *context_ = nullptr;
 };
 }  // namespace mindspore::lite
 
diff --git a/mindspore/lite/src/train/loss_kernel.h b/mindspore/lite/src/train/loss_kernel.h
index 5205cb072c..07484b5ecf 100644
--- a/mindspore/lite/src/train/loss_kernel.h
+++ b/mindspore/lite/src/train/loss_kernel.h
@@ -23,7 +23,7 @@ class LossKernel : public LiteKernel {
  public:
   LossKernel() = default;
   explicit LossKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                      const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                       const lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~LossKernel() = default;
diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt
index 78c38f7261..e6ee3f8f13 100644
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
@@ -138,6 +138,7 @@ set(TEST_LITE_SRC
         ${LITE_DIR}/src/runtime/parallel_executor.cc
         ${LITE_DIR}/src/tensor.cc
         ${LITE_DIR}/src/executor.cc
+        ${LITE_DIR}/src/inner_context.cc
         ${LITE_DIR}/src/kernel_registry.cc
         ${LITE_DIR}/src/lite_kernel.cc
         ${LITE_DIR}/src/lite_session.cc
@@ -198,7 +199,7 @@ if(BUILD_CONVERTER)
             ${LITE_DIR}/tools/optimizer/common/gllo_utils.cc
             ${LITE_DIR}/tools/optimizer/fusion/conv_biasadd_fusion.cc
             ${LITE_DIR}/tools/optimizer/fusion/conv_activation_fusion.cc
-             ${LITE_DIR}/tools/optimizer/fusion/conv_tuple_activation_fusion.cc
+            ${LITE_DIR}/tools/optimizer/fusion/conv_tuple_activation_fusion.cc
             ${LITE_DIR}/tools/optimizer/fusion/conv_transform_fusion.cc
             ${LITE_DIR}/tools/optimizer/fusion/conv_scale_fusion.cc
             ${LITE_DIR}/tools/optimizer/fusion/conv_bn_fusion.cc
diff --git a/mindspore/lite/test/ut/src/graph_test.cc b/mindspore/lite/test/ut/src/graph_test.cc
index 8cadde3cca..4e5607ae44 100644
--- a/mindspore/lite/test/ut/src/graph_test.cc
+++ b/mindspore/lite/test/ut/src/graph_test.cc
@@ -81,166 +81,4 @@ char *ReadModelFile(const char *file, size_t *size) {
 
   return buf.release();
 }
-
-// TEST_F(TestLiteInference, Net) {
-//  auto msGraph = std::make_shared<lite::GraphDefT>();
-//  msGraph->name = "graph";
-//  auto msSubgraph = std::make_unique<lite::SubGraphDefT>();
-//  msSubgraph->name = "subGraph";
-//
-//  auto node = std::make_unique<lite::OpDefT>();
-//  node->inputIndex = {0, 1};
-//  node->outputIndex = {2};
-//  node->attr.type = lite::OpT_Add;
-//  node->attr.value = new lite::AddT;
-//  node->name = "Add";
-//  node->fmkType = lite::FmkType_CAFFE;
-//  msSubgraph->nodes.emplace_back(std::move(node));
-//
-//  msSubgraph->inputIndex = {0};
-//  msSubgraph->outputIndex = {2};
-//
-//  auto input0 = std::make_unique<lite::TensorDefT>();
-//  input0->refCount = lite::MSCONST_WEIGHT_REFCOUNT;
-//  input0->format = lite::Format_NCHW;
-//  input0->dataType = TypeId::kNumberTypeFloat;
-//  input0->dims = {1, 1, 2, 2};
-//  input0->offset = -1;
-//  msSubgraph->allTensors.emplace_back(std::move(input0));
-//
-//  auto input1 = std::make_unique<lite::TensorDefT>();
-//  input1->refCount = lite::MSCONST_WEIGHT_REFCOUNT;
-//  input1->format = lite::Format_NCHW;
-//  input1->dataType = TypeId::kNumberTypeFloat;
-//  input1->dims = {1, 1, 2, 2};
-//  input1->offset = -1;
-//  input1->data.resize(16);
-//  msSubgraph->allTensors.emplace_back(std::move(input1));
-//
-//  auto output = std::make_unique<lite::TensorDefT>();
-//  output->refCount = 0;
-//  output->format = lite::Format_NCHW;
-//  output->dims = {1, 1, 2, 2};
-//  output->offset = -1;
-//  msSubgraph->allTensors.emplace_back(std::move(output));
-//  msGraph->subgraphs.emplace_back(std::move(msSubgraph));
-//
-//  flatbuffers::FlatBufferBuilder builder(1024);
-//  auto offset = lite::GraphDef::Pack(builder, msGraph.get());
-//  builder.Finish(offset);
-//  int size = builder.GetSize();
-//  auto *content = builder.GetBufferPointer();
-//  mindspore::lite::Context context;
-//  context.allocator = nullptr;
-//  context.deviceCtx.type = mindspore::lite::DeviceType::DT_CPU;
-// #if 0
-//    auto graph = mindspore::lite::inference::LoadModel((char *)content, size);
-//
-//    auto session = mindspore::lite::inference::Session::CreateSession(&context);
-//
-//    std::vector<float> z1 = {1.1, 2.1, 3.1, 4.1};
-//    std::vector<inference::MSTensor *> inputs;
-//    auto t1 = inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, std::vector<int>({1, 1, 2, 2}));
-//    memcpy_s(t1->MutableData(), z1.size() * sizeof(float), z1.data(), z1.size() * sizeof(float));
-//
-//    auto t2 = inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, std::vector<int>({1, 1, 2, 2}));
-//    memcpy_s(t2->MutableData(), z1.size() * sizeof(float), z1.data(), z1.size() * sizeof(float));
-//
-//    inputs.push_back(t1);
-//    inputs.push_back(t1);
-//    //    VectorRef *outputs = new VectorRef();
-//    auto outputs = session->RunGraph(inputs);
-// #else
-//  auto file = "./efficientnet_b0.ms";
-//  size_t model_size;
-//
-//  char *modelbuf = ReadModelFile(file, &model_size);
-//  auto graph = mindspore::lite::inference::LoadModel(modelbuf, model_size);
-//  auto session = mindspore::lite::inference::Session::CreateSession(&context);
-//  session->CompileGraph(graph);
-//  std::vector<inference::MSTensor *> inputs;
-//  auto t1 = inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, std::vector<int>({1, 244, 244, 3}));
-//
-//  inputs.push_back(t1);
-//  auto outputs = session->RunGraph(inputs);
-// #endif
-// }
-
-// TEST_F(TestLiteInference, Conv) {
-//   auto msGraph = std::make_shared<lite::GraphDefT>();
-//   msGraph->name = "graph";
-//   auto msSubgraph = std::make_unique<lite::SubGraphDefT>();
-//   msSubgraph->name = "subGraph";
-//
-//   auto node = std::make_unique<lite::OpDefT>();
-//   node->inputIndex = {0, 1};
-//   node->outputIndex = {2};
-//   node->attr.type = lite::OpT_Conv2D;
-//   auto attr = new lite::Conv2DT;
-//   attr->padMode = lite::PadMode_SAME;
-//   attr->channelIn = 1;
-//   attr->channelOut = 1;
-//   attr->format = lite::Format_NHWC;
-//   attr->strideH = 1;
-//   attr->strideW = 1;
-//   attr->kernelH = 2;
-//   attr->kernelW = 2;
-//
-//   node->attr.value = attr;
-//   node->name = "Conv2D";
-//   node->fmkType = lite::FmkType_CAFFE;
-//   msSubgraph->nodes.emplace_back(std::move(node));
-//
-//   msSubgraph->inputIndex = {0};
-//   msSubgraph->outputIndex = {2};
-//   // MS_LOG(ERROR) << "OutData";
-//
-//   auto input0 = std::make_unique<lite::TensorDefT>();
-//   input0->refCount = lite::MSCONST_WEIGHT_REFCOUNT;
-//   input0->format = lite::Format_NCHW;
-//   input0->dataType = TypeId::kNumberTypeFloat;
-//   input0->dims = {1, 1, 5, 5};
-//   // input0->data.resize(sizeof(float) * 25);
-//   // std::vector<float> input_data = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5};
-//   // memcpy(input0->data.data(), input_data.data(), sizeof(int) * 25);
-//   input0->offset = -1;
-//   msSubgraph->allTensors.emplace_back(std::move(input0));
-//
-//   auto weight = std::make_unique<lite::TensorDefT>();
-//   weight->refCount = lite::MSCONST_WEIGHT_REFCOUNT;
-//   weight->format = lite::Format_KHWC;
-//   weight->dataType = TypeId::kNumberTypeFloat;
-//   weight->dims = {1, 2, 2, 1};
-//   weight->data.resize(sizeof(float) * 4);
-//   std::vector<float> weight_data = {1, 2, 3, 4};
-//   memcpy(weight->data.data(), weight_data.data(), sizeof(int) * 4);
-//   weight->offset = -1;
-//   msSubgraph->allTensors.emplace_back(std::move(weight));
-//
-//   auto output = std::make_unique<lite::TensorDefT>();
-//   output->refCount = 0;
-//   output->format = lite::Format_NCHW;
-//   output->dims = {1, 1, 5, 5};
-//   output->offset = -1;
-//   msSubgraph->allTensors.emplace_back(std::move(output));
-//   msGraph->subgraphs.emplace_back(std::move(msSubgraph));
-//
-//   flatbuffers::FlatBufferBuilder builder(1024);
-//   auto offset = lite::GraphDef::Pack(builder, msGraph.get());
-//   builder.Finish(offset);
-//   int size = builder.GetSize();
-//   auto *content = builder.GetBufferPointer();
-//   mindspore::lite::Context context;
-//   context.allocator = nullptr;
-//   context.deviceCtx.type = mindspore::lite::DeviceType::DT_CPU;
-//   auto graph = mindspore::lite::inference::LoadModel((char *)content, size);
-//   auto session = mindspore::lite::inference::Session::CreateSession(&context);
-//   session->CompileGraph(graph);
-//   std::vector<inference::MSTensor *> inputs;
-//   auto t1 = inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, std::vector<int>({1, 3, 244, 244}));
-//
-//   inputs.push_back(t1);
-//   auto outputs = session->RunGraph(inputs);
-// }
-
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/infer_test.cc b/mindspore/lite/test/ut/src/infer_test.cc
index 1811233c54..555bc6fd7f 100644
--- a/mindspore/lite/test/ut/src/infer_test.cc
+++ b/mindspore/lite/test/ut/src/infer_test.cc
@@ -105,10 +105,11 @@ TEST_F(InferTest, TestConvNode) {
   ASSERT_NE(nullptr, model);
   meta_graph.reset();
   content = nullptr;
-  auto context = new lite::Context;
+  auto context = new lite::InnerContext;
   context->cpu_bind_mode_ = lite::NO_BIND;
   context->device_type_ = lite::DT_CPU;
   context->thread_num_ = 4;
+  ASSERT_EQ(lite::RET_OK, context->Init());
   auto session = session::LiteSession::CreateSession(context);
   ASSERT_NE(nullptr, session);
   auto ret = session->CompileGraph(model);
@@ -203,10 +204,11 @@ TEST_F(InferTest, TestAddNode) {
   ASSERT_NE(nullptr, model);
   meta_graph.reset();
   content = nullptr;
-  auto context = new lite::Context;
+  auto context = new lite::InnerContext;
   context->cpu_bind_mode_ = lite::NO_BIND;
   context->device_type_ = lite::DT_CPU;
   context->thread_num_ = 4;
+  ASSERT_EQ(lite::RET_OK, context->Init());
   auto session = session::LiteSession::CreateSession(context);
   ASSERT_NE(nullptr, session);
   auto ret = session->CompileGraph(model);
@@ -246,7 +248,7 @@ TEST_F(InferTest, TestAddNode) {
 
 class SessionWithParallelExecutor : public lite::LiteSession {
  public:
-  int Init(lite::Context *context) {
+  int Init(lite::InnerContext *context) {
     lite::LiteSession::Init(context);
     delete this->executor;
     this->executor = new mindspore::lite::ParallelExecutor();
@@ -304,10 +306,11 @@ TEST_F(InferTest, TestParallelExecutor) {
   ASSERT_NE(nullptr, model);
   meta_graph.reset();
   content = nullptr;
-  auto context = new lite::Context;
+  auto context = new lite::InnerContext;
   context->cpu_bind_mode_ = lite::NO_BIND;
   context->device_type_ = lite::DT_CPU;
   context->thread_num_ = 4;
+  ASSERT_EQ(lite::RET_OK, context->Init());
   auto session = new SessionWithParallelExecutor();
   session->Init(context);
   ASSERT_NE(nullptr, session);
@@ -344,10 +347,11 @@ TEST_F(InferTest, TestModel) {
   auto model = lite::Model::Import(buf[0], model_size);
   ASSERT_NE(nullptr, model);
   delete[] buf[0];
-  auto context = new lite::Context;
+  auto context = new lite::InnerContext;
   context->cpu_bind_mode_ = lite::NO_BIND;
   context->device_type_ = lite::DT_CPU;
   context->thread_num_ = 4;
+  ASSERT_EQ(lite::RET_OK, context->Init());
   auto session = session::LiteSession::CreateSession(context);
   ASSERT_NE(nullptr, session);
   auto ret = session->CompileGraph(model);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/common/strided_slice_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/common/strided_slice_tests.cc
index 02b08216d4..7e01b4eabb 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/common/strided_slice_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/common/strided_slice_tests.cc
@@ -62,7 +62,8 @@ TEST_F(TestStridedSlice, StridedSlice) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   ASSERT_NE(kernel, nullptr);
 
@@ -107,7 +108,8 @@ TEST_F(TestStridedSlice, StridedSliceInt8) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   ASSERT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp16/reduce_fp16_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp16/reduce_fp16_tests.cc
index 4cb045ad54..5e2026a1f8 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp16/reduce_fp16_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp16/reduce_fp16_tests.cc
@@ -37,7 +37,7 @@ class TestReduceFp16 : public mindspore::CommonTest {
   std::vector<lite::Tensor *> outputs_{&out_tensor_};
   ReduceParameter param_ = {{}};
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat16, schema::PrimitiveType_Reduce};
-  lite::Context ctx_ = lite::Context();
+  lite::InnerContext ctx_ = lite::InnerContext();
   kernel::KernelCreator creator_ = nullptr;
   kernel::LiteKernel *kernel_ = nullptr;
 };
@@ -69,8 +69,9 @@ void TestReduceFp16::Prepare(const std::vector<int> &input_shape, const std::vec
   param_.mode_ = mode;
 
   desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat16, schema::PrimitiveType_Reduce};
-  ctx_ = lite::Context();
+  ctx_ = lite::InnerContext();
   ctx_.thread_num_ = thread_num;
+  ASSERT_EQ(lite::RET_OK, context->Init());
   creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator_, nullptr);
   kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(&param_), &ctx_, desc, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc
index 024c286dea..86f878c7c4 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc
@@ -111,8 +111,9 @@ TEST_F(TestActivationFp32, HSwishFp32) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Activation};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 7;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/arithmetic_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/arithmetic_fp32_tests.cc
index 249c639264..2acfac53fc 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/arithmetic_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/arithmetic_fp32_tests.cc
@@ -21,6 +21,7 @@
 #include "mindspore/lite/nnacl/fp32/arithmetic.h"
 #include "mindspore/lite/src/kernel_registry.h"
 #include "mindspore/lite/src/lite_kernel.h"
+#include "include/errorcode.h"
 
 namespace mindspore {
 
@@ -507,8 +508,9 @@ TEST_F(TestArithmeticTestFp32, MulFp32) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&mul_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
@@ -580,8 +582,9 @@ TEST_F(TestArithmeticTestFp32, MulReluFp32) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&mul_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
@@ -653,8 +656,9 @@ TEST_F(TestArithmeticTestFp32, MulRelu6Fp32) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&mul_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
@@ -725,8 +729,9 @@ TEST_F(TestArithmeticTestFp32, AddReluFp32) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&add_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
@@ -797,8 +802,9 @@ TEST_F(TestArithmeticTestFp32, AddRelu6Fp32) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&add_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
@@ -868,8 +874,9 @@ TEST_F(TestArithmeticTestFp32, DivReluFp32) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&div_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
@@ -941,8 +948,9 @@ TEST_F(TestArithmeticTestFp32, DivRelu6Fp32) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&div_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
@@ -1011,8 +1019,9 @@ TEST_F(TestArithmeticTestFp32, EqualFp32) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&equal_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc
index b19d6edd0c..8593c16145 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc
@@ -55,8 +55,9 @@ TEST_F(TestBatchnormFp32, BNTest) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_BatchNorm};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
@@ -111,8 +112,9 @@ TEST_F(TestBatchnormFp32, FusedBNTest) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_FusedBatchNorm};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
@@ -161,8 +163,9 @@ TEST_F(TestBatchnormFp32, easyTest) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_BatchNorm};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/constant_of_shape_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/constant_of_shape_fp32_test.cc
index d8534fbef8..de89f88655 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/constant_of_shape_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/constant_of_shape_fp32_test.cc
@@ -52,8 +52,9 @@ TEST_F(TestConstantOfShapeFp32, Simple) {
   std::vector<int> a_shape = {4, 1, 1, 1};
   // std::vector<int> c_shape = {2, 2, 2, 1};
   int total_size = ConstantOfShapeTestInit(&inputs_, &outputs_, a, a_shape);
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 4;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::ConstantOfShapeCPUKernel *op =
     new kernel::ConstantOfShapeCPUKernel(reinterpret_cast<OpParameter *>(param), inputs_, outputs_, ctx, nullptr);
   op->Init();
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/conv1x1_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/conv1x1_fp32_tests.cc
index d469b57fd8..0e288d8323 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/conv1x1_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/conv1x1_fp32_tests.cc
@@ -217,8 +217,9 @@ TEST_F(TestConv1x1Fp32, Conv1x1Test1) {
   std::vector<lite::Tensor *> inputs_;
   std::vector<lite::Tensor *> outputs_;
   auto conv_param = new ConvParameter();
-  lite::Context *ctx = new lite::Context();
+  lite::InnerContext *ctx = new lite::InnerContext();
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   float *correct;
   int total_size = Conv1x1TestInit1(&inputs_, &outputs_, conv_param, &correct);
   kernel::Convolution1x1CPUKernel *conv1x1 =
@@ -284,8 +285,9 @@ TEST_F(TestConv1x1Fp32, Conv1x1Test2) {
   std::vector<lite::Tensor *> inputs_;
   std::vector<lite::Tensor *> outputs_;
   auto conv_param = new ConvParameter();
-  lite::Context *ctx = new lite::Context();
+  lite::InnerContext *ctx = new lite::InnerContext();
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   float *correct;
   int total_size = Conv1x1TestInit2(&inputs_, &outputs_, conv_param, &correct);
   kernel::Convolution1x1CPUKernel *conv1x1 =
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32_tests.cc
index 66199ffa75..975cec6626 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32_tests.cc
@@ -103,8 +103,9 @@ TEST_F(TestConvolutionDwFp32, ConvDwFp32Accuracy) {
   InitConvDwParam(conv_param);
 
   // init ctx
-  auto ctx = new Context();
+  auto ctx = new InnerContext();
   ctx->thread_num_ = 4;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
 
   // init tensor
   std::vector<lite::Tensor *> inputs;
@@ -154,8 +155,9 @@ TEST_F(TestConvolutionDwFp32, ConvDwFp32Performance) {
   InitConvDwParam(conv_param);
 
   // init ctx
-  auto ctx = new Context();
+  auto ctx = new InnerContext();
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
 
   // init tensor
   std::vector<lite::Tensor *> inputs;
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc
index fcabf82e37..07505c577e 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc
@@ -266,8 +266,9 @@ TEST_F(CropTestFp32, CropTest11) {
   out_t->MallocData();
   outputs.push_back(out_t);
 
-  auto ctx = new (std::nothrow) lite::Context;
+  auto ctx = new (std::nothrow) lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   CropParameter crop_param;
   crop_param.axis_ = 2;
   crop_param.offset_[0] = 0;
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/deconvolution_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/deconvolution_fp32_tests.cc
index 1497218c2d..d9d5ad4dec 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/deconvolution_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/deconvolution_fp32_tests.cc
@@ -475,8 +475,9 @@ TEST_F(TestDeConvolutionFp32, DeConvTest1) {
   std::vector<lite::Tensor *> inputs_;
   std::vector<lite::Tensor *> outputs_;
   ConvParameter *deconv_param = new ConvParameter();
-  lite::Context *ctx = new lite::Context();
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   float *correct;
   int total_size = DeConvTestInit1(&inputs_, &outputs_, deconv_param, &correct);
   kernel::DeConvolutionCPUKernel *deconv =
@@ -543,8 +544,9 @@ TEST_F(TestDeConvolutionFp32, DeConvTest2) {
   auto deconv_param = new ConvParameter();
   float *correct;
   int total_size = DeConvTestInit2(&inputs_, &outputs_, deconv_param, &correct);
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::DeConvolutionCPUKernel *deconv =
     new kernel::DeConvolutionCPUKernel(reinterpret_cast<OpParameter *>(deconv_param), inputs_, outputs_, ctx, nullptr);
 
@@ -621,8 +623,9 @@ TEST_F(TestDeConvolutionFp32, DeConvTest3) {
   auto deconv_param = new ConvParameter();
   float *correct;
   int total_size = DeConvTestInit3(&inputs_, &outputs_, deconv_param, &correct);
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::DeConvolutionCPUKernel *deconv =
     new kernel::DeConvolutionCPUKernel(reinterpret_cast<OpParameter *>(deconv_param), inputs_, outputs_, ctx, nullptr);
 
@@ -688,8 +691,9 @@ TEST_F(TestDeConvolutionFp32, DeConvTest4) {
   auto deconv_param = new ConvParameter();
   float *correct;
   int total_size = DeConvTestInit4(&inputs_, &outputs_, deconv_param, &correct);
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::DeConvolutionCPUKernel *deconv =
     new kernel::DeConvolutionCPUKernel(reinterpret_cast<OpParameter *>(deconv_param), inputs_, outputs_, ctx, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/detection_post_process_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/detection_post_process_test.cc
index d637ec1d03..72bf3833e6 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/detection_post_process_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/detection_post_process_test.cc
@@ -121,8 +121,9 @@ TEST_F(TestDetectionPostProcessFp32, Fast) {
   std::vector<lite::Tensor *> outputs_;
   auto param = new DetectionPostProcessParameter();
   DetectionPostProcessTestInit(&inputs_, &outputs_, param);
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::DetectionPostProcessCPUKernel *op =
     new kernel::DetectionPostProcessCPUKernel(reinterpret_cast<OpParameter *>(param), inputs_, outputs_, ctx, nullptr);
   op->Init();
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/elu_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/elu_fp32_test.cc
index 195da4ca28..5184aca7af 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/elu_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/elu_fp32_test.cc
@@ -49,8 +49,9 @@ TEST_F(TestEluFp32, EluTest) {
   auto elu_param_ = new EluParameter();
   EluTestInit(&inputs_, &outputs_, elu_param_);
 
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::EluCPUKernel *elu =
     new kernel::EluCPUKernel(reinterpret_cast<OpParameter *>(elu_param_), inputs_, outputs_, ctx, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/embedding_lookup_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/embedding_lookup_fp32_test.cc
index 484c33907d..96f9cf5f88 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/embedding_lookup_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/embedding_lookup_fp32_test.cc
@@ -62,8 +62,9 @@ TEST_F(TestEmbeddingLookupFp32, ElTest) {
   auto embedding_lookup_param_ = new EmbeddingLookupParameter();
   ElTestInit(&inputs_, &outputs_, embedding_lookup_param_);
 
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::EmbeddingLookupCPUKernel *el = new kernel::EmbeddingLookupCPUKernel(
     reinterpret_cast<OpParameter *>(embedding_lookup_param_), inputs_, outputs_, ctx, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/fullconnection_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/fullconnection_fp32_tests.cc
index fb2efa1e69..785aad906e 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/fullconnection_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/fullconnection_fp32_tests.cc
@@ -76,8 +76,9 @@ TEST_F(TestFcFp32, FcTest1) {
   auto matmul_param = new MatMulParameter();
   float *correct;
   int total_size = FcTestInit1(&inputs_, &outputs_, matmul_param, &correct);
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::FullconnectionCPUKernel *fc =
     new kernel::FullconnectionCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx, nullptr);
 
@@ -133,8 +134,9 @@ TEST_F(TestFcFp32, FcTest2) {
   auto matmul_param = new MatMulParameter();
   float *correct;
   int total_size = FcTestInit2(&inputs_, &outputs_, matmul_param, &correct);
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::FullconnectionCPUKernel *fc =
     new kernel::FullconnectionCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/l2norm_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/l2norm_fp32_test.cc
index 180c0a28fc..31213e5143 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/l2norm_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/l2norm_fp32_test.cc
@@ -36,7 +36,7 @@ class TestL2NormFp32 : public mindspore::CommonTest {
   std::vector<lite::Tensor *> outputs_{&out_tensor_};
   L2NormParameter param_;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize};
-  lite::Context ctx_ = lite::Context();
+  lite::InnerContext ctx_ = lite::InnerContext();
   kernel::KernelCreator creator_ = nullptr;
   kernel::LiteKernel *kernel_ = nullptr;
 };
@@ -65,8 +65,9 @@ void TestL2NormFp32::Init(const std::vector<int> &input_shape, const std::vector
   param_.act_type_ = activation_type;
 
   desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_L2Norm};
-  ctx_ = lite::Context();
+  ctx_ = lite::InnerContext();
   ctx_.thread_num_ = thread_num;
+  ASSERT_EQ(lite::RET_OK, ctx_.Init());
   creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator_, nullptr);
   kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(&param_), &ctx_, desc, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/lstm_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/lstm_fp32_tests.cc
index 32ee1a425e..75c9d59b2e 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/lstm_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/lstm_fp32_tests.cc
@@ -140,8 +140,9 @@ TEST_F(LstmFp32, LstmForwardFp32Accuracy) {
   InitLstmParam(lstm_param);
 
   // init ctx
-  auto ctx = new lite::Context();
+  auto ctx = new lite::InnerContext();
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
 
   // init tensor
   std::vector<lite::Tensor *> inputs;
@@ -288,8 +289,9 @@ TEST_F(LstmFp32, LstmBackwardFp32Accuracy) {
   lstm_param->bidirectional_ = true;
 
   // init ctx
-  auto ctx = new lite::Context();
+  auto ctx = new lite::InnerContext();
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
 
   // init tensor
   std::vector<lite::Tensor *> inputs;
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc
index 444c5ce9db..7896ecb9d3 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc
@@ -240,8 +240,9 @@ TEST_F(TestMatMulFp32, simple) {
   std::vector<int> b_shape = {8, 3};
   std::vector<int> c_shape = {2, 3};
   int total_size = MMTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape);
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto mm = new kernel::MatmulCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx, nullptr);
   mm->Init();
   mm->Run();
@@ -272,8 +273,9 @@ TEST_F(TestMatMulFp32, simple_bias) {
   std::vector<int> bias_shape = {1, 3};
   std::vector<int> c_shape = {2, 3};
   int total_size = MMTestInit2(&inputs_, &outputs_, a, b, bias, a_shape, b_shape, bias_shape, c_shape);
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto mm = new kernel::MatmulCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx, nullptr);
   mm->Init();
   mm->Run();
@@ -323,8 +325,9 @@ TEST_F(TestMatMulFp32, simple2) {
   std::vector<int> b_shape = {12, 36};
   std::vector<int> c_shape = {25, 36};
   int total_size = MMTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape);
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto mm = new kernel::MatmulCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx, nullptr);
   mm->Init();
   mm->Run();
@@ -392,8 +395,9 @@ TEST_F(TestMatMulFp32, simple_transb) {
   std::vector<int> b_shape = {1, 3, 8};
   std::vector<int> c_shape = {1, 2, 3};
   int total_size = MMTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape);
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto mm = new kernel::MatmulCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx, nullptr);
   mm->Init();
   mm->Run();
@@ -441,8 +445,9 @@ TEST_F(TestMatMulFp32, batch) {
   std::vector<int> b_shape = {3, 3, 8};
   std::vector<int> c_shape = {3, 2, 3};
   int total_size = MMTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape);
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto mm = new kernel::MatmulCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx, nullptr);
   mm->Init();
   mm->Run();
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/pad_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/pad_fp32_test.cc
index c7fdf490b2..e62184fbfb 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/pad_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/pad_fp32_test.cc
@@ -46,7 +46,7 @@ class TestPadFp32 : public mindspore::CommonTest {
   std::vector<lite::Tensor *> inputs_{&in_tensor_};
   std::vector<lite::Tensor *> outputs_{&out_tensor_};
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Pad};
-  lite::Context ctx_ = lite::Context();
+  lite::InnerContext ctx_ = lite::InnerContext();
   kernel::KernelCreator creator_ = nullptr;
   kernel::LiteKernel *kernel_ = nullptr;
 };
@@ -83,8 +83,9 @@ void TestPadFp32::Prepare(const std::vector<int> &input_shape, const std::vector
   }
 
   desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Pad};
-  ctx_ = lite::Context();
+  ctx_ = lite::InnerContext();
   ctx_.thread_num_ = thread_num;
+  ctx_.Init();
   creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator_, nullptr);
   kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(&param_), &ctx_, desc, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/power_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/power_fp32_tests.cc
index 1ec026b87d..f4ee436f49 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/power_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/power_fp32_tests.cc
@@ -70,8 +70,9 @@ TEST_F(TestPowerFp32, Simple) {
   std::vector<int> b_shape = {2, 2};
   std::vector<int> c_shape = {2, 2};
   int total_size = PowerTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape);
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::PowerCPUKernel *op =
     new kernel::PowerCPUKernel(reinterpret_cast<OpParameter *>(param), inputs_, outputs_, ctx, nullptr);
   op->Init();
@@ -95,8 +96,9 @@ TEST_F(TestPowerFp32, Broadcast) {
   std::vector<int> a_shape = {2, 2};
   std::vector<int> c_shape = {2, 2};
   int total_size = PowerTestInit2(&inputs_, &outputs_, a, a_shape, c_shape);
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::PowerCPUKernel *op =
     new kernel::PowerCPUKernel(reinterpret_cast<OpParameter *>(param), inputs_, outputs_, ctx, nullptr);
   op->Init();
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc
index e8235408a2..a28541620a 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc
@@ -40,7 +40,7 @@ class TestResizeBilinearFp32 : public mindspore::CommonTest {
   std::vector<lite::Tensor *> outputs_{&out_tensor_};
   ResizeParameter param_ = {{}};
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize};
-  lite::Context ctx_ = lite::Context();
+  lite::InnerContext ctx_ = lite::InnerContext();
   kernel::KernelCreator creator_ = nullptr;
   kernel::LiteKernel *kernel_ = nullptr;
 };
@@ -64,8 +64,9 @@ void TestResizeBilinearFp32::Prepare(const std::vector<int> &input_shape, const
   ResizeParameter param_ = {
     {}, static_cast<int>(schema::ResizeMethod_BILINEAR), output_shape[1], output_shape[2], align_corners};
   desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize};
-  ctx_ = lite::Context();
+  ctx_ = lite::InnerContext();
   ctx_.thread_num_ = thread_num;
+  ASSERT_EQ(lite::RET_OK, ctx_.Init());
   creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator_, nullptr);
   kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(&param_), &ctx_, desc, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc
index 1d8b445ffa..ed579145d5 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc
@@ -36,7 +36,7 @@ class TestResizeNearestNeighborFp32 : public mindspore::CommonTest {
   std::vector<lite::Tensor *> outputs_{&out_tensor_};
   ResizeParameter param_ = {{}};
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize};
-  lite::Context ctx_ = lite::Context();
+  lite::InnerContext ctx_ = lite::InnerContext();
   kernel::KernelCreator creator_ = nullptr;
   kernel::LiteKernel *kernel_ = nullptr;
 };
@@ -59,8 +59,9 @@ void TestResizeNearestNeighborFp32::Prepare(const std::vector<int> &input_shape,
   ResizeParameter param_ = {
     {}, static_cast<int>(schema::ResizeMethod_NEAREST_NEIGHBOR), output_shape[1], output_shape[2], align_corners};
   desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Resize};
-  ctx_ = lite::Context();
+  ctx_ = lite::InnerContext();
   ctx_.thread_num_ = thread_num;
+  ASSERT_EQ(lite::RET_OK, ctx_.Init());
   creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator_, nullptr);
   kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(&param_), &ctx_, desc, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reverse_sequence_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reverse_sequence_fp32_tests.cc
index 3f2e4512af..6791b8e55d 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reverse_sequence_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reverse_sequence_fp32_tests.cc
@@ -49,7 +49,8 @@ TEST_F(TestReverseSequenceFp32, BatchLessSeq) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   EXPECT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   EXPECT_NE(kernel, nullptr);
 
@@ -92,7 +93,8 @@ TEST_F(TestReverseSequenceFp32, BatchGreaterSeq) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   EXPECT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   EXPECT_NE(kernel, nullptr);
 
@@ -135,7 +137,8 @@ TEST_F(TestReverseSequenceFp32, BatchSeqNotAdjacent) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   EXPECT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   EXPECT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/roi_pooling_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/roi_pooling_fp32_tests.cc
index de7af7555f..b9a08ccdfc 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/roi_pooling_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/roi_pooling_fp32_tests.cc
@@ -58,8 +58,9 @@ TEST_F(TestROIPoolingFp32, Simple) {
   std::vector<int> b_shape = {2, 5};
   std::vector<int> c_shape = {1, 2, 2, 2};
   int total_size = ROIPoolingTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape);
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::ROIPoolingCPUKernel *op =
     new kernel::ROIPoolingCPUKernel(reinterpret_cast<OpParameter *>(param), inputs_, outputs_, ctx, nullptr);
   op->Init();
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_depth_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_depth_fp32_tests.cc
index bf8afb1157..42ab39d856 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_depth_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_depth_fp32_tests.cc
@@ -74,8 +74,9 @@ TEST_F(SpaceToDepthTestFp32, SpaceToDepthTest2) {
   op_param.op_parameter_.type_ = schema::PrimitiveType_SpaceToDepth;
   op_param.block_size_ = 2;
 
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_SpaceToDepth};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32_tests.cc
index 4104b296d7..daee1544d3 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32_tests.cc
@@ -80,8 +80,9 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test1) {
 
   SparseToDenseParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_SpaceToDepth;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.validate_indices_ = false;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, tid, schema::PrimitiveType_SparseToDense};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
@@ -166,8 +167,9 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test2) {
 
   SparseToDenseParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_SpaceToDepth;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.validate_indices_ = false;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, tid, schema::PrimitiveType_SparseToDense};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
@@ -252,8 +254,9 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test3) {
 
   SparseToDenseParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_SpaceToDepth;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.validate_indices_ = true;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, tid, schema::PrimitiveType_SparseToDense};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
@@ -333,8 +336,9 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test4) {
 
   SparseToDenseParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_SpaceToDepth;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.validate_indices_ = true;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, tid, schema::PrimitiveType_SparseToDense};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
@@ -414,8 +418,9 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test5) {
 
   SparseToDenseParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_SpaceToDepth;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.validate_indices_ = true;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, tid, schema::PrimitiveType_SparseToDense};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/strided_slice_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/strided_slice_fp32_tests.cc
index 40c4ebf86c..38348f63de 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/strided_slice_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/strided_slice_fp32_tests.cc
@@ -149,8 +149,9 @@ TEST_F(TestStridedSliceFp32, StridedSlice3) {
   output_tensor.set_data_type(input_tensor.data_type());
   output_tensor.set_shape(output_shape);
 
-  lite::Context *ctx = new lite::Context();
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   strided_slice_param->op_parameter_.type_ = schema::PrimitiveType_StridedSlice;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_StridedSlice};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
@@ -198,8 +199,9 @@ TEST_F(TestStridedSliceFp32, StridedSlice4) {
   output_tensor.set_data_type(input_tensor.data_type());
   output_tensor.set_shape(output_shape);
 
-  lite::Context *ctx = new lite::Context();
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   strided_slice_param->op_parameter_.type_ = schema::PrimitiveType_StridedSlice;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_StridedSlice};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
@@ -254,8 +256,9 @@ TEST_F(TestStridedSliceFp32, StridedSlice5) {
   output_tensor.set_data_type(input_tensor.data_type());
   output_tensor.set_shape(output_shape);
 
-  lite::Context *ctx = new lite::Context();
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   strided_slice_param->op_parameter_.type_ = schema::PrimitiveType_StridedSlice;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_StridedSlice};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
@@ -310,8 +313,9 @@ TEST_F(TestStridedSliceFp32, StridedSlice6) {
   output_tensor.set_data_type(input_tensor.data_type());
   output_tensor.set_shape(output_shape);
 
-  lite::Context *ctx = new lite::Context();
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   strided_slice_param->op_parameter_.type_ = schema::PrimitiveType_StridedSlice;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_StridedSlice};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
@@ -358,8 +362,9 @@ TEST_F(TestStridedSliceFp32, StridedSlice7) {
   output_tensor.set_data_type(input_tensor.data_type());
   output_tensor.set_shape(output_shape);
 
-  lite::Context *ctx = new lite::Context();
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   strided_slice_param->op_parameter_.type_ = schema::PrimitiveType_StridedSlice;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_StridedSlice};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
@@ -414,8 +419,9 @@ TEST_F(TestStridedSliceFp32, StridedSlice8) {
   output_tensor.set_data_type(input_tensor.data_type());
   output_tensor.set_shape(output_shape);
 
-  lite::Context *ctx = new lite::Context();
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   strided_slice_param->op_parameter_.type_ = schema::PrimitiveType_StridedSlice;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_StridedSlice};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
@@ -564,7 +570,8 @@ TEST_F(TestStridedSliceFp32, StridedSlice9) {
   output_tensor.set_data_type(input_tensor.data_type());
   output_tensor.set_shape(output_shape);
 
-  lite::Context *ctx = new lite::Context();
+  lite::InnerContext *ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   strided_slice_param->op_parameter_.type_ = schema::PrimitiveType_StridedSlice;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_StridedSlice};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/tile_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/tile_fp32_tests.cc
index 5582a8b560..760abe2203 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/tile_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/tile_fp32_tests.cc
@@ -52,7 +52,8 @@ TEST_F(TestTileFp32, Tile) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   EXPECT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   EXPECT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/topk_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/topk_fp32_tests.cc
index 4bc5417e49..7f4a73c8fa 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/topk_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/topk_fp32_tests.cc
@@ -45,7 +45,8 @@ TEST_F(TestTopKFp32, TopK) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   ASSERT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/transpose_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/transpose_fp32_tests.cc
index b5f5357c59..acf687326a 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/transpose_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/transpose_fp32_tests.cc
@@ -198,8 +198,9 @@ TEST_F(TestTransposeFp32, TransposeFp32_test5) {
   std::vector<lite::Tensor *> outputs_tensor;
   outputs_tensor.emplace_back(&output_tensor);
 
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Transpose};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unique_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unique_fp32_tests.cc
index 3125d35b0d..8c2fd20599 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unique_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unique_fp32_tests.cc
@@ -45,7 +45,8 @@ TEST_F(TestUniqueFp32, Unique) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   EXPECT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, &parameter, ctx.get(), desc, nullptr);
   EXPECT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unstack_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unstack_fp32_tests.cc
index 26dc00f64a..39258fa359 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unstack_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unstack_fp32_tests.cc
@@ -51,7 +51,8 @@ TEST_F(TestUnstackFp32, Unstack) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   EXPECT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   EXPECT_NE(kernel, nullptr);
 
@@ -98,7 +99,8 @@ TEST_F(TestUnstackFp32, Unstack2) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   EXPECT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   EXPECT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc
index 82ad88036d..ca02b4566e 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc
@@ -507,9 +507,10 @@ TEST_F(TestConvolutionGradFp32, ConvGroupDilation) {
   printf("Calculating runtime cost...\n");
   uint64_t time_avg = 0;
 
-  lite::Context context;
+  lite::InnerContext context;
   context.device_type_ = lite::DT_CPU;
   context.thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, context.Init());
 
   auto *kernel = new mindspore::kernel::ConvolutionTrainCPUKernel(reinterpret_cast<OpParameter *>(conv_param), inputs,
                                                                   outputs, &context, 0);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc
index c5ef47ccb7..bd1c8cae18 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc
@@ -208,7 +208,7 @@ TEST_F(NetworkTest, tuning_layer) {
   weight->data.resize(weight_size);
   std::copy(buf, buf + weight_size, weight->data.data());
   meta_graph->allTensors.emplace_back(std::move(weight));
-  delete [] buf;
+  delete[] buf;
   // tensor 3 - matmul
   auto input3 = std::make_unique<schema::TensorT>();
   input3->nodeType = schema::NodeType::NodeType_Parameter;
@@ -231,7 +231,7 @@ TEST_F(NetworkTest, tuning_layer) {
   bias->data.resize(bias_size);
   std::copy(buf, buf + bias_size, bias->data.data());
   meta_graph->allTensors.emplace_back(std::move(bias));
-  delete [] buf;
+  delete[] buf;
 
   // tensor 5 - bias_add
   auto input5 = std::make_unique<schema::TensorT>();
@@ -247,7 +247,7 @@ TEST_F(NetworkTest, tuning_layer) {
     label->nodeType = schema::NodeType::NodeType_ValueNode;
     label->format = schema::Format_NHWC;
     label->dataType = TypeId::kNumberTypeInt32;
-    label->dims = {BATCH_SIZE*NUM_CLASSES};
+    label->dims = {BATCH_SIZE * NUM_CLASSES};
     label->offset = -1;
     // label->data.resize(BATCH_SIZE * NUM_CLASSES * sizeof(float));
     // int *data = reinterpret_cast<int *>(label->data.data());
@@ -370,10 +370,11 @@ TEST_F(NetworkTest, tuning_layer) {
   ASSERT_NE(nullptr, model);
   meta_graph.reset();
   content = nullptr;
-  lite::Context context;
+  lite::InnerContext context;
   context.device_type_ = lite::DT_CPU;
   context.cpu_bind_mode_ = lite::NO_BIND;
   context.thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, context.Init());
   auto session = new session::TrainSession();
   ASSERT_NE(nullptr, session);
   session->Init(&context);
@@ -397,10 +398,10 @@ TEST_F(NetworkTest, tuning_layer) {
   //===================================================
   ASSERT_EQ(input_size, inTensor->Size());
   memcpy(data, input_data, input_size);
-  delete [] buf;
+  delete[] buf;
   auto labelTensor = inputs.at(1);
   ASSERT_NE(nullptr, labelTensor);
-  ASSERT_EQ(BATCH_SIZE*NUM_CLASSES, labelTensor->ElementsNum());
+  ASSERT_EQ(BATCH_SIZE * NUM_CLASSES, labelTensor->ElementsNum());
   auto labels = reinterpret_cast<int *>(labelTensor->MutableData());
   for (int i = 0; i < BATCH_SIZE; i++) labels[i] = (i * 97) % NUM_CLASSES;
 
@@ -452,7 +453,7 @@ TEST_F(NetworkTest, tuning_layer) {
   EXPECT_LT(error, 2e-3);
 
   session->train();
-  session->eval();   // do some more zig-zags
+  session->eval();  // do some more zig-zags
   ret = session->RunGraph();
   outputs = session->GetOutputsByName("BiasAdd");
   ASSERT_EQ(outputs.size(), 1);
@@ -468,8 +469,6 @@ TEST_F(NetworkTest, tuning_layer) {
   error = lite::RelativeOutputError(outData, output_path);
   EXPECT_LT(error, 2e-3);
 
-
-
   delete model;
   delete session;
   MS_LOG(INFO) << "TuningLayer passed";
@@ -505,7 +504,7 @@ int32_t runEffNet(mindspore::lite::LiteSession *session, const std::string &in,
   auto input_data = reinterpret_cast<float *>(in_buf);
   // ASSERT_EQ(input_size, inTensor->Size());
   std::copy(input_data, input_data + inTensor->ElementsNum(), data);
-  delete [] in_buf;
+  delete[] in_buf;
 
   // execute network
   session->RunGraph();
@@ -526,11 +525,12 @@ TEST_F(NetworkTest, efficient_net) {
   std::string net = "./test_data/nets/effnetb0_fwd_nofuse.ms";
   ReadFile(net.c_str(), &net_size, &buf);
   auto model = lite::Model::Import(buf, net_size);
-  delete [] buf;
-  auto context = new lite::Context;
+  delete[] buf;
+  auto context = new lite::InnerContext;
   context->device_type_ = lite::DT_CPU;
   context->cpu_bind_mode_ = lite::NO_BIND;
   context->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, context->Init());
 
   auto session = new mindspore::session::TrainSession();
   // auto session = new mindspore::lite::LiteSession();
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/add_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/add_int8_tests.cc
index c6be1af139..9f56de0218 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/add_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/add_int8_tests.cc
@@ -55,7 +55,8 @@ TEST_F(TestQuantizedAdd, Add) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   ASSERT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/arithmetic_self_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/arithmetic_self_int8_tests.cc
index ff5026f3e2..1398016f2f 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/arithmetic_self_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/arithmetic_self_int8_tests.cc
@@ -65,8 +65,9 @@ TEST_F(TestArithmeticSelfInt8, floor_quant0_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Floor;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Floor};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -124,8 +125,9 @@ TEST_F(TestArithmeticSelfInt8, floor_quant1_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Floor;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Floor};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -183,8 +185,9 @@ TEST_F(TestArithmeticSelfInt8, round_quant0_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Round;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Floor};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -242,8 +245,9 @@ TEST_F(TestArithmeticSelfInt8, round_quant1_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Round;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Floor};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -301,8 +305,9 @@ TEST_F(TestArithmeticSelfInt8, ceil_quant0_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Ceil;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Floor};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -360,8 +365,9 @@ TEST_F(TestArithmeticSelfInt8, ceil_quant1_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Ceil;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Floor};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -419,8 +425,9 @@ TEST_F(TestArithmeticSelfInt8, abs_quant0_thread0) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Abs;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Abs};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -478,8 +485,9 @@ TEST_F(TestArithmeticSelfInt8, abs_quant1_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Abs;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Abs};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -537,8 +545,9 @@ TEST_F(TestArithmeticSelfInt8, sin_quant0_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Sin;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Sin};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -596,8 +605,9 @@ TEST_F(TestArithmeticSelfInt8, cos_quant0_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Cos;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Cos};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -655,8 +665,9 @@ TEST_F(TestArithmeticSelfInt8, log_quant0_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Log;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Log};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -714,8 +725,9 @@ TEST_F(TestArithmeticSelfInt8, sqrt_quant0_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Sqrt;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Sqrt};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -773,8 +785,9 @@ TEST_F(TestArithmeticSelfInt8, rsqrt_quant0_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Rsqrt;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Rsqrt};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -832,8 +845,9 @@ TEST_F(TestArithmeticSelfInt8, square_quant0_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Square;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Square};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -891,8 +905,9 @@ TEST_F(TestArithmeticSelfInt8, square_quant1_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Square;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Square};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -950,8 +965,9 @@ TEST_F(TestArithmeticSelfInt8, logical_not_quant0_thread2) {
 
   ArithmeticSelfParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_LogicalNot;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_LogicalNot};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/batchnorm_int8_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/batchnorm_int8_test.cc
index 3e58171fe0..2faf08019b 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/batchnorm_int8_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/batchnorm_int8_test.cc
@@ -101,8 +101,9 @@ TEST_F(TestBatchnormInt8, FusedTest) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_FusedBatchNorm};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
@@ -181,8 +182,9 @@ TEST_F(TestBatchnormInt8, BNTest) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_BatchNorm};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/bias_add_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/bias_add_int8_tests.cc
index 9b9c58980b..518db24583 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/bias_add_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/bias_add_int8_tests.cc
@@ -57,7 +57,8 @@ TEST_F(TestBiasAddInt8, BiasAdd) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   EXPECT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   EXPECT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/concat_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/concat_int8_tests.cc
index cb71c664a4..7432415340 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/concat_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/concat_int8_tests.cc
@@ -77,8 +77,9 @@ TEST_F(TestConcatInt8, Concat1_axis0) {
   ConcatParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Concat;
   op_param.axis_ = 0;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Concat};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -148,8 +149,9 @@ TEST_F(TestConcatInt8, Concat1_axis1_thread2) {
   ConcatParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Concat;
   op_param.axis_ = 1;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Concat};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -220,8 +222,9 @@ TEST_F(TestConcatInt8, Concat1_axis1_thread2_quant1) {
   ConcatParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Concat;
   op_param.axis_ = 1;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Concat};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/conv_1x1_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/conv_1x1_int8_tests.cc
index 0f11a371e0..d86fee8982 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/conv_1x1_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/conv_1x1_int8_tests.cc
@@ -120,8 +120,9 @@ TEST_F(TestConv1x1Int8, Conv1x1TestPerChannel) {
   std::vector<lite::Tensor *> outputs_;
   auto conv_param = new ConvParameter();
   int8_t *correct;
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   int total_size = Conv1x1Int8TestInit1_perchannel(&inputs_, &outputs_, conv_param, &correct);
   kernel::Convolution1x1Int8CPUKernel *conv1x1 = new kernel::Convolution1x1Int8CPUKernel(
     reinterpret_cast<OpParameter *>(conv_param), inputs_, outputs_, ctx, nullptr);
@@ -187,8 +188,9 @@ TEST_F(TestConv1x1Int8, Conv1x1Int8Test1) {
   std::vector<lite::Tensor *> outputs_;
   auto conv_param = new ConvParameter();
   int8_t *correct;
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   int total_size = Conv1x1Int8TestInit1(&inputs_, &outputs_, conv_param, &correct);
   kernel::Convolution1x1Int8CPUKernel *conv1x1 = new kernel::Convolution1x1Int8CPUKernel(
     reinterpret_cast<OpParameter *>(conv_param), inputs_, outputs_, ctx, nullptr);
@@ -262,8 +264,9 @@ TEST_F(TestConv1x1Int8, Conv1x1Int8Test2) {
   std::vector<lite::Tensor *> outputs_;
   auto conv_param = new ConvParameter();
   int8_t *correct;
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   int total_size = Conv1x1Int8TestInit2(&inputs_, &outputs_, conv_param, &correct);
   kernel::Convolution1x1Int8CPUKernel *conv1x1 = new kernel::Convolution1x1Int8CPUKernel(
     reinterpret_cast<OpParameter *>(conv_param), inputs_, outputs_, ctx, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/crop_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/crop_int8_tests.cc
index 6d991674ca..2e09968fa6 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/crop_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/crop_int8_tests.cc
@@ -66,8 +66,9 @@ TEST_F(TestCropInt8, crop_1d_axis0_offset0_quant0_thread2) {
 
   CropParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Crop;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.axis_ = 0;
   op_param.offset_[0] = 1;
   op_param.offset_size_ = 1;
@@ -129,8 +130,9 @@ TEST_F(TestCropInt8, crop_2d_axis1_offset0_quant0_thread2) {
 
   CropParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Crop;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.axis_ = 1;
   op_param.offset_[0] = 1;
   op_param.offset_size_ = 1;
@@ -192,8 +194,9 @@ TEST_F(TestCropInt8, crop_3d_axis1_offset0_quant0_thread0) {
 
   CropParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Crop;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.axis_ = 1;
   op_param.offset_[0] = 1;
   op_param.offset_size_ = 1;
@@ -256,8 +259,9 @@ TEST_F(TestCropInt8, crop_3d_axis1_offset0_quant0_thread2) {
 
   CropParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Crop;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.axis_ = 1;
   op_param.offset_[0] = 1;
   op_param.offset_size_ = 1;
@@ -319,8 +323,9 @@ TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread0) {
 
   CropParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Crop;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.axis_ = 0;
   op_param.offset_[0] = 1;
   op_param.offset_size_ = 1;
@@ -382,8 +387,9 @@ TEST_F(TestCropInt8, crop_4d_axis1_offset0_quant0_thread0) {
 
   CropParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Crop;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.axis_ = 1;
   op_param.offset_[0] = 1;
   op_param.offset_size_ = 1;
@@ -445,8 +451,9 @@ TEST_F(TestCropInt8, crop_4d_axis1_offset1_quant0_thread0) {
 
   CropParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Crop;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.axis_ = 0;
   op_param.offset_[0] = 1;
   op_param.offset_[1] = 1;
@@ -511,8 +518,9 @@ TEST_F(TestCropInt8, crop_4d_axis1_offset1_quant1_thread0) {
 
   CropParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Crop;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.axis_ = 0;
   op_param.offset_[0] = 1;
   op_param.offset_[1] = 1;
@@ -579,8 +587,9 @@ TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread2) {
 
   CropParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Crop;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.axis_ = 0;
   op_param.offset_[0] = 1;
   op_param.offset_size_ = 1;
@@ -644,8 +653,9 @@ TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread3) {
 
   CropParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Crop;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.axis_ = 0;
   op_param.offset_[0] = 1;
   op_param.offset_size_ = 1;
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/deconv_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/deconv_int8_tests.cc
index 3147fc8aed..306b32d498 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/deconv_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/deconv_int8_tests.cc
@@ -353,8 +353,9 @@ TEST_F(TestDeconvInt8, DeConvInt8Test1) {
   std::vector<lite::Tensor *> inputs_;
   std::vector<lite::Tensor *> outputs_;
   auto deconv_param = new ConvParameter();
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   int8_t *correct;
   int total_size = DeConvInt8TestInit1(&inputs_, &outputs_, deconv_param, &correct);
   mindspore::kernel::DeConvInt8CPUKernel *deconv = new mindspore::kernel::DeConvInt8CPUKernel(
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/div_int8_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/div_int8_test.cc
index 5a0973e953..4bb836a01b 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/div_int8_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/div_int8_test.cc
@@ -56,7 +56,8 @@ TEST_F(TestDivInt8, DivInt8) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   ASSERT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc
index 8ca1db2dd3..f13f96f244 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc
@@ -133,8 +133,9 @@ TEST_F(TestFcInt8, fctest1) {
   std::vector<lite::Tensor *> inputs;
   std::vector<lite::Tensor *> outputs;
   FcInt8TestInit(&inputs, &outputs, &in_params, &weight_params, &bias_params, &out_params);
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
 
   kernel::FullconnectionInt8CPUKernel *fc =
     new kernel::FullconnectionInt8CPUKernel(reinterpret_cast<OpParameter *>(fc_param), inputs, outputs, ctx, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gatherNd_int8_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gatherNd_int8_test.cc
index 265c127f93..318c74d10b 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gatherNd_int8_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gatherNd_int8_test.cc
@@ -78,8 +78,9 @@ TEST_F(TestGatherNdInt8, GatherNdTest) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_GatherNd};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gather_int8_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gather_int8_test.cc
index 1003b93471..b25894635a 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gather_int8_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gather_int8_test.cc
@@ -76,8 +76,9 @@ TEST_F(TestGatherInt8, GatherTest) {
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Gather};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::LiteKernel *kernel =
     creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
   ASSERT_NE(kernel, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/hswish_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/hswish_int8_tests.cc
index 46a74a1f2d..30e5933e45 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/hswish_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/hswish_int8_tests.cc
@@ -56,7 +56,8 @@ TEST_F(TestHSwishInt8, HSwish) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   ASSERT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/matmul_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/matmul_int8_tests.cc
index c498e15a34..52220834c6 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/matmul_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/matmul_int8_tests.cc
@@ -174,8 +174,9 @@ TEST_F(TestMatmulInt8, mmtest1) {
   std::vector<lite::Tensor *> inputs;
   std::vector<lite::Tensor *> outputs;
   MMInt8TestInit(&inputs, &outputs, &in_params, &weight_params, &out_params);
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::MatmulInt8CPUKernel *mm =
     new kernel::MatmulInt8CPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs, outputs, ctx, nullptr);
 
@@ -290,8 +291,9 @@ TEST_F(TestMatmulInt8, mmtest2) {
   std::vector<lite::Tensor *> inputs;
   std::vector<lite::Tensor *> outputs;
   MMInt8TestInit(&inputs, &outputs, &in_params, &weight_params, &out_params);
-  auto ctx = new lite::Context;
+  auto ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::MatmulInt8CPUKernel *mm =
     new kernel::MatmulInt8CPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs, outputs, ctx, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/mul_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/mul_int8_tests.cc
index 161aab6cf1..16c61a17da 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/mul_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/mul_int8_tests.cc
@@ -76,8 +76,9 @@ TEST_F(TestMulInt8, Mul_quant0) {
 
   MulParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Mul;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Mul};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -146,8 +147,9 @@ TEST_F(TestMulInt8, Mul_quant0_thread0) {
 
   MulParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Mul;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Mul};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -216,8 +218,9 @@ TEST_F(TestMulInt8, Mul_quant1) {
 
   MulParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Mul;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Mul};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -286,8 +289,9 @@ TEST_F(TestMulInt8, Mul_quant1_thread1) {
 
   MulParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Mul;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Mul};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/pad_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/pad_int8_tests.cc
index ad4bc33b22..f12e6d5e47 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/pad_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/pad_int8_tests.cc
@@ -65,7 +65,8 @@ TEST_F(TestPadInt8, PadInt8Test1) {
   std::vector<lite::Tensor *> inputs_;
   std::vector<lite::Tensor *> outputs_;
   auto pad_param = new PadParameter();
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   int8_t *correct;
   int total_size = PadInt8TestInit1(&inputs_, &outputs_, pad_param, &correct);
   kernel::PadInt8CPUKernel *pad =
@@ -117,7 +118,8 @@ TEST_F(TestPadInt8, PadInt8Test2) {
   std::vector<lite::Tensor *> inputs_;
   std::vector<lite::Tensor *> outputs_;
   auto pad_param = new PadParameter();
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   int8_t *correct;
   int total_size = PadInt8TestInit2(&inputs_, &outputs_, pad_param, &correct);
   kernel::PadInt8CPUKernel *pad =
@@ -185,8 +187,9 @@ TEST_F(TestPadInt8, PadInt8TestInit4) {
   std::vector<lite::Tensor *> inputs_;
   std::vector<lite::Tensor *> outputs_;
   auto pad_param = new PadParameter();
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   int8_t *correct;
   int total_size = PadInt8TestInit2(&inputs_, &outputs_, pad_param, &correct);
   kernel::PadInt8CPUKernel *pad =
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/power_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/power_int8_tests.cc
index 895963e64e..5e4d1b661b 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/power_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/power_int8_tests.cc
@@ -66,7 +66,8 @@ TEST_F(TestPowerInt8, PowerInt8) {
   output0_tensor.AddQuantParam(output_quant_arg);
   output0_tensor.set_data_type(tid_int8);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Power};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -134,7 +135,8 @@ TEST_F(TestPowerInt8, normal) {
   output0_tensor.AddQuantParam(output_quant_arg);
   output0_tensor.set_data_type(tid_int8);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Power};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/prelu_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/prelu_int8_tests.cc
index 7e6ea228d2..3626f05baf 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/prelu_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/prelu_int8_tests.cc
@@ -69,8 +69,9 @@ TEST_F(TestPreluInt8, prelu_1) {
   op_param.slope_ = reinterpret_cast<float *>(malloc(sizeof(float)));
   op_param.slope_[0] = 0.25;
 
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.axis_ = 0.25;
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_LeakyReLU};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/quant_dtype_cast_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/quant_dtype_cast_tests.cc
index 4d80fb5ab1..f2f9560bb3 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/quant_dtype_cast_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/quant_dtype_cast_tests.cc
@@ -62,8 +62,9 @@ TEST_F(QuantDTypeCastTestFp32, QuantDTypeCastTest1) {
   std::vector<lite::Tensor *> outputs_tensor;
   outputs_tensor.emplace_back(&output_tensor);
 
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_QuantDTypeCast};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -108,8 +109,9 @@ TEST_F(QuantDTypeCastTestFp32, QuantDTypeCastTest2) {
   std::vector<lite::Tensor *> outputs_tensor;
   outputs_tensor.emplace_back(&output_tensor);
 
-  lite::Context ctx;
+  lite::InnerContext ctx;
   ctx.thread_num_ = 3;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_QuantDTypeCast};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc
index 4f4a189cdf..2933f0ee2d 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc
@@ -49,7 +49,7 @@ class TestReduceInt8 : public mindspore::CommonTest {
   std::vector<Tensor *> outputs{&out_tensor_};
   kernel::KernelKey desc_ = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Reduce};
   kernel::KernelCreator creator_ = nullptr;
-  lite::Context ctx_ = lite::Context();
+  lite::InnerContext ctx_ = lite::InnerContext();
   kernel::LiteKernel *kernel_ = nullptr;
   const QuantArg quant_in_ = {0.005f, 5};
   const QuantArg quant_out_ = {0.01f, 1};
@@ -80,6 +80,7 @@ void TestReduceInt8::Prepare(const std::vector<int> &in_shape, const std::vector
   creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc_);
 
   ctx_.thread_num_ = thread_num_;
+  ASSERT_EQ(lite::RET_OK, ctx_.Init());
   kernel_ = creator_(inputs, outputs, reinterpret_cast<OpParameter *>(&param_), &ctx_, desc_, nullptr);
 }
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/relux_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/relux_int8_tests.cc
index d25c4501fd..7297afbf85 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/relux_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/relux_int8_tests.cc
@@ -54,7 +54,8 @@ TEST_F(TestReluXInt8, Relu) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   ASSERT_NE(kernel, nullptr);
 
@@ -97,7 +98,8 @@ TEST_F(TestReluXInt8, Relu6) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   ASSERT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reshape_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reshape_int8_tests.cc
index 7e3010da51..d537169bd6 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reshape_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reshape_int8_tests.cc
@@ -65,8 +65,9 @@ TEST_F(TestReshapeInt8, reshape_quant0) {
 
   ReshapeParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Reshape;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 1;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Reshape};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -124,8 +125,9 @@ TEST_F(TestReshapeInt8, reshape_quant1_thread2) {
 
   ReshapeParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Reshape;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Reshape};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc
index 796c0115d6..076ceb1ad6 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc
@@ -41,7 +41,7 @@ class TestResizeBilinearInt8 : public mindspore::CommonTest {
 
   kernel::KernelKey desc_ = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Resize};
   kernel::KernelCreator creator_ = nullptr;
-  lite::Context ctx_ = lite::Context();
+  lite::InnerContext ctx_ = lite::InnerContext();
   kernel::LiteKernel *kernel_ = nullptr;
   float err_percent_ = 0.2f;
 };
@@ -76,6 +76,7 @@ void TestResizeBilinearInt8::Prepare(const std::vector<int> &in_shape, const std
   creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc_);
 
   ctx_.thread_num_ = thread_num;
+  ASSERT_EQ(lite::RET_OK, ctx_.Init());
   kernel_ = creator_(inputs, outputs, reinterpret_cast<OpParameter *>(&param_), &ctx_, desc_, nullptr);
 }
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc
index a2777e77d9..2b43d037be 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc
@@ -42,7 +42,7 @@ class TestResizeNearestNeighborInt8 : public mindspore::CommonTest {
 
   kernel::KernelKey desc_ = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Resize};
   kernel::KernelCreator creator_ = nullptr;
-  lite::Context ctx_ = lite::Context();
+  lite::InnerContext ctx_ = lite::InnerContext();
   kernel::LiteKernel *kernel_ = nullptr;
   float err_percent_ = 0.05f;
 };
@@ -71,6 +71,7 @@ void TestResizeNearestNeighborInt8::Prepare(const std::vector<int> &in_shape, co
   creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc_);
 
   ctx_.thread_num_ = thread_num;
+  ASSERT_EQ(lite::RET_OK, ctx_.Init());
   kernel_ = creator_(inputs, outputs, reinterpret_cast<OpParameter *>(&param_), &ctx_, desc_, nullptr);
 }
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sigmoid_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sigmoid_int8_tests.cc
index 75cb7af07f..806f781af2 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sigmoid_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sigmoid_int8_tests.cc
@@ -53,7 +53,8 @@ TEST_F(TestSigmoidInt8, Sigmoid) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   ASSERT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/slice_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/slice_int8_tests.cc
index 6940779e2c..91c5ef2802 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/slice_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/slice_int8_tests.cc
@@ -59,7 +59,8 @@ TEST_F(TestSliceInt8, SliceInt8) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   ASSERT_NE(kernel, nullptr);
   auto ret = kernel->Run();
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/softmax_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/softmax_int8_tests.cc
index b69ee4d133..828c32c8ae 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/softmax_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/softmax_int8_tests.cc
@@ -70,7 +70,8 @@ TEST_F(TestSoftmaxInt8, SoftmaxInt8) {
   output0_tensor.AddQuantParam(output_quant_arg);
   output0_tensor.set_data_type(tid_int8);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SoftMax};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc
index 90bfa43b86..2f07423979 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc
@@ -40,7 +40,8 @@ TEST_F(SpaceToBatchTestInt8, test1) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   ASSERT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/split_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/split_int8_tests.cc
index 51f5446edf..b589741214 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/split_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/split_int8_tests.cc
@@ -79,8 +79,9 @@ TEST_F(TestSplitInt8, Split_quant0_thread2) {
   op_param.split_dim_ = 1;
   op_param.split_sizes_[0] = 1;
   op_param.split_sizes_[1] = 2;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Split};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -167,8 +168,9 @@ TEST_F(TestSplitInt8, Split_quant0_thread2_num) {
   op_param.op_parameter_.type_ = schema::PrimitiveType_Split;
   op_param.num_split_ = 3;
   op_param.split_dim_ = 1;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Split};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
@@ -263,8 +265,9 @@ TEST_F(TestSplitInt8, Split_quant1_thread2_num) {
   op_param.op_parameter_.type_ = schema::PrimitiveType_Split;
   op_param.num_split_ = 3;
   op_param.split_dim_ = 1;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Split};
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/squeeze_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/squeeze_int8_tests.cc
index 330cf010bf..65e3c1c36e 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/squeeze_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/squeeze_int8_tests.cc
@@ -66,8 +66,9 @@ TEST_F(TestSqueezeInt8, Squeeze_1d_axis0_offset0_quant0_thread2) {
 
   SqueezeParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Squeeze;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.axis_ = 0;
   op_param.offset_[0] = 1;
   op_param.offset_size_ = 1;
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sub_int_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sub_int_tests.cc
index a19b5e426b..eac24d33c3 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sub_int_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sub_int_tests.cc
@@ -56,7 +56,8 @@ TEST_F(TestSubInt8, SubInt8) {
   auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
   ASSERT_NE(creator, nullptr);
 
-  auto ctx = std::make_shared<lite::Context>();
+  auto ctx = std::make_shared<lite::InnerContext>();
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
   ASSERT_NE(kernel, nullptr);
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/unsqueeze_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/unsqueeze_int8_tests.cc
index 33e7ed0dd8..65192eea46 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/unsqueeze_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/unsqueeze_int8_tests.cc
@@ -66,8 +66,9 @@ TEST_F(TestUnsqueezeInt8, Unsqueeze_1) {
 
   UnSqueezeParameter op_param;
   op_param.op_parameter_.type_ = schema::PrimitiveType_Unsqueeze;
-  lite::Context *ctx = new lite::Context;
+  lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
   op_param.axis_ = 0;
   op_param.offset_[0] = 1;
   op_param.offset_size_ = 1;
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc
index 3222cfe5d6..6fc39440dd 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc
@@ -143,7 +143,8 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
   param->op_parameter_.type_ = PrimitiveType_Add;
 
   std::vector<lite::Tensor *> arithmetic_inputs = {tensor_a, tensor_b};
-  lite::Context ctx;
+  lite::InnerContext ctx;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   auto *arith_kernel = new (std::nothrow)
     kernel::ArithmeticOpenCLKernel(reinterpret_cast<OpParameter *>(param), arithmetic_inputs, outputs, &ctx);
   if (arith_kernel == nullptr) {
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc
index bb3816067a..a869907bcc 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc
@@ -155,7 +155,8 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
   param->op_parameter_.type_ = schema::PrimitiveType_Scale;
 
   std::vector<lite::Tensor *> scale_inputs = {tensor_in, tensor_scale, tensor_offset};
-  lite::Context ctx;
+  lite::InnerContext ctx;
+  ASSERT_EQ(lite::RET_OK, ctx.Init());
   auto *scale_kernel =
     new (std::nothrow) kernel::ScaleOpenCLKernel(reinterpret_cast<OpParameter *>(param), scale_inputs, outputs, &ctx);
   if (scale_kernel == nullptr) {
diff --git a/mindspore/lite/tools/anf_importer/anf_importer.cc b/mindspore/lite/tools/anf_importer/anf_importer.cc
index 49f66dc0af..b5a55c266c 100644
--- a/mindspore/lite/tools/anf_importer/anf_importer.cc
+++ b/mindspore/lite/tools/anf_importer/anf_importer.cc
@@ -24,7 +24,6 @@
 #include "schema/inner/model_generated.h"
 namespace mindspore {
 namespace lite {
-
 int AnfImporter::Import(const schema::QuantType &quantType) {
   auto ret = ConverterConstTensor();
   if (RET_OK != ret) {
diff --git a/mindspore/lite/tools/common/storage.cc b/mindspore/lite/tools/common/storage.cc
index f8b2430a77..eb53054438 100644
--- a/mindspore/lite/tools/common/storage.cc
+++ b/mindspore/lite/tools/common/storage.cc
@@ -34,7 +34,7 @@ int Storage::Save(const schema::MetaGraphT &graph, const std::string &outputPath
 
   std::ofstream output(outputPath + ".ms", std::ofstream::binary);
   if (!output.is_open()) {
-    MS_LOG(ERROR) << "Output file path is error";
+    MS_LOG(ERROR) << "Can not open output file: " << outputPath << ".ms";
     return RET_ERROR;
   }
 
diff --git a/mindspore/lite/tools/converter/CMakeLists.txt b/mindspore/lite/tools/converter/CMakeLists.txt
index 1de0a3896a..c21ce053c0 100644
--- a/mindspore/lite/tools/converter/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/CMakeLists.txt
@@ -19,6 +19,7 @@ if (WIN32)
             ${CMAKE_CURRENT_SOURCE_DIR}/../../src/tensor.cc
             ${CMAKE_CURRENT_SOURCE_DIR}/../../src/model.cc
             ${CMAKE_CURRENT_SOURCE_DIR}/../../src/lite_session.cc
+            ${CMAKE_CURRENT_SOURCE_DIR}/../../src/inner_context.cc
             ${CMAKE_CURRENT_SOURCE_DIR}/../../src/kernel_registry.cc
             ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/graph_util.cc
             ${CMAKE_CURRENT_SOURCE_DIR}/../../src/runtime/runtime_api.cc
@@ -81,6 +82,7 @@ set(LITE_SRC
         ${SRC_DIR}/runtime/runtime_api.cc
         ${SRC_DIR}/runtime/thread_pool.c
         ${SRC_DIR}/runtime/workspace_pool.cc
+        ${SRC_DIR}/inner_context.cc
         ${SRC_DIR}/tensor.cc
         ${SRC_DIR}/kernel_registry.cc
         ${SRC_DIR}/lite_kernel.cc
diff --git a/mindspore/lite/tools/converter/converter.cc b/mindspore/lite/tools/converter/converter.cc
index 039f78ed0f..58cf4536f0 100644
--- a/mindspore/lite/tools/converter/converter.cc
+++ b/mindspore/lite/tools/converter/converter.cc
@@ -164,7 +164,7 @@ int RunConverter(int argc, const char **argv) {
   fb_graph->version = Version();
   status = storage.Save(*fb_graph, flags->outputFile);
   if (status != 0) {
-    MS_LOG(ERROR) << "Save graph failed";
+    MS_LOG(ERROR) << "Save graph to file failed";
     std::cout << "SAVE GRAPH FAILED:" << status << std::endl;
     return status;
   }
diff --git a/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.cc b/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.cc
index 320ccc75a8..5b77ba5208 100644
--- a/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.cc
@@ -21,7 +21,7 @@
 #include "tools/optimizer/common/gllo_utils.h"
 #include "tools/anf_exporter/anf_exporter.h"
 #include "src/kernel_registry.h"
-#include "include/context.h"
+#include "src/inner_context.h"
 #include "src/populate_parameter.h"
 #include "src/ops/primitive_c.h"
 #include "src/tensor.h"
@@ -91,7 +91,7 @@ ParameterPtr CreateNewParamter(const FuncGraphPtr &func_graph, Tensor *tensor) {
       MS_LOG(ERROR) << "tensor_data is nullptr";
       return nullptr;
     }
-    auto ret = memcpy_s(tensor_data, tensor->Size(), tensor->MutableData(), tensor->Size());
+    auto ret = memcpy_s(tensor_data, size, tensor->MutableData(), tensor->Size());
     if (ret != EOK) {
       delete[] tensor_data;
       MS_LOG(ERROR) << "memcpy error: " << ret;
@@ -104,7 +104,7 @@ ParameterPtr CreateNewParamter(const FuncGraphPtr &func_graph, Tensor *tensor) {
   return parameter;
 }
 kernel::LiteKernel *GetLiteKernel(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs, OpParameter *parameter,
-                                  lite::Context *context, mindspore::lite::PrimitiveC *primitive) {
+                                  lite::InnerContext *context, mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(nullptr != lite_primitive);
   auto data_type = inputs.front()->data_type();
   kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, (schema::PrimitiveType)primitive->Type()};
@@ -233,11 +233,7 @@ const AnfNodePtr ConstFoldPass::Process(const FuncGraphPtr &func_graph, const An
                     << schema::EnumNamePrimitiveType((schema::PrimitiveType)(lite_primitive->Type()));
       return nullptr;
     }
-    lite::Context context;
-    if (context.allocator == nullptr) {
-      context.allocator = lite::Allocator::Create();
-    }
-    auto lite_kernel = GetLiteKernel(input_tensors, output_tensors, parameter, &context, lite_primitive.get());
+    auto lite_kernel = GetLiteKernel(input_tensors, output_tensors, parameter, context, lite_primitive.get());
     if (lite_kernel == nullptr) {
       MS_LOG(ERROR) << "constant_folding schedule node lite kernel nullptr";
       FreeTensors(&input_tensors, &output_tensors);
diff --git a/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.h b/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.h
index c7d08d5159..fe53b74ac3 100644
--- a/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.h
+++ b/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.h
@@ -27,9 +27,15 @@ namespace mindspore {
 namespace opt {
 class ConstFoldPass : public PatternProcessPass {
  public:
-  explicit ConstFoldPass(bool multigraph = true) : PatternProcessPass("constfold_pass", multigraph) {}
-  ~ConstFoldPass() override = default;
+  explicit ConstFoldPass(bool multigraph = true) : PatternProcessPass("constfold_pass", multigraph) {
+    this->context = new lite::InnerContext;
+    this->context->Init();
+  }
+  ~ConstFoldPass() override { delete (this->context); }
   const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+
+ private:
+  lite::InnerContext *context = nullptr;
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/optimizer/fusion/conv_biasadd_fusion.cc b/mindspore/lite/tools/optimizer/fusion/conv_biasadd_fusion.cc
index 6b2e832e21..dbcb841ef8 100644
--- a/mindspore/lite/tools/optimizer/fusion/conv_biasadd_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/conv_biasadd_fusion.cc
@@ -114,6 +114,7 @@ int GenConvNewBias(const FuncGraphPtr &func_graph, const CNodePtr &conv_node, co
   } else {
     if (EOK != memcpy_s(add_bias_data, kernel_nums * sizeof(float), add_weight_data, kernel_nums * sizeof(float))) {
       MS_LOG(EXCEPTION) << "memset_s conv_bias_data failed";
+      delete[] add_bias_data;
       return lite::RET_MEMORY_FAILED;
     }
   }