fix large functions & fix large Cyclomatic Complexity & remove redundant

code
5 years ago · dda64912d0
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@@ -24,7 +24,6 @@ set(LITE_SRC
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/allocator.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_api.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/thread_pool.c
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/workspace_pool.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/tensor.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/executor.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/inner_context.cc
--- a/mindspore/lite/src/common/file_utils.cc
+++ b/mindspore/lite/src/common/file_utils.cc
@@ -18,12 +18,10 @@
 #include <fcntl.h>
 #include <cstdlib>
 #include <climits>
 #include <cmath>
 #include "securec/include/securec.h"

 namespace mindspore {
 namespace lite {
 #define MAX_FILENAME_LEN 1024
 char *ReadFile(const char *file, size_t *size) {
  if (file == nullptr) {
    MS_LOG(ERROR) << "file is nullptr";
--- a/mindspore/lite/src/common/utils.cc
+++ b/mindspore/lite/src/common/utils.cc
@@ -48,131 +48,10 @@ uint64_t GetTimeUs() {
    return 0;
  }
  // USECS_IN_SEC *NSECS_IN_USEC;
  uint64_t retval = static_cast<uint64_t>((ts.tv_sec * USEC) + (ts.tv_nsec / MSEC));
  auto retval = static_cast<uint64_t>((ts.tv_sec * USEC) + (ts.tv_nsec / MSEC));
  return retval;
 }

 static const unsigned int FP32_BIT_SIZE = 32;
 static const unsigned int FP32_EXPONENT_BIAS = 127;
 static const unsigned int FP32_SIGNIFICAND = 23;

 static const unsigned int FP32_EXPONENT_MAX = 255;

 static const unsigned int FP16_BIT_SIZE = 16;
 static const unsigned int FP16_EXPONENT_BIAS = 15;
 static const unsigned int FP16_SIGNIFICAND = 10;

 static const int FP16_EXPONENT_MAX = 30;
 static const int FP16_EXPONENT_MIN = -10;

 // fp16.c
 float ShortToFloat32(int16_t srcValue) {
  uint16_t expHalf16 = srcValue & 0x7C00;
  int exp1 = static_cast<int>(expHalf16);
  uint16_t mantissa16 = srcValue & 0x03FF;
  int mantissa1 = static_cast<int>(mantissa16);
  int sign = static_cast<int>(srcValue & 0x8000);
  sign = sign << FP16_BIT_SIZE;

  // nan or inf
  if (expHalf16 == 0x7C00) {
    // nan
    if (mantissa16 > 0) {
      int res = (0x7FC00000 | sign);
      int *iRes = &res;
      auto fres = static_cast<float>(*iRes);
      return fres;
    }
    // inf
    int res = (0x7F800000 | sign);
    int *iRes = &res;
    auto fres = static_cast<float>(*iRes);
    return fres;
  }
  if (expHalf16 != 0) {
    exp1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS) << FP16_SIGNIFICAND);  // exponents converted to float32 bias
    int res = (exp1 | mantissa1);
    res = res << (FP32_SIGNIFICAND - FP16_SIGNIFICAND);
    res = (res | sign);
    int *iRes = &res;
    auto fres = static_cast<float>(*iRes);
    return fres;
  }

  int xmm1 = exp1 > (1 << FP16_SIGNIFICAND) ? exp1 : (1 << FP16_SIGNIFICAND);
  xmm1 = (xmm1 << (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
  xmm1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS - FP16_SIGNIFICAND)
           << FP32_SIGNIFICAND);  // add the bias difference to xmm1
  xmm1 = xmm1 | sign;             // Combine with the sign mask

  auto res = static_cast<float>(mantissa1);  // Convert mantissa to float
  int *ixmm1 = nullptr;
  ixmm1 = &xmm1;
  res *= static_cast<float>(*ixmm1);

  return res;
 }

 // __gnu_f2h_ieee
 int16_t Float32ToShort(float srcValue) {
  float *psrcValue = nullptr;
  psrcValue = &srcValue;
  auto srcValueBit = static_cast<unsigned int>(*psrcValue);
  int sign = srcValueBit >> (FP32_BIT_SIZE - 1);
  int mantissa = srcValueBit & 0x007FFFFF;
  // exponent
  int exp = ((srcValueBit & 0x7F800000) >> FP32_SIGNIFICAND) + FP16_EXPONENT_BIAS - FP32_EXPONENT_BIAS;
  int16_t res;
  if (exp > 0 && exp < FP16_EXPONENT_MAX) {
    // use rte rounding mode, round the significand, combine sign, exponent and significand into a short.
    res = (sign << (FP16_BIT_SIZE - 1)) | (exp << FP16_SIGNIFICAND) |
          ((mantissa + 0x00001000) >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
  } else if (srcValueBit == 0) {
    res = 0;
  } else {
    if (exp <= 0) {
      if (exp < FP16_EXPONENT_MIN) {
        // value is less than min half float point
        res = 0;
      } else {
        // normalized single, magnitude is less than min normal half float point.
        mantissa = (mantissa | 0x00800000) >> (1 - exp);
        // round to nearest
        if ((mantissa & 0x00001000) > 0) {
          mantissa = mantissa + 0x00002000;
        }
        // combine sign & mantissa (exp is zero to get denormalized number)
        res = (sign << FP16_EXPONENT_BIAS) | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
      }
    } else if (exp == (FP32_EXPONENT_MAX - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS)) {
      if (mantissa == 0) {
        // input float is infinity, return infinity half
        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
      } else {
        // input float is NaN, return half NaN
        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00 | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
      }
    } else {
      // exp > 0, normalized single, round to nearest
      if ((mantissa & 0x00001000) > 0) {
        mantissa = mantissa + 0x00002000;
        if ((mantissa & 0x00800000) > 0) {
          mantissa = 0;
          exp = exp + 1;
        }
      }
      if (exp > FP16_EXPONENT_MAX) {
        // exponent overflow - return infinity half
        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
      } else {
        // combine sign, exp and mantissa into normalized half
        res = (sign << FP16_EXPONENT_BIAS) | (exp << FP16_SIGNIFICAND) |
              (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
      }
    }
  }
  return res;
 }
 std::string Remove(const std::string &from, const std::string &subStr, Mode mode) {
  std::string result = from;
  if (mode == PREFIX) {
@@ -236,22 +115,6 @@ std::vector<std::string> Tokenize(const std::string &src, const std::string &del
  return tokens;
 }

 void ShortToFloat32(const int16_t *srcdata, float *dstdata, size_t elementSize) {
  MS_ASSERT(srcdata != nullptr);
  MS_ASSERT(dstdata != nullptr);
  for (size_t i = 0; i < elementSize; i++) {
    dstdata[i] = ShortToFloat32(srcdata[i]);
  }
 }

 void Float32ToShort(const float *srcdata, int16_t *dstdata, size_t elementSize) {
  MS_ASSERT(srcdata != nullptr);
  MS_ASSERT(dstdata != nullptr);
  for (size_t i = 0; i < elementSize; i++) {
    dstdata[i] = Float32ToShort(srcdata[i]);
  }
 }

 #if defined(__ANDROID__)
 uint32_t getHwCap(int hwcap_type) {
  uint32_t ret = getauxval(hwcap_type);
--- a/mindspore/lite/src/common/utils.h
+++ b/mindspore/lite/src/common/utils.h
@@ -34,15 +34,7 @@ const int USEC = 1000000;
 const int MSEC = 1000;
 std::vector<std::string> StringSplit(std::string str, const std::string &pattern);

 uint64_t GetTimeUs(void);

 int16_t Float32ToShort(float srcValue);

 float ShortToFloat32(int16_t srcValue);

 void ShortToFloat32(const int16_t *srcdata, float *dstdata, size_t elementSize);

 void Float32ToShort(const float *srcdata, int16_t *dstdata, size_t elementSize);
 uint64_t GetTimeUs();

 bool IsSupportSDot();

--- a/mindspore/lite/src/executor.cc
+++ b/mindspore/lite/src/executor.cc
@@ -75,57 +75,4 @@ int Executor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_
  }
  return RET_OK;
 }

 int Executor::TransformTensorLayout(Tensor *tensor, schema::Format dst_format, Allocator *allocator) {
  MS_ASSERT(nullptr != tensor);
  MS_ASSERT(nullptr != allocator);
  MS_ASSERT(4 == tensor->shape().size());
  auto data_type = tensor->data_type();
  switch (data_type) {
    case kNumberTypeInt8:
      return TransformTensorLayoutUint8(tensor, dst_format, allocator);
    case kNumberTypeFloat32:
      return TransformTensorLayoutFp32(tensor, dst_format, allocator);
    default:
      return RET_ERROR;
  }
  return RET_OK;
 }

 int Executor::TransformTensorLayoutFp32(Tensor *tensor, schema::Format dst_format, Allocator *allocator) {
  MS_ASSERT(nullptr != tensor);
  MS_ASSERT(nullptr != allocator);
  MS_ASSERT(4 == tensor->shape().size());
  auto src_format = tensor->GetFormat();
  if (src_format == schema::Format::Format_NC4HW4 && dst_format == schema::Format::Format_NHWC) {
    auto *src_data = tensor->data_c();
    if (src_data == nullptr) {
      MS_LOG(ERROR) << "data of tensor is nullptr";
      return RET_ERROR;
    }
    auto *dst_data = allocator->Malloc(tensor->Size());
    if (dst_data == nullptr) {
      MS_LOG(ERROR) << "Malloc data failed";
      return RET_ERROR;
    }
    PackNC4HW4ToNHWCFp32(src_data, dst_data, tensor->Batch(), tensor->Height() * tensor->Width(), tensor->Channel());
    tensor->set_data(dst_data);
    tensor->SetFormat(dst_format);
    allocator->Free(src_data);
    return RET_OK;
  } else {
    MS_LOG(ERROR) << "Unsupported layout transform: " << EnumNameFormat(tensor->GetFormat()) << " to "
                  << EnumNameFormat(dst_format) << " in float32";
    return RET_ERROR;
  }
 }

 int Executor::TransformTensorLayoutUint8(Tensor *tensor, schema::Format dst_format, Allocator *allocator) {
  MS_ASSERT(nullptr != tensor);
  MS_ASSERT(nullptr != allocator);
  MS_ASSERT(4 == tensor->shape().size());
  MS_LOG(ERROR) << "Unsupported layout transform: " << EnumNameFormat(tensor->GetFormat()) << " to "
                << EnumNameFormat(dst_format) << " in uint8";
  return RET_ERROR;
 }
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/executor.h
+++ b/mindspore/lite/src/executor.h
@@ -36,12 +36,6 @@ class Executor {

 protected:
  int CheckInputs(std::vector<Tensor *> &in_tensors);

  int TransformTensorLayoutFp32(Tensor *tensor, schema::Format dst_format, Allocator *allocator = nullptr);

  int TransformTensorLayoutUint8(Tensor *tensor, schema::Format dst_format, Allocator *allocator = nullptr);

  int TransformTensorLayout(Tensor *tensor, schema::Format dst_format, Allocator *allocator = nullptr);
 };
 }  // namespace mindspore::lite
 #endif
--- a/mindspore/lite/src/kernel_registry.h
+++ b/mindspore/lite/src/kernel_registry.h
@@ -50,8 +50,7 @@ class KernelRegistry {
  static const int data_type_length_{kNumberTypeEnd - kNumberTypeBegin + 1};
  static const int op_type_length_{PrimitiveType_MAX - PrimitiveType_MIN + 1};
  static const int array_size_{device_type_length_ * data_type_length_ * op_type_length_};
  kernel::KernelCreator creator_arrays_[array_size_] = {0};
  std::vector<OpParameter *> op_parameters_;
  kernel::KernelCreator creator_arrays_[array_size_] = {nullptr};
 };

 class KernelRegistrar {
--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@@ -52,89 +52,106 @@ static bool WeightTensorNeedCopy(const lite::Model *model, const uint32_t tensor

 LiteSession::LiteSession() { this->is_running_.store(false); }

 void LiteSession::ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lite::Tensor *dst_tensor) {
  MS_ASSERT(src_tensor != nullptr);
  MS_ASSERT(dst_tensor != nullptr);
  auto quant_params = src_tensor->quantParams();
  if (quant_params != nullptr) {
    for (size_t j = 0; j < quant_params->size(); j++) {
      QuantArg quant_arg{};
      quant_arg.bitNum = quant_params->Get(j)->numBits();
      quant_arg.scale = quant_params->Get(j)->scale();
      quant_arg.zeroPoint = quant_params->Get(j)->zeroPoint();
      quant_arg.var_corr = quant_params->Get(j)->varCorr();
      quant_arg.mean_corr = quant_params->Get(j)->meanCorr();
      quant_arg.inited = quant_params->Get(j)->inited();
      dst_tensor->AddQuantParam(quant_arg);
    }
  }
  auto quant_clusters = src_tensor->quantClusters();
  if (quant_clusters != nullptr) {
    std::vector<float> clusters;
    for (size_t j = 0; j < quant_clusters->size(); j++) {
      clusters.push_back(quant_clusters->Get(j));
    }
    dst_tensor->SetQuantClusters(clusters);
  }
 }

 int LiteSession::ConvertTensorsData(const lite::Model *model, size_t tensor_index, const schema::Tensor *src_tensor,
                                    lite::Tensor *dst_tensor) {
  MS_ASSERT(src_tensor != nullptr);
  MS_ASSERT(dst_tensor != nullptr);
  auto src_category = TensorCategory(src_tensor);
  auto data_type = src_tensor->dataType();
  if ((src_category == Tensor::Category::CONST_TENSOR || src_category == Tensor::Category::CONST_SCALAR) &&
      src_tensor->data() != nullptr && src_tensor->data()->size() > 0) {
    MS_ASSERT(dst_tensor->Size() == src_tensor->data()->size());
    if (WeightTensorNeedCopy(model, tensor_index)) {
      auto dst_data = dst_tensor->MutableData();
      if (dst_data == nullptr) {
        MS_LOG(ERROR) << "Data from tensor is nullptr";
        return RET_NULL_PTR;
      }
      memcpy(dst_data, src_tensor->data()->data(), dst_tensor->Size());
      copyed_tensor_idxes_.emplace_back(tensor_index);
    } else {
      int pack_size = src_tensor->data()->size();
      int org_size = dst_tensor->Size();
      if (pack_size != org_size && (data_type == kNumberTypeInt8 || data_type == kNumberTypeInt16)) {
        auto ret = dst_tensor->MallocData();
        if (ret != RET_OK) {
          MS_LOG(ERROR) << "Malloc data for tensor failed ";
          return RET_ERROR;
        }
        kernel::DequantUtil::UnPackToInt(src_tensor, dst_tensor->MutableData());
      } else {
        dst_tensor->set_data(const_cast<unsigned char *>(src_tensor->data()->data()));
      }
    }
  }
  return RET_OK;
 }

 int LiteSession::ConvertTensors(const lite::Model *model) {
  MS_ASSERT(model != nullptr);
  copyed_tensor_idxes_.clear();
  uint32_t tensor_count = model->all_tensors_.size();
  for (uint32_t i = 0; i < tensor_count; ++i) {
    auto *srcTensor = model->all_tensors_[i];
    if (srcTensor == nullptr) {
    auto *src_tensor = model->all_tensors_[i];
    if (src_tensor == nullptr) {
      MS_LOG(ERROR) << i << "th tensor in model is nullptr";
      return RET_NULL_PTR;
    }
    auto src_category = TensorCategory(srcTensor);
    auto src_category = TensorCategory(src_tensor);
    std::vector<int> shape;
    if (srcTensor->dims() == nullptr) {
    if (src_tensor->dims() == nullptr) {
      MS_LOG(DEBUG) << "Dims of " << i << "th tensor is nullptr";
    } else {
      if (src_category == Tensor::Category::CONST_TENSOR) {
        if (srcTensor->dataType() == kObjectTypeString && srcTensor->data() != nullptr) {
          shape.push_back(srcTensor->data()->size());
        } else {
          for (size_t j = 0; j < srcTensor->dims()->size(); j++) {
            shape.push_back(srcTensor->dims()->data()[j]);
          }
        }
      }
    }
    int dataType = srcTensor->dataType();
    auto *dstTensor = new (std::nothrow) Tensor(TypeId(dataType), shape, srcTensor->format(), src_category);
    if (dstTensor == nullptr) {
      MS_LOG(ERROR) << "new " << i << "th tensor failed";
      return RET_NULL_PTR;
    }
    if ((src_category == Tensor::Category::CONST_TENSOR || src_category == Tensor::Category::CONST_SCALAR) &&
        srcTensor->data() != nullptr && srcTensor->data()->size() > 0) {
      MS_ASSERT(dstTensor->Size() == srcTensor->data()->size());
      if (WeightTensorNeedCopy(model, i)) {
        auto dst_data = dstTensor->MutableData();
        if (dst_data == nullptr) {
          MS_LOG(ERROR) << "MutableData from " << i << "th tensor is nullptr";
          delete dstTensor;
          return RET_ERROR;
        }
        memcpy(dst_data, srcTensor->data()->data(), dstTensor->Size());
        copyed_tensor_idxes_.emplace_back(i);
    if (src_tensor->dims() != nullptr && src_category == Tensor::Category::CONST_TENSOR) {
      if (src_tensor->dataType() == kObjectTypeString && src_tensor->data() != nullptr) {
        shape.push_back(src_tensor->data()->size());
      } else {
        int pack_size = srcTensor->data()->size();
        int org_size = dstTensor->Size();
        if (pack_size != org_size && (dataType == kNumberTypeInt8 || dataType == kNumberTypeInt16)) {
          auto ret = dstTensor->MallocData();
          if (ret != RET_OK) {
            MS_LOG(ERROR) << "Malloc data for " << i << "tensor failed ";
            delete dstTensor;
            return RET_ERROR;
          }
          kernel::DequantUtil::UnPackToInt(srcTensor, dstTensor->MutableData());
        } else {
          dstTensor->set_data(const_cast<unsigned char *>(srcTensor->data()->data()));
        for (size_t j = 0; j < src_tensor->dims()->size(); j++) {
          shape.push_back(src_tensor->dims()->data()[j]);
        }
      }
    }
    auto quant_params = srcTensor->quantParams();
    if (quant_params != nullptr) {
      for (size_t j = 0; j < quant_params->size(); j++) {
        QuantArg quant_arg{};
        quant_arg.bitNum = quant_params->Get(j)->numBits();
        quant_arg.scale = quant_params->Get(j)->scale();
        quant_arg.zeroPoint = quant_params->Get(j)->zeroPoint();
        quant_arg.var_corr = quant_params->Get(j)->varCorr();
        quant_arg.mean_corr = quant_params->Get(j)->meanCorr();
        quant_arg.inited = quant_params->Get(j)->inited();
        dstTensor->AddQuantParam(quant_arg);
      }
    auto *dst_tensor =
      new (std::nothrow) Tensor(TypeId(src_tensor->dataType()), shape, src_tensor->format(), src_category);
    if (dst_tensor == nullptr) {
      MS_LOG(ERROR) << "new " << i << "th tensor failed";
      return RET_NULL_PTR;
    }
    auto quant_clusters = srcTensor->quantClusters();
    if (quant_clusters != nullptr) {
      std::vector<float> clusters;
      for (size_t j = 0; j < quant_clusters->size(); j++) {
        clusters.push_back(quant_clusters->Get(j));
      }
      dstTensor->SetQuantClusters(clusters);
    auto ret = ConvertTensorsData(model, i, src_tensor, dst_tensor);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "Convert data of " << i << "th tensor failed";
      delete (dst_tensor);
      return ret;
    }
    this->tensors_.emplace_back(dstTensor);
    ConvertTensorsQuantParam(src_tensor, dst_tensor);
    this->tensors_.emplace_back(dst_tensor);
  }

  return RET_OK;
 }

--- a/mindspore/lite/src/lite_session.h
+++ b/mindspore/lite/src/lite_session.h
@@ -66,6 +66,11 @@ class LiteSession : public session::LiteSession {
             const std::vector<std::vector<int>> &dims) override;

 protected:
  void ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lite::Tensor *dst_tensor);

  int ConvertTensorsData(const lite::Model *model, size_t tensor_index, const schema::Tensor *src_tensor,
                         lite::Tensor *dst_tensor);

  int ConvertTensors(const lite::Model *model);

  void InitGraphInOutTensors(const lite::Model *model);
--- a/mindspore/lite/src/ops/primitive_c.cc
+++ b/mindspore/lite/src/ops/primitive_c.cc
@@ -216,12 +216,29 @@ void PrimitiveC::CalFloatScopeByMeanAndStddev(const double &mean, const double &
  *mMax = static_cast<float>((qmax - mean) / stdDev);
 }

 void PrimitiveC::PopulaterQuantParam(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
  auto narrow_range = prim.GetAttr("narrow_range");
  bool narrowRangeQuantParam = narrow_range != nullptr ? GetValue<bool>(narrow_range) : false;
  auto num_bits = prim.GetAttr("num_bits");
  int32_t numbitsRangeQuantParam = num_bits != nullptr ? GetValue<int64_t>(num_bits) : 8;
 void PrimitiveC::FillDefaultInputQuantParamIfNeed(const size_t &inputSize) {
  std::vector<schema::QuantParamT> quants;
  schema::QuantParamT quantParam;
  // fill input_quant_param_ by not inited quant_parm
  if (input_quant_param_.size() < inputSize) {
    schema::QuantParamT tmpQuantParam;
    quants.emplace_back(tmpQuantParam);
    input_quant_param_.insert(input_quant_param_.end(), inputSize - input_quant_param_.size(), quants);
  }

  if (input_quant_param_.size() == kDoubleNum) {
    quants.clear();
    quantParam.min = 0.0;
    quantParam.max = 0.0;
    quantParam.zeroPoint = 0;
    quantParam.scale = input_quant_param_.at(0).at(0).scale * input_quant_param_.at(1).at(0).scale;
    quants.emplace_back(quantParam);
    input_quant_param_.emplace_back(quants);
  }
 }

 void PrimitiveC::PopulaterInputQuantParam(const Primitive &prim, const std::vector<AnfNodePtr> &inputs,
                                          bool narrowRangeQuantParam, int32_t numbitsRangeQuantParam) {
  std::vector<schema::QuantParamT> quants;
  schema::QuantParamT quantParam;
  auto inputMin = prim.GetAttr("input_minq");
@@ -267,26 +284,13 @@ void PrimitiveC::PopulaterQuantParam(const Primitive &prim, const std::vector<An
    quants.emplace_back(quantParam);
    input_quant_param_.emplace_back(quants);
  }
  FillDefaultInputQuantParamIfNeed(inputs.size());
 }

  // fill input_quant_param_ by not inited quant_parm
  if (input_quant_param_.size() < inputs.size()) {
    quants.clear();
    schema::QuantParamT tmpQuantParam;
    quants.emplace_back(tmpQuantParam);
    input_quant_param_.insert(input_quant_param_.end(), inputs.size() - input_quant_param_.size(), quants);
  }

  if (input_quant_param_.size() == kDoubleNum) {
    quants.clear();
    quantParam.min = 0.0;
    quantParam.max = 0.0;
    quantParam.zeroPoint = 0;
    quantParam.scale = input_quant_param_.at(0).at(0).scale * input_quant_param_.at(1).at(0).scale;
    quants.emplace_back(quantParam);
    input_quant_param_.emplace_back(quants);
  }

  quants.clear();
 void PrimitiveC::PopulaterOutputQuantParam(const Primitive &prim, bool narrowRangeQuantParam,
                                           int32_t numbitsRangeQuantParam) {
  std::vector<schema::QuantParamT> quants;
  schema::QuantParamT quantParam;
  auto outputMin = prim.GetAttr("output_minq");
  auto outputMax = prim.GetAttr("output_maxq");
  if (outputMin != nullptr && outputMax != nullptr) {
@@ -311,6 +315,15 @@ void PrimitiveC::PopulaterQuantParam(const Primitive &prim, const std::vector<An
  }
 }

 void PrimitiveC::PopulaterQuantParam(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
  auto narrow_range = prim.GetAttr("narrow_range");
  bool narrowRangeQuantParam = narrow_range != nullptr ? GetValue<bool>(narrow_range) : false;
  auto num_bits = prim.GetAttr("num_bits");
  int32_t numbitsRangeQuantParam = num_bits != nullptr ? GetValue<int64_t>(num_bits) : 8;
  PopulaterInputQuantParam(prim, inputs, narrowRangeQuantParam, numbitsRangeQuantParam);
  PopulaterOutputQuantParam(prim, narrowRangeQuantParam, numbitsRangeQuantParam);
 }

 void PrimitiveC::GetAttrDataFromInput(const AnfNodePtr inputNode, std::vector<int> *data) {
  if (inputNode->isa<ValueNode>()) {
    auto valNode = inputNode->cast<ValueNodePtr>();
--- a/mindspore/lite/src/ops/primitive_c.h
+++ b/mindspore/lite/src/ops/primitive_c.h
@@ -131,6 +131,10 @@ class PrimitiveC : public mindspore::Primitive {
  static std::shared_ptr<PrimitiveC> Create(const Primitive &prim, const std::vector<AnfNodePtr> &inputs,
                                            const schema::QuantType &quantType);
  void PopulaterQuantParam(const Primitive &prim, const std::vector<AnfNodePtr> &inputs);
  void FillDefaultInputQuantParamIfNeed(const size_t &inputSize);
  void PopulaterInputQuantParam(const Primitive &prim, const std::vector<AnfNodePtr> &inputs,
                                bool narrowRangeQuantParam, int32_t numbitsRangeQuantParam);
  void PopulaterOutputQuantParam(const Primitive &prim, bool narrowRangeQuantParam, int32_t numbitsRangeQuantParam);
  void CalFloatScopeByMeanAndStddev(const double &mean, const double &stdDev, float *mMin, float *mMax);

 protected:
--- a/mindspore/lite/src/runtime/runtime_api.cc
+++ b/mindspore/lite/src/runtime/runtime_api.cc
@@ -17,7 +17,6 @@
 #include "src/runtime/runtime_api.h"
 #include <mutex>
 #include <string>
 #include "src/runtime/workspace_pool.h"
 #include "src/common/log_adapter.h"

 static std::mutex gWorkspaceMutex;
@@ -28,26 +27,6 @@ extern "C" {
 ThreadPool *CreateLiteThreadPool(int thread_num, int mode) { return CreateThreadPool(thread_num, mode); }

 void LiteAPISetLastError(const char *msg) { MS_LOG(ERROR) << "The lite api set last error is " << msg; }

 void *LiteBackendAllocWorkspace(int deviceType, int deviceId, uint64_t size, int dtypeCode, int dtypeBits) {
  std::lock_guard<std::mutex> lock(gWorkspaceMutex);
  auto p = mindspore::predict::WorkspacePool::GetInstance();
  if (p == nullptr) {
    MS_LOG(ERROR) << "Get thread pool instance failed";
    return nullptr;
  }
  return p->AllocWorkSpaceMem(size);
 }

 int LiteBackendFreeWorkspace(int deviceType, int deviceId, const void *ptr) {
  std::lock_guard<std::mutex> lock(gWorkspaceMutex);
  auto p = mindspore::predict::WorkspacePool::GetInstance();
  if (p == nullptr) {
    return -1;
  }
  p->FreeWorkSpaceMem(ptr);
  return 0;
 }
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/src/runtime/runtime_api.h
+++ b/mindspore/lite/src/runtime/runtime_api.h
@@ -36,9 +36,6 @@ struct ThreadPool;
 #endif
 INTERNAL_API_DLL ThreadPool *CreateLiteThreadPool(int thread_num, int mode);
 INTERNAL_API_DLL void LiteAPISetLastError(const char *msg);
 INTERNAL_API_DLL void *LiteBackendAllocWorkspace(int deviceType, int deviceId, uint64_t size, int dtypeCode,
                                                 int dtypeBits);
 INTERNAL_API_DLL int LiteBackendFreeWorkspace(int deviceType, int deviceId, const void *ptr);
 INTERNAL_API_DLL int LiteBackendRegisterSystemLibSymbol(const char *name, void *ptr);
 #ifdef __cplusplus
 }
--- a/mindspore/lite/src/runtime/thread_pool.c
+++ b/mindspore/lite/src/runtime/thread_pool.c
@@ -44,7 +44,6 @@
 #define RET_TP_SYSTEM_ERROR (-1)

 #define MAX_THREAD_NUM (8)
 #define MAX_THREAD_POOL_NUM (4)
 #define DEFAULT_SPIN_COUNT (30000)

 typedef struct {
@@ -509,60 +508,74 @@ int BindMasterThread(struct ThreadPool *thread_pool, bool is_bind) {
  return RET_TP_OK;
 }

 int BindSalverThreads(struct ThreadPool *thread_pool, bool is_bind) {
  if (thread_pool == NULL) {
    LOG_ERROR("get thread pool instane failed");
    return RET_TP_ERROR;
 int FreeBindSalverThreads(struct ThreadPool *thread_pool) {
  cpu_set_t mask;
  CPU_ZERO(&mask);
  for (int i = 0; i < gHigNum + gMidNum; ++i) {
    CPU_SET(cpu_cores[i], &mask);
  }
  for (int i = 0; i < thread_pool->thread_num - 1; ++i) {
    Thread *thread = GetThread(thread_pool, i);
    if (thread == NULL) {
      LOG_ERROR("get thread failed, thread_id: %d", i);
      return false;
    }
    int ret = SetAffinity(thread->pthread, &mask);
    if (ret != RET_TP_OK) {
      LOG_ERROR("set thread affinity failed");
      return RET_TP_ERROR;
    }
  }
  return RET_TP_OK;
 }

 int DoBindSalverThreads(struct ThreadPool *thread_pool) {
  cpu_set_t mask;
  if (is_bind && thread_pool->mode != NO_BIND_MODE) {
    unsigned int attach_id;
    for (int i = 0; i < thread_pool->thread_num - 1; ++i) {
      if (thread_pool->mode == MID_MODE) {
        int core_id = gHigNum + gMidNum - i - 2;
        if (core_id >= 0) {
          attach_id = cpu_cores[core_id];
        } else {
          attach_id = cpu_cores[0];
        }
  unsigned int attach_id;
  for (int i = 0; i < thread_pool->thread_num - 1; ++i) {
    if (thread_pool->mode == MID_MODE) {
      int core_id = gHigNum + gMidNum - i - 2;
      if (core_id >= 0) {
        attach_id = cpu_cores[core_id];
      } else {
        attach_id = cpu_cores[i + 1];
      }
      LOG_INFO("mode: %d, attach id: %u", thread_pool->mode, attach_id);
      CPU_ZERO(&mask);
      CPU_SET(attach_id, &mask);
      Thread *thread = GetThread(thread_pool, i);
      if (thread == NULL) {
        LOG_ERROR("get thread failed, thread_id: %d", i);
        return false;
      }
      int ret = SetAffinity(thread->pthread, &mask);
      if (ret != RET_TP_OK) {
        LOG_ERROR("set thread affinity failed");
        return RET_TP_ERROR;
        attach_id = cpu_cores[0];
      }
    } else {
      attach_id = cpu_cores[i + 1];
    }
  } else {
    LOG_INFO("mode: %d, attach id: %u", thread_pool->mode, attach_id);
    CPU_ZERO(&mask);
    for (int i = 0; i < gHigNum + gMidNum; ++i) {
      CPU_SET(cpu_cores[i], &mask);
    CPU_SET(attach_id, &mask);
    Thread *thread = GetThread(thread_pool, i);
    if (thread == NULL) {
      LOG_ERROR("get thread failed, thread_id: %d", i);
      return false;
    }
    for (int i = 0; i < thread_pool->thread_num - 1; ++i) {
      Thread *thread = GetThread(thread_pool, i);
      if (thread == NULL) {
        LOG_ERROR("get thread failed, thread_id: %d", i);
        return false;
      }
      int ret = SetAffinity(thread->pthread, &mask);
      if (ret != RET_TP_OK) {
        LOG_ERROR("set thread affinity failed");
        return RET_TP_ERROR;
      }
    int ret = SetAffinity(thread->pthread, &mask);
    if (ret != RET_TP_OK) {
      LOG_ERROR("set thread affinity failed");
      return RET_TP_ERROR;
    }
  }
  LOG_INFO("BindSalverThreads success");
  return RET_TP_OK;
 }

 int BindSalverThreads(struct ThreadPool *thread_pool, bool is_bind) {
  if (thread_pool == NULL) {
    LOG_ERROR("get thread pool instane failed");
    return RET_TP_ERROR;
  }
  int ret;
  if (is_bind && thread_pool->mode != NO_BIND_MODE) {
    ret = DoBindSalverThreads(thread_pool);
  } else {
    ret = FreeBindSalverThreads(thread_pool);
  }
  if (ret == RET_TP_OK) {
    LOG_INFO("BindSalverThreads success");
  }
  return ret;
 }
 #endif

 int BindThreads(struct ThreadPool *thread_pool, bool is_bind, int mode) {
@@ -782,46 +795,6 @@ int CreateNewThread(struct ThreadPool *thread_pool, int thread_id) {
  return RET_TP_OK;
 }

 int ReConfigThreadPool(struct ThreadPool *thread_pool, int thread_num, int mode) {
  LOG_INFO("reconfig thread pool, thread_num: %d, mode: %d", thread_num, mode);
  if (thread_num <= 0 || thread_num > MAX_THREAD_NUM) {
    LOG_ERROR("invalid thread num: %d", thread_num);
    return RET_TP_ERROR;
  }
  if (thread_pool == NULL) {
    LOG_ERROR("get thread pool instane failed");
    return RET_TP_ERROR;
  }
  if (thread_num <= thread_pool->thread_num) {
    LOG_INFO("no need to add thread");
    return RET_TP_OK;
  }
  int curr_thread_num = thread_pool->thread_num;
  thread_pool->thread_num = thread_num > MAX_THREAD_NUM ? MAX_THREAD_NUM : thread_num;
  thread_pool->mode = mode;
  if (thread_pool->thread_list == NULL) {
    thread_pool->thread_list = (ThreadList *)malloc(sizeof(ThreadList));
    if (thread_pool->thread_list == NULL) {
      LOG_ERROR("create thread list failed");
      DestroyThreadPool(thread_pool);
      return RET_TP_ERROR;
    }
    thread_pool->thread_list->head = NULL;
    thread_pool->thread_list->tail = NULL;
    thread_pool->thread_list->size = 0;
    pthread_mutex_init(&thread_pool->thread_list->lock, NULL);
  }
  int add_thread_num = thread_pool->thread_num - curr_thread_num;
  for (int i = curr_thread_num - 1, j = 0; j < add_thread_num; ++i, ++j) {
    int ret = CreateNewThread(thread_pool, i);
    if (ret != RET_TP_OK) {
      LOG_ERROR("create new thread failed");
      return RET_TP_ERROR;
    }
  }
  return BindThreads(thread_pool, true, mode);
 }

 ThreadPool *CreateThreadPool(int thread_num, int mode) {
  LOG_INFO("create thread pool, thread_num: %d, mode: %d", thread_num, mode);
  if (thread_num <= 0 || thread_num > MAX_THREAD_NUM) {
@@ -873,18 +846,6 @@ ThreadPool *CreateThreadPool(int thread_num, int mode) {
  return thread_pool;
 }

 int ConfigThreadPool(struct ThreadPool *thread_pool, int thread_num, int mode) {
  if (thread_num <= 0 || thread_num > MAX_THREAD_NUM) {
    LOG_ERROR("invalid thread num: %d", thread_num);
    return RET_TP_ERROR;
  }
  int ret = ReConfigThreadPool(thread_pool, thread_num, mode);
  if (ret != RET_TP_OK) {
    LOG_ERROR("reconfig thread pool failed, thread_num: %d, mode: %d", thread_num, mode);
  }
  return ret;
 }

 void ActivateThreadPool(struct ThreadPool *thread_pool) {
  if (thread_pool == NULL) {
    LOG_ERROR("get thread pool instane failed");
--- a/mindspore/lite/src/runtime/thread_pool.h
+++ b/mindspore/lite/src/runtime/thread_pool.h
@@ -28,25 +28,10 @@ typedef enum {
  MID_MODE = 2      /**< bind middle cpu first */
 } BindMode;

 /// \brief ThreadPoolId defined for specifying which thread pool to use.
 typedef enum {
  THREAD_POOL_DEFAULT = 0, /**< default thread pool id */
  THREAD_POOL_SECOND = 1,  /**< the second thread pool id */
  THREAD_POOL_THIRD = 2,   /**< the third thread pool id */
  THREAD_POOL_FOURTH = 3   /**< the fourth thread pool id */
 } ThreadPoolId;

 struct ThreadPool;

 struct ThreadPool *CreateThreadPool(int thread_num, int mode);

 /**
 * create thread pool and init
 * @param thread_num
 * @param mode
 */
 int ConfigThreadPool(struct ThreadPool *thread_pool, int thread_num, int mode);

 /**
 *
 * @param session_index, support multi session
--- a/mindspore/lite/src/runtime/workspace_pool.cc
+++ b/mindspore/lite/src/runtime/workspace_pool.cc
@@ -1,154 +0,0 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "src/runtime/workspace_pool.h"
 #ifdef __APPLE__
 #include <stdlib.h>
 #else
 #include <malloc.h>
 #endif
 #include <algorithm>
 #include "src/common/log_adapter.h"

 namespace mindspore {
 namespace predict {
 static constexpr size_t kWorkspacePageSize = 4096;
 static constexpr int kTempAllocaAlignment = 64;
 WorkspacePool *WorkspacePool::GetInstance() {
  static WorkspacePool instance;
  return &instance;
 }

 void *WorkspacePool::AllocWorkSpaceMem(size_t size) {
  size_t nbytes = (size + (kWorkspacePageSize - 1)) / kWorkspacePageSize * kWorkspacePageSize;
  if (nbytes == 0) {
    nbytes = kWorkspacePageSize;
  }
  std::pair<size_t, void *> alloc;
  // fist alloc
  if (freeList.empty()) {
    alloc.first = nbytes;
 #ifdef __APPLE__
    int err = posix_memalign(&alloc.second, kTempAllocaAlignment, nbytes);
    if (err != 0) {
      MS_LOGE("posix_memalign failed, error code:%d", err);
      return alloc.second;
    }
 #else
 #ifdef _WIN32
    alloc.second = _aligned_malloc(nbytes, kTempAllocaAlignment);
 #else
    alloc.second = memalign(kTempAllocaAlignment, nbytes);
 #endif
 #endif
  } else if (freeList.size() == 1) {  // one element
    alloc = *(freeList.begin());
    freeList.erase(freeList.begin());
    if (alloc.first < nbytes) {
      free(alloc.second);
      alloc.first = nbytes;
 #ifdef __APPLE__
      int err = posix_memalign(&alloc.second, kTempAllocaAlignment, nbytes);
      if (err != 0) {
        MS_LOGE("posix_memalign failed, error code:%d", err);
        return alloc.second;
      }
 #else
 #ifdef _WIN32
      alloc.second = _aligned_malloc(nbytes, kTempAllocaAlignment);
 #else
      alloc.second = memalign(kTempAllocaAlignment, nbytes);
 #endif
 #endif
    }
  } else {
    if ((*(freeList.begin())).first >= nbytes) {
      auto iter = freeList.begin();
      for (; iter != freeList.end(); ++iter) {
        if ((*iter).first < size) {
          alloc = *(--iter);
          freeList.erase(iter);
          break;
        }
      }
      if (iter == freeList.end()) {
        alloc = *(freeList.rbegin());
        freeList.erase(--freeList.end());
      }
    } else {
      alloc = *(freeList.begin());
      freeList.erase(freeList.begin());
      free(alloc.second);
      alloc.first = nbytes;
 #ifdef __APPLE__
      int err = posix_memalign(&alloc.second, kTempAllocaAlignment, nbytes);
      if (err != 0) {
        MS_LOGE("posix_memalign failed, error code:%d", err);
        return alloc.second;
      }
 #else
 #ifdef _WIN32
      alloc.second = _aligned_malloc(nbytes, kTempAllocaAlignment);
 #else
      alloc.second = memalign(kTempAllocaAlignment, nbytes);
 #endif
 #endif
    }
  }
  allocList.emplace_back(alloc);
  return alloc.second != nullptr ? alloc.second : nullptr;
 }

 void WorkspacePool::FreeWorkSpaceMem(const void *ptr) {
  if (ptr == nullptr) {
    return;
  }
  std::pair<size_t, void *> alloc;
  if (allocList.empty()) {
    MS_LOG(ERROR) << "no mem have been alloc";
    return;
  } else if (allocList.back().second == ptr) {
    alloc = allocList.back();
    allocList.pop_back();
  } else {
    auto iter = allocList.begin();
    for (; iter != allocList.end(); ++iter) {
      if ((*iter).second == ptr) {
        alloc = *iter;
        allocList.erase(iter);
        break;
      }
    }
    if (iter == allocList.end()) {
      MS_LOG(ERROR) << "no value ptr have been alloc";
      return;
    }
  }
  freeList.insert(alloc);
 }

 WorkspacePool::~WorkspacePool() {
  for (auto &a : allocList) {
    free(a.second);
  }
  allocList.clear();
  for (auto &f : freeList) {
    free(f.second);
  }
  freeList.clear();
 }
 }  // namespace predict
 }  // namespace mindspore
--- a/mindspore/lite/src/runtime/workspace_pool.h
+++ b/mindspore/lite/src/runtime/workspace_pool.h
@@ -1,44 +0,0 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_SRC_RUNTIME_WORKSPACE_POOL_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_WORKSPACE_POOL_H_
 #include <memory>
 #include <vector>
 #include <set>
 #include <utility>
 #include <functional>
 #include <mutex>

 namespace mindspore {
 namespace predict {
 class WorkspacePool {
 public:
  WorkspacePool() = default;
  ~WorkspacePool();
  WorkspacePool(const WorkspacePool &) = delete;
  WorkspacePool &operator=(const WorkspacePool &) = delete;
  static WorkspacePool *GetInstance();
  void *AllocWorkSpaceMem(size_t size);
  void FreeWorkSpaceMem(const void *ptr);

 private:
  std::vector<std::pair<size_t, void *>> allocList{};
  std::set<std::pair<size_t, void *>, std::greater<std::pair<size_t, void *>>> freeList{};
 };
 }  // namespace predict
 }  // namespace mindspore
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_WORKSPACE_POOL_H_
--- a/mindspore/lite/src/scheduler.cc
+++ b/mindspore/lite/src/scheduler.cc
@@ -171,6 +171,33 @@ int Scheduler::BuildKernels(const lite::Model *model, std::vector<Tensor *> *ten
  return RET_OK;
 }

 std::vector<kernel::LiteKernel *> Scheduler::FindAllSubGraphKernels(
  kernel::LiteKernel *head_kernel, std::map<const kernel::LiteKernel *, bool> *sinked_kernel_map) {
  MS_ASSERT(head_kernel != nullptr);
  MS_ASSERT(sinked_kernel_map != nullptr);
  std::vector<kernel::LiteKernel *> sub_kernels;
  std::queue<kernel::LiteKernel *> kernel_queue;
  kernel_queue.emplace(head_kernel);
  auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(head_kernel);
  while (!kernel_queue.empty()) {
    auto cur_kernel = kernel_queue.front();
    kernel_queue.pop();
    (*sinked_kernel_map)[cur_kernel] = true;
    sub_kernels.emplace_back(cur_kernel);
    auto post_kernels = cur_kernel->out_kernels();
    for (auto post_kernel : post_kernels) {
      if (cur_sub_graph_type == mindspore::lite::Scheduler::GetKernelSubGraphType(post_kernel)) {
        auto post_kernel_inputs = post_kernel->in_kernels();
        if (std::all_of(post_kernel_inputs.begin(), post_kernel_inputs.end(),
                        [&](kernel::LiteKernel *kernel) { return (*sinked_kernel_map)[kernel]; })) {
          kernel_queue.emplace(post_kernel);
        }
      }
    }
  }
  return sub_kernels;
 }

 int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) {
  auto old_kernels = *kernels;
  kernels->clear();
@@ -194,27 +221,8 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) {
      MS_LOG(ERROR) << "Not support NPU and APU now";
      return RET_NOT_SUPPORT;
    }

    std::vector<kernel::LiteKernel *> sub_kernels;
    std::queue<kernel::LiteKernel *> kernel_queue;
    kernel_queue.emplace(head_kernel);
    auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(head_kernel);
    while (!kernel_queue.empty()) {
      auto cur_kernel = kernel_queue.front();
      kernel_queue.pop();
      is_kernel_sinked[cur_kernel] = true;
      sub_kernels.emplace_back(cur_kernel);
      auto post_kernels = cur_kernel->out_kernels();
      for (auto post_kernel : post_kernels) {
        if (cur_sub_graph_type == mindspore::lite::Scheduler::GetKernelSubGraphType(post_kernel)) {
          auto post_kernel_inputs = post_kernel->in_kernels();
          if (std::all_of(post_kernel_inputs.begin(), post_kernel_inputs.end(),
                          [&](kernel::LiteKernel *kernel) { return is_kernel_sinked[kernel]; })) {
            kernel_queue.emplace(post_kernel);
          }
        }
      }
    }
    auto sub_kernels = FindAllSubGraphKernels(head_kernel, &is_kernel_sinked);
    auto subgraph = CreateSubGraphKernel(sub_kernels, cur_sub_graph_type);
    if (subgraph == nullptr) {
      MS_LOG(ERROR) << "Create SubGraphKernel failed";
--- a/mindspore/lite/src/scheduler.h
+++ b/mindspore/lite/src/scheduler.h
@@ -18,6 +18,7 @@
 #define MINDSPORE_LITE_SRC_SCHEDULER_H_

 #include <vector>
 #include <map>
 #include "src/sub_graph_kernel.h"
 #include "src/inner_context.h"
 #include "include/model.h"
@@ -47,6 +48,9 @@ class Scheduler {
  kernel::SubGraphKernel *CreateSubGraphKernel(const std::vector<kernel::LiteKernel *> &kernels,
                                               kernel::SubGraphType type);

  std::vector<kernel::LiteKernel *> FindAllSubGraphKernels(
    kernel::LiteKernel *head_kernel, std::map<const kernel::LiteKernel *, bool> *sinked_kernel_map);

  static TypeId GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_tensors);

  static void SetKernelTensorDataType(kernel::LiteKernel *kernel);
--- a/mindspore/lite/src/tensor.cc
+++ b/mindspore/lite/src/tensor.cc
@@ -256,54 +256,19 @@ std::string Tensor::ToString() const {
  oss << std::endl << "Data:";
  switch (this->data_type_) {
    case kNumberTypeFloat32: {
      auto data = static_cast<float *>(this->data_);
      if (data == nullptr) {
        return "Data of tensor is nullptr";
      } else {
        for (int i = 0; i < 40 && i < this->ElementsNum(); i++) {
          oss << " " << data[i];
        }
      }
      oss << DataToString<float>(this->data_c(), this->ElementsNum());
    } break;
    case kNumberTypeFloat16: {
      auto data = static_cast<int16_t *>(this->data_);
      if (data == nullptr) {
        oss << " Data of tensor is nullptr";
      } else {
        for (int i = 0; i < 40 && i < this->ElementsNum(); i++) {
          oss << " " << data[i];
        }
      }
      oss << DataToString<int16_t>(this->data_c(), this->ElementsNum());
    } break;
    case kNumberTypeInt32: {
      auto data = static_cast<int32_t *>(this->data_);
      if (data == nullptr) {
        oss << " Data of tensor is nullptr";
      } else {
        for (int i = 0; i < 40 && i < this->ElementsNum(); i++) {
          oss << " " << data[i];
        }
      }
      oss << DataToString<int32_t>(this->data_c(), this->ElementsNum());
    } break;
    case kNumberTypeInt16: {
      auto data = static_cast<int16_t *>(this->data_);
      if (data == nullptr) {
        oss << " Data of tensor is nullptr";
      } else {
        for (int i = 0; i < 40 && i < this->ElementsNum(); i++) {
          oss << " " << data[i];
        }
      }
      oss << DataToString<int16_t>(this->data_c(), this->ElementsNum());
    } break;
    case kNumberTypeInt8: {
      auto data = static_cast<int8_t *>(this->data_);
      if (data == nullptr) {
        oss << " Data of tensor is nullptr";
      } else {
        for (int i = 0; i < 40 && i < this->ElementsNum(); i++) {
          oss << " " << static_cast<int32_t>(data[i]);
        }
      }
      oss << DataToString<int8_t>(this->data_c(), this->ElementsNum());
    } break;
    default:
      oss << "Unsupported data type to print";
--- a/mindspore/lite/src/tensor.h
+++ b/mindspore/lite/src/tensor.h
@@ -134,6 +134,20 @@ class Tensor : public mindspore::tensor::MSTensor {
    }
  }

 private:
  template <typename T>
  std::string DataToString(void *data, size_t data_number) const {
    if (data == nullptr) {
      return "Data of tensor is nullptr";
    }
    std::ostringstream oss;
    auto casted_data = static_cast<T *>(data);
    for (size_t i = 0; i < 40 && i < data_number; i++) {
      oss << " " << casted_data[i];
    }
    return oss.str();
  }

 protected:
  void *data_ = nullptr;
  void *device_data_ = nullptr;
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
@@ -119,7 +119,6 @@ set(TEST_LITE_SRC
        ${LITE_DIR}/src/runtime/allocator.cc
        ${LITE_DIR}/src/runtime/runtime_api.cc
        ${LITE_DIR}/src/runtime/thread_pool.c
        ${LITE_DIR}/src/runtime/workspace_pool.cc
        ${LITE_DIR}/src/runtime/parallel_executor.cc
        ${LITE_DIR}/src/tensor.cc
        ${LITE_DIR}/src/executor.cc
--- a/mindspore/lite/tools/converter/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/CMakeLists.txt
@@ -72,7 +72,6 @@ set(LITE_SRC
        ${SRC_DIR}/runtime/allocator.cc
        ${SRC_DIR}/runtime/runtime_api.cc
        ${SRC_DIR}/runtime/thread_pool.c
        ${SRC_DIR}/runtime/workspace_pool.cc
        ${SRC_DIR}/inner_context.cc
        ${SRC_DIR}/tensor.cc
        ${SRC_DIR}/kernel_registry.cc