!6569 [MS][LITE][Develop]lite/internal strip c++

Merge pull request !6569 from chenjianping/lite_dev2
5 years ago · d76ccefcdc
--- a/mindspore/lite/include/lite_session.h
+++ b/mindspore/lite/include/lite_session.h
@@ -114,7 +114,7 @@ class MS_API LiteSession {
  /// \brief Resize inputs shape.
  ///
  /// \param[in] inputs Define the inputs of the model.
  /// \param[in] inputs Define the inputs new shape.
  /// \param[in] dims Define the inputs new shape.
  ///
  /// \return STATUS as an error code of resize inputs, STATUS is defined in errorcode.h.
  virtual int Resize(const std::vector<tensor::MSTensor *> &inputs, const std::vector<std::vector<int>>& dims) = 0;
--- a/mindspore/lite/include/model.h
+++ b/mindspore/lite/include/model.h
@@ -48,7 +48,7 @@ struct Model {
  /// \brief Free meta graph temporary buffer
  virtual void Free();

  /// \brief Free all temporay buffer
  /// \brief Free all temporay buffer.EG: nodes in the model.
  void Destroy();

  /// \brief Model destruct, free all memory
--- a/mindspore/lite/internal/CMakeLists.txt
+++ b/mindspore/lite/internal/CMakeLists.txt
@@ -1,8 +1,10 @@
 cmake_minimum_required(VERSION 3.14)
 project (Lite_Internal)
 set(TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../)

 set(CMAKE_CXX_COMPILER ${CMAKE_C_COMPILER})
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
 include_directories(${TOP_DIR})
 add_compile_definitions(ENABLE_NNACL_INFER_SHAPE)

 file(GLOB KERNEL_SRC
        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/arithmetic_common.c
@@ -26,13 +28,11 @@ endif ()
 list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/opt_op_handler.c)

 set(CCSRC
        ${CMAKE_CURRENT_SOURCE_DIR}/src/common/vector.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/src/common/string.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/src/lite_session.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/src/allocator.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/src/ms_tensor.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/src/common/string.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/src/common/vector.cc
        ${TOP_DIR}/src/common/log_adapter.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/../../core/gvar/logging_level.cc
        ${TOP_DIR}/src/runtime/allocator.cc
        )

 if (PLATFORM_ARM64)
@@ -43,6 +43,4 @@ if (PLATFORM_ARM64)
    set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
    set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC})
    add_library(mslite_internal SHARED ${CCSRC} ${KERNEL_SRC} ${TRAIN_KERNEL_SRC})
    target_link_libraries(mslite_internal log)
 endif()

--- a/mindspore/lite/internal/include/lite_session.h
+++ b/mindspore/lite/internal/include/lite_session.h
@@ -82,6 +82,7 @@ typedef struct LiteSession {
  /// \brief Resize inputs shape.
  ///
  /// \param[in] inputs Define the new inputs shape.
  /// \param[in] dims Define the inputs new shape.
  ///
  /// \return STATUS as an error code of resize inputs, STATUS is defined in errorcode.h.
  int Resize(const TensorPtrVector &inputs, const Int32VectorVector &dims);
--- a/mindspore/lite/internal/include/model.h
+++ b/mindspore/lite/internal/include/model.h
@@ -28,161 +28,161 @@ enum NodeType {
 };

 enum KernelType : int {
    Concat = 0,
    SoftMax,
    Activation,
    Conv2D,
    FusedBatchNorm,
    BatchNorm,
    BiasAdd,
    Pooling,
    ROIPooling,
    DepthwiseConv2D,
    DeDepthwiseConv2D,
    Resize,
    DetectionPostProcess,
    FullConnection,
    Mean,
    DeConv2D,
    Scale,
    Reshape,
    Eltwise,
    NetOutput,
    Add,
    Sub,
    MatMul,
    StridedSlice,
    Power,
    Slice,
    Stack,
    Mul,
    RealDiv,
    Pad,
    Maximum,
    Minimum,
    PReLU,
    LeakyReLU,
    ArgMax,
    ArgMin,
    Exp,
    Crop,
    Range,
    Rsqrt,
    ExpandDims,
    Tile,
    Cast,
    Shape,
    Nchw2Nhwc,
    Nhwc2Nchw,
    QuantDTypeCast,
    Split,
    Permute,
    FakeQuantWithMinMaxVars,
    Equal,
    Less,
    Greater,
    NotEqual,
    LessEqual,
    GreaterEqual,
    Min,
    Floor,
    Abs,
    Neg,
    Cos,
    Sin,
    Sqrt,
    Square,
    Constant,
    Log,
    Tan,
    Atan,
    Asin,
    Clip,
    Transpose,
    Squeeze,
    Unsqueeze,
    Upsample,
    Dropout,
    Broadcast,
    BroadcastTo,
    Lrn,
    ZerosLike,
    TopK,
    SpaceToDepth,
    SpaceToBatch,
    SparseToDense,
    ReverseSequence,
    Rank,
    Gather,
    GatherNd,
    Fill,
    Elu,
    DepthToSpace,
    BatchToSpace,
    AddN,
    Ceil,
    EmbeddingLookup,
    EmbeddingLookupSparse,
    FloorDiv,
    FloorMod,
    L2Norm,
    LocalResponseNormalization,
    MatrixDiag,
    Reduce,
    Reverse,
    Round,
    Select,
    Scatter,
    ScatterND,
    ConstantOfShape,
    Unique,
    Unstack,
    LogicalAnd,
    LogicalOr,
    LogicalXor,
    LogicalNot,
    OnnxInt8Quantize,
    OnnxInt8Dequantize,
    FakeQuantWithMinMax,
    FakeQuantWithMinMaxPerChannel,
    BatchNormFold,
    MulFold,
    AddFold,
    SquaredDifference,
    Flatten,
    FlattenGrad,
    TupleGetItem,
    Div,
    Where,
    OneHot,
    Lstm,
    Conv2DGradFilter,
    Conv2DGradInput,
    PoolingGrad,
    BNGrad,
    BNGradInput,
    ApplyMomentum,
    BiasGrad,
    SoftmaxCrossEntropy,
    AddGrad,
    SubGrad,
    MulGrad,
    DivGrad,
    PowerGrad,
    ActivationGrad,
    PriorBox,
    SpaceToBatchND,
    Depend,
    Return,
    MakeTuple,
    ToFormat,
    Proposal,
    Custom,
    BlackBox,
    NegGrad,
    LogGrad,
    BatchToSpaceND,
    END,
    KernelType_Concat = 0,
    KernelType_SoftMax,
    KernelType_Activation,
    KernelType_Conv2D,
    KernelType_FusedBatchNorm,
    KernelType_BatchNorm,
    KernelType_BiasAdd,
    KernelType_Pooling,
    KernelType_ROIPooling,
    KernelType_DepthwiseConv2D,
    KernelType_DeDepthwiseConv2D,
    KernelType_Resize,
    KernelType_DetectionPostProcess,
    KernelType_FullConnection,
    KernelType_Mean,
    KernelType_DeConv2D,
    KernelType_Scale,
    KernelType_Reshape,
    KernelType_Eltwise,
    KernelType_NetOutput,
    KernelType_Add,
    KernelType_Sub,
    KernelType_MatMul,
    KernelType_StridedSlice,
    KernelType_Power,
    KernelType_Slice,
    KernelType_Stack,
    KernelType_Mul,
    KernelType_RealDiv,
    KernelType_Pad,
    KernelType_Maximum,
    KernelType_Minimum,
    KernelType_PReLU,
    KernelType_LeakyReLU,
    KernelType_ArgMax,
    KernelType_ArgMin,
    KernelType_Exp,
    KernelType_Crop,
    KernelType_Range,
    KernelType_Rsqrt,
    KernelType_ExpandDims,
    KernelType_Tile,
    KernelType_Cast,
    KernelType_Shape,
    KernelType_Nchw2Nhwc,
    KernelType_Nhwc2Nchw,
    KernelType_QuantDTypeCast,
    KernelType_Split,
    KernelType_Permute,
    KernelType_FakeQuantWithMinMaxVars,
    KernelType_Equal,
    KernelType_Less,
    KernelType_Greater,
    KernelType_NotEqual,
    KernelType_LessEqual,
    KernelType_GreaterEqual,
    KernelType_Min,
    KernelType_Floor,
    KernelType_Abs,
    KernelType_Neg,
    KernelType_Cos,
    KernelType_Sin,
    KernelType_Sqrt,
    KernelType_Square,
    KernelType_Constant,
    KernelType_Log,
    KernelType_Tan,
    KernelType_Atan,
    KernelType_Asin,
    KernelType_Clip,
    KernelType_Transpose,
    KernelType_Squeeze,
    KernelType_Unsqueeze,
    KernelType_Upsample,
    KernelType_Dropout,
    KernelType_Broadcast,
    KernelType_BroadcastTo,
    KernelType_Lrn,
    KernelType_ZerosLike,
    KernelType_TopK,
    KernelType_SpaceToDepth,
    KernelType_SpaceToBatch,
    KernelType_SparseToDense,
    KernelType_ReverseSequence,
    KernelType_Rank,
    KernelType_Gather,
    KernelType_GatherNd,
    KernelType_Fill,
    KernelType_Elu,
    KernelType_DepthToSpace,
    KernelType_BatchToSpace,
    KernelType_AddN,
    KernelType_Ceil,
    KernelType_EmbeddingLookup,
    KernelType_EmbeddingLookupSparse,
    KernelType_FloorDiv,
    KernelType_FloorMod,
    KernelType_L2Norm,
    KernelType_LocalResponseNormalization,
    KernelType_MatrixDiag,
    KernelType_Reduce,
    KernelType_Reverse,
    KernelType_Round,
    KernelType_Select,
    KernelType_Scatter,
    KernelType_ScatterND,
    KernelType_ConstantOfShape,
    KernelType_Unique,
    KernelType_Unstack,
    KernelType_LogicalAnd,
    KernelType_LogicalOr,
    KernelType_LogicalXor,
    KernelType_LogicalNot,
    KernelType_OnnxInt8Quantize,
    KernelType_OnnxInt8Dequantize,
    KernelType_FakeQuantWithMinMax,
    KernelType_FakeQuantWithMinMaxPerChannel,
    KernelType_BatchNormFold,
    KernelType_MulFold,
    KernelType_AddFold,
    KernelType_SquaredDifference,
    KernelType_Flatten,
    KernelType_FlattenGrad,
    KernelType_TupleGetItem,
    KernelType_Div,
    KernelType_Where,
    KernelType_OneHot,
    KernelType_Lstm,
    KernelType_Conv2DGradFilter,
    KernelType_Conv2DGradInput,
    KernelType_PoolingGrad,
    KernelType_BNGrad,
    KernelType_BNGradInput,
    KernelType_ApplyMomentum,
    KernelType_BiasGrad,
    KernelType_SoftmaxCrossEntropy,
    KernelType_AddGrad,
    KernelType_SubGrad,
    KernelType_MulGrad,
    KernelType_DivGrad,
    KernelType_PowerGrad,
    KernelType_ActivationGrad,
    KernelType_PriorBox,
    KernelType_SpaceToBatchND,
    KernelType_Depend,
    KernelType_Return,
    KernelType_MakeTuple,
    KernelType_ToFormat,
    KernelType_Proposal,
    KernelType_Custom,
    KernelType_BlackBox,
    KernelType_NegGrad,
    KernelType_LogGrad,
    KernelType_BatchToSpaceND,
    KernelType_END,
 };

 enum ActivationType {
--- a/mindspore/lite/internal/include/ms_tensor.h
+++ b/mindspore/lite/internal/include/ms_tensor.h
@@ -107,7 +107,7 @@ typedef struct MSTensor {
  TypeId data_type_;
  Format format_ = Format_NHWC;
  Category category_ = VAR;
  ShapeVector shape_ = {};
  ShapeVector shape_;
  size_t refCount = 0;

  int32_t Batch() const;
--- a/mindspore/lite/internal/include/string.h
+++ b/mindspore/lite/internal/include/string.h
@@ -13,8 +13,8 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef INTERNAL_SRC_STRING_H_
 #define INTERNAL_SRC_STRING_H_
 #ifndef MINDSPORE_LITE_INTERNAL_SRC_STRING_H_
 #define MINDSPORE_LITE_INTERNAL_SRC_STRING_H_
 #include <string.h>
 #include <stdint.h>

@@ -34,8 +34,8 @@ typedef struct String {

  char &at(size_t pos);
  const char &at(size_t pos) const;
  char &operator[](size_t pos);
  const char &operator[](size_t pos) const;
  inline char &operator[](size_t pos);
  inline const char &operator[](size_t pos) const;
  char *data() noexcept;
  const char *data() const noexcept;
  const char *c_str() const noexcept;
@@ -97,4 +97,4 @@ String to_String(float value);
 String to_String(double value);
 String to_String(long double value);

 #endif  // INTERNAL_SRC_STRING_H_
 #endif  // MINDSPORE_LITE_INTERNAL_SRC_STRING_H_
--- a/mindspore/lite/internal/include/vector.h
+++ b/mindspore/lite/internal/include/vector.h
@@ -17,6 +17,7 @@
 #define MINDSPORE_LITE_INTERNAL_INCLUDE_VECTOR_H

 #include <stdint.h>
 #include <initializer_list>
 #include "internal/include/string.h"
 #define DEFAULT_CAPACITY 1

@@ -44,6 +45,8 @@ class Vector {

  void push_back(const T &elem);

  void push_back(T &&);

  void pop_back();

  void insert(const T &elem, size_t index);
@@ -87,6 +90,8 @@ class Vector {
  void resize(size_t size);

  void reserve(size_t capacity);

  Vector<T> &operator=(const Vector<T> &v);
 };

 template <typename T>
--- a/mindspore/lite/internal/src/allocator.cc
+++ b/mindspore/lite/internal/src/allocator.cc
@@ -0,0 +1,220 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "internal/src/allocator.h"
 #include <stdlib.h>
 #include "internal/src/lite_log.h"

 namespace mindspore::lite {
 namespace {
 constexpr size_t kMaxMallocSize = 2000 * 1024 * 1024;
 constexpr int kBlockSize = 1024;
 constexpr size_t kBlockLimit = (kBlockSize << (kBlockRange - 1));

 int SizeToIndex(size_t size) {
  if (size > kBlockLimit) {
    return -1;
  }
  int index = 0;
  for (int i = 0; i < kBlockRange; ++i) {
    if ((size & (kBlockSize << i))) {
      index = i;
    }
  }
  if (size > (size_t)(kBlockSize << index)) {
    index += 1;
  }
  return index;
 }

 void PopMemNode(MemNode **head) {
  if (*head == nullptr) {
    return;
  }
  MemNode *next = (*head)->next_;
  (*head) = next;
  if (*head != nullptr) {
    (*head)->pre_ = nullptr;
  }
 }

 void PushMemNode(MemNode **head, MemNode *node) {
  if (node == nullptr) {
    return;
  }
  if (*head == nullptr) {
    *head = node;
    return;
  }
  (*head)->pre_ = node;
  node->next_ = *head;
  node->pre_ = nullptr;
  *head = node;
 }

 void RemoveMemNode(MemNode **head, MemNode *node) {
  if (node == nullptr) {
    return;
  }
  if ((*head) == node) {
    *head = node->next_;
    if (*head != nullptr) {
      (*head)->pre_ = nullptr;
    }
  } else {
    MemNode *node_pre = node->pre_;
    node_pre->next_ = node->next_;
    node->next_ = nullptr;
    node->pre_ = nullptr;
  }
 }

 void FreeNodesList(MemNode *head) {
  MemNode *node = head;
  while (node != nullptr) {
    MemNode *next = node->next_;
    free(node);
    node = next;
  }
 }
 }  // namespace

 Allocator::Allocator() {
  for (int i = 0; i < kBlockRange; ++i) {
    allocated_list_[i] = nullptr;
    free_list_[i] = nullptr;
  }
 }

 Allocator::~Allocator() { Clear(); }

 void Allocator::SetContext(const AllocatorContext &ctx) {
  lock_flag_ = ctx.lock_flag_;
 }

 void Allocator::Lock() {
  if (lock_flag_) {
    pthread_mutex_lock(&lock_);
  }
 }

 void Allocator::UnLock() {
  if (lock_flag_) {
    pthread_mutex_unlock(&lock_);
  }
 }

 void *Allocator::Malloc(size_t size) {
  if (size > kMaxMallocSize) {
    LITE_ERROR_LOG("MallocData out of max_size, size: %zd", size);
    return nullptr;
  }
  void *result = nullptr;
  int index = SizeToIndex(size);
  if (index < 0) {
    MemNode *node = (MemNode *)malloc(sizeof(MemNode) + size);
    if (node == nullptr) {
      LITE_ERROR_LOG("MallocData out of max_size, size: %zd", (size + sizeof(MemNode)));
      return result;
    }
    node->size_ = size;
    result = (char *)node + sizeof(MemNode);
    Lock();
    PushMemNode(&large_mem_list_, node);
    UnLock();
    return result;
  }
  Lock();
  size_t size_apply = (kBlockSize << index);
  if (free_list_[index] != nullptr) {
    MemNode *free_node = free_list_[index];
    PopMemNode(&free_list_[index]);
    PushMemNode(&allocated_list_[index], free_node);
    result = (char *)free_node + sizeof(MemNode);
    UnLock();
    return result;
  } else {
    MemNode *new_node = (MemNode *)malloc(sizeof(MemNode) + size_apply);
    if (new_node == nullptr) {
      UnLock();
      LITE_LOG_ERROR("malloc MemNode fail!");
      return nullptr;
    }
    new_node->size_ = size;
    PushMemNode(&allocated_list_[index], new_node);
    result = (char *)new_node + sizeof(MemNode);
    UnLock();
    return result;
  }
 }

 void Allocator::Free(void *buf) {
  if (buf == nullptr) {
    return;
  }

  MemNode *node = (MemNode *)((char *)buf - sizeof(MemNode));
  size_t buf_size = node->size_;
  Lock();
  if (buf_size > kBlockLimit) {
    RemoveMemNode(&large_mem_list_, node);
    free(node);
  } else {
    int index = SizeToIndex(buf_size);
    RemoveMemNode(&allocated_list_[index], node);
    PushMemNode(&free_list_[index], node);
  }
  UnLock();
 }

 size_t Allocator::GetTotalSize() {
  Lock();
  size_t total_size = 0;
  for (int i = 0; i < kBlockRange; ++i) {
    MemNode *node = allocated_list_[i];
    while (node != nullptr) {
      total_size += node->size_;
      node = node->next_;
    }

    node = free_list_[i];
    while (node != nullptr) {
      total_size += node->size_;
      node = node->next_;
    }
  }
  MemNode *node = large_mem_list_;
  while (node != nullptr) {
    total_size += node->size_;
    node = node->next_;
  }
  UnLock();
  return total_size;
 }

 void Allocator::Clear() {
  Lock();
  for (int i = 0; i < kBlockRange; ++i) {
    FreeNodesList(allocated_list_[i]);
    allocated_list_[i] = nullptr;

    FreeNodesList(free_list_[i]);
    free_list_[i] = nullptr;
  }
  FreeNodesList(large_mem_list_);
  UnLock();
 }
 }  // namespace mindspore::lite
--- a/mindspore/lite/internal/src/allocator.h
+++ b/mindspore/lite/internal/src/allocator.h
@@ -0,0 +1,60 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_INTERNAL_SRC_ALLOCATOR_H_
 #define MINDSPORE_LITE_INTERNAL_SRC_ALLOCATOR_H_

 #include <stddef.h>
 #include <pthread.h>
 #include "internal/include/string.h"

 namespace mindspore::lite {
 constexpr int kBlockRange = 9;

 typedef struct AllocatorContext {
  bool lock_flag_;
 } AllocatorContext;

 typedef struct MemNode {
  MemNode *pre_ = nullptr;
  MemNode *next_ = nullptr;
  size_t size_ = 0;
 } MemNode;


 class Allocator {
 public:
  Allocator();
  ~Allocator();
  void SetContext(const AllocatorContext &ctx);
  void *Malloc(size_t size);
  void Free(void *ptr);
  void Clear();
  size_t GetTotalSize();

 private:
  void Lock();
  void UnLock();

  bool lock_flag_ = false;
  pthread_mutex_t lock_ = PTHREAD_MUTEX_INITIALIZER;
  MemNode *large_mem_list_ = nullptr;
  MemNode *allocated_list_[kBlockRange];
  MemNode *free_list_[kBlockRange];
 };
 }  // namespace mindspore::lite

 #endif  // MINDSPORE_LITE_INTERNAL_SRC_ALLOCATOR_H_
--- a/mindspore/lite/internal/src/common/vector.cc
+++ b/mindspore/lite/internal/src/common/vector.cc
@@ -52,9 +52,25 @@ Vector<T>::Vector(const Vector<T> &vec) {
  memcpy(data_, vec.data_, size_ * elem_size_);
 }

 template<typename T>
 Vector<T> &Vector<T>::operator=(const Vector<T> &vec) {
  if (this == &vec) {
    return *this;
  }
  size_ = vec.size_;
  elem_size_ = sizeof(T);
  capacity_ = vec.capacity_;
  data_ = reinterpret_cast<T *>(malloc(capacity_ * elem_size_));
  if (data_ == nullptr) {
    MS_C_EXCEPTION("malloc data failed");
  }
  memcpy(data_, vec.data_, size_ * elem_size_);
  return *this;
 }

 template<typename T>
 Vector<T>::~Vector() {
  if (data_) {
  if (data_ != nullptr) {
    free(data_);
  }
 }
@@ -62,7 +78,7 @@ Vector<T>::~Vector() {
 template<typename T>
 void Vector<T>::clear() {
  size_ = 0;
  if (data_) {
  if (data_ != nullptr) {
    free(data_);
    data_ = nullptr;
  }
@@ -83,6 +99,21 @@ void Vector<T>::push_back(const T &elem) {
  ++size_;
 }

 template<typename T>
 void Vector<T>::push_back(T &&elem) {
  if (data_ == nullptr) {
    data_ = reinterpret_cast<T *>(malloc(capacity_ * elem_size_));
    if (data_ == nullptr) {
      MS_C_EXCEPTION("malloc data failed");
    }
  } else if (size_ == capacity_) {
    resize(size_ + 1);
    --size_;
  }
  memcpy(data_ + size_, &elem, elem_size_);
  ++size_;
 }

 template<typename T>
 void Vector<T>::pop_back() {
  if (size_ > 0) {
--- a/mindspore/lite/internal/src/kernel/fp32/activation.h
+++ b/mindspore/lite/internal/src/kernel/fp32/activation.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_LITE_INTERNAL_SRC_KERNEL_FP32_ACTIVATION_H_

 #include "internal/include/model.h"
 #include "src/runtime/allocator.h"
 #include "internal/src/allocator.h"

 int DoActivationInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);
 int DoActivation(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
--- a/mindspore/lite/internal/src/kernel/fp32/arithmetic.cc
+++ b/mindspore/lite/internal/src/kernel/fp32/arithmetic.cc
@@ -19,7 +19,6 @@
 #include "internal/include/model.h"
 #include "internal/include/ms_tensor.h"
 #include "internal/include/lite_utils.h"
 #include "src/runtime/allocator.h"
 #include "nnacl/arithmetic_common.h"
 #include "nnacl/fp32/arithmetic.h"

@@ -47,14 +46,14 @@ int BroadcastRun(float *input0, float *input1, float *output, int dim, int out_c

 int CalBroadCasting(const TensorPtrVector &in_tensors, int *outside, int *break_pos, ArithmeticParameter *params) {
  params->broadcasting_ = false;
  for (int i = 0; i < params->ndim_; i++) {
  for (size_t i = 0; i < params->ndim_; ++i) {
    if (params->in_shape0_[i] != params->in_shape1_[i]) {
      if (params->in_shape0_[i] == 1) {
        params->out_shape_[i] = params->in_shape1_[i];
      } else if (params->in_shape1_[i] == 1) {
        params->out_shape_[i] = params->in_shape0_[i];
      } else {
        LITE_ERROR_LOG("shapes of input tensors can not be broadCasted");
        LITE_LOG_ERROR("shapes of input tensors can not be broadCasted");
        return RET_INPUT_TENSOR_ERROR;
      }
      params->broadcasting_ = true;
@@ -100,11 +99,11 @@ int RunArithmetic(const TensorPtrVector &in_tensors, const TensorPtrVector &out_

 int DoArithmeticInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) {
  if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) {
    LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
    LITE_LOG_ERROR("input tensors num not correct or input data is NULL!");
    return RET_INPUT_TENSOR_ERROR;
  }
  if (out_tensors.size() != 1) {
    LITE_ERROR_LOG("output tensors num not correct!")
    LITE_LOG_ERROR("output tensors num not correct!");
    return RET_ERROR;
  }
  ShapeVector in_shape0 = in_tensors[0]->shape_;
@@ -116,7 +115,7 @@ int DoArithmeticInferShape(const TensorPtrVector &in_tensors, const TensorPtrVec
    arithmeticParameter->ndim_ = ndim1;
    int fill_dim_num = ndim1 - ndim0;
    int j = 0;
    for (size_t i = 0; i < ndim1; i++) {
    for (int i = 0; i < ndim1; ++i) {
      if (i < fill_dim_num) {
        arithmeticParameter->in_shape0_[i] = 1;
      } else {
@@ -128,7 +127,7 @@ int DoArithmeticInferShape(const TensorPtrVector &in_tensors, const TensorPtrVec
    arithmeticParameter->ndim_ = ndim0;
    int fill_dim_num = ndim0 - ndim1;
    int j = 0;
    for (size_t i = 0; i < ndim0; i++) {
    for (int i = 0; i < ndim0; ++i) {
      if (i < fill_dim_num) {
        arithmeticParameter->in_shape1_[i] = 1;
      } else {
@@ -138,20 +137,20 @@ int DoArithmeticInferShape(const TensorPtrVector &in_tensors, const TensorPtrVec
    }
  } else {
    arithmeticParameter->ndim_ = ndim0;
    for (size_t i = 0; i < ndim0; i++) {
    for (int i = 0; i < ndim0; ++i) {
      arithmeticParameter->in_shape0_[i] = in_shape0[i];
      arithmeticParameter->in_shape1_[i] = in_shape1[i];
    }
  }
  ShapeVector out_shape;
  for (int i = 0; i < arithmeticParameter->ndim_; i++) {
  for (size_t i = 0; i < arithmeticParameter->ndim_; ++i) {
    if (arithmeticParameter->in_shape0_[i] != arithmeticParameter->in_shape1_[i]) {
      if (arithmeticParameter->in_shape0_[i] == 1) {
        out_shape.push_back(arithmeticParameter->in_shape1_[i]);
      } else if (arithmeticParameter->in_shape1_[i] == 1) {
        out_shape.push_back(arithmeticParameter->in_shape0_[i]);
      } else {
        LITE_ERROR_LOG("shapes of input tensors can not be broadcasted!")
        LITE_LOG_ERROR("shapes of input tensors can not be broadcasted!");
        return RET_INPUT_TENSOR_ERROR;
      }
    } else {
@@ -165,7 +164,7 @@ int DoArithmeticInferShape(const TensorPtrVector &in_tensors, const TensorPtrVec
 }

 int ChooseKernel(const int kernel_type, ArithmeticRun *arithmetic_run, ArithmeticParameter *params) {
  if (kernel_type == KernelType::Mul) {
  if (kernel_type == KernelType::KernelType_Mul) {
    if (params->activation_type_ == ActivationType::RELU) {
      *arithmetic_run = ElementMulRelu;
    } else if (params->activation_type_ == ActivationType::RELU6) {
@@ -174,14 +173,14 @@ int ChooseKernel(const int kernel_type, ArithmeticRun *arithmetic_run, Arithmeti
      *arithmetic_run = ElementMul;
    }
  } else {
    LITE_ERROR_LOG("unsupported operator type");
    LITE_LOG_INFO("unsupported operator type");
    return RET_ERROR;
  }
  return RET_OK;
 }

 int ChooseOptKernel(const int kernel_type, ArithmeticOptRun *arithmetic_opt_run, ArithmeticParameter *params) {
  if (kernel_type == KernelType::Mul) {
  if (kernel_type == KernelType::KernelType_Mul) {
    if (params->activation_type_ == ActivationType::RELU) {
      *arithmetic_opt_run = ElementOptMulRelu;
    } else if (params->activation_type_ == ActivationType::RELU6) {
@@ -190,7 +189,7 @@ int ChooseOptKernel(const int kernel_type, ArithmeticOptRun *arithmetic_opt_run,
      *arithmetic_opt_run = ElementOptMul;
    }
  } else {
    LITE_INFO_LOG("kernel not have opt version");
    LITE_LOG_INFO("kernel not have opt version");
  }
  return RET_OK;
 }
@@ -198,15 +197,15 @@ int ChooseOptKernel(const int kernel_type, ArithmeticOptRun *arithmetic_opt_run,
 int DoArithmetic(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
                 mindspore::lite::Allocator *allocator) {
  if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) {
    LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
    LITE_LOG_ERROR("input tensors num not correct or input data is NULL!");
    return RET_INPUT_TENSOR_ERROR;
  }
  if (out_tensors.size() != 1 || out_tensors[0]->data_ == NULL) {
    LITE_ERROR_LOG("output tensors num not correct or output data is NULL!")
    LITE_LOG_ERROR("output tensors num not correct or output data is NULL!");
    return RET_ERROR;
  }
  if (allocator == NULL) {
    LITE_ERROR_LOG("allocator is NULL!")
    LITE_LOG_ERROR("allocator is NULL!");
    return RET_ERROR;
  }
  ArithmeticParameter *params = reinterpret_cast<ArithmeticParameter *>(node->primitive_);
--- a/mindspore/lite/internal/src/kernel/fp32/arithmetic.h
+++ b/mindspore/lite/internal/src/kernel/fp32/arithmetic.h
@@ -18,7 +18,7 @@

 #include "internal/include/model.h"
 #include "internal/include/lite_utils.h"
 #include "src/runtime/allocator.h"
 #include "internal/src/allocator.h"
 #include "nnacl/arithmetic_common.h"

 int DoArithmeticInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);
--- a/mindspore/lite/internal/src/kernel/fp32/arithmetic_self.cc
+++ b/mindspore/lite/internal/src/kernel/fp32/arithmetic_self.cc
@@ -31,9 +31,9 @@ int DoArithmeticSelf(const TensorPtrVector &in_tensors, const TensorPtrVector &o
  size_t data_size = in_tensors[0]->ElementsNum();
  OpParameter *param = node->primitive_;
  int ret;
  if (param->type_ == KernelType::Log) {
  if (param->type_ == KernelType::KernelType_Log) {
    ret = ElementLog((float *)in_tensors[0]->data_, (float *)out_tensors[0]->data_, data_size);
  } else if (param->type_ == KernelType::Neg) {
  } else if (param->type_ == KernelType::KernelType_Neg) {
    ret = ElementNegative((float *)in_tensors[0]->data_, (float *)out_tensors[0]->data_, data_size);
  } else {
    LITE_ERROR_LOG("Unsupport kernel type: %d", param->type_);
--- a/mindspore/lite/internal/src/kernel/fp32/arithmetic_self.h
+++ b/mindspore/lite/internal/src/kernel/fp32/arithmetic_self.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_LITE_INTERNAL_SRC_KERNEL_FP32_ARITHMETIC_SELF_H_

 #include "internal/include/model.h"
 #include "src/runtime/allocator.h"
 #include "internal/src/allocator.h"

 int DoArithmeticSelfInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors,
                               OpParameter *param);
--- a/mindspore/lite/internal/src/kernel/fp32/bias_add.cc
+++ b/mindspore/lite/internal/src/kernel/fp32/bias_add.cc
@@ -17,7 +17,6 @@
 #include "internal/include/model.h"
 #include "internal/include/ms_tensor.h"
 #include "internal/include/lite_utils.h"
 #include "src/runtime/allocator.h"
 #include "internal/src/lite_log.h"
 #include "internal/include/errorcode.h"
 #include "nnacl/arithmetic_common.h"
@@ -25,11 +24,11 @@

 int DoBiasAddInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) {
  if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) {
    LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
    LITE_LOG_ERROR("input tensors num not correct or input data is NULL!");
    return RET_INPUT_TENSOR_ERROR;
  }
  if (out_tensors.size() != 1) {
    LITE_ERROR_LOG("output tensors num not correct!")
    LITE_LOG_ERROR("output tensors num not correct!");
    return RET_ERROR;
  }
  out_tensors[0]->shape_ = in_tensors[0]->shape_;
@@ -41,15 +40,15 @@ int DoBiasAddInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector
 int DoBiasAdd(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
              mindspore::lite::Allocator *allocator) {
  if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) {
    LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
    LITE_LOG_ERROR("input tensors num not correct or input data is NULL!");
    return RET_INPUT_TENSOR_ERROR;
  }
  if (out_tensors.size() != 1 || out_tensors[0]->data_ == NULL) {
    LITE_ERROR_LOG("output tensors num not correct or output data is NULL!")
    LITE_LOG_ERROR("output tensors num not correct or output data is NULL!");
    return RET_ERROR;
  }
  if (allocator == NULL) {
    LITE_ERROR_LOG("allocator is NULL!")
    LITE_LOG_ERROR("allocator is NULL!");
    return RET_ERROR;
  }
  ArithmeticParameter *params = reinterpret_cast<ArithmeticParameter *>(node->primitive_);
@@ -70,7 +69,7 @@ int DoBiasAdd(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tens
  float *tile_in = reinterpret_cast<float *>(allocator->Malloc(data_size * sizeof(float)));
  float *tile_bias = reinterpret_cast<float *>(allocator->Malloc(data_size * sizeof(float)));
  if (tile_in == NULL || tile_bias == NULL) {
    LITE_ERROR_LOG("Memory allocation failed!")
    LITE_LOG_ERROR("Memory allocation failed!");
    allocator->Free(tile_in);
    allocator->Free(tile_bias);
    return RET_ERROR;
--- a/mindspore/lite/internal/src/kernel/fp32/bias_add.h
+++ b/mindspore/lite/internal/src/kernel/fp32/bias_add.h
@@ -18,7 +18,7 @@

 #include "internal/include/model.h"
 #include "internal/include/lite_utils.h"
 #include "src/runtime/allocator.h"
 #include "internal/src/allocator.h"

 int DoBiasAddInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);

--- a/mindspore/lite/internal/src/kernel/fp32/matmul.cc
+++ b/mindspore/lite/internal/src/kernel/fp32/matmul.cc
@@ -71,14 +71,7 @@ void FreeMatMulKernelData(MatMulCPUKernelData *kernel_data, mindspore::lite::All
  free(kernel_data);
 }

 static void SwapDims(Int32Vector *dims, int index1, int index2) {
  int tmp = dims->at(index1);
  dims->at(index1) = dims->at(index2);
  dims->at(index2) = tmp;
 }

 int DoMatMulInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) {
  MS_ASSERT(this->primitive_ != nullptr);
  TensorPtr input0 = in_tensors.at(0);
  MS_ASSERT(input0 != nullptr);
  TensorPtr input1 = in_tensors.at(1);
@@ -86,31 +79,20 @@ int DoMatMulInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector
  TensorPtr output = out_tensors.at(0);
  MS_ASSERT(output != nullptr);

  output->data_type_ = input0->data_type_;
  output->format_ = input0->format_;

  Int32Vector a_shape = input0->shape_;
  Int32Vector b_shape = input1->shape_;
  if (a_shape.size() < 2 || b_shape.size() < 2) {
    LITE_ERROR_LOG("inputs shape is invalid");
    return RET_INPUT_TENSOR_ERROR;
  }
  for (size_t i = 0; i < a_shape.size() - 2; ++i) {
    if (a_shape[i] != b_shape[i]) {
      LITE_ERROR_LOG("Op MatMul's dimensions must be equal");
      return RET_INPUT_TENSOR_ERROR;
    }
  }

  MatMulParameter *matmul_param = (MatMulParameter *)param;
  if (matmul_param->a_transpose_) {
    SwapDims(&a_shape, a_shape.size() - 1, a_shape.size() - 2);
  }
  if (matmul_param->b_transpose_) {
    SwapDims(&b_shape, b_shape.size() - 1, b_shape.size() - 2);
  }
  output->shape_ = a_shape;
  output->shape_.at(a_shape.size() - 1) = b_shape.at(b_shape.size() - 1);
  int in_datatype[2] = {input0->data_type_, input1->data_type_};
  int in_format[2] = {static_cast<int>(input0->format_), static_cast<int>(input1->format_)};
  size_t dim_size[2] = {input0->shape_.size(), input1->shape_.size()};
  int *in_shape[2] = {input0->shape_.data(), input1->shape_.data()};
  int out_format;
  int out_datatype;
  int ret = MatMulInferShape(in_shape, 2, dim_size, output->shape_.data(), in_format, &out_format, in_datatype,
                             &out_datatype, param);
  if (ret != NNACL_OK) {
    LITE_ERROR_LOG("matmul infershape fail!ret: %d", ret);
    return RET_ERROR;
  }
  output->format_ = static_cast<Format>(out_format);
  output->data_type_ = static_cast<TypeId>(out_datatype);
  return RET_OK;
 }

@@ -149,7 +131,7 @@ int DoMatMul(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tenso

  MatMulCPUKernelData *kernel_data = (MatMulCPUKernelData *)malloc(sizeof(MatMulCPUKernelData));
  if (kernel_data == NULL) {
    LITE_ERROR_LOG("Malloc MatMulCPUKernelData failed");
    LITE_LOG_ERROR("Malloc MatMulCPUKernelData failed");
    return RET_MEMORY_FAILED;
  }
  kernel_data->a_c12_ptr_
--- a/mindspore/lite/internal/src/kernel/fp32/matmul.h
+++ b/mindspore/lite/internal/src/kernel/fp32/matmul.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_LITE_INTERNAL_SRC_KERNEL_FP32_MATMUL_H_

 #include "internal/include/model.h"
 #include "src/runtime/allocator.h"
 #include "internal/src/allocator.h"

 int DoMatMulInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);
 int DoMatMul(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
--- a/mindspore/lite/internal/src/kernel/fp32/reduce.cc
+++ b/mindspore/lite/internal/src/kernel/fp32/reduce.cc
@@ -15,10 +15,8 @@
 */

 #include "internal/src/kernel/fp32/reduce.h"
 #include <vector>
 #include "internal/include/model.h"
 #include "internal/include/lite_utils.h"
 #include "src/runtime/allocator.h"
 #include "internal/src/lite_log.h"
 #include "internal/include/errorcode.h"
 #include "nnacl/reduce_parameter.h"
@@ -27,16 +25,8 @@
 typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const float *src_data,
                       float *dst_data, const int tid, const int thread_num);

 int MallocTmpBuffer(std::vector<float *> *data_buffers, const ShapeVector &shape, const int *axes, const int num_axes,
 int MallocTmpBuffer(float *data_buffers[], const ShapeVector &shape, const int *axes, const int num_axes,
                    mindspore::lite::Allocator *allocator) {
  for (int i = 0; i < data_buffers->size(); ++i) {
    if (data_buffers->at(i) != NULL) {
      free(data_buffers->at(i));
      data_buffers->at(i) = NULL;
    }
  }
  data_buffers->clear();

  ShapeVector input_shape = shape;
  const int rank = input_shape.size();
  for (auto i = 0; i < num_axes - 1; i++) {
@@ -48,39 +38,39 @@ int MallocTmpBuffer(std::vector<float *> *data_buffers, const ShapeVector &shape
      }
    }
    float *buffer = reinterpret_cast<float *>(allocator->Malloc(size * sizeof(float)));
    if (buffer == NULL) {
      LITE_ERROR_LOG("Memory allocation failed!")
    if (buffer == nullptr) {
      LITE_LOG_ERROR("Memory allocation failed!");
      return RET_ERROR;
    }
    data_buffers->emplace_back(buffer);
    data_buffers[i] = buffer;
    input_shape[axis] = 1;
  }
  return RET_OK;
 }

 void FreeTmpBuffer(std::vector<float *> *data_buffers, mindspore::lite::Allocator *allocator) {
 void FreeTmpBuffer(float *data_buffers[], int size, mindspore::lite::Allocator *allocator) {
  if (data_buffers == nullptr) {
    return;
  }
  for (int i = 0; i < data_buffers->size(); ++i) {
    allocator->Free(data_buffers->at(i));
  for (int i = 0; i < size; ++i) {
    allocator->Free(data_buffers[i]);
    data_buffers[i] = nullptr;
  }
  data_buffers->clear();
 }

 int RunReduce(Reducer reducer, std::vector<float *> data_buffers, float *in_data, float *out_data, Int32Vector axes,
 int RunReduce(Reducer reducer, float *data_buffers[], float *in_data, float *out_data, ReduceParameter *params,
              ShapeVector shape) {
  int rank = shape.size();
  float *dst_data = NULL;
  float *src_data = in_data;
  ShapeVector tmp_shape = shape;
  for (size_t i = 0; i < axes.size(); ++i) {
    if (i != axes.size() - 1) {
  for (int i = 0; i < params->num_axes_; ++i) {
    if (i != params->num_axes_ - 1) {
      dst_data = data_buffers[i];
    } else {
      dst_data = out_data;
    }
    int axis = axes[i];
    int axis = params->axes_[i];
    int outer_size = 1;
    for (int j = 0; j < axis; j++) {
      outer_size *= tmp_shape[j];
@@ -92,7 +82,7 @@ int RunReduce(Reducer reducer, std::vector<float *> data_buffers, float *in_data
    int axis_size = tmp_shape[axis];
    int error_code = reducer(outer_size, inner_size, axis_size, src_data, dst_data, 0, 1);
    if (error_code != RET_OK) {
      LITE_ERROR_LOG("Reduce run error!")
      LITE_LOG_ERROR("Reduce run error!");
      return RET_ERROR;
    }
    tmp_shape[axis] = 1;
@@ -103,11 +93,11 @@ int RunReduce(Reducer reducer, std::vector<float *> data_buffers, float *in_data

 int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) {
  if (in_tensors.size() != 1 || in_tensors[0]->data_ == NULL) {
    LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
    LITE_LOG_ERROR("input tensors num not correct or input data is NULL!");
    return RET_INPUT_TENSOR_ERROR;
  }
  if (out_tensors.size() != 1) {
    LITE_ERROR_LOG("output tensors num not correct!")
    LITE_LOG_ERROR("output tensors num not correct!");
    return RET_ERROR;
  }

@@ -121,7 +111,7 @@ int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector
  int actual_axes_num = num_axes;
  for (int i = 0; i < num_axes; ++i) {
    if (reduceParameter->axes_[i] < -rank || reduceParameter->axes_[i] >= rank) {
      LITE_ERROR_LOG("reduce_sum got invalid axis!")
      LITE_LOG_ERROR("reduce_sum got invalid axis!");
      return RET_ERROR;
    }
    if (reduceParameter->axes_[i] < 0) {
@@ -132,7 +122,7 @@ int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector
  }
  if (reduceParameter->reduce_to_end_) {
    if (num_axes != 1) {
      LITE_ERROR_LOG("Reduce when reduce_to_end, num of axis should be 1!")
      LITE_LOG_ERROR("Reduce when reduce_to_end, num of axis should be 1!");
      return RET_ERROR;
    }
    int begin_axis = axes[0];
@@ -144,14 +134,14 @@ int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector

  if (num_axes == 0) {
    axes.resize(rank);
    for (size_t i = 0; i < rank; i++) {
    for (auto i = 0; i < rank; ++i) {
      axes[i] = i;
      if (keep_dims) {
        out_shape.push_back(1);
      }
    }
    reduceParameter->num_axes_ = axes.size();
    for (int i = 0; i < axes.size(); ++i) {
    for (size_t i = 0; i < axes.size(); ++i) {
      reduceParameter->axes_[i] = axes[i];
    }
    out_tensors[0]->shape_ = out_shape;
@@ -160,9 +150,9 @@ int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector
    return RET_OK;
  }
  // reduce on selected axes
  for (size_t i = 0; i < rank; i++) {
  for (auto i = 0; i < rank; ++i) {
    bool reduce_axis = false;
    for (size_t idx = 0; idx < num_axes; ++idx) {
    for (auto idx = 0; idx < num_axes; ++idx) {
      if (axes[idx] == i) {
        reduce_axis = true;
        break;
@@ -177,7 +167,7 @@ int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector
    }
  }
  reduceParameter->num_axes_ = axes.size();
  for (int i = 0; i < axes.size(); ++i) {
  for (size_t i = 0; i < axes.size(); ++i) {
    reduceParameter->axes_[i] = axes[i];
  }
  out_tensors[0]->shape_ = out_shape;
@@ -189,15 +179,15 @@ int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector
 int DoReduce(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
             mindspore::lite::Allocator *allocator) {
  if (in_tensors.size() != 1 || in_tensors[0]->data_ == NULL) {
    LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
    LITE_LOG_ERROR("input tensors num not correct or input data is NULL!");
    return RET_INPUT_TENSOR_ERROR;
  }
  if (out_tensors.size() != 1 || out_tensors[0]->data_ == NULL) {
    LITE_ERROR_LOG("output tensors num not correct or output data is NULL!")
    LITE_LOG_ERROR("output tensors num not correct or output data is NULL!");
    return RET_ERROR;
  }
  if (allocator == NULL) {
    LITE_ERROR_LOG("allocator is NULL!")
    LITE_LOG_ERROR("allocator is NULL!");
    return RET_ERROR;
  }

@@ -209,21 +199,18 @@ int DoReduce(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tenso
    reducer = ReduceMean;
  }

  std::vector<float *> data_buffers;
  int status = MallocTmpBuffer(&data_buffers, in_tensors[0]->shape_, params->axes_, params->num_axes_, allocator);
  int buf_num = params->num_axes_ - 1;
  float *data_buffers[buf_num];
  int status = MallocTmpBuffer(data_buffers, in_tensors[0]->shape_, params->axes_, params->num_axes_, allocator);
  if (status != RET_OK) {
    FreeTmpBuffer(&data_buffers, allocator);
    FreeTmpBuffer(data_buffers, buf_num, allocator);
    return status;
  }

  Int32Vector axes;
  for (int i = 0; i < params->num_axes_; ++i) {
    axes.push_back(params->axes_[i]);
  }
  status = RunReduce(reducer, data_buffers, reinterpret_cast<float *>(in_tensors[0]->data_),
                     reinterpret_cast<float *>(out_tensors[0]->data_), axes, in_tensors[0]->shape_);
                     reinterpret_cast<float *>(out_tensors[0]->data_), params, in_tensors[0]->shape_);

  FreeTmpBuffer(&data_buffers, allocator);
  FreeTmpBuffer(data_buffers, buf_num, allocator);

  if (status != RET_OK) {
    return RET_ERROR;
--- a/mindspore/lite/internal/src/kernel/fp32/reduce.h
+++ b/mindspore/lite/internal/src/kernel/fp32/reduce.h
@@ -19,7 +19,7 @@
 #include "internal/include/model.h"
 #include "internal/include/ms_tensor.h"
 #include "internal/include/lite_utils.h"
 #include "src/runtime/allocator.h"
 #include "internal/src/allocator.h"

 int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);

--- a/mindspore/lite/internal/src/kernel/fp32_grad/activation_grad.h
+++ b/mindspore/lite/internal/src/kernel/fp32_grad/activation_grad.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_LITE_INTERNAL_SRC_KERNEL_FP32_GRAD_ACTIVATION_GRAD_H_

 #include "internal/include/model.h"
 #include "src/runtime/allocator.h"
 #include "internal/src/allocator.h"

 int DoActivationGradInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors,
                               OpParameter *param);
--- a/mindspore/lite/internal/src/kernel/fp32_grad/arithmetic_self_grad.cc
+++ b/mindspore/lite/internal/src/kernel/fp32_grad/arithmetic_self_grad.cc
@@ -35,9 +35,9 @@ int DoArithmeticSelfGrad(const TensorPtrVector &in_tensors, const TensorPtrVecto
  float *x_data = reinterpret_cast<float *>(in_tensors[1]->data_);
  float *dx_data = reinterpret_cast<float *>(out_tensors[0]->data_);
  int ret;
  if (param->type_ == KernelType::LogGrad) {
  if (param->type_ == KernelType::KernelType_LogGrad) {
    ret = ElementDiv(dy_data, x_data, dx_data, data_size);
  } else if (param->type_ == KernelType::NegGrad) {
  } else if (param->type_ == KernelType::KernelType_NegGrad) {
    ret = ElementNegative(dy_data, dx_data, data_size);
  } else {
    LITE_ERROR_LOG("Unsupport kernel type: %d", param->type_);
--- a/mindspore/lite/internal/src/kernel/fp32_grad/arithmetic_self_grad.h
+++ b/mindspore/lite/internal/src/kernel/fp32_grad/arithmetic_self_grad.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_LITE_INTERNAL_SRC_KERNEL_FP32_GRAD_ARITHMETIC_SELF_GRAD_H_

 #include "internal/include/model.h"
 #include "src/runtime/allocator.h"
 #include "internal/src/allocator.h"

 int DoArithmeticSelfGradInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors,
                                   OpParameter *param);
--- a/mindspore/lite/internal/src/lite_log.h
+++ b/mindspore/lite/internal/src/lite_log.h
@@ -18,15 +18,18 @@
 #define MINDSPORE_LITE_INTERNAL_SRC_LITE_LOG_H_

 #include <stdlib.h>
 #ifdef DEBUG
 #include <stdio.h>
 #ifndef Release
 #include <assert.h>
 #endif

 #ifdef DEBUG
 #ifndef Release
 #define LITE_DEBUG_LOG(format, ...) \
  printf("[DEBUG] [%s %s] [%s] [%d] " format "\n", __DATE__, __TIME__, __FILE__, __LINE__, __VA_ARGS__)
 #define LITE_INFO_LOG(format, ...) \
  printf("[INFO] [%s %s] [%s] [%d] " format "\n", __DATE__, __TIME__, __FILE__, __LINE__, __VA_ARGS__)
 #define LITE_LOG_INFO(...) \
  printf("[INFO] [%s %s] [%s] [%d] %s\n", __DATE__, __TIME__, __FILE__, __LINE__, __VA_ARGS__)
 #define LITE_WARNING_LOG(format, ...) \
  printf("[WARNING] [%s %s] [%s] [%d] " format "\n", __DATE__, __TIME__, __FILE__, __LINE__, __VA_ARGS__)
 #define LITE_ERROR_LOG(format, ...) \
@@ -40,6 +43,7 @@
 #else
 #define LITE_DEBUG_LOG(...)
 #define LITE_INFO_LOG(...)
 #define LITE_LOG_INFO(...)
 #define LITE_WARNING_LOG(...)
 #define LITE_ERROR_LOG(...)
 #define LITE_LOG_ERROR(...)
--- a/mindspore/lite/internal/src/lite_session.cc
+++ b/mindspore/lite/internal/src/lite_session.cc
@@ -16,26 +16,30 @@
 #include "internal/include/lite_session.h"
 #include "internal/include/model.h"
 #include "internal/include/ms_tensor.h"
 #include "src/runtime/allocator.h"
 #include "internal/src/allocator.h"
 #include "internal/include/errorcode.h"
 #include "internal/src/lite_log.h"
 #include "internal/src/kernel/fp32/activation.h"
 #include "internal/src/kernel/fp32/arithmetic_self.h"
 #include "internal/src/kernel/fp32/matmul.h"
 #include "internal/src/kernel/fp32/arithmetic.h"
 #include "internal/src/kernel/fp32/bias_add.h"
 #ifdef SUPPORT_TRAIN
 #include "internal/src/kernel/fp32_grad/arithmetic_self_grad.h"
 #include "internal/src/kernel/fp32_grad/activation_grad.h"
 #endif

 static Context *g_ctx;
 static Model *g_model;
 static LiteSession g_session;
 static mindspore::lite::DefaultAllocator g_allocator;
 static mindspore::lite::Allocator g_allocator;
 static bool g_infershape_interrupt = false;
 static bool g_first_load = true;
 typedef int (*InferShape)(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);
 typedef int (*RunKernel)(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
                         mindspore::lite::Allocator *allocator);
 static InferShape g_infershape_funcs[KernelType::END];
 static RunKernel g_runkernel_funcs[KernelType::END];
 static InferShape g_infershape_funcs[KernelType::KernelType_END];
 static RunKernel g_runkernel_funcs[KernelType::KernelType_END];

 static int ModelInferShape() {
  NodePtrVector nodes = g_model->nodes_;
@@ -43,7 +47,7 @@ static int ModelInferShape() {
  for (size_t i = 0; i < nodes_size; ++i) {
    auto node = nodes[i];
    if (node->primitive_ == NULL) {
      LITE_ERROR_LOG("node's primitive is NULL!");
      LITE_LOG_ERROR("node's primitive is NULL!");
      return RET_ERROR;
    }
    TensorPtrVector in_tensors;
@@ -75,22 +79,27 @@ static int ModelInferShape() {

 static void InitFuncs() {
  if (g_first_load) {
    g_infershape_funcs[KernelType::MatMul] = DoMatMulInferShape;
    g_infershape_funcs[KernelType::Activation] = DoActivationInferShape;
    g_infershape_funcs[KernelType::Log] = DoArithmeticSelfInferShape;
    g_infershape_funcs[KernelType::Neg] = DoArithmeticSelfInferShape;

    g_runkernel_funcs[KernelType::MatMul] = DoMatMul;
    g_runkernel_funcs[KernelType::Activation] = DoActivation;
    g_runkernel_funcs[KernelType::Log] = DoArithmeticSelf;
    g_runkernel_funcs[KernelType::Neg] = DoArithmeticSelf;

    g_infershape_funcs[KernelType::KernelType_MatMul] = DoMatMulInferShape;
    g_infershape_funcs[KernelType::KernelType_Activation] = DoActivationInferShape;
    g_infershape_funcs[KernelType::KernelType_Log] = DoArithmeticSelfInferShape;
    g_infershape_funcs[KernelType::KernelType_Neg] = DoArithmeticSelfInferShape;
    g_infershape_funcs[KernelType::KernelType_Mul] = DoArithmeticInferShape;
    g_infershape_funcs[KernelType::KernelType_BiasAdd] = DoBiasAddInferShape;

    g_runkernel_funcs[KernelType::KernelType_MatMul] = DoMatMul;
    g_runkernel_funcs[KernelType::KernelType_Activation] = DoActivation;
    g_runkernel_funcs[KernelType::KernelType_Log] = DoArithmeticSelf;
    g_runkernel_funcs[KernelType::KernelType_Neg] = DoArithmeticSelf;
    g_runkernel_funcs[KernelType::KernelType_Mul] = DoArithmetic;
    g_runkernel_funcs[KernelType::KernelType_BiasAdd] = DoBiasAdd;
 #ifdef SUPPORT_TRAIN
    g_infershape_funcs[KernelType::ActivationGrad] = DoActivationGradInferShape;
    g_infershape_funcs[KernelType::KernelType_ActivationGrad] = DoActivationGradInferShape;
    g_infershape_funcs[KernelType::KernelType_NegGrad] = DoArithmeticSelfGradInferShape;
    g_infershape_funcs[KernelType::KernelType_LogGrad] = DoArithmeticSelfGradInferShape;

    g_runkernel_funcs[KernelType::NegGrad] = DoArithmeticSelfGrad;
    g_runkernel_funcs[KernelType::ActivationGrad] = DoActivationGrad;
    g_runkernel_funcs[KernelType::LogGrad] = DoArithmeticSelfGrad;
    g_runkernel_funcs[KernelType::KernelType_NegGrad] = DoArithmeticSelfGrad;
    g_runkernel_funcs[KernelType::KernelType_ActivationGrad] = DoActivationGrad;
    g_runkernel_funcs[KernelType::KernelType_LogGrad] = DoArithmeticSelfGrad;
 #endif
    g_first_load = false;
  }
@@ -155,7 +164,7 @@ int LiteSession::RunGraph() {
  for (size_t i = 0; i < nodes_size; ++i) {
    auto node = nodes[i];
    if (node->primitive_ == nullptr) {
      LITE_ERROR_LOG("node's primitive is NULL!");
      LITE_LOG_ERROR("node's primitive is NULL!");
      return RET_ERROR;
    }
    TensorPtrVector in_tensors;
@@ -182,7 +191,7 @@ int LiteSession::RunGraph() {
    for (size_t j = 0; j < out_tensors.size(); ++j) {
      out_tensors[j]->data_ = g_allocator.Malloc(out_tensors[j]->Size());
      if (out_tensors[j]->data_ == NULL) {
        LITE_ERROR_LOG("Malloc data for out tensor fail!");
        LITE_LOG_ERROR("Malloc data for out tensor fail!");
        return RET_NULL_PTR;
      }
    }
@@ -194,7 +203,7 @@ int LiteSession::RunGraph() {

    int ret = (*run_kernel)(in_tensors, out_tensors, node, &g_allocator);
    if (ret != RET_OK) {
      LITE_ERROR_LOG("run kernel fail!ret: ", ret);
      LITE_ERROR_LOG("run kernel fail!ret: %d", ret);
      return ret;
    }
  }
--- a/mindspore/lite/internal/src/ms_tensor.cc
+++ b/mindspore/lite/internal/src/ms_tensor.cc
@@ -13,7 +13,6 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include <iostream>
 #include "internal/include/string.h"
 #include "internal/include/vector.h"
 #include "internal/include/ms_tensor.h"
@@ -85,7 +84,7 @@ size_t MSTensor::Size() const {
      size = sizeof(bool);
      break;
    default:
      std::cout << "Not support the type: " << this->data_type_;
      LITE_ERROR_LOG("Not support the type: %d", this->data_type_);
      return 0;
  }
  size *= (format_ == Format::Format_NC4HW4 || format_ == Format::Format_NHWC4) ? ElementsC4Num() : ElementsNum();
@@ -94,7 +93,7 @@ size_t MSTensor::Size() const {
 }
 int32_t MSTensor::Batch() const {
  if (this->shape_.size() != 4 && this->shape_.size() != 2) {
    std::cout << "Unsupported tensor shape: " << this->shape_.size();
    LITE_ERROR_LOG("Unsupported tensor shape: %zu", this->shape_.size());
    return -1;
  }
  switch (this->format_) {
@@ -115,14 +114,14 @@ int32_t MSTensor::Batch() const {
    case Format::Format_CKHW:
      return this->shape_[1];
    default:
      // std::cout << "Unsupported format: " << EnumNameFormat(this->format_);
      LITE_ERROR_LOG("Unsupported format: %d", this->format_);
      return -1;
  }
 }

 int32_t MSTensor::Channel() const {
  if (this->shape_.size() != 4 && this->shape_.size() != 2) {
    std::cout << "Unsupported tensor shape: " << this->shape_.size();
    LITE_ERROR_LOG("Unsupported tensor shape: %zu", this->shape_.size());
    return -1;
  }
  switch (this->format_) {
@@ -149,7 +148,7 @@ int32_t MSTensor::Channel() const {

 int32_t MSTensor::Height() const {
  if (this->shape_.size() != 4 && this->shape_.size() != 2) {
    std::cout << "Unsupported tensor shape: " << this->shape_.size();
    LITE_ERROR_LOG("Unsupported tensor shape: %zu", this->shape_.size());
    return -1;
  }
  switch (this->format_) {
@@ -169,14 +168,14 @@ int32_t MSTensor::Height() const {
    case Format::Format_HW4:
      return this->shape_[0];
    default:
      // std::cout << "Unsupported format: " << EnumNameFormat(this->format_);
      LITE_ERROR_LOG("Unsupported format: %d", this->format_);
      return -1;
  }
 }

 int32_t MSTensor::Width() const {
  if (this->shape_.size() != 4 && this->shape_.size() != 2) {
    std::cout << "Unsupported tensor shape: " << this->shape_.size();
    LITE_ERROR_LOG("Unsupported tensor shape: %zu", this->shape_.size());
    return -1;
  }
  switch (this->format_) {
--- a/mindspore/lite/nnacl/fp32/matmul.c
+++ b/mindspore/lite/nnacl/fp32/matmul.c
@@ -483,3 +483,38 @@ void MatMulOpt(const float *a, const float *b, float *c, const float *bias, ActT
  MatMul12x8(a, b, c, bias, act_type, deep, row, col, stride, out_type);
 #endif
 }

 #ifdef ENABLE_NNACL_INFER_SHAPE
 static void SwapDims(int *dims, int index1, int index2) {
  int tmp = dims[index1];
  dims[index1] = dims[index2];
  dims[index2] = tmp;
 }

 int MatMulInferShape(int **in_shape, int in_num, size_t *dim_size, int *out_shape, int *in_format,
                     int *out_format, int *in_datatype, int *out_datatype, OpParameter *param) {
  *out_datatype = in_datatype[0];
  *out_format = in_format[0];
  if (dim_size[0] < 2 || dim_size[1] < 2) {
    return NNACL_PARAM_INVALID;
  }

  for (int i = 0; i < dim_size[0] - 2; ++i) {
    if (in_shape[0][i] != in_shape[1][i]) {
      return NNACL_PARAM_INVALID;
    }
  }
  MatMulParameter *matmul_param = (MatMulParameter *)param;
  if (matmul_param->a_transpose_) {
    SwapDims(in_shape[0], dim_size[0] - 1, dim_size[0] - 2);
  }
  if (matmul_param->b_transpose_) {
    SwapDims(in_shape[1], dim_size[1] - 1, dim_size[1] - 2);
  }
  for (int i = 0; i < dim_size[0] - 1; ++i) {
    out_shape[i] = in_shape[0][i];
  }
  out_shape[dim_size[0] - 1] = in_shape[1][dim_size[1] - 1];
  return NNACL_OK;
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/matmul.h
+++ b/mindspore/lite/nnacl/fp32/matmul.h
@@ -44,6 +44,11 @@ void MatmulFloatNeon64OptRemain(const float *a, const float *b, float *c, int de
 void MatmulFloatNeon32Opt(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int row,
                          int col, size_t stride, size_t write_nhwc, size_t write_c4);
 #endif

 #ifdef ENABLE_NNACL_INFER_SHAPE
 int MatMulInferShape(int **in_shape, int in_num, size_t *dim_size, int *out_shape, int *in_format,
                     int *out_format, int *in_datatype, int *out_datatype, OpParameter *param);
 #endif
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/test/ut/internal/CMakeLists.txt
+++ b/mindspore/lite/test/ut/internal/CMakeLists.txt
@@ -37,10 +37,12 @@ endif()

 ### runtime framework
 set(TEST_LITE_SRC
        ${LITE_DIR}/internal/src/common/string.cc
        ${LITE_DIR}/internal/src/lite_session.cc
        ${LITE_DIR}/src/runtime/allocator.cc
        ${LITE_DIR}/internal/src/allocator.cc
        ${LITE_DIR}/internal/src/ms_tensor.cc
        ${LITE_DIR}/internal/src/common/string.cc
        ${LITE_DIR}/internal/src/common/vector.cc
        ${TOP_DIR}/mindspore/core/utils/log_adapter.cc
        ${TOP_DIR}/mindspore/core/gvar/logging_level.cc
        )
@@ -65,10 +67,3 @@ set(TEST_SRC
 add_executable(lite-test-internal ${TEST_SRC})

 target_link_libraries(lite-test-internal dl ${GTEST_LIBRARY})
 if (PLATFORM_ARM64)
    target_link_libraries(lite-test-internal mslite_internal)
 endif()

 if (PLATFORM_ARM32 OR PLATFORM_ARM64)
       target_link_libraries(lite-test-internal log)
 endif()
--- a/mindspore/lite/test/ut/internal/allocator_test.cc
+++ b/mindspore/lite/test/ut/internal/allocator_test.cc
@@ -0,0 +1,99 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "common/common_test.h"
 #include "internal/include/model.h"
 #include "internal/include/errorcode.h"
 #include "nnacl/op_base.h"
 #undef private
 #define private public
 #include "internal/src/allocator.h"
 #undef private

 namespace mindspore {
 class AllocatorTest : public mindspore::CommonTest {
 public:
  AllocatorTest() {}
 };

 TEST_F(AllocatorTest, AllocatorTest1) {
  lite::DefaultAllocator allocator;
  constexpr int data1_size = 10 * sizeof(float);
  ASSERT_EQ(allocator.allocated_list_[0], nullptr);
  float *data1 = reinterpret_cast<float *>(allocator.Malloc(data1_size));
  ASSERT_NE(data1, nullptr);
  ASSERT_NE(allocator.allocated_list_[0], nullptr);

  ASSERT_EQ(allocator.free_list_[0], nullptr);
  allocator.Free(data1);
  ASSERT_EQ(allocator.allocated_list_[0], nullptr);
  ASSERT_NE(allocator.free_list_[0], nullptr);
 }

 TEST_F(AllocatorTest, AllocatorTest2) {
  lite::DefaultAllocator allocator;
  constexpr int data1_size = 10 * sizeof(float);
  ASSERT_EQ(allocator.allocated_list_[0], nullptr);
  float *data1 = reinterpret_cast<float *>(allocator.Malloc(data1_size));
  ASSERT_NE(data1, nullptr);
  ASSERT_NE(allocator.allocated_list_[0], nullptr);

  constexpr int data2_size = (1024 << lite::kBlockRange);
  ASSERT_EQ(allocator.large_mem_list_, nullptr);
  float *data2 = reinterpret_cast<float *>(allocator.Malloc(data2_size));
  ASSERT_NE(data2, nullptr);
  ASSERT_NE(allocator.large_mem_list_, nullptr);

  constexpr int data3_size = (1024 << 3);
  ASSERT_EQ(allocator.allocated_list_[3], nullptr);
  float *data3 = reinterpret_cast<float *>(allocator.Malloc(data3_size));
  ASSERT_NE(data3, nullptr);
  ASSERT_NE(allocator.allocated_list_[3], nullptr);

  int expect_total_size = data1_size + data2_size + data3_size;
  size_t total_size = allocator.GetTotalSize();
  ASSERT_EQ(total_size, expect_total_size);

  allocator.Clear();
  total_size = allocator.GetTotalSize();
  ASSERT_EQ(total_size, 0);
 }

 TEST_F(AllocatorTest, AllocatorTest3) {
  lite::DefaultAllocator allocator;
  constexpr int data1_size = 10 * sizeof(float);
  ASSERT_EQ(allocator.allocated_list_[0], nullptr);
  float *data1 = reinterpret_cast<float *>(allocator.Malloc(data1_size));
  ASSERT_NE(data1, nullptr);
  ASSERT_NE(allocator.allocated_list_[0], nullptr);

  constexpr int data2_size = 11 * sizeof(float);
  float *data2 = reinterpret_cast<float *>(allocator.Malloc(data2_size));
  ASSERT_NE(data2, nullptr);

  constexpr int data3_size = 12 * sizeof(float);
  float *data3 = reinterpret_cast<float *>(allocator.Malloc(data3_size));
  ASSERT_NE(data3, nullptr);

  int expect_total_size = data1_size + data2_size + data3_size;
  size_t total_size = allocator.GetTotalSize();
  ASSERT_EQ(total_size, expect_total_size);

  allocator.Free(data2);
  total_size = allocator.GetTotalSize();
  ASSERT_EQ(total_size, expect_total_size);
 }
 }  // namespace mindspore
--- a/mindspore/lite/test/ut/internal/infer_test.cc
+++ b/mindspore/lite/test/ut/internal/infer_test.cc
@@ -42,13 +42,17 @@ TEST_F(InferTest, TestSession) {
  node.primitive_ = &prim;
  node.input_indices_.push_back(0);
  node.output_indices_.push_back(1);
  ShapeVector shape = {1, 1, 1, 10};
  ShapeVector shape(4);
  shape[0] = 1;
  shape[1] = 1;
  shape[2] = 1;
  shape[3] = 10;
  MSTensor *in = CreateTensor(kNumberTypeFloat32, shape);
  model.all_tensors_.push_back(in);
  model.input_indices_.push_back(0);

  MSTensor *out = CreateTensor(kNumberTypeFloat32, shape);
  model.all_tensors_.emplace_back(out);
  model.all_tensors_.push_back(out);
  model.output_indices_.push_back(1);

  LiteSession session;
--- a/mindspore/lite/test/ut/internal/vector_test.cc
+++ b/mindspore/lite/test/ut/internal/vector_test.cc
@@ -0,0 +1,54 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <cmath>
 #include <memory>
 #include "common/common_test.h"
 #include "internal/include/vector.h"
 #include "nnacl/op_base.h"

 namespace mindspore {
 class VectorTest : public mindspore::CommonTest {
 public:
  VectorTest() {}
 };

 void CheckArrValue(Vector<int> arr) {
  for (size_t i = 0; i < arr.size(); ++i) {
    ASSERT_EQ(arr[i], i);
  }
 }

 TEST_F(VectorTest, VectorTest1) {
  constexpr int kLen1 = 10;
  Vector<int> arr1(kLen1);
  for (int i = 0 ; i < kLen1; ++i) {
    arr1[i] = i;
  }
  Vector<int> arr2 = arr1;
  ASSERT_EQ(arr2.size(), kLen1);
  for (int i = 0; i < kLen1; ++i) {
    ASSERT_EQ(arr2[i], i);
  }

  Vector<int> arr3;
  for (int i = 0; i < kLen1; ++i) {
    arr3.push_back(std::move(arr1[i]));
  }
  CheckArrValue(arr3);
 }

 }  // namespace mindspore