From fb9dddb77adb069e5bb2e44fc78264ecfdf18f08 Mon Sep 17 00:00:00 2001
From: wangzhe <wangzhe128@huawei.com>
Date: Wed, 23 Sep 2020 17:34:20 +0800
Subject: [PATCH] move infershape from internal to nnacl

---
 .../internal/src/kernel/fp32/arithmetic.cc    |  67 +----
 .../lite/internal/src/kernel/fp32/bias_add.cc |  14 +-
 .../lite/internal/src/kernel/fp32/reduce.cc   |  81 +-----
 mindspore/lite/nnacl/fp32/arithmetic.c        |  59 +++++
 mindspore/lite/nnacl/fp32/arithmetic.h        |   5 +
 mindspore/lite/nnacl/fp32/reduce.c            |  79 ++++++
 mindspore/lite/nnacl/fp32/reduce.h            |   5 +
 mindspore/lite/nnacl/reduce_parameter.h       |   4 +-
 .../lite/test/ut/internal/CMakeLists.txt      |   1 +
 .../src/kernel/fp32/arithmetic_fp32_test.cc   |  99 +++++++
 .../src/kernel/fp32/bias_add_fp32_test.cc     |  91 +++++++
 .../src/kernel/fp32/reduce_fp32_test.cc       | 241 ++++++++++++++++++
 12 files changed, 610 insertions(+), 136 deletions(-)
 create mode 100644 mindspore/lite/test/ut/internal/src/kernel/fp32/arithmetic_fp32_test.cc
 create mode 100644 mindspore/lite/test/ut/internal/src/kernel/fp32/bias_add_fp32_test.cc
 create mode 100644 mindspore/lite/test/ut/internal/src/kernel/fp32/reduce_fp32_test.cc
diff --git a/mindspore/lite/internal/src/kernel/fp32/arithmetic.cc b/mindspore/lite/internal/src/kernel/fp32/arithmetic.cc
index af0d1c9953..db0ae0f795 100644
--- a/mindspore/lite/internal/src/kernel/fp32/arithmetic.cc
+++ b/mindspore/lite/internal/src/kernel/fp32/arithmetic.cc
@@ -106,60 +106,21 @@ int DoArithmeticInferShape(const TensorPtrVector &in_tensors, const TensorPtrVec
     LITE_LOG_ERROR("output tensors num not correct!");
     return RET_ERROR;
   }
-  ShapeVector in_shape0 = in_tensors[0]->shape_;
-  ShapeVector in_shape1 = in_tensors[1]->shape_;
-  int ndim0 = in_shape0.size();
-  int ndim1 = in_shape1.size();
-  ArithmeticParameter *arithmeticParameter = (ArithmeticParameter *)param;
-  if (ndim0 < ndim1) {
-    arithmeticParameter->ndim_ = ndim1;
-    int fill_dim_num = ndim1 - ndim0;
-    int j = 0;
-    for (int i = 0; i < ndim1; ++i) {
-      if (i < fill_dim_num) {
-        arithmeticParameter->in_shape0_[i] = 1;
-      } else {
-        arithmeticParameter->in_shape0_[i] = in_shape0[j++];
-      }
-      arithmeticParameter->in_shape1_[i] = in_shape1[i];
-    }
-  } else if (ndim0 > ndim1) {
-    arithmeticParameter->ndim_ = ndim0;
-    int fill_dim_num = ndim0 - ndim1;
-    int j = 0;
-    for (int i = 0; i < ndim0; ++i) {
-      if (i < fill_dim_num) {
-        arithmeticParameter->in_shape1_[i] = 1;
-      } else {
-        arithmeticParameter->in_shape1_[i] = in_shape1[j++];
-      }
-      arithmeticParameter->in_shape0_[i] = in_shape0[i];
-    }
-  } else {
-    arithmeticParameter->ndim_ = ndim0;
-    for (int i = 0; i < ndim0; ++i) {
-      arithmeticParameter->in_shape0_[i] = in_shape0[i];
-      arithmeticParameter->in_shape1_[i] = in_shape1[i];
-    }
-  }
-  ShapeVector out_shape;
-  for (size_t i = 0; i < arithmeticParameter->ndim_; ++i) {
-    if (arithmeticParameter->in_shape0_[i] != arithmeticParameter->in_shape1_[i]) {
-      if (arithmeticParameter->in_shape0_[i] == 1) {
-        out_shape.push_back(arithmeticParameter->in_shape1_[i]);
-      } else if (arithmeticParameter->in_shape1_[i] == 1) {
-        out_shape.push_back(arithmeticParameter->in_shape0_[i]);
-      } else {
-        LITE_LOG_ERROR("shapes of input tensors can not be broadcasted!");
-        return RET_INPUT_TENSOR_ERROR;
-      }
-    } else {
-      out_shape.push_back(arithmeticParameter->in_shape0_[i]);
-    }
+
+  int in_datatype[2] = {in_tensors[0]->data_type_, in_tensors[1]->data_type_};
+  int in_format[2] = {static_cast<int>(in_tensors[0]->format_), static_cast<int>(in_tensors[1]->format_)};
+  size_t dim_size[2] = {in_tensors[0]->shape_.size(), in_tensors[1]->shape_.size()};
+  int *in_shape[2] = {in_tensors[0]->shape_.data(), in_tensors[1]->shape_.data()};
+  int out_format;
+  int out_datatype;
+  int ret = ArithmeticInferShape(in_shape, dim_size, out_tensors[0]->shape_.data(), in_format, &out_format, in_datatype,
+                                 &out_datatype, param);
+  if (ret != NNACL_OK) {
+    LITE_ERROR_LOG("arithmetic infershape failed! ret: %d", ret);
+    return RET_ERROR;
   }
-  out_tensors[0]->shape_ = out_shape;
-  out_tensors[0]->data_type_ = in_tensors[0]->data_type_;
-  out_tensors[0]->format_ = in_tensors[0]->format_;
+  out_tensors[0]->format_ = static_cast<Format>(out_format);
+  out_tensors[0]->data_type_ = static_cast<TypeId>(out_datatype);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/internal/src/kernel/fp32/bias_add.cc b/mindspore/lite/internal/src/kernel/fp32/bias_add.cc
index e8d507533b..9c2372eab2 100644
--- a/mindspore/lite/internal/src/kernel/fp32/bias_add.cc
+++ b/mindspore/lite/internal/src/kernel/fp32/bias_add.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 #include "internal/src/kernel/fp32/bias_add.h"
+#include "internal/src/kernel/common/common_infershape.h"
 #include "internal/include/model.h"
 #include "internal/include/ms_tensor.h"
 #include "internal/include/lite_utils.h"
@@ -23,18 +24,7 @@
 #include "nnacl/fp32/arithmetic.h"
 
 int DoBiasAddInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) {
-  if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) {
-    LITE_LOG_ERROR("input tensors num not correct or input data is NULL!");
-    return RET_INPUT_TENSOR_ERROR;
-  }
-  if (out_tensors.size() != 1) {
-    LITE_LOG_ERROR("output tensors num not correct!");
-    return RET_ERROR;
-  }
-  out_tensors[0]->shape_ = in_tensors[0]->shape_;
-  out_tensors[0]->data_type_ = in_tensors[0]->data_type_;
-  out_tensors[0]->format_ = in_tensors[0]->format_;
-  return RET_OK;
+  return DoCommonInferShape(in_tensors, out_tensors);
 }
 
 int DoBiasAdd(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
diff --git a/mindspore/lite/internal/src/kernel/fp32/reduce.cc b/mindspore/lite/internal/src/kernel/fp32/reduce.cc
index 6a37674a4e..f7ac3f73c8 100644
--- a/mindspore/lite/internal/src/kernel/fp32/reduce.cc
+++ b/mindspore/lite/internal/src/kernel/fp32/reduce.cc
@@ -21,6 +21,7 @@
 #include "internal/include/errorcode.h"
 #include "nnacl/reduce_parameter.h"
 #include "nnacl/fp32/reduce.h"
+#include "nnacl/errorcode.h"
 
 typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const float *src_data,
                        float *dst_data, const int tid, const int thread_num);
@@ -101,76 +102,18 @@ int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector
     return RET_ERROR;
   }
 
-  ReduceParameter *reduceParameter = reinterpret_cast<ReduceParameter *>(param);
-  bool keep_dims = reduceParameter->keep_dims_;
-  int num_axes = reduceParameter->num_axes_;
-  ShapeVector in_shape = in_tensors[0]->shape_;
-  int rank = in_shape.size();
-  Int32Vector out_shape;
-  Int32Vector axes;
-  int actual_axes_num = num_axes;
-  for (int i = 0; i < num_axes; ++i) {
-    if (reduceParameter->axes_[i] < -rank || reduceParameter->axes_[i] >= rank) {
-      LITE_LOG_ERROR("reduce_sum got invalid axis!");
-      return RET_ERROR;
-    }
-    if (reduceParameter->axes_[i] < 0) {
-      axes.push_back(reduceParameter->axes_[i] + rank);
-    } else {
-      axes.push_back(reduceParameter->axes_[i]);
-    }
-  }
-  if (reduceParameter->reduce_to_end_) {
-    if (num_axes != 1) {
-      LITE_LOG_ERROR("Reduce when reduce_to_end, num of axis should be 1!");
-      return RET_ERROR;
-    }
-    int begin_axis = axes[0];
-    num_axes = rank - begin_axis;
-    for (auto i = begin_axis + 1; i < rank; ++i) {
-      axes[actual_axes_num++] = i;
-    }
-  }
-
-  if (num_axes == 0) {
-    axes.resize(rank);
-    for (auto i = 0; i < rank; ++i) {
-      axes[i] = i;
-      if (keep_dims) {
-        out_shape.push_back(1);
-      }
-    }
-    reduceParameter->num_axes_ = axes.size();
-    for (size_t i = 0; i < axes.size(); ++i) {
-      reduceParameter->axes_[i] = axes[i];
-    }
-    out_tensors[0]->shape_ = out_shape;
-    out_tensors[0]->data_type_ = in_tensors[0]->data_type_;
-    out_tensors[0]->format_ = in_tensors[0]->format_;
-    return RET_OK;
-  }
-  // reduce on selected axes
-  for (auto i = 0; i < rank; ++i) {
-    bool reduce_axis = false;
-    for (auto idx = 0; idx < num_axes; ++idx) {
-      if (axes[idx] == i) {
-        reduce_axis = true;
-        break;
-      }
-    }
-    if (reduce_axis) {
-      if (keep_dims) {
-        out_shape.push_back(1);
-      }
-    } else {
-      out_shape.push_back(in_shape[i]);
-    }
-  }
-  reduceParameter->num_axes_ = axes.size();
-  for (size_t i = 0; i < axes.size(); ++i) {
-    reduceParameter->axes_[i] = axes[i];
+  int in_datatype[1] = {in_tensors[0]->data_type_};
+  int in_format[1] = {static_cast<int>(in_tensors[0]->format_)};
+  size_t dim_size[1] = {in_tensors[0]->shape_.size()};
+  int *in_shape[1] = {in_tensors[0]->shape_.data()};
+  int out_format;
+  int out_datatype;
+  int ret = ReduceInferShape(in_shape, dim_size, out_tensors[0]->shape_.data(), in_format, &out_format, in_datatype,
+                             &out_datatype, param);
+  if (ret != NNACL_OK) {
+    LITE_ERROR_LOG("arithmetic infershape failed! ret: %d", ret);
+    return RET_ERROR;
   }
-  out_tensors[0]->shape_ = out_shape;
   out_tensors[0]->data_type_ = in_tensors[0]->data_type_;
   out_tensors[0]->format_ = in_tensors[0]->format_;
   return RET_OK;
diff --git a/mindspore/lite/nnacl/fp32/arithmetic.c b/mindspore/lite/nnacl/fp32/arithmetic.c
index 65a733897c..d5bdb75673 100644
--- a/mindspore/lite/nnacl/fp32/arithmetic.c
+++ b/mindspore/lite/nnacl/fp32/arithmetic.c
@@ -1253,3 +1253,62 @@ int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, floa
 }
 
 #undef ACCURACY_DATA
+
+#ifdef ENABLE_NNACL_INFER_SHAPE
+int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
+                         int *in_datatype, int *out_datatype, OpParameter *param) {
+  *out_format = in_format[0];
+  *out_datatype = in_datatype[0];
+  ArithmeticParameter *arithmetic_parameter = (ArithmeticParameter *)param;
+  int ndim0 = dim_size[0];
+  int ndim1 = dim_size[1];
+  int *in_shape0 = in_shape[0];
+  int *in_shape1 = in_shape[1];
+  if (ndim0 < ndim1) {
+    arithmetic_parameter->ndim_ = ndim1;
+    int fill_dim_num = ndim1 - ndim0;
+    int j = 0;
+    for (int i = 0; i < ndim1; ++i) {
+      if (i < fill_dim_num) {
+        arithmetic_parameter->in_shape0_[i] = 1;
+      } else {
+        arithmetic_parameter->in_shape0_[i] = in_shape0[j++];
+      }
+      arithmetic_parameter->in_shape1_[i] = in_shape1[i];
+    }
+  } else if (ndim0 > ndim1) {
+    arithmetic_parameter->ndim_ = ndim0;
+    int fill_dim_num = ndim0 - ndim1;
+    int j = 0;
+    for (int i = 0; i < ndim0; ++i) {
+      if (i < fill_dim_num) {
+        arithmetic_parameter->in_shape1_[i] = 1;
+      } else {
+        arithmetic_parameter->in_shape1_[i] = in_shape1[j++];
+      }
+      arithmetic_parameter->in_shape0_[i] = in_shape0[i];
+    }
+  } else {
+    arithmetic_parameter->ndim_ = ndim0;
+    for (int i = 0; i < ndim0; ++i) {
+      arithmetic_parameter->in_shape0_[i] = in_shape0[i];
+      arithmetic_parameter->in_shape1_[i] = in_shape1[i];
+    }
+  }
+  int j = 0;
+  for (size_t i = 0; i < arithmetic_parameter->ndim_; ++i) {
+    if (arithmetic_parameter->in_shape0_[i] != arithmetic_parameter->in_shape1_[i]) {
+      if (arithmetic_parameter->in_shape0_[i] == 1) {
+        out_shape[j++] = arithmetic_parameter->in_shape1_[i];
+      } else if (arithmetic_parameter->in_shape1_[i] == 1) {
+        out_shape[j++] = arithmetic_parameter->in_shape0_[i];
+      } else {
+        return NNACL_PARAM_INVALID;
+      }
+    } else {
+      out_shape[j++] = arithmetic_parameter->in_shape0_[i];
+    }
+  }
+  return NNACL_OK;
+}
+#endif
diff --git a/mindspore/lite/nnacl/fp32/arithmetic.h b/mindspore/lite/nnacl/fp32/arithmetic.h
index 22c5d36c02..8dc5115435 100644
--- a/mindspore/lite/nnacl/fp32/arithmetic.h
+++ b/mindspore/lite/nnacl/fp32/arithmetic.h
@@ -117,6 +117,11 @@ int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *ti
 int ElementGreaterEqual(float *input0, float *input1, float *output, int element_size);
 int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
                           int element_size, ArithmeticParameter *param);
+
+#ifdef ENABLE_NNACL_INFER_SHAPE
+int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
+                         int *in_datatype, int *out_datatype, OpParameter *param);
+#endif
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/lite/nnacl/fp32/reduce.c b/mindspore/lite/nnacl/fp32/reduce.c
index d95d81a037..0d8b74f5b4 100644
--- a/mindspore/lite/nnacl/fp32/reduce.c
+++ b/mindspore/lite/nnacl/fp32/reduce.c
@@ -19,6 +19,10 @@
 #include "nnacl/errorcode.h"
 #include "nnacl/common_func.h"
 
+#ifdef ENABLE_NNACL_INFER_SHAPE
+#include "nnacl/reduce_parameter.h"
+#endif
+
 int ReduceMean(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
                const int tid, const int thread_num) {
   if (src_data == NULL || dst_data == NULL) {
@@ -186,3 +190,78 @@ int ReduceSumSquare(const int outer_size, const int inner_size, const int axis_s
   }
   return NNACL_OK;
 }
+
+#ifdef ENABLE_NNACL_INFER_SHAPE
+int ReduceInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
+                     int *in_datatype, int *out_datatype, OpParameter *param) {
+  *out_format = in_format[0];
+  *out_datatype = in_datatype[0];
+  ReduceParameter *reduce_parameter = (ReduceParameter *)param;
+  bool keep_dims = reduce_parameter->keep_dims_;
+  int num_axes = reduce_parameter->num_axes_;
+  int *in_shape0 = in_shape[0];
+  int rank = dim_size[0];
+  if (rank <= 0 || rank > REDUCE_MAX_AXES_NUM) {
+    return NNACL_PARAM_INVALID;
+  }
+  int axes[REDUCE_MAX_AXES_NUM];
+  int actual_axes_num = num_axes;
+  for (int i = 0; i < num_axes; ++i) {
+    if (reduce_parameter->axes_[i] < -rank || reduce_parameter->axes_[i] >= rank) {
+      return NNACL_PARAM_INVALID;
+    }
+    if (reduce_parameter->axes_[i] < 0) {
+      axes[i] = reduce_parameter->axes_[i] + rank;
+    } else {
+      axes[i] = reduce_parameter->axes_[i];
+    }
+  }
+  if (reduce_parameter->reduce_to_end_) {
+    if (num_axes != 1) {
+      return NNACL_PARAM_INVALID;
+    }
+    int begin_axis = axes[0];
+    num_axes = rank - begin_axis;
+    for (int i = begin_axis + 1; i < rank; ++i) {
+      axes[actual_axes_num++] = i;
+    }
+  }
+  if (num_axes == 0) {
+    int j = 0;
+    for (int i = 0; i < rank; ++i) {
+      axes[i] = i;
+      if (keep_dims) {
+        out_shape[j++] = 1;
+      }
+    }
+    reduce_parameter->num_axes_ = rank;
+    for (int i = 0; i < rank; ++i) {
+      reduce_parameter->axes_[i] = axes[i];
+    }
+    return NNACL_OK;
+  }
+  // reduce on selected axes
+  int j = 0;
+  for (int i = 0; i < rank; ++i) {
+    bool reduce_axis = false;
+    for (int idx = 0; idx < num_axes; ++idx) {
+      if (axes[idx] == i) {
+        reduce_axis = true;
+        break;
+      }
+    }
+    if (reduce_axis) {
+      if (keep_dims) {
+        out_shape[j++] = 1;
+      }
+    } else {
+      out_shape[j++] = in_shape0[i];
+    }
+  }
+  reduce_parameter->num_axes_ = num_axes;
+  for (int i = 0; i < num_axes; ++i) {
+    reduce_parameter->axes_[i] = axes[i];
+  }
+  return NNACL_OK;
+}
+#endif
diff --git a/mindspore/lite/nnacl/fp32/reduce.h b/mindspore/lite/nnacl/fp32/reduce.h
index 9c87f6392d..a7f7e730e1 100644
--- a/mindspore/lite/nnacl/fp32/reduce.h
+++ b/mindspore/lite/nnacl/fp32/reduce.h
@@ -36,6 +36,11 @@ int IntReduceProd(const int outer_size, const int inner_size, const int axis_siz
                   const int tid, const int thread_num);
 int ReduceSumSquare(const int outer_size, const int inner_size, const int axis_size, const float *src_data,
                     float *dst_data, const int tid, const int thread_num);
+
+#ifdef ENABLE_NNACL_INFER_SHAPE
+int ReduceInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
+                     int *in_datatype, int *out_datatype, OpParameter *param);
+#endif
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/lite/nnacl/reduce_parameter.h b/mindspore/lite/nnacl/reduce_parameter.h
index e28f6f625f..d4eb2e7056 100644
--- a/mindspore/lite/nnacl/reduce_parameter.h
+++ b/mindspore/lite/nnacl/reduce_parameter.h
@@ -19,7 +19,7 @@
 #include "nnacl/op_base.h"
 #define REDUCE_MAX_AXES_NUM 8
 
-struct ReduceParameter {
+typedef struct ReduceParameter {
   OpParameter op_parameter_;
   bool keep_dims_;
   bool reduce_to_end_;
@@ -27,6 +27,6 @@ struct ReduceParameter {
   int axes_[REDUCE_MAX_AXES_NUM];
   int num_axes_;
   int mode_;
-};
+} ReduceParameter;
 
 #endif  // MINDSPORE_LITE_NNACL_REDUCE_PARAMETER_H_
diff --git a/mindspore/lite/test/ut/internal/CMakeLists.txt b/mindspore/lite/test/ut/internal/CMakeLists.txt
index d70413d56b..fdee819c54 100644
--- a/mindspore/lite/test/ut/internal/CMakeLists.txt
+++ b/mindspore/lite/test/ut/internal/CMakeLists.txt
@@ -4,6 +4,7 @@ set(LITE_DIR ${TOP_DIR}/mindspore/lite)
 
 include_directories(${TOP_DIR})
 include_directories(${TEST_DIR})
+add_compile_definitions(ENABLE_NNACL_INFER_SHAPE)
 
 string(REPLACE " -Werror " " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
 string(REPLACE " -Werror " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
diff --git a/mindspore/lite/test/ut/internal/src/kernel/fp32/arithmetic_fp32_test.cc b/mindspore/lite/test/ut/internal/src/kernel/fp32/arithmetic_fp32_test.cc
new file mode 100644
index 0000000000..2030933ff6
--- /dev/null
+++ b/mindspore/lite/test/ut/internal/src/kernel/fp32/arithmetic_fp32_test.cc
@@ -0,0 +1,99 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "test/common/common_test.h"
+#include "src/common/file_utils.h"
+#include "schema/ops_generated.h"
+#include "mindspore/lite/nnacl/fp32/arithmetic.h"
+#include "internal/src/allocator.h"
+#include "internal/include/model.h"
+#include "internal/include/ms_tensor.h"
+#include "internal/include/lite_utils.h"
+#include "internal/src/kernel/fp32/arithmetic.h"
+#include "gtest/gtest.h"
+
+class TestInternalArithmeticFp32 : public mindspore::CommonTest {
+ public:
+  TestInternalArithmeticFp32() {}
+};
+
+TEST_F(TestInternalArithmeticFp32, MulTest) {
+  auto mul_param = new ArithmeticParameter();
+  mul_param->activation_type_ = mindspore::schema::ActivationType_NO_ACTIVATION;
+  mul_param->op_parameter_.type_ = KernelType_Mul;
+  mul_param->ndim_ = 4;
+  Node *node = new Node();
+  node->name_ = "Mul";
+  node->node_type_ = NodeType::NodeType_CNode;
+  node->primitive_ = reinterpret_cast<PrimitiveC *>(mul_param);
+  mindspore::lite::Allocator allocator;
+  /* 1x2x3x4 NHWC */
+  std::vector<float> data0 = {12.216284, 3.3466918, 15.327419,  5.234958,  0.804376,   9.952188,
+                              14.727955, -8.080715, 13.71383,   8.055829,  6.5845337,  -9.25232,
+                              -4.24519,  11.550042, 9.262012,   1.2780352, 6.7263746,  -3.9301445,
+                              3.764492,  -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505};
+  std::vector<float> data1 = {0.16771512, 0.7336843, 0.6768286, 0.4453379};
+  std::vector<float> correct_out = {2.0488555,   2.4554152,  10.374036,   2.3313253, 0.13490601, 7.3017635,
+                                    9.968302,    -3.5986485, 2.3000166,   5.910435,  4.4566007,  -4.120409,
+                                    -0.71198255, 8.474085,   6.2687945,   0.5691575, 1.1281147,  -2.8834853,
+                                    2.547916,    -3.8308315, -0.56281954, 9.992072,  -1.8067529, 1.42546};
+
+  TensorPtrVector in_tensors;
+  ShapeVector shape0(4);
+  shape0[0] = 1;
+  shape0[1] = 2;
+  shape0[2] = 3;
+  shape0[3] = 4;
+  MSTensor in0;
+  in0.data_ = data0.data();
+  in0.shape_ = shape0;
+  in0.data_type_ = TypeId::kNumberTypeFloat32;
+  in_tensors.push_back(&in0);
+
+  ShapeVector shape1(4);
+  shape1[0] = 1;
+  shape1[1] = 1;
+  shape1[2] = 1;
+  shape1[3] = 4;
+  MSTensor in1;
+  in1.data_ = data1.data();
+  in1.shape_ = shape1;
+  in1.data_type_ = TypeId::kNumberTypeFloat32;
+  in_tensors.push_back(&in1);
+
+  TensorPtrVector out_tensors;
+  MSTensor out0;
+  out0.shape_.resize(4);
+  out_tensors.push_back(&out0);
+
+  DoArithmeticInferShape(in_tensors, out_tensors, reinterpret_cast<OpParameter *>(mul_param));
+
+  ShapeVector out_shape0(4);
+  out_shape0[0] = 1;
+  out_shape0[1] = 2;
+  out_shape0[2] = 3;
+  out_shape0[3] = 4;
+  ASSERT_EQ(out_tensors.front()->shape_, out_shape0);
+
+  out_tensors[0]->data_ = new float[correct_out.size()];
+  DoArithmetic(in_tensors, out_tensors, node, &allocator);
+
+  CompareOutputData(reinterpret_cast<float *>(out_tensors.front()->data_), correct_out.data(), correct_out.size(),
+                    0.00001);
+
+  delete[] out_tensors[0]->data_;
+  delete node;
+  delete mul_param;
+}
diff --git a/mindspore/lite/test/ut/internal/src/kernel/fp32/bias_add_fp32_test.cc b/mindspore/lite/test/ut/internal/src/kernel/fp32/bias_add_fp32_test.cc
new file mode 100644
index 0000000000..d7766688de
--- /dev/null
+++ b/mindspore/lite/test/ut/internal/src/kernel/fp32/bias_add_fp32_test.cc
@@ -0,0 +1,91 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "test/common/common_test.h"
+#include "src/common/file_utils.h"
+#include "schema/ops_generated.h"
+#include "mindspore/lite/nnacl/fp32/arithmetic.h"
+#include "internal/src/allocator.h"
+#include "internal/include/model.h"
+#include "internal/include/ms_tensor.h"
+#include "internal/include/lite_utils.h"
+#include "internal/src/kernel/fp32/bias_add.h"
+#include "gtest/gtest.h"
+
+class TestInternalBiasAddFp32 : public mindspore::CommonTest {
+ public:
+  TestInternalBiasAddFp32() {}
+};
+
+TEST_F(TestInternalBiasAddFp32, BiasAddTest) {
+  auto bias_add_param = new ArithmeticParameter();
+  bias_add_param->activation_type_ = mindspore::schema::ActivationType_NO_ACTIVATION;
+  bias_add_param->op_parameter_.type_ = KernelType_BiasAdd;
+  Node *node = new Node();
+  node->name_ = "BiasAdd";
+  node->node_type_ = NodeType::NodeType_CNode;
+  node->primitive_ = reinterpret_cast<PrimitiveC *>(bias_add_param);
+  mindspore::lite::Allocator allocator;
+  std::vector<float> data0 = {12.216284, 3.3466918, 15.327419,  5.234958,  0.804376,   9.952188,
+                              14.727955, -8.080715, 13.71383,   8.055829,  6.5845337,  -9.25232,
+                              -4.24519,  11.550042, 9.262012,   1.2780352, 6.7263746,  -3.9301445,
+                              3.764492,  -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505};
+  std::vector<float> data1 = {0.16771512, 0.7336843, 0.6768286, 0.4453379};
+  std::vector<float> correct_out = {12.3839989, 4.0803761,  16.0042477, 5.6802959,  0.9720911,  10.6858721,
+                                    15.4047832, -7.6353774, 13.8815451, 8.7895136,  7.2613621,  -8.8069820,
+                                    -4.0774751, 12.2837267, 9.9388399,  1.7233731,  6.8940897,  -3.1964602,
+                                    4.4413204,  -8.1567402, -3.1880918, 14.3527193, -1.9926107, 3.6461883};
+  TensorPtrVector in_tensors;
+  ShapeVector shape0(4);
+  shape0[0] = 1;
+  shape0[1] = 2;
+  shape0[2] = 3;
+  shape0[3] = 4;
+  MSTensor in0;
+  in0.data_ = data0.data();
+  in0.shape_ = shape0;
+  in0.data_type_ = TypeId::kNumberTypeFloat32;
+  in_tensors.push_back(&in0);
+
+  ShapeVector shape1{4};
+  MSTensor in1;
+  in1.data_ = data1.data();
+  in1.shape_ = shape1;
+  in1.data_type_ = TypeId::kNumberTypeFloat32;
+  in_tensors.push_back(&in1);
+
+  TensorPtrVector out_tensors;
+  MSTensor out0;
+  out_tensors.push_back(&out0);
+
+  DoBiasAddInferShape(in_tensors, out_tensors, reinterpret_cast<OpParameter *>(bias_add_param));
+
+  ShapeVector out_shape0(4);
+  out_shape0[0] = 1;
+  out_shape0[1] = 2;
+  out_shape0[2] = 3;
+  out_shape0[3] = 4;
+  ASSERT_EQ(out_tensors.front()->shape_, out_shape0);
+
+  out_tensors[0]->data_ = new float[correct_out.size()];
+  DoBiasAdd(in_tensors, out_tensors, node, &allocator);
+
+  CompareOutputData(reinterpret_cast<float *>(out_tensors.front()->data_), correct_out.data(), correct_out.size(),
+                    0.00001);
+
+  delete out_tensors[0]->data_;
+  delete node;
+  delete bias_add_param;
+}
diff --git a/mindspore/lite/test/ut/internal/src/kernel/fp32/reduce_fp32_test.cc b/mindspore/lite/test/ut/internal/src/kernel/fp32/reduce_fp32_test.cc
new file mode 100644
index 0000000000..1a4b0c7421
--- /dev/null
+++ b/mindspore/lite/test/ut/internal/src/kernel/fp32/reduce_fp32_test.cc
@@ -0,0 +1,241 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "test/common/common_test.h"
+#include "mindspore/lite/nnacl/reduce_parameter.h"
+#include "schema/ops_generated.h"
+#include "internal/src/allocator.h"
+#include "internal/include/model.h"
+#include "internal/include/ms_tensor.h"
+#include "internal/include/lite_utils.h"
+#include "internal/src/kernel/fp32/reduce.h"
+#include "gtest/gtest.h"
+
+class TestInternalReduceFp32 : public mindspore::CommonTest {
+ public:
+  TestInternalReduceFp32() {}
+};
+
+TEST_F(TestInternalReduceFp32, ReduceSumOneAxisTest) {
+  Node *node = reinterpret_cast<Node *>(new Node());
+  node->name_ = "ReduceSum";
+  node->node_type_ = NodeType::NodeType_CNode;
+
+  auto params = new ReduceParameter();
+  params->mode_ = mindspore::schema::ReduceMode_ReduceSum;
+  params->num_axes_ = 1;
+  params->axes_[0] = 1;
+  params->keep_dims_ = false;
+  node->primitive_ = reinterpret_cast<PrimitiveC *>(params);
+  mindspore::lite::Allocator allocator;
+  float in[96] = {0.0,  1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,  9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
+                  16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+                  32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0,
+                  48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0,
+                  64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0,
+                  80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0};
+  float correct[24] = {72.0,  76.0,  80.0,  84.0,  88.0,  92.0,  96.0,  100.0, 104.0, 108.0, 112.0, 116.0,
+                       264.0, 268.0, 272.0, 276.0, 280.0, 284.0, 288.0, 292.0, 296.0, 300.0, 304.0, 308.0};
+
+  TensorPtrVector in_tensors;
+  ShapeVector shape0(4);
+  shape0[0] = 2;
+  shape0[1] = 4;
+  shape0[2] = 4;
+  shape0[3] = 3;
+  MSTensor in0;
+  in0.data_ = in;
+  in0.shape_ = shape0;
+  in0.data_type_ = TypeId::kNumberTypeFloat32;
+  in_tensors.push_back(&in0);
+
+  TensorPtrVector out_tensors;
+  MSTensor out0;
+  out0.shape_.resize(3);
+  out_tensors.push_back(&out0);
+
+  DoReduceInferShape(in_tensors, out_tensors, reinterpret_cast<OpParameter *>(params));
+
+  ShapeVector out_shape0(3);
+  out_shape0[0] = 2;
+  out_shape0[1] = 4;
+  out_shape0[2] = 3;
+  ASSERT_EQ(out_tensors.front()->shape_, out_shape0);
+  out_tensors[0]->data_ = new float[24];
+
+  DoReduce(in_tensors, out_tensors, node, &allocator);
+
+  CompareOutputData(reinterpret_cast<float *>(out_tensors.front()->data_), correct, 24, 0.00001);
+  delete out_tensors[0]->data_;
+  delete node;
+  delete params;
+}
+
+TEST_F(TestInternalReduceFp32, ReduceSumAllAxisTest) {
+  Node *node = reinterpret_cast<Node *>(new Node());
+  node->name_ = "ReduceSum";
+  node->node_type_ = NodeType::NodeType_CNode;
+
+  auto params = new ReduceParameter();
+  params->mode_ = mindspore::schema::ReduceMode_ReduceSum;
+  params->num_axes_ = 0;
+  params->keep_dims_ = false;
+  node->primitive_ = reinterpret_cast<PrimitiveC *>(params);
+  mindspore::lite::Allocator allocator;
+  float in[96] = {0.0,  1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,  9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
+                  16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+                  32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0,
+                  48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0,
+                  64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0,
+                  80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0};
+  float correct[1] = {4560.0};
+
+  TensorPtrVector in_tensors;
+  ShapeVector shape0(4);
+  shape0[0] = 2;
+  shape0[1] = 4;
+  shape0[2] = 4;
+  shape0[3] = 3;
+  MSTensor in0;
+  in0.data_ = in;
+  in0.shape_ = shape0;
+  in0.data_type_ = TypeId::kNumberTypeFloat32;
+  in_tensors.push_back(&in0);
+
+  TensorPtrVector out_tensors;
+  MSTensor out0;
+  out_tensors.push_back(&out0);
+
+  DoReduceInferShape(in_tensors, out_tensors, reinterpret_cast<OpParameter *>(params));
+
+  ShapeVector out_shape0{};
+  ASSERT_EQ(out_tensors.front()->shape_, out_shape0);
+  out_tensors[0]->data_ = new float[1];
+
+  DoReduce(in_tensors, out_tensors, node, &allocator);
+
+  CompareOutputData(reinterpret_cast<float *>(out_tensors.front()->data_), correct, 1, 0.00001);
+  delete out_tensors[0]->data_;
+  delete node;
+  delete params;
+}
+
+TEST_F(TestInternalReduceFp32, ReduceMeanOneAxisTest) {
+  Node *node = reinterpret_cast<Node *>(new Node());
+  node->name_ = "ReduceMean";
+  node->node_type_ = NodeType::NodeType_CNode;
+
+  auto params = new ReduceParameter();
+  params->mode_ = mindspore::schema::ReduceMode_ReduceMean;
+  params->num_axes_ = 1;
+  params->axes_[0] = 1;
+  params->keep_dims_ = false;
+  node->primitive_ = reinterpret_cast<PrimitiveC *>(params);
+  mindspore::lite::Allocator allocator;
+  float in[96] = {0.0,  1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,  9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
+                  16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+                  32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0,
+                  48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0,
+                  64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0,
+                  80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0};
+  float correct[24] = {18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0,
+                       66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0};
+  TensorPtrVector in_tensors;
+  ShapeVector shape0(4);
+  shape0[0] = 2;
+  shape0[1] = 4;
+  shape0[2] = 4;
+  shape0[3] = 3;
+  MSTensor in0;
+  in0.data_ = in;
+  in0.shape_ = shape0;
+  in0.data_type_ = TypeId::kNumberTypeFloat32;
+  in_tensors.push_back(&in0);
+
+  TensorPtrVector out_tensors;
+  MSTensor out0;
+  out0.shape_.resize(3);
+  out_tensors.push_back(&out0);
+
+  DoReduceInferShape(in_tensors, out_tensors, reinterpret_cast<OpParameter *>(params));
+
+  ShapeVector out_shape0(3);
+  out_shape0[0] = 2;
+  out_shape0[1] = 4;
+  out_shape0[2] = 3;
+  ASSERT_EQ(out_tensors.front()->shape_, out_shape0);
+  out_tensors[0]->data_ = new float[24];
+
+  DoReduce(in_tensors, out_tensors, node, &allocator);
+
+  CompareOutputData(reinterpret_cast<float *>(out_tensors.front()->data_), correct, 24, 0.00001);
+  delete out_tensors[0]->data_;
+  delete node;
+  delete params;
+}
+
+TEST_F(TestInternalReduceFp32, ReduceMeanAllAxisTest) {
+  Node *node = reinterpret_cast<Node *>(new Node());
+  node->name_ = "ReduceMean";
+  node->node_type_ = NodeType::NodeType_CNode;
+
+  auto params = new ReduceParameter();
+  params->mode_ = mindspore::schema::ReduceMode_ReduceMean;
+  params->num_axes_ = 0;
+  params->keep_dims_ = true;
+  node->primitive_ = reinterpret_cast<PrimitiveC *>(params);
+  mindspore::lite::Allocator allocator;
+  float in[96] = {0.0,  1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,  9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
+                  16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+                  32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0,
+                  48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0,
+                  64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0,
+                  80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0};
+  float correct[1] = {47.5};
+
+  TensorPtrVector in_tensors;
+  ShapeVector shape0(4);
+  shape0[0] = 2;
+  shape0[1] = 4;
+  shape0[2] = 4;
+  shape0[3] = 3;
+  MSTensor in0;
+  in0.data_ = in;
+  in0.shape_ = shape0;
+  in0.data_type_ = TypeId::kNumberTypeFloat32;
+  in_tensors.push_back(&in0);
+
+  TensorPtrVector out_tensors;
+  MSTensor out0;
+  out0.shape_.resize(4);
+  out_tensors.push_back(&out0);
+
+  DoReduceInferShape(in_tensors, out_tensors, reinterpret_cast<OpParameter *>(params));
+
+  ShapeVector out_shape0(4);
+  out_shape0[0] = 1;
+  out_shape0[1] = 1;
+  out_shape0[2] = 1;
+  out_shape0[3] = 1;
+  ASSERT_EQ(out_tensors.front()->shape_, out_shape0);
+  out_tensors[0]->data_ = new float[1];
+
+  DoReduce(in_tensors, out_tensors, node, &allocator);
+
+  CompareOutputData(reinterpret_cast<float *>(out_tensors.front()->data_), correct, 1, 0.00001);
+  delete out_tensors[0]->data_;
+  delete node;
+  delete params;
+}