diff --git a/mindspore/lite/nnacl/fp32/cast_fp32.c b/mindspore/lite/nnacl/base/cast_base.h
similarity index 57%
rename from mindspore/lite/nnacl/fp32/cast_fp32.c
rename to mindspore/lite/nnacl/base/cast_base.h
index 0f563c89f0..ee62db13df 100644
--- a/mindspore/lite/nnacl/fp32/cast_fp32.c
+++ b/mindspore/lite/nnacl/base/cast_base.h
@@ -13,66 +13,66 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#ifndef MINDSPORE_LITE_NNACL_CAST_BASE_H_
+#define MINDSPORE_LITE_NNACL_CAST_BASE_H_
 
-#include "nnacl/fp32/cast_fp32.h"
-#include "nnacl/fp32/common_func_fp32.h"
+#include "nnacl/op_base.h"
+#include "nnacl/nnacl_common.h"
 
-void BoolToFloat32(const bool *input, float *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (float)input[i];
-  }
-}
+#ifdef __cplusplus
+extern "C" {
+#endif
 
-void Uint8ToFloat32(const uint8_t *input, float *output, int number) {
+inline void BoolToFloat32(const bool *input, float *output, int number) {
   for (int i = 0; i < number; ++i) {
     output[i] = (float)input[i];
   }
 }
 
-void Uint8ToInt8(const uint8_t *input, int8_t *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (int8_t)(input[i] - 128);
-  }
-}
-
-void Int8ToUint8(const int8_t *input, uint8_t *output, int number) {
+inline void Uint8ToFloat32(const uint8_t *input, float *output, int number) {
   for (int i = 0; i < number; ++i) {
-    output[i] = (uint8_t)(input[i] + 128);
+    output[i] = (float)input[i];
   }
 }
 
-void Int32ToFloat32(const int32_t *input, float *output, int number) {
+inline void Int32ToFloat32(const int32_t *input, float *output, int number) {
   for (int i = 0; i < number; ++i) {
     output[i] = (float)input[i];
   }
 }
 
-void Fp16ToFloat32(const uint16_t *input, float *output, int number) {
+inline void Fp16ToFloat32(const uint16_t *input, float *output, int number) {
   for (int i = 0; i < number; ++i) {
     output[i] = ShortToFloat32(input[i]);
   }
 }
 
-void Float32ToFp16(const float *input, uint16_t *output, int number) {
+inline void Float32ToFp16(const float *input, uint16_t *output, int number) {
   for (int i = 0; i < number; ++i) {
     output[i] = Float32ToShort(input[i]);
   }
 }
 
-void Float32ToInt32(const float *input, int32_t *output, int number) {
+inline void Float32ToInt32(const float *input, int32_t *output, int number) {
   for (int i = 0; i < number; ++i) {
     output[i] = (int32_t)input[i];
   }
 }
 
-void Float32ToInt64(const float *input, int64_t *output, int number) {
+inline void Float32ToInt64(const float *input, int64_t *output, int number) {
   for (int i = 0; i < number; ++i) {
     output[i] = (int64_t)input[i];
   }
 }
 
-void Int32ToInt64(const int32_t *input, int64_t *output, int number) {
+inline void Int32ToInt64(const int32_t *input, int64_t *output, int number) {
   for (int i = 0; i < number; ++i) {
     output[i] = (int64_t)input[i];
   }
 }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MINDSPORE_LITE_NNACL_CAST_BASE_H_
diff --git a/mindspore/lite/nnacl/fp32/concat_fp32.c b/mindspore/lite/nnacl/base/concat_base.c
similarity index 85%
rename from mindspore/lite/nnacl/fp32/concat_fp32.c
rename to mindspore/lite/nnacl/base/concat_base.c
index 4f2568f341..0f8d287ade 100644
--- a/mindspore/lite/nnacl/fp32/concat_fp32.c
+++ b/mindspore/lite/nnacl/base/concat_base.c
@@ -14,17 +14,16 @@
  * limitations under the License.
  */
 
-#include "nnacl/fp32/concat_fp32.h"
-#include <string.h>
+#include "nnacl/base/concat_base.h"
 
-void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size,
-            void *output, int task_id, int thread_num) {
+void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output,
+            int task_id, int thread_num, int data_size) {
   int before_axis_size = 1;
   for (int i = 0; i < axis; ++i) {
     before_axis_size *= inputs_output_shape[0][i];
   }
-  // sizeof float/int32
-  int after_axis_size = 4;
+
+  int after_axis_size = data_size;
   for (size_t i = axis + 1; i < shape_size; ++i) {
     after_axis_size *= inputs_output_shape[0][i];
   }
diff --git a/mindspore/lite/nnacl/fp32/expandDims_fp32.h b/mindspore/lite/nnacl/base/concat_base.h
similarity index 67%
rename from mindspore/lite/nnacl/fp32/expandDims_fp32.h
rename to mindspore/lite/nnacl/base/concat_base.h
index 37586820d2..ae6bc5da02 100644
--- a/mindspore/lite/nnacl/fp32/expandDims_fp32.h
+++ b/mindspore/lite/nnacl/base/concat_base.h
@@ -14,23 +14,19 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_LITE_NNACL_EXPANDDIMS_H_
-#define MINDSPORE_LITE_NNACL_EXPANDDIMS_H_
+#ifndef MINDSPORE_LITE_NNACL_FP32_CONCAT_BASE_H_
+#define MINDSPORE_LITE_NNACL_FP32_CONCAT_BASE_H_
 
+#include <string.h>
 #include "nnacl/op_base.h"
 
-typedef struct ExpandDimsParameter {
-  // Primitive parameter
-  OpParameter op_parameter_;
-  int dim_;
-} ExpandDimsParameter;
-
 #ifdef __cplusplus
 extern "C" {
 #endif
-int ExpandDims(const void *input_ptr, void *output_ptr, size_t data_size);
+void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output,
+            int task_id, int thread_num, int data_size);
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // MINDSPORE_LITE_NNACL_EXPANDDIMS_H_
+#endif  // MINDSPORE_LITE_NNACL_FP32_CONCAT_BASE_H_
diff --git a/mindspore/lite/nnacl/fp32/concat_fp32.h b/mindspore/lite/nnacl/base/expand_dims_base.h
similarity index 68%
rename from mindspore/lite/nnacl/fp32/concat_fp32.h
rename to mindspore/lite/nnacl/base/expand_dims_base.h
index 73d0534979..542216ef9a 100644
--- a/mindspore/lite/nnacl/fp32/concat_fp32.h
+++ b/mindspore/lite/nnacl/base/expand_dims_base.h
@@ -14,18 +14,23 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_LITE_NNACL_FP32_CONCAT_H_
-#define MINDSPORE_LITE_NNACL_FP32_CONCAT_H_
+#ifndef MINDSPORE_LITE_NNACL_EXPAND_DIMS_BASE_H_
+#define MINDSPORE_LITE_NNACL_EXPAND_DIMS_BASE_H_
 
 #include "nnacl/op_base.h"
+#include "nnacl/errorcode.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
-void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size,
-            void *output, int task_id, int thread_num);
+
+inline int ExpandDims(const void *input_ptr, void *output_ptr, size_t data_size) {
+  memcpy(output_ptr, input_ptr, data_size);
+  return NNACL_OK;
+}
+
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // MINDSPORE_LITE_NNACL_FP32_CONCAT_H_
+#endif  // MINDSPORE_LITE_NNACL_EXPAND_DIMS_BASE_H_
diff --git a/mindspore/lite/nnacl/fp32/fill_fp32.c b/mindspore/lite/nnacl/base/fill_base.c
similarity index 90%
rename from mindspore/lite/nnacl/fp32/fill_fp32.c
rename to mindspore/lite/nnacl/base/fill_base.c
index be915092bc..87c15222ae 100644
--- a/mindspore/lite/nnacl/fp32/fill_fp32.c
+++ b/mindspore/lite/nnacl/base/fill_base.c
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "nnacl/fp32/fill_fp32.h"
+#include "nnacl/base/fill_base.h"
 
-int Fill(float *output, int size, float data) {
+int FillFp32(float *output, int size, float data) {
   for (int i = 0; i < size; ++i) {
     output[i] = data;
   }
diff --git a/mindspore/lite/nnacl/reshape.h b/mindspore/lite/nnacl/base/fill_base.h
similarity index 70%
rename from mindspore/lite/nnacl/reshape.h
rename to mindspore/lite/nnacl/base/fill_base.h
index a14901bfcf..79a100d83e 100644
--- a/mindspore/lite/nnacl/reshape.h
+++ b/mindspore/lite/nnacl/base/fill_base.h
@@ -13,17 +13,20 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#ifndef MINDSPORE_LITE_NNACL_FILL_BASE_H_
+#define MINDSPORE_LITE_NNACL_FILL_BASE_H_
 
-#ifndef MINDSPORE_LITE_NNACL_RESHAHPE_H_
-#define MINDSPORE_LITE_NNACL_RESHAHPE_H_
 #include "nnacl/op_base.h"
+#include "nnacl/errorcode.h"
+#include "nnacl/fill_parameter.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
-void Reshape(const void *input_ptr, void *output_ptr, size_t data_size);
+int FillFp32(float *output, int size, float data);
+int FillInt32(int *output, int size, int data);
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // MINDSPORE_LITE_NNACL_RESHAHPE_H_
+#endif  // MINDSPORE_LITE_NNACL_FILL_BASE_H_
diff --git a/mindspore/lite/nnacl/fp32/gather_fp32.c b/mindspore/lite/nnacl/base/gather_base.c
similarity index 80%
rename from mindspore/lite/nnacl/fp32/gather_fp32.c
rename to mindspore/lite/nnacl/base/gather_base.c
index a652f36397..6791623755 100644
--- a/mindspore/lite/nnacl/fp32/gather_fp32.c
+++ b/mindspore/lite/nnacl/base/gather_base.c
@@ -14,20 +14,9 @@
  * limitations under the License.
  */
 
-#include "nnacl/fp32/gather_fp32.h"
-#include <string.h>
-#include "nnacl/errorcode.h"
-
-inline int Stride(const int *shape, int rank, int index) {
-  int i, stride = 1;
-  for (i = index + 1; i < rank; ++i) {
-    stride *= shape[i];
-  }
-  return stride;
-}
-
-int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size,
-           float *output) {
+#include "nnacl/base/gather_base.h"
+int GatherFp32(const float *input, int outer_size, int inner_size, int limit, const int *indices,
+               int indices_element_size, float *output) {
   for (int m = 0; m < outer_size; ++m) {
     const float *inputm = input + inner_size * m * limit;
     float *outputm = output + inner_size * m * indices_element_size;
diff --git a/mindspore/lite/nnacl/fp32/gather_fp32.h b/mindspore/lite/nnacl/base/gather_base.h
similarity index 71%
rename from mindspore/lite/nnacl/fp32/gather_fp32.h
rename to mindspore/lite/nnacl/base/gather_base.h
index 28b7eb59d7..f38ed951c0 100644
--- a/mindspore/lite/nnacl/fp32/gather_fp32.h
+++ b/mindspore/lite/nnacl/base/gather_base.h
@@ -14,20 +14,22 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_LITE_NNACL_GATHER_H_
-#define MINDSPORE_LITE_NNACL_GATHER_H_
+#ifndef MINDSPORE_LITE_NNACL_GATHER_BASE_H_
+#define MINDSPORE_LITE_NNACL_GATHER_BASE_H_
 
+#include <string.h>
 #include "nnacl/op_base.h"
+#include "nnacl/errorcode.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
-int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size,
-           float *output);
+int GatherFp32(const float *input, int outer_size, int inner_size, int limit, const int *indices,
+               int indices_element_size, float *output);
 int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices,
                 int indices_element_size, int32_t *output);
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // MINDSPORE_LITE_NNACL_GATHER_H_
+#endif  // MINDSPORE_LITE_NNACL_GATHER_BASE_H_
diff --git a/mindspore/lite/nnacl/fp16/concat_fp16.h b/mindspore/lite/nnacl/base/reshape_base.h
similarity index 70%
rename from mindspore/lite/nnacl/fp16/concat_fp16.h
rename to mindspore/lite/nnacl/base/reshape_base.h
index ae9e1bf618..d2b12302c8 100644
--- a/mindspore/lite/nnacl/fp16/concat_fp16.h
+++ b/mindspore/lite/nnacl/base/reshape_base.h
@@ -14,18 +14,22 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_LITE_NNACL_FP16_CONCAT_FP16_H_
-#define MINDSPORE_LITE_NNACL_FP16_CONCAT_FP16_H_
+#ifndef MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_
+#define MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_
 
+#include <string.h>
 #include "nnacl/op_base.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
-void ConcatFp16(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output,
-                int dtype_len);
+
+inline void Reshape(const void *input_ptr, void *output_ptr, size_t data_size) {
+  memcpy(output_ptr, input_ptr, data_size);
+}
+
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // MINDSPORE_LITE_NNACL_FP16_CONCAT_FP16_H_
+#endif  // MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_
diff --git a/mindspore/lite/nnacl/base/squeeze_base.h b/mindspore/lite/nnacl/base/squeeze_base.h
index 4932bb5609..5f3ea2da4e 100644
--- a/mindspore/lite/nnacl/base/squeeze_base.h
+++ b/mindspore/lite/nnacl/base/squeeze_base.h
@@ -23,7 +23,7 @@
 extern "C" {
 #endif
 
-inline int DoSqueeze(const void *input_ptr, void *output_ptr, size_t data_size) {
+static inline int DoSqueeze(const void *input_ptr, void *output_ptr, size_t data_size) {
   if (input_ptr == NULL || output_ptr == NULL) {
     return NNACL_ERR;
   }
diff --git a/mindspore/lite/nnacl/broadcast_to_parameter.h b/mindspore/lite/nnacl/broadcast_to_parameter.h
new file mode 100644
index 0000000000..8cdce592e7
--- /dev/null
+++ b/mindspore/lite/nnacl/broadcast_to_parameter.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_PARAMETER_H_
+#define MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_PARAMETER_H_
+
+#include "nnacl/op_base.h"
+
+#define BROADCAST_TO_SHAPE_MAX_SIZE 4
+
+typedef struct BroadcastToParameter {
+  OpParameter op_parameter_;
+  int shape_[BROADCAST_TO_SHAPE_MAX_SIZE];
+  size_t shape_size_;
+} BroadcastToParameter;
+
+typedef struct BroadcastShapeInfo {
+  int input_shape_[BROADCAST_TO_SHAPE_MAX_SIZE];
+  int input_shape_size_;
+  int output_shape_[BROADCAST_TO_SHAPE_MAX_SIZE];
+  int output_shape_size_;
+} BroadcastShapeInfo;
+
+#endif  // MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_PARAMETER_H_
diff --git a/mindspore/lite/nnacl/reshape.c b/mindspore/lite/nnacl/cast_parameter.h
similarity index 68%
rename from mindspore/lite/nnacl/reshape.c
rename to mindspore/lite/nnacl/cast_parameter.h
index 21ebbe95fd..4a56bc3a98 100644
--- a/mindspore/lite/nnacl/reshape.c
+++ b/mindspore/lite/nnacl/cast_parameter.h
@@ -13,8 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#ifndef MINDSPORE_LITE_NNACL_CAST_PARAMETER_H_
+#define MINDSPORE_LITE_NNACL_CAST_PARAMETER_H_
 
-#include "nnacl/reshape.h"
-#include <string.h>
+#include "nnacl/op_base.h"
 
-void Reshape(const void *input_ptr, void *output_ptr, size_t data_size) { memcpy(output_ptr, input_ptr, data_size); }
+typedef struct CastParameter {
+  OpParameter op_parameter_;
+  int dst_type_;
+  int src_type_;
+} CastParameter;
+
+#endif  // MINDSPORE_LITE_NNACL_CAST_PARAMETER_H_
diff --git a/mindspore/lite/nnacl/fp32/fill_fp32.h b/mindspore/lite/nnacl/fill_parameter.h
similarity index 69%
rename from mindspore/lite/nnacl/fp32/fill_fp32.h
rename to mindspore/lite/nnacl/fill_parameter.h
index 7678a61436..dc87516d7a 100644
--- a/mindspore/lite/nnacl/fp32/fill_fp32.h
+++ b/mindspore/lite/nnacl/fill_parameter.h
@@ -13,14 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_LITE_NNACL_FILL_H_
-#define MINDSPORE_LITE_NNACL_FILL_H_
+#ifndef MINDSPORE_LITE_NNACL_FILL_PARAMETER_H_
+#define MINDSPORE_LITE_NNACL_FILL_PARAMETER_H_
 
-#ifdef ENABLE_NEON
-#include <arm_neon.h>
-#endif
 #include "nnacl/op_base.h"
-#include "nnacl/errorcode.h"
 
 #define FILL_DIMS_MAX_SIZE 4
 
@@ -31,14 +27,4 @@ typedef struct FillParameter {
   int num_dims_;
 } FillParameter;
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-int Fill(float *output, int size, float data);
-
-int FillInt32(int *output, int size, int data);
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // MINDSPORE_LITE_NNACL_FILL_H_
+#endif  // MINDSPORE_LITE_NNACL_FILL_PARAMETER_H_
diff --git a/mindspore/lite/nnacl/fp16/arithmetic_self_fp16.c b/mindspore/lite/nnacl/fp16/arithmetic_self_fp16.c
index 9a061ae95e..e3d475ac5d 100644
--- a/mindspore/lite/nnacl/fp16/arithmetic_self_fp16.c
+++ b/mindspore/lite/nnacl/fp16/arithmetic_self_fp16.c
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <assert.h>
 #include <math.h>
 #include "nnacl/fp16/arithmetic_self_fp16.h"
 
diff --git a/mindspore/lite/nnacl/fp16/cast_fp16.c b/mindspore/lite/nnacl/fp16/cast_fp16.c
deleted file mode 100644
index d973b2268a..0000000000
--- a/mindspore/lite/nnacl/fp16/cast_fp16.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "nnacl/fp16/cast_fp16.h"
-
-void BoolToFloat16(const bool *input, float16_t *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (float16_t)input[i];
-  }
-}
-
-void Uint8ToFloat16(const uint8_t *input, float16_t *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (float16_t)input[i];
-  }
-}
-
-void Float16ToInt32(const float16_t *input, int32_t *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (int32_t)input[i];
-  }
-}
-
-void Float16ToInt64(const float16_t *input, int64_t *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (int64_t)input[i];
-  }
-}
-
-#ifndef ENABLE_ARM64
-void Float32ToFloat16(const float *input, float16_t *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (float16_t)input[i];
-  }
-}
-
-void Float16ToFloat32(const float16_t *input, float *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (float)input[i];
-  }
-}
-#endif
diff --git a/mindspore/lite/nnacl/fp16/cast_fp16.h b/mindspore/lite/nnacl/fp16/cast_fp16.h
index 301985482c..7493196fd1 100644
--- a/mindspore/lite/nnacl/fp16/cast_fp16.h
+++ b/mindspore/lite/nnacl/fp16/cast_fp16.h
@@ -18,16 +18,47 @@
 
 #include <arm_neon.h>
 #include "nnacl/op_base.h"
-#include "nnacl/fp32/cast_fp32.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
-void BoolToFloat16(const bool *input, float16_t *output, int number);
-void Uint8ToFloat16(const uint8_t *input, float16_t *output, int number);
-void Float16ToInt32(const float16_t *input, int32_t *output, int number);
-void Float16ToInt64(const float16_t *input, int64_t *output, int number);
-void Float32ToFloat16(const float *input, float16_t *output, int number);
-void Float16ToFloat32(const float16_t *input, float *output, int number);
+
+inline void BoolToFloat16(const bool *input, float16_t *output, int number) {
+  for (int i = 0; i < number; ++i) {
+    output[i] = (float16_t)input[i];
+  }
+}
+
+inline void Uint8ToFloat16(const uint8_t *input, float16_t *output, int number) {
+  for (int i = 0; i < number; ++i) {
+    output[i] = (float16_t)input[i];
+  }
+}
+
+inline void Float16ToInt32(const float16_t *input, int32_t *output, int number) {
+  for (int i = 0; i < number; ++i) {
+    output[i] = (int32_t)input[i];
+  }
+}
+
+inline void Float16ToInt64(const float16_t *input, int64_t *output, int number) {
+  for (int i = 0; i < number; ++i) {
+    output[i] = (int64_t)input[i];
+  }
+}
+
+inline void Float32ToFloat16(const float *input, float16_t *output, int number) {
+  for (int i = 0; i < number; ++i) {
+    output[i] = (float16_t)input[i];
+  }
+}
+
+inline void Float16ToFloat32(const float16_t *input, float *output, int number) {
+  for (int i = 0; i < number; ++i) {
+    output[i] = (float)input[i];
+  }
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/lite/nnacl/fp16/concat_fp16.c b/mindspore/lite/nnacl/fp16/concat_fp16.c
deleted file mode 100644
index de47da48d0..0000000000
--- a/mindspore/lite/nnacl/fp16/concat_fp16.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "nnacl/fp16/concat_fp16.h"
-#include <string.h>
-
-void ConcatFp16(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output,
-                int dtype_len) {
-  int before_axis_size = 1;
-  for (int i = 0; i < axis; ++i) {
-    before_axis_size *= inputs_output_shape[0][i];
-  }
-  // sizeof float16,int32
-  int after_axis_size = dtype_len;
-  for (size_t i = axis + 1; i < shape_size; ++i) {
-    after_axis_size *= inputs_output_shape[0][i];
-  }
-  int axis_offset = 0;
-  uint8_t *dst_base = (output);
-  size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis];
-  for (int i = 0; i < input_num; ++i) {
-    uint8_t *src_base = (input[i]);
-    size_t input_stride = after_axis_size * inputs_output_shape[i][axis];
-    for (int j = 0; j < before_axis_size; ++j) {
-      uint8_t *src = src_base + j * input_stride;
-      uint8_t *dst = dst_base + j * output_stride + axis_offset * after_axis_size;
-      memcpy(dst, src, input_stride);
-    }
-    axis_offset += inputs_output_shape[i][axis];
-  }
-}
diff --git a/mindspore/lite/nnacl/fp32/arithmetic_fp32.c b/mindspore/lite/nnacl/fp32/arithmetic_fp32.c
index cba65c2c3c..6bcdc9dcdd 100644
--- a/mindspore/lite/nnacl/fp32/arithmetic_fp32.c
+++ b/mindspore/lite/nnacl/fp32/arithmetic_fp32.c
@@ -1003,12 +1003,6 @@ int ElementMinimumInt(const int *input0, const int *input1, int *output, const i
   return NNACL_OK;
 }
 
-int BroadcastMaximum(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
-                     ArithmeticParameter *param) {
-  TileDimensionsFp32(in0, in1, tile_in0, tile_in1, param);
-  return ElementMaximum(tile_in0, tile_in1, out, size);
-}
-
 int ElementMinimum(const float *in0, const float *in1, float *out, int size) {
   int index = 0;
 #ifdef ENABLE_NEON
@@ -1027,65 +1021,6 @@ int ElementMinimum(const float *in0, const float *in1, float *out, int size) {
 
 #undef ACCURACY_DATA
 
-#ifdef ENABLE_NNACL_INFER_SHAPE
-int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
-                         int *in_datatype, int *out_datatype, OpParameter *param) {
-  *out_format = in_format[0];
-  *out_datatype = in_datatype[0];
-  const ArithmeticParameter *arithmetic_parameter = (const ArithmeticParameter *)param;
-  int ndim0 = dim_size[0];
-  int ndim1 = dim_size[1];
-  int *in_shape0 = in_shape[0];
-  int *in_shape1 = in_shape[1];
-  if (ndim0 < ndim1) {
-    arithmetic_parameter->ndim_ = ndim1;
-    int fill_dim_num = ndim1 - ndim0;
-    int j = 0;
-    for (int i = 0; i < ndim1; ++i) {
-      if (i < fill_dim_num) {
-        arithmetic_parameter->in_shape0_[i] = 1;
-      } else {
-        arithmetic_parameter->in_shape0_[i] = in_shape0[j++];
-      }
-      arithmetic_parameter->in_shape1_[i] = in_shape1[i];
-    }
-  } else if (ndim0 > ndim1) {
-    arithmetic_parameter->ndim_ = ndim0;
-    int fill_dim_num = ndim0 - ndim1;
-    int j = 0;
-    for (int i = 0; i < ndim0; ++i) {
-      if (i < fill_dim_num) {
-        arithmetic_parameter->in_shape1_[i] = 1;
-      } else {
-        arithmetic_parameter->in_shape1_[i] = in_shape1[j++];
-      }
-      arithmetic_parameter->in_shape0_[i] = in_shape0[i];
-    }
-  } else {
-    arithmetic_parameter->ndim_ = ndim0;
-    for (int i = 0; i < ndim0; ++i) {
-      arithmetic_parameter->in_shape0_[i] = in_shape0[i];
-      arithmetic_parameter->in_shape1_[i] = in_shape1[i];
-    }
-  }
-  int j = 0;
-  for (size_t i = 0; i < arithmetic_parameter->ndim_; ++i) {
-    if (arithmetic_parameter->in_shape0_[i] != arithmetic_parameter->in_shape1_[i]) {
-      if (arithmetic_parameter->in_shape0_[i] == 1) {
-        out_shape[j++] = arithmetic_parameter->in_shape1_[i];
-      } else if (arithmetic_parameter->in_shape1_[i] == 1) {
-        out_shape[j++] = arithmetic_parameter->in_shape0_[i];
-      } else {
-        return NNACL_PARAM_INVALID;
-      }
-    } else {
-      out_shape[j++] = arithmetic_parameter->in_shape0_[i];
-    }
-  }
-  return NNACL_OK;
-}
-#endif
-
 void TileOneDimensionFp32(const float *inData, float *outData, int dim, size_t ndim, const int *inShape,
                           const int *inStrides, const int *outStrides, const int *multiple) {
   int srcDimSize = inShape[dim];
diff --git a/mindspore/lite/nnacl/fp32/arithmetic_fp32.h b/mindspore/lite/nnacl/fp32/arithmetic_fp32.h
index 12e8eb59f7..f076e40459 100644
--- a/mindspore/lite/nnacl/fp32/arithmetic_fp32.h
+++ b/mindspore/lite/nnacl/fp32/arithmetic_fp32.h
@@ -96,8 +96,6 @@ int ElementMaximum(const float *in0, const float *in1, float *out, int size);
 int ElementMinimum(const float *in0, const float *in1, float *out, int size);
 int ElementMaximumInt(const int *in0, const int *in1, int *out, int size);
 int ElementMinimumInt(const int *input0, const int *input1, int *output, const int element_size);
-int BroadcastMaximum(const float *in0, const float *in1, float *tile_input0, float *tile_input1, float *out, int size,
-                     ArithmeticParameter *param);
 
 /* floor div */
 int ElementFloorDiv(const float *in0, const float *in1, float *out, int size);
@@ -113,10 +111,6 @@ int ElementModInt(const int *in0, const int *in1, int *out, int size);
 int ElementOptMod(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
 int ElementOptModInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
 
-#ifdef ENABLE_NNACL_INFER_SHAPE
-int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
-                         int *in_datatype, int *out_datatype, OpParameter *param);
-#endif
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/lite/nnacl/fp32/arithmetic_self_fp32.c b/mindspore/lite/nnacl/fp32/arithmetic_self_fp32.c
index 3ed24b0f93..72888f05b8 100644
--- a/mindspore/lite/nnacl/fp32/arithmetic_self_fp32.c
+++ b/mindspore/lite/nnacl/fp32/arithmetic_self_fp32.c
@@ -16,7 +16,6 @@
 
 #include <string.h>
 #include <math.h>
-#include <assert.h>
 #include "nnacl/fp32/arithmetic_self_fp32.h"
 
 // abs:
diff --git a/mindspore/lite/nnacl/fp32/broadcast_to_fp32.h b/mindspore/lite/nnacl/fp32/broadcast_to_fp32.h
index d68477435d..5c354be667 100644
--- a/mindspore/lite/nnacl/fp32/broadcast_to_fp32.h
+++ b/mindspore/lite/nnacl/fp32/broadcast_to_fp32.h
@@ -13,28 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_H_
-#define MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_H_
+#ifndef MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_FP32_H_
+#define MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_FP32_H_
 
-#ifdef ENABLE_NEON
-#include <arm_neon.h>
-#endif
 #include "nnacl/op_base.h"
-
-#define BROADCAST_TO_SHAPE_MAX_SIZE 4
-
-typedef struct BroadcastToParameter {
-  OpParameter op_parameter_;
-  int shape_[BROADCAST_TO_SHAPE_MAX_SIZE];
-  size_t shape_size_;
-} BroadcastToParameter;
-
-typedef struct BroadcastShapeInfo {
-  int input_shape_[BROADCAST_TO_SHAPE_MAX_SIZE];
-  int input_shape_size_;
-  int output_shape_[BROADCAST_TO_SHAPE_MAX_SIZE];
-  int output_shape_size_;
-} BroadcastShapeInfo;
+#include "nnacl/broadcast_to_parameter.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -44,4 +27,4 @@ int BroadcastTo(const float *input, BroadcastShapeInfo *shape_info, float *outpu
 }
 #endif
 
-#endif  // MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_H_
+#endif  // MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_FP32_H_
diff --git a/mindspore/lite/nnacl/fp32/cast_fp32.h b/mindspore/lite/nnacl/fp32/cast_fp32.h
deleted file mode 100644
index 4923e2e78d..0000000000
--- a/mindspore/lite/nnacl/fp32/cast_fp32.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_NNACL_CAST_H_
-#define MINDSPORE_LITE_NNACL_CAST_H_
-
-#ifdef ENABLE_NEON
-#include <arm_neon.h>
-#endif
-#include "nnacl/op_base.h"
-
-// For cast.
-typedef struct CastParameter {
-  OpParameter op_parameter_;
-  int src_type_;
-  int dst_type_;
-} CastParameter;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-void BoolToFloat32(const bool *input, float *output, int number);
-void Uint8ToFloat32(const uint8_t *input, float *output, int number);
-void Uint8ToInt8(const uint8_t *input, int8_t *output, int number);
-void Int8ToUint8(const int8_t *input, uint8_t *output, int number);
-void Int32ToFloat32(const int32_t *input, float *output, int number);
-void Fp16ToFloat32(const uint16_t *input, float *output, int number);
-void Float32ToFp16(const float *input, uint16_t *output, int number);
-void Float32ToInt32(const float *input, int32_t *output, int number);
-void Float32ToInt64(const float *input, int64_t *output, int number);
-void Int32ToInt64(const int32_t *input, int64_t *output, int number);
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // MINDSPORE_LITE_NNACL_CAST_H_
diff --git a/mindspore/lite/nnacl/fp32/common_func_fp32.c b/mindspore/lite/nnacl/fp32/common_func_fp32.c
index 01914b2a5c..c079ee3863 100644
--- a/mindspore/lite/nnacl/fp32/common_func_fp32.c
+++ b/mindspore/lite/nnacl/fp32/common_func_fp32.c
@@ -115,101 +115,3 @@ void WinogradTransRight(const float *S, const float *B, float *M, size_t w, size
   }
 }
 #endif
-
-typedef union float32_bits {
-  unsigned int u;
-  float f;
-} float32_bits;
-
-float ShortToFloat32(uint16_t src_value) {
-  const float32_bits magic = {113 << 23};
-  const unsigned int shifted_exp = 0x7c00 << 13;
-  float32_bits o;
-
-  o.u = (src_value & 0x7fff) << 13;
-  unsigned int exp = shifted_exp & o.u;
-  o.u += (127 - 15) << 23;
-
-  if (exp == shifted_exp) {
-    o.u += (128 - 16) << 23;
-  } else if (exp == 0) {
-    o.u += 1 << 23;
-    o.f -= magic.f;
-  }
-
-  o.u |= (src_value & 0x8000) << 16;
-  return o.f;
-}
-
-static const unsigned int FP32_BIT_SIZE = 32;
-static const unsigned int FP32_EXPONENT_BIAS = 127;
-static const unsigned int FP32_SIGNIFICAND = 23;
-
-static const unsigned int FP32_EXPONENT_MAX = 255;
-
-static const unsigned int FP16_BIT_SIZE = 16;
-static const unsigned int FP16_EXPONENT_BIAS = 15;
-static const unsigned int FP16_SIGNIFICAND = 10;
-
-static const int FP16_EXPONENT_MAX = 30;
-static const int FP16_EXPONENT_MIN = -10;
-
-uint16_t Float32ToShort(float src_value) {
-  float *psrcValue = NULL;
-  psrcValue = &src_value;
-  unsigned int srcValueBit = (unsigned int)(*psrcValue);
-  unsigned int sign = srcValueBit >> (FP32_BIT_SIZE - 1);
-  unsigned int mantissa = srcValueBit & 0x007FFFFF;
-  // exponent
-  int exp = ((srcValueBit & 0x7F800000) >> FP32_SIGNIFICAND) + FP16_EXPONENT_BIAS - FP32_EXPONENT_BIAS;
-  uint16_t res;
-  if (exp > 0 && exp < FP16_EXPONENT_MAX) {
-    // use rte rounding mode, round the significand, combine sign, exponent and significand into a short.
-    res = (sign << (FP16_BIT_SIZE - 1)) | (exp << FP16_SIGNIFICAND) |
-          ((mantissa + 0x00001000) >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
-  } else if (srcValueBit == 0) {
-    res = 0;
-  } else {
-    if (exp <= 0) {
-      if (exp < FP16_EXPONENT_MIN) {
-        // value is less than min half float point
-        res = 0;
-      } else {
-        // normalized single, magnitude is less than min normal half float point.
-        mantissa = (mantissa | 0x00800000) >> (1 - exp);
-        // round to nearest
-        if ((mantissa & 0x00001000) > 0) {
-          mantissa = mantissa + 0x00002000;
-        }
-        // combine sign & mantissa (exp is zero to get denormalized number)
-        res = (sign << FP16_EXPONENT_BIAS) | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
-      }
-    } else if (exp == (FP32_EXPONENT_MAX - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS)) {
-      if (mantissa == 0) {
-        // input float is infinity, return infinity half
-        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
-      } else {
-        // input float is NaN, return half NaN
-        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00 | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
-      }
-    } else {
-      // exp > 0, normalized single, round to nearest
-      if ((mantissa & 0x00001000) > 0) {
-        mantissa = mantissa + 0x00002000;
-        if ((mantissa & 0x00800000) > 0) {
-          mantissa = 0;
-          exp = exp + 1;
-        }
-      }
-      if (exp > FP16_EXPONENT_MAX) {
-        // exponent overflow - return infinity half
-        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
-      } else {
-        // combine sign, exp and mantissa into normalized half
-        res = (sign << FP16_EXPONENT_BIAS) | (exp << FP16_SIGNIFICAND) |
-              (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
-      }
-    }
-  }
-  return res;
-}
diff --git a/mindspore/lite/nnacl/fp32/common_func_fp32.h b/mindspore/lite/nnacl/fp32/common_func_fp32.h
index 81f73e1b8f..70a3f8a928 100644
--- a/mindspore/lite/nnacl/fp32/common_func_fp32.h
+++ b/mindspore/lite/nnacl/fp32/common_func_fp32.h
@@ -33,10 +33,6 @@ void PostConvFuncFp32C4(const float *c4_out_ptr, float *out_ptr, const float *bi
 void WinogradTransLeft(const float *S, const float *B, float *M, size_t w, size_t h, size_t k, size_t length);
 void WinogradTransRight(const float *S, const float *B, float *M, size_t w, size_t h, size_t k, size_t length);
 
-float ShortToFloat32(uint16_t src_value);
-
-uint16_t Float32ToShort(float src_value);
-
 #if defined(ENABLE_ARM) || defined(ENABLE_SSE)
 void ConvDwFp32Center(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width,
                       size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step,
diff --git a/mindspore/lite/nnacl/fp32/expandDims_fp32.c b/mindspore/lite/nnacl/fp32/expandDims_fp32.c
index 373199f1ce..ebb8decd08 100644
--- a/mindspore/lite/nnacl/fp32/expandDims_fp32.c
+++ b/mindspore/lite/nnacl/fp32/expandDims_fp32.c
@@ -13,12 +13,3 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#include "nnacl/fp32/expandDims_fp32.h"
-#include <string.h>
-#include "nnacl/errorcode.h"
-
-int ExpandDims(const void *input_ptr, void *output_ptr, size_t data_size) {
-  memcpy(output_ptr, input_ptr, data_size);
-  return NNACL_OK;
-}
diff --git a/mindspore/lite/nnacl/fp32/range_fp32.c b/mindspore/lite/nnacl/fp32/range_fp32.c
deleted file mode 100644
index e1097a2b8c..0000000000
--- a/mindspore/lite/nnacl/fp32/range_fp32.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "nnacl/fp32/range_fp32.h"
-
-void Range(float *output_ptr, float start, float delta, int nums) {
-  for (int i = 0; i < nums; ++i, start += delta) {
-    output_ptr[i] = start;
-  }
-}
-
-void RangeInt(int *output_ptr, int start, int delta, int nums) {
-  for (int i = 0; i < nums; ++i, start += delta) {
-    output_ptr[i] = start;
-  }
-}
diff --git a/mindspore/lite/nnacl/fp32/range_fp32.h b/mindspore/lite/nnacl/fp32/range_fp32.h
index 0b1e44c49f..dfa49af0be 100644
--- a/mindspore/lite/nnacl/fp32/range_fp32.h
+++ b/mindspore/lite/nnacl/fp32/range_fp32.h
@@ -31,8 +31,18 @@ typedef struct RangeParameter {
 #ifdef __cplusplus
 extern "C" {
 #endif
-void Range(float *output_ptr, float start, float delta, int nums);
-void RangeInt(int *output_ptr, int start, int delta, int nums);
+inline void Range(float *output_ptr, float start, float delta, int nums) {
+  for (int i = 0; i < nums; ++i, start += delta) {
+    output_ptr[i] = start;
+  }
+}
+
+inline void RangeInt(int *output_ptr, int start, int delta, int nums) {
+  for (int i = 0; i < nums; ++i, start += delta) {
+    output_ptr[i] = start;
+  }
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/lite/nnacl/fp32/rank_fp32.c b/mindspore/lite/nnacl/fp32/rank_fp32.c
deleted file mode 100644
index e7c0de453f..0000000000
--- a/mindspore/lite/nnacl/fp32/rank_fp32.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "nnacl/fp32/rank_fp32.h"
-
-void Rank(float *output, int rank) { output[0] = (float)(rank); }
diff --git a/mindspore/lite/nnacl/fp32/rank_fp32.h b/mindspore/lite/nnacl/fp32/rank_fp32.h
index 6e66efb816..7c132c4346 100644
--- a/mindspore/lite/nnacl/fp32/rank_fp32.h
+++ b/mindspore/lite/nnacl/fp32/rank_fp32.h
@@ -21,7 +21,10 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-void Rank(float *output, int rank);
+inline void Rank(float *output, int rank) {
+  output[0] = (float)(rank);
+  return;
+}
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/lite/nnacl/int8/arithmetic_self_int8.c b/mindspore/lite/nnacl/int8/arithmetic_self_int8.c
index 5a737b760d..8a3b36968e 100644
--- a/mindspore/lite/nnacl/int8/arithmetic_self_int8.c
+++ b/mindspore/lite/nnacl/int8/arithmetic_self_int8.c
@@ -15,7 +15,6 @@
  */
 
 #include <math.h>
-#include <assert.h>
 #include "nnacl/int8/arithmetic_self_int8.h"
 #ifdef ENABLE_NEON
 #include <arm_neon.h>
diff --git a/mindspore/lite/nnacl/nnacl_common.c b/mindspore/lite/nnacl/nnacl_common.c
index a07bdc8f90..b38cc54a4c 100644
--- a/mindspore/lite/nnacl/nnacl_common.c
+++ b/mindspore/lite/nnacl/nnacl_common.c
@@ -15,3 +15,88 @@
  */
 
 #include "nnacl/nnacl_common.h"
+
+typedef union float32_bits {
+  unsigned int u;
+  float f;
+} float32_bits;
+
+float ShortToFloat32(uint16_t src_value) {
+  const float32_bits magic = {113 << 23};
+  const unsigned int shifted_exp = 0x7c00 << 13;
+  float32_bits o;
+
+  o.u = (src_value & 0x7fff) << 13;
+  unsigned int exp = shifted_exp & o.u;
+  o.u += (127 - 15) << 23;
+
+  if (exp == shifted_exp) {
+    o.u += (128 - 16) << 23;
+  } else if (exp == 0) {
+    o.u += 1 << 23;
+    o.f -= magic.f;
+  }
+
+  o.u |= (src_value & 0x8000) << 16;
+  return o.f;
+}
+
+uint16_t Float32ToShort(float src_value) {
+  float *psrcValue = NULL;
+  psrcValue = &src_value;
+  unsigned int srcValueBit = (unsigned int)(*psrcValue);
+  unsigned int sign = srcValueBit >> (FP32_BIT_SIZE - 1);
+  unsigned int mantissa = srcValueBit & 0x007FFFFF;
+  // exponent
+  int exp = ((srcValueBit & 0x7F800000) >> FP32_SIGNIFICAND) + FP16_EXPONENT_BIAS - FP32_EXPONENT_BIAS;
+  uint16_t res;
+  if (exp > 0 && exp < FP16_EXPONENT_MAX) {
+    // use rte rounding mode, round the significand, combine sign, exponent and significand into a short.
+    res = (sign << (FP16_BIT_SIZE - 1)) | (exp << FP16_SIGNIFICAND) |
+          ((mantissa + 0x00001000) >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
+  } else if (srcValueBit == 0) {
+    res = 0;
+  } else {
+    if (exp <= 0) {
+      if (exp < FP16_EXPONENT_MIN) {
+        // value is less than min half float point
+        res = 0;
+      } else {
+        // normalized single, magnitude is less than min normal half float point.
+        mantissa = (mantissa | 0x00800000) >> (1 - exp);
+        // round to nearest
+        if ((mantissa & 0x00001000) > 0) {
+          mantissa = mantissa + 0x00002000;
+        }
+        // combine sign & mantissa (exp is zero to get denormalized number)
+        res = (sign << FP16_EXPONENT_BIAS) | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
+      }
+    } else if (exp == (FP32_EXPONENT_MAX - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS)) {
+      if (mantissa == 0) {
+        // input float is infinity, return infinity half
+        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
+      } else {
+        // input float is NaN, return half NaN
+        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00 | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
+      }
+    } else {
+      // exp > 0, normalized single, round to nearest
+      if ((mantissa & 0x00001000) > 0) {
+        mantissa = mantissa + 0x00002000;
+        if ((mantissa & 0x00800000) > 0) {
+          mantissa = 0;
+          exp = exp + 1;
+        }
+      }
+      if (exp > FP16_EXPONENT_MAX) {
+        // exponent overflow - return infinity half
+        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
+      } else {
+        // combine sign, exp and mantissa into normalized half
+        res = (sign << FP16_EXPONENT_BIAS) | (exp << FP16_SIGNIFICAND) |
+              (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
+      }
+    }
+  }
+  return res;
+}
diff --git a/mindspore/lite/nnacl/nnacl_common.h b/mindspore/lite/nnacl/nnacl_common.h
index 65ae6de172..ae1adccf46 100644
--- a/mindspore/lite/nnacl/nnacl_common.h
+++ b/mindspore/lite/nnacl/nnacl_common.h
@@ -17,6 +17,8 @@
 #ifndef MINDSPORE_LITE_NNACL_NNACL_COMMON_H_
 #define MINDSPORE_LITE_NNACL_NNACL_COMMON_H_
 
+#include "nnacl/op_base.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -29,6 +31,18 @@ inline void ComputeStrides(const int *shape, int *strides, const int ndim) {
   }
 }
 
+static const unsigned int FP32_BIT_SIZE = 32;
+static const unsigned int FP32_EXPONENT_BIAS = 127;
+static const unsigned int FP32_SIGNIFICAND = 23;
+static const unsigned int FP32_EXPONENT_MAX = 255;
+static const unsigned int FP16_BIT_SIZE = 16;
+static const unsigned int FP16_EXPONENT_BIAS = 15;
+static const unsigned int FP16_SIGNIFICAND = 10;
+static const int FP16_EXPONENT_MAX = 30;
+static const int FP16_EXPONENT_MIN = -10;
+float ShortToFloat32(uint16_t src_value);
+uint16_t Float32ToShort(float src_value);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/lite/src/ops/assert_op.cc b/mindspore/lite/src/ops/assert_op.cc
index fce3cd8b43..83074c1b7f 100644
--- a/mindspore/lite/src/ops/assert_op.cc
+++ b/mindspore/lite/src/ops/assert_op.cc
@@ -22,7 +22,6 @@
 namespace mindspore {
 namespace lite {
 #ifdef PRIMITIVE_WRITEABLE
-
 int AssertOP::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
   if (this->primitive_ == nullptr) {
     this->primitive_ = new (std::nothrow) schema::PrimitiveT;
@@ -67,6 +66,5 @@ Registry AssertRegistry(schema::PrimitiveType_Assert, AssertCreator);
 #endif
 
 int AssertOP::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outputs_) { return RET_OK; }
-
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/src/ops/merge.cc b/mindspore/lite/src/ops/merge.cc
index a959f45d6f..c93a9e0442 100644
--- a/mindspore/lite/src/ops/merge.cc
+++ b/mindspore/lite/src/ops/merge.cc
@@ -23,7 +23,6 @@
 namespace mindspore {
 namespace lite {
 #ifdef PRIMITIVE_WRITEABLE
-
 int Merge::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
   if (this->primitive_ == nullptr) {
     this->primitive_ = new (std::nothrow) schema::PrimitiveT;
@@ -99,6 +98,5 @@ int Merge::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outpu
   }
   return RET_OK;
 }
-
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/src/ops/mod.cc b/mindspore/lite/src/ops/mod.cc
index ebcaa6458d..c0024408ac 100644
--- a/mindspore/lite/src/ops/mod.cc
+++ b/mindspore/lite/src/ops/mod.cc
@@ -23,7 +23,6 @@
 namespace mindspore {
 namespace lite {
 #ifdef PRIMITIVE_WRITEABLE
-
 int Mod::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
   if (this->primitive_ == nullptr) {
     this->primitive_ = new (std::nothrow) schema::PrimitiveT;
diff --git a/mindspore/lite/src/ops/populate/assert_populate.cc b/mindspore/lite/src/ops/populate/assert_populate.cc
index 02db20243d..ef0f4e0b02 100644
--- a/mindspore/lite/src/ops/populate/assert_populate.cc
+++ b/mindspore/lite/src/ops/populate/assert_populate.cc
@@ -20,7 +20,6 @@
 
 namespace mindspore {
 namespace lite {
-
 OpParameter *PopulateAssertParameter(const mindspore::lite::PrimitiveC *primitive) {
   OpParameter *assert_parameter = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
   if (assert_parameter == nullptr) {
diff --git a/mindspore/lite/src/ops/populate/cast_populate.cc b/mindspore/lite/src/ops/populate/cast_populate.cc
index 62aa39d292..3543a2a53c 100644
--- a/mindspore/lite/src/ops/populate/cast_populate.cc
+++ b/mindspore/lite/src/ops/populate/cast_populate.cc
@@ -17,7 +17,7 @@
 #include "src/ops/cast.h"
 #include "src/ops/primitive_c.h"
 #include "src/ops/populate/populate_register.h"
-#include "nnacl/fp32/cast_fp32.h"
+#include "nnacl/cast_parameter.h"
 
 namespace mindspore {
 namespace lite {
@@ -29,9 +29,11 @@ OpParameter *PopulateCastParameter(const mindspore::lite::PrimitiveC *primitive)
   }
   memset(cast_param, 0, sizeof(CastParameter));
   cast_param->op_parameter_.type_ = primitive->Type();
+
   auto param = reinterpret_cast<mindspore::lite::Cast *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
   cast_param->src_type_ = param->GetSrcT();
   cast_param->dst_type_ = param->GetDstT();
+
   return reinterpret_cast<OpParameter *>(cast_param);
 }
 
diff --git a/mindspore/lite/src/ops/populate/expand_dims_populate.cc b/mindspore/lite/src/ops/populate/expand_dims_populate.cc
index 23696d7575..63901a9993 100644
--- a/mindspore/lite/src/ops/populate/expand_dims_populate.cc
+++ b/mindspore/lite/src/ops/populate/expand_dims_populate.cc
@@ -14,24 +14,19 @@
  * limitations under the License.
  */
 
-#include "src/ops/expand_dims.h"
 #include "src/ops/primitive_c.h"
 #include "src/ops/populate/populate_register.h"
-#include "nnacl/fp32/expandDims_fp32.h"
 
 namespace mindspore {
 namespace lite {
 
 OpParameter *PopulateExpandDimsParameter(const mindspore::lite::PrimitiveC *primitive) {
-  auto param = reinterpret_cast<mindspore::lite::ExpandDims *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
-  ExpandDimsParameter *expand_dims_param = reinterpret_cast<ExpandDimsParameter *>(malloc(sizeof(ExpandDimsParameter)));
+  OpParameter *expand_dims_param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
   if (expand_dims_param == nullptr) {
     MS_LOG(ERROR) << "malloc ExpandDimsParameter failed.";
     return nullptr;
   }
-  memset(expand_dims_param, 0, sizeof(ExpandDimsParameter));
-  expand_dims_param->op_parameter_.type_ = primitive->Type();
-  expand_dims_param->dim_ = param->GetDim();
+  memset(expand_dims_param, 0, sizeof(OpParameter));
   return reinterpret_cast<OpParameter *>(expand_dims_param);
 }
 
diff --git a/mindspore/lite/src/ops/populate/fill_populate.cc b/mindspore/lite/src/ops/populate/fill_populate.cc
index b4ce010664..c68d457fce 100644
--- a/mindspore/lite/src/ops/populate/fill_populate.cc
+++ b/mindspore/lite/src/ops/populate/fill_populate.cc
@@ -17,7 +17,7 @@
 #include "src/ops/fill.h"
 #include "src/ops/primitive_c.h"
 #include "src/ops/populate/populate_register.h"
-#include "nnacl/fp32/fill_fp32.h"
+#include "nnacl/fill_parameter.h"
 
 namespace mindspore {
 namespace lite {
diff --git a/mindspore/lite/src/ops/populate/layer_norm_populate.h b/mindspore/lite/src/ops/populate/layer_norm_populate.h
index 4d16529f40..4b99e4737c 100644
--- a/mindspore/lite/src/ops/populate/layer_norm_populate.h
+++ b/mindspore/lite/src/ops/populate/layer_norm_populate.h
@@ -20,9 +20,7 @@
 
 namespace mindspore {
 namespace lite {
-
 OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primitive);
-
 }  // namespace lite
 }  // namespace mindspore
 #endif  // MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_
diff --git a/mindspore/lite/src/ops/populate/merge_populate.cc b/mindspore/lite/src/ops/populate/merge_populate.cc
index ec23291934..1945864f05 100644
--- a/mindspore/lite/src/ops/populate/merge_populate.cc
+++ b/mindspore/lite/src/ops/populate/merge_populate.cc
@@ -19,7 +19,6 @@
 
 namespace mindspore {
 namespace lite {
-
 OpParameter *PopulateMergeParameter(const mindspore::lite::PrimitiveC *primitive) {
   OpParameter *merge_parameter = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
   if (merge_parameter == nullptr) {
diff --git a/mindspore/lite/src/ops/populate/strided_slice_populate.cc b/mindspore/lite/src/ops/populate/strided_slice_populate.cc
index 441843518f..b98ef367b2 100644
--- a/mindspore/lite/src/ops/populate/strided_slice_populate.cc
+++ b/mindspore/lite/src/ops/populate/strided_slice_populate.cc
@@ -15,6 +15,7 @@
  */
 
 #include "src/ops/populate/strided_slice_populate.h"
+#include <limits>
 #include "src/ops/strided_slice.h"
 #include "src/ops/primitive_c.h"
 #include "src/ops/populate/populate_register.h"
diff --git a/mindspore/lite/src/ops/populate/tensorlistsetlitem_populate.cc b/mindspore/lite/src/ops/populate/tensorlistsetlitem_populate.cc
index ab95a57d32..73b463e67b 100644
--- a/mindspore/lite/src/ops/populate/tensorlistsetlitem_populate.cc
+++ b/mindspore/lite/src/ops/populate/tensorlistsetlitem_populate.cc
@@ -36,6 +36,5 @@ OpParameter *PopulateTensorListSetItemParameter(const mindspore::lite::Primitive
 }
 Registry TensorListSetItemParameterRegistry(schema::PrimitiveType_TensorListSetItem,
                                             PopulateTensorListSetItemParameter);
-
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/src/ops/reciprocal.cc b/mindspore/lite/src/ops/reciprocal.cc
index 86966a584c..9dba025aab 100644
--- a/mindspore/lite/src/ops/reciprocal.cc
+++ b/mindspore/lite/src/ops/reciprocal.cc
@@ -28,6 +28,5 @@ PrimitiveC *ReciprocalCreator(const schema::Primitive *primitive) {
 }
 Registry ReciprocalRegistry(schema::PrimitiveType_Reciprocal, ReciprocalCreator);
 #endif
-
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/src/ops/reciprocal.h b/mindspore/lite/src/ops/reciprocal.h
index 2af5b5d230..20677e0c33 100644
--- a/mindspore/lite/src/ops/reciprocal.h
+++ b/mindspore/lite/src/ops/reciprocal.h
@@ -39,7 +39,6 @@ class Reciprocal : public ArithmeticSelf {
   }
 #endif
 };
-
 }  // namespace lite
 }  // namespace mindspore
 
diff --git a/mindspore/lite/src/ops/space_to_batch_nd.cc b/mindspore/lite/src/ops/space_to_batch_nd.cc
index 3d0cba2086..273f2d3555 100644
--- a/mindspore/lite/src/ops/space_to_batch_nd.cc
+++ b/mindspore/lite/src/ops/space_to_batch_nd.cc
@@ -15,6 +15,7 @@
  */
 
 #include "src/ops/space_to_batch_nd.h"
+#include <limits>
 #include "src/common/common.h"
 
 #ifndef PRIMITIVE_WRITEABLE
diff --git a/mindspore/lite/src/ops/space_to_depth.cc b/mindspore/lite/src/ops/space_to_depth.cc
index 764a332308..f719bf59af 100644
--- a/mindspore/lite/src/ops/space_to_depth.cc
+++ b/mindspore/lite/src/ops/space_to_depth.cc
@@ -15,6 +15,7 @@
  */
 
 #include "src/ops/space_to_depth.h"
+#include <limits>
 #include "src/common/common.h"
 
 #ifndef PRIMITIVE_WRITEABLE
diff --git a/mindspore/lite/src/ops/tile.cc b/mindspore/lite/src/ops/tile.cc
index 90e86752eb..aa142c8e24 100644
--- a/mindspore/lite/src/ops/tile.cc
+++ b/mindspore/lite/src/ops/tile.cc
@@ -15,6 +15,7 @@
  */
 
 #include "src/ops/tile.h"
+#include <limits>
 #include <algorithm>
 
 #ifndef PRIMITIVE_WRITEABLE
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
index 26a08f9570..03488a7e5b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
@@ -14,11 +14,8 @@
  * limitations under the License.
  */
 #include "src/runtime/kernel/arm/fp16/cast_fp16.h"
-#include <vector>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
-#include "nnacl/fp16/cast_fp16.h"
-#include "nnacl/op_base.h"
 #include "src/runtime/runtime_api.h"
 #include "include/errorcode.h"
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.h
index 72f9dbade8..91b407c6f2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.h
@@ -18,6 +18,9 @@
 
 #include <vector>
 #include "src/lite_kernel.h"
+#include "nnacl/op_base.h"
+#include "nnacl/fp16/cast_fp16.h"
+#include "nnacl/base/cast_base.h"
 
 namespace mindspore::kernel {
 class CastFp16CPUKernel : public LiteKernel {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
index 5ff724aa61..ea540ab43e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
@@ -113,8 +113,9 @@ int ConcatFp16CPUKernel::Run() {
     fp16_output_ = reinterpret_cast<float16_t *>(out_tensors_.at(0)->MutableData());
   }
   int dtype_len = in_tensors_.at(0)->data_type() == kNumberTypeInt32 ? sizeof(int32_t) : sizeof(float16_t);
-  ConcatFp16(reinterpret_cast<void **>(fp16_inputs_.data()), input_num, concat_param_->axis_,
-             inputs_output_shape.data(), output_shape.size(), reinterpret_cast<void *>(fp16_output_), dtype_len);
+
+  Concat(reinterpret_cast<void **>(fp16_inputs_.data()), input_num, concat_param_->axis_, inputs_output_shape.data(),
+         output_shape.size(), reinterpret_cast<void *>(fp16_output_), 0, 1, dtype_len);
 
   if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32 || out_tensors_.at(0)->data_type() == kNumberTypeFloat) {
     Float16ToFloat32(fp16_output_, reinterpret_cast<float *>(output_addr), out_tensors_.at(0)->ElementsNum());
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h
index ee223041d7..368646276c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h
@@ -20,7 +20,7 @@
 #include <vector>
 #include "include/context.h"
 #include "include/errorcode.h"
-#include "nnacl/fp16/concat_fp16.h"
+#include "nnacl/base/concat_base.h"
 #include "nnacl/concat_parameter.h"
 #include "nnacl/fp16/cast_fp16.h"
 #include "src/lite_kernel.h"
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc
index 4c9f94042a..7d01aa4159 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc
@@ -15,9 +15,6 @@
  */
 
 #include "src/runtime/kernel/arm/fp16/reshape_fp16.h"
-#include <vector>
-#include "nnacl/fp16/cast_fp16.h"
-#include "nnacl/reshape.h"
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h
index b06f7ec4d4..58a93984ba 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h
@@ -18,8 +18,9 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_RESHAPE_H_
 
 #include <vector>
+#include "nnacl/fp16/cast_fp16.h"
+#include "nnacl/base/reshape_base.h"
 #include "src/lite_kernel.h"
-
 #include "include/context.h"
 #include "src/runtime/kernel/arm/fp32/reshape_fp32.h"
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc
index 4c99768df3..50783d362b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc
@@ -14,14 +14,9 @@
  * limitations under the License.
  */
 #include "src/runtime/kernel/arm/fp32/cast_fp32.h"
-#include <vector>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
-#include "src/tensor.h"
-#include "nnacl/fp32/cast_fp32.h"
-#include "nnacl/op_base.h"
 #include "src/runtime/runtime_api.h"
-#include "include/errorcode.h"
 
 using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.h
index c320d3ddc1..afcf19053d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.h
@@ -17,7 +17,11 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CAST_H_
 
 #include <vector>
+#include "include/errorcode.h"
 #include "src/lite_kernel.h"
+#include "src/tensor.h"
+#include "nnacl/op_base.h"
+#include "nnacl/base/cast_base.h"
 
 namespace mindspore::kernel {
 class CastCPUKernel : public LiteKernel {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
index 9ab4e4b8e2..753733e9e3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
@@ -40,8 +40,8 @@ int ConcatCPUKernel::ReSize() {
 
 int ConcatCPUKernel::DoConcat(int task_id) {
   auto input_num = in_tensors_.size();
-  std::vector<const void *> inputs_addr(input_num, nullptr);
-  std::vector<const int *> inputs_output_shape(input_num + 1, nullptr);
+  std::vector<void *> inputs_addr(input_num, nullptr);
+  std::vector<int *> inputs_output_shape(input_num + 1, nullptr);
 
   std::vector<std::vector<int>> shapes;
   for (size_t i = 0; i < input_num; ++i) {
@@ -54,7 +54,7 @@ int ConcatCPUKernel::DoConcat(int task_id) {
   auto output_addr = out_tensors_.at(0)->MutableData();
 
   Concat(inputs_addr.data(), input_num, concat_param_->axis_, inputs_output_shape.data(), output_shape.size(),
-         output_addr, task_id, op_parameter_->thread_num_);
+         output_addr, task_id, op_parameter_->thread_num_, sizeof(float));
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.h
index 6e4bb9175e..8a03d57059 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include "src/lite_kernel.h"
-#include "nnacl/fp32/concat_fp32.h"
+#include "nnacl/base/concat_base.h"
 #include "nnacl/concat_parameter.h"
 #include "include/errorcode.h"
 #include "src/runtime/runtime_api.h"
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc
index fc8675176c..a77031ef2c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc
@@ -15,10 +15,8 @@
  */
 
 #include "src/runtime/kernel/arm/fp32/expandDims_fp32.h"
-#include <vector>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
-#include "include/errorcode.h"
 #include "src/runtime/runtime_api.h"
 
 using mindspore::kernel::KERNEL_ARCH::kCPU;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.h
index 2598079261..5185fea0e4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.h
@@ -18,8 +18,9 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_EXPANDDIMS_H_
 
 #include <vector>
+#include "include/errorcode.h"
 #include "src/lite_kernel.h"
-#include "nnacl/fp32/expandDims_fp32.h"
+#include "nnacl/base/expand_dims_base.h"
 #include "schema/model_generated.h"
 
 #include "include/context.h"
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc
index 25c7a42eb8..1526b10577 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc
@@ -51,7 +51,7 @@ int FillCPUKernel::DoFill(int task_id) {
   auto input_tensor = in_tensors_.at(0);
   int ret = RET_OK;
   if (input_tensor->data_type() == kNumberTypeFloat32 || input_tensor->data_type() == kNumberTypeFloat) {
-    ret = Fill(out_ptr_ + offset, size, src_data_);
+    ret = FillFp32(out_ptr_ + offset, size, src_data_);
   } else if (input_tensor->data_type() == kNumberTypeInt32 || input_tensor->data_type() == kNumberTypeInt) {
     ret = FillInt32(int32_out_ptr_ + offset, size, int32_src_data_);
   } else {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.h
index c6990e267c..09228635f1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.h
@@ -18,9 +18,8 @@
 
 #include <vector>
 #include "src/lite_kernel.h"
-
 #include "include/context.h"
-#include "nnacl/fp32/fill_fp32.h"
+#include "nnacl/base/fill_base.h"
 
 using mindspore::lite::InnerContext;
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc
index 37557852bb..588bae5cbe 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc
@@ -16,7 +16,7 @@
 
 #include "src/runtime/kernel/arm/fp32/gatherNd_fp32.h"
 #include <string.h>
-#include <vector>
+#include <limits>
 #include "schema/model_generated.h"
 #include "include/errorcode.h"
 #include "src/kernel_registry.h"
@@ -29,7 +29,6 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_GatherNd;
 
 namespace mindspore::kernel {
-
 GatherNdCPUKernel::~GatherNdCPUKernel() {
   if (in_offset_ != nullptr) {
     free(in_offset_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.h
index 48ad065332..7f719ae29d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.h
@@ -17,10 +17,10 @@
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GATHERND_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GATHERND_H_
 
+#include <string.h>
 #include <vector>
 #include "nnacl/fp32/gatherNd_fp32.h"
 #include "src/lite_kernel.h"
-
 #include "include/context.h"
 #include "nnacl/op_base.h"
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc
index 188d00d86c..22a191bb31 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc
@@ -13,14 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "src/runtime/kernel/arm/fp32/gather_fp32.h"
-#include <vector>
-#include "nnacl/gather_parameter.h"
-#include "nnacl/fp32/gather_fp32.h"
+#include <limits>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "src/runtime/runtime_api.h"
-#include "include/errorcode.h"
 
 using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
@@ -29,7 +27,6 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Gather;
 
 namespace mindspore::kernel {
-
 int GatherCPUKernel::Init() {
   if (!InferShapeDone()) {
     return RET_OK;
@@ -76,7 +73,7 @@ int GatherCPUKernel::DoGather(int task_id) {
   } else {
     input_ptr += thread_stride * limit;
     output_ptr += thread_stride * indices_element_size;
-    error_code = Gather(input_ptr, count, inner_size, limit, indices_data_, indices_element_size, output_ptr);
+    error_code = GatherFp32(input_ptr, count, inner_size, limit, indices_data_, indices_element_size, output_ptr);
   }
   return error_code;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.h
index 65eec3e8c4..212da0c7dc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.h
@@ -18,8 +18,10 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GATHER_H_
 
 #include <vector>
-#include "nnacl/gather_parameter.h"
+#include "include/errorcode.h"
 #include "src/lite_kernel.h"
+#include "nnacl/gather_parameter.h"
+#include "nnacl/base/gather_base.h"
 
 namespace mindspore::kernel {
 class GatherCPUKernel : public LiteKernel {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.cc
index 5e6e455176..d138775777 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.cc
@@ -15,8 +15,6 @@
  */
 
 #include "src/runtime/kernel/arm/fp32/reshape_fp32.h"
-#include <vector>
-#include "nnacl/reshape.h"
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.h
index 4a1bc0f7a0..400dfe1f3e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.h
@@ -20,6 +20,7 @@
 #include <vector>
 #include "src/lite_kernel.h"
 #include "include/context.h"
+#include "nnacl/base/reshape_base.h"
 
 using mindspore::lite::InnerContext;
 
@@ -35,8 +36,6 @@ class ReshapeCPUKernel : public LiteKernel {
   int Init() override;
   int ReSize() override;
   int Run() override;
-
- private:
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc
index 64ee0ee81b..bd7f82ded7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc
@@ -15,6 +15,7 @@
  */
 
 #include "src/runtime/kernel/arm/fp32/space_to_depth_fp32.h"
+#include <limits>
 #include <vector>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
index 34d3095f16..4f15af5593 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
@@ -16,7 +16,7 @@
 
 #include "src/runtime/kernel/arm/int8/gatherNd_int8.h"
 #include <string.h>
-#include <vector>
+#include <limits>
 #include "schema/model_generated.h"
 #include "include/errorcode.h"
 #include "src/kernel_registry.h"
@@ -30,7 +30,6 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_GatherNd;
 
 namespace mindspore::kernel {
-
 GatherNdInt8CPUKernel::~GatherNdInt8CPUKernel() {
   if (in_offset_ != nullptr) {
     free(in_offset_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc
index ef17396656..eb74e99803 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc
@@ -16,7 +16,6 @@
 
 #include "src/runtime/kernel/arm/int8/pooling_int8.h"
 #include "nnacl/int8/pooling_int8.h"
-#include "nnacl/fp32/cast_fp32.h"
 #include "include/errorcode.h"
 #include "src/runtime/runtime_api.h"
 #include "src/kernel_registry.h"
diff --git a/mindspore/lite/src/runtime/kernel/npu/cast_npu.cc b/mindspore/lite/src/runtime/kernel/npu/cast_npu.cc
index c39b4c2790..1f9e7e3dad 100644
--- a/mindspore/lite/src/runtime/kernel/npu/cast_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/cast_npu.cc
@@ -35,7 +35,7 @@ int CastNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const
     return RET_ERROR;
   }
   op_->set_input_x(*npu_inputs[0]);
-  op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->dst_type_)));
+  op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(outputs[0]->data_type())));
   op_->set_attr_src_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(inputs[0]->data_type())));
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/npu/cast_npu.h b/mindspore/lite/src/runtime/kernel/npu/cast_npu.h
index 9da4e714f8..7dd30cb2c0 100644
--- a/mindspore/lite/src/runtime/kernel/npu/cast_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/cast_npu.h
@@ -19,16 +19,14 @@
 #include <vector>
 #include "src/runtime/kernel/npu/npu_kernel.h"
 #include "include/graph/op/all_ops.h"
-#include "nnacl/fp32/cast_fp32.h"
+
 namespace mindspore::kernel {
 class CastNPUKernel : public NPUKernel {
  public:
   CastNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                 const mindspore::lite::PrimitiveC *primitive)
-      : NPUKernel(parameter, inputs, outputs, ctx, primitive) {
-    cast_parameter_ = reinterpret_cast<CastParameter *>(parameter);
-  }
+      : NPUKernel(parameter, inputs, outputs, ctx, primitive) {}
   ~CastNPUKernel() override;
 
   int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
@@ -39,7 +37,6 @@ class CastNPUKernel : public NPUKernel {
 
  private:
   hiai::op::CastT *op_ = nullptr;
-  CastParameter *cast_parameter_;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CAST_NPU_H_
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h
index 63af1e5c28..53f46fd4e1 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h
@@ -20,7 +20,7 @@
 #include <vector>
 #include <string>
 #include "src/runtime/kernel/opencl/opencl_kernel.h"
-#include "nnacl/fp32/cast_fp32.h"
+#include "nnacl/cast_parameter.h"
 
 namespace mindspore::kernel {
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h
index 7e71745789..9c346487a5 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_FILL_H_
 
 #include <vector>
-#include "mindspore/lite/nnacl/fp32/fill_fp32.h"
+#include "mindspore/lite/nnacl/base/fill_base.h"
 #include "mindspore/lite/nnacl/shape.h"
 #include "src/runtime/kernel/opencl/opencl_kernel.h"
 
diff --git a/mindspore/lite/src/train/train_model.h b/mindspore/lite/src/train/train_model.h
index 14ddf47922..646e13e691 100644
--- a/mindspore/lite/src/train/train_model.h
+++ b/mindspore/lite/src/train/train_model.h
@@ -20,7 +20,6 @@
 
 namespace mindspore {
 namespace lite {
-
 /// \brief TrainModel Defines a class that allows to import and export a mindsport trainable model
 struct TrainModel : public lite::LiteModel {
   /// \brief Static method to create a TrainModel object
diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/subgraph_node_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/subgraph_node_pass.cc
index a68fe03f90..7e739a5ff0 100644
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/subgraph_node_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/subgraph_node_pass.cc
@@ -27,7 +27,6 @@
 
 namespace mindspore {
 namespace lite {
-
 STATUS SubgraphNodePass::GetSubgraphAllTensorIndices(const std::unique_ptr<SubGraphT> &subgraph,
                                                      schema::MetaGraphT *graph, std::set<uint32_t> *tensors_indices) {
   for (auto &node_idx : subgraph->nodeIndices) {
diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/subgraph_tensor_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/subgraph_tensor_pass.cc
index 67e7d04513..ba79e2c480 100644
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/subgraph_tensor_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/subgraph_tensor_pass.cc
@@ -26,7 +26,6 @@
 
 namespace mindspore {
 namespace lite {
-
 bool SubgraphTensorPass::IsUsing(schema::MetaGraphT *graph, const uint32_t &tensor_idx) {
   for (const auto &node : graph->nodes) {
     if (IsContain<uint32_t>(node->inputIndex, tensor_idx)) {
diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_pooling_parser.cc b/mindspore/lite/tools/converter/parser/caffe/caffe_pooling_parser.cc
index 7964fdab79..0cd88d6088 100644
--- a/mindspore/lite/tools/converter/parser/caffe/caffe_pooling_parser.cc
+++ b/mindspore/lite/tools/converter/parser/caffe/caffe_pooling_parser.cc
@@ -19,7 +19,6 @@
 
 namespace mindspore {
 namespace lite {
-
 STATUS CaffePoolingParser::ParsePads(const caffe::PoolingParameter &poolingParam, schema::PoolingT *attr) {
   if (poolingParam.has_pad_h() && poolingParam.has_pad_w()) {
     if (poolingParam.has_pad()) {
diff --git a/mindspore/lite/tools/cropper/cropper_flags.cc b/mindspore/lite/tools/cropper/cropper_flags.cc
index 2ed9afe509..4aa040a850 100644
--- a/mindspore/lite/tools/cropper/cropper_flags.cc
+++ b/mindspore/lite/tools/cropper/cropper_flags.cc
@@ -109,7 +109,6 @@ int CropperFlags::Init(int argc, const char **argv) {
   }
   return RET_OK;
 }
-
 }  // namespace cropper
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/cropper/cropper_flags.h b/mindspore/lite/tools/cropper/cropper_flags.h
index afdb1af8e0..de21ed195a 100644
--- a/mindspore/lite/tools/cropper/cropper_flags.h
+++ b/mindspore/lite/tools/cropper/cropper_flags.h
@@ -24,7 +24,6 @@
 namespace mindspore {
 namespace lite {
 namespace cropper {
-
 class CropperFlags : public virtual mindspore::lite::FlagParser {
  public:
   CropperFlags();
diff --git a/mindspore/lite/tools/cropper/cropper_utils.cc b/mindspore/lite/tools/cropper/cropper_utils.cc
index dc98eafb08..fb5b0d33d5 100644
--- a/mindspore/lite/tools/cropper/cropper_utils.cc
+++ b/mindspore/lite/tools/cropper/cropper_utils.cc
@@ -47,7 +47,6 @@ int ValidFile(std::ifstream &in_file, const char *file_path) {
   }
   return RET_OK;
 }
-
 }  // namespace cropper
 }  // namespace lite
 }  // namespace mindspore