!4042 RowMajor2Col8Major Neon code

Merge pull request !4042 from ling/deconv
5 years ago · 50d758561d
--- a/mindspore/lite/src/ops/deconvolution.cc
+++ b/mindspore/lite/src/ops/deconvolution.cc
@@ -54,11 +54,11 @@ int DeConv2D::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tens
    output_h = (input_h - 1) * stride_h + ((kernel_h - 1) * dilate_h + 1) - pad_u_ - pad_d_;
    output_w = (input_w - 1) * stride_w + ((kernel_w - 1) * dilate_w + 1) - pad_l_ - pad_r_;
  } else if (pad_mode == schema::PadMode_SAME) {
    output_h = (input_h - 1) * stride_h + (kernel_h - 1) * dilate_h + 1 - deconv->padUp() - deconv->padDown();
    output_w = (input_w - 1) * stride_w + (kernel_w - 1) * dilate_w + 1 - deconv->padLeft() - deconv->padRight();
  } else if (pad_mode == schema::PadMode_VALID) {
    output_h = input_h * stride_h;
    output_w = input_w * stride_w;
  } else if (pad_mode == schema::PadMode_VALID) {
    output_h = (input_h - 1) * stride_h + kernel_h;
    output_w = (input_w - 1) * stride_w + kernel_w;
  } else {
    MS_LOG(ERROR) << "unsupported pad mode for deconv";
  }
@@ -70,4 +70,3 @@ int DeConv2D::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tens
  return RET_OK;
 }
 }  // namespace mindspore::lite

--- a/mindspore/lite/src/populate_parameter.cc
+++ b/mindspore/lite/src/populate_parameter.cc
@@ -334,8 +334,6 @@ OpParameter *PopulateDeconvParameter(const lite::Primitive *primitive) {
  conv_param->pad_d_ = deconv_lite_primitive->PadDown();
  conv_param->pad_l_ = deconv_lite_primitive->PadLeft();
  conv_param->pad_r_ = deconv_lite_primitive->PadRight();
  conv_param->pad_h_ = deconv_lite_primitive->PadUp();
  conv_param->pad_w_ = deconv_lite_primitive->PadLeft();
  conv_param->dilation_h_ = conv_primitive->dilateH();
  conv_param->dilation_w_ = conv_primitive->dilateW();
  auto act_type = conv_primitive->activationType();
@@ -353,6 +351,26 @@ OpParameter *PopulateDeconvParameter(const lite::Primitive *primitive) {
      conv_param->is_relu6_ = false;
      break;
  }

  auto pad_mode = conv_primitive->padMode();
  switch (pad_mode) {
    case schema::PadMode_SAME:
      conv_param->pad_h_ = (conv_param->kernel_h_ - 1) / 2;
      conv_param->pad_w_ = (conv_param->kernel_w_ - 1) / 2;
      break;
    case schema::PadMode_VALID:
      conv_param->pad_h_ = 0;
      conv_param->pad_w_ = 0;
      break;
    case schema::PadMode_CAFFE:
      conv_param->pad_h_ = conv_param->pad_u_;
      conv_param->pad_w_ = conv_param->pad_l_;
      break;
    default:
      MS_LOG(ERROR) << "invalid pad mode!";
      return nullptr;
  }

  return reinterpret_cast<OpParameter *>(conv_param);
 }

@@ -393,27 +411,27 @@ OpParameter *PopulateReduceParameter(const lite::Primitive *primitive) {
 }

 OpParameter *PopulateMeanParameter(const lite::Primitive *primitive) {
    ReduceParameter *mean_param = new (std::nothrow) ReduceParameter();
    if (mean_param == nullptr) {
        MS_LOG(ERROR) << "new ReduceParameter failed.";
        return nullptr;
    }
    mean_param->op_parameter_.type_ = primitive->Type();
    auto mean = primitive->Value()->value_as_Mean();
    mean_param->keep_dims_ = mean->keepDims();
    auto axisVector = mean->axis();
    if (axisVector->size() > REDUCE_MAX_AXES_NUM) {
        MS_LOG(ERROR) << "Reduce axes size " << axisVector->size() << " exceed limit " << REDUCE_MAX_AXES_NUM;
        delete (mean_param);
        return nullptr;
    }
    mean_param->num_axes_ = static_cast<int>(axisVector->size());
    int i = 0;
    for (auto iter = axisVector->begin(); iter != axisVector->end(); iter++) {
        mean_param->axes_[i++] = *iter;
    }
    mean_param->mode_ = static_cast<int>(schema::ReduceMode_ReduceMean);
    return reinterpret_cast<OpParameter *>(mean_param);
  ReduceParameter *mean_param = new (std::nothrow) ReduceParameter();
  if (mean_param == nullptr) {
    MS_LOG(ERROR) << "new ReduceParameter failed.";
    return nullptr;
  }
  mean_param->op_parameter_.type_ = primitive->Type();
  auto mean = primitive->Value()->value_as_Mean();
  mean_param->keep_dims_ = mean->keepDims();
  auto axisVector = mean->axis();
  if (axisVector->size() > REDUCE_MAX_AXES_NUM) {
    MS_LOG(ERROR) << "Reduce axes size " << axisVector->size() << " exceed limit " << REDUCE_MAX_AXES_NUM;
    delete (mean_param);
    return nullptr;
  }
  mean_param->num_axes_ = static_cast<int>(axisVector->size());
  int i = 0;
  for (auto iter = axisVector->begin(); iter != axisVector->end(); iter++) {
    mean_param->axes_[i++] = *iter;
  }
  mean_param->mode_ = static_cast<int>(schema::ReduceMode_ReduceMean);
  return reinterpret_cast<OpParameter *>(mean_param);
 }

 OpParameter *PopulatePadParameter(const lite::Primitive *primitive) {
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.cc
@@ -28,13 +28,93 @@ void RowMajor2Row8Major(float *src_ptr, float *dst_ptr, int row, int col) {
  return;
 }

 void RowMajor2Col8Major(float *src_ptr, float *dst_ptr, int row, int col) {
  for (int r = 0; r < row; r++) {
    int rd8 = r / 8;
    int rm8 = r % 8;
    for (int c = 0; c < col; c++) {
      dst_ptr[rd8 * col * 8 + c * 8 + rm8] = src_ptr[r * col + c];
 void RowMajor2Col8Major(float *src_ptr, float *dst_ptr, size_t row, size_t col) {
  size_t row8 = row / C8NUM * C8NUM;
  size_t col4 = col / C4NUM * C4NUM;
  float *src_r = src_ptr;
  float *dst_r = dst_ptr;

  size_t ri = 0;
  for (; ri < row8; ri += C8NUM) {
    size_t ci = 0;
    for (; ci < col4; ci += C4NUM) {
      float *src_c = src_r + ci;
      float *dst_c = dst_r + ci * C8NUM;

      /* 8x4 row-major to col-major */
 #ifdef ENABLE_NEON
      size_t stride = col * 4;
      asm volatile(
        "mov x10, %[src_c]\n"
        "mov x11, %[dst_c]\n"

        "ld1 {v0.4s}, [x10], %[stride]\n"
        "ld1 {v1.4s}, [x10], %[stride]\n"
        "ld1 {v2.4s}, [x10], %[stride]\n"
        "ld1 {v3.4s}, [x10], %[stride]\n"

        "zip1 v4.4s, v0.4s, v1.4s\n"
        "zip2 v5.4s, v0.4s, v1.4s\n"
        "zip1 v6.4s, v2.4s, v3.4s\n"
        "zip2 v7.4s, v2.4s, v3.4s\n"

        "ld1 {v8.4s},  [x10], %[stride]\n"
        "ld1 {v9.4s},  [x10], %[stride]\n"
        "ld1 {v10.4s}, [x10],  %[stride]\n"
        "ld1 {v11.4s}, [x10],  %[stride]\n"

        "trn1 v0.2d, v4.2d, v6.2d\n"
        "trn2 v1.2d, v4.2d, v6.2d\n"
        "trn1 v2.2d, v5.2d, v7.2d\n"
        "trn2 v3.2d, v5.2d, v7.2d\n"

        "zip1 v12.4s, v8.4s, v9.4s\n"
        "zip2 v13.4s, v8.4s, v9.4s\n"
        "zip1 v14.4s, v10.4s, v11.4s\n"
        "zip2 v15.4s, v10.4s, v11.4s\n"

        "trn1 v8.2d, v12.2d, v14.2d\n"
        "trn2 v9.2d, v12.2d, v14.2d\n"
        "trn1 v10.2d, v13.2d, v15.2d\n"
        "trn2 v11.2d, v13.2d, v15.2d\n"

        "st1 {v0.4s}, [x11],  #16\n"
        "st1 {v8.4s}, [x11],  #16\n"
        "st1 {v1.4s}, [x11],  #16\n"
        "st1 {v9.4s}, [x11],  #16\n"
        "st1 {v2.4s},  [x11],#16\n"
        "st1 {v10.4s}, [x11], #16\n"
        "st1 {v3.4s},  [x11],#16\n"
        "st1 {v11.4s}, [x11], #16\n"

        :
        : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride)
        : "x10", "x11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
          "v15");
 #else
      for (int tr = 0; tr < 8; tr++) {
        for (int tc = 0; tc < 4; tc++) {
          dst_c[tc * 8 + tr] = src_c[tr * col + tc];
        }
      }
 #endif
    }
    for (; ci < col; ci++) {
      float *src_c = src_r + ci;
      float *dst_c = dst_r + ci * C8NUM;
      for (size_t i = 0; i < C8NUM; i++) {
        dst_c[i] = src_c[i * col];
      }
    }
    src_r += C8NUM * col;
    dst_r += C8NUM * col;
  }
  for (; ri < row; ri++) {
    for (size_t i = 0; i < col; i++) {
      dst_r[i * C8NUM] = src_r[i];
    }
    src_r += col;
    dst_r += 1;
  }
  return;
 }
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.h
@@ -24,7 +24,7 @@

 void MatMul(const float *a, const float *b, float *c, const float *bias, ActType act_type, int depth, int row, int col);
 void RowMajor2Row8Major(float *src_ptr, float *dst_ptr, int row, int col);
 void RowMajor2Col8Major(float *src_ptr, float *dst_ptr, int row, int col);
 void RowMajor2Col8Major(float *src_ptr, float *dst_ptr, size_t row, size_t col);
 void Row8x8Major2RowMajor(float *src_ptr, float *dst_ptr, int row, int col);
 void MatMul8x8(const float *a, const float *b, float *c, const float *bias, float maxf, float minf, int deep,
               int row_8_, int col_8_);
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/deconv.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/deconv.cc
@@ -31,12 +31,11 @@ int DeConvPostInt8(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t
  size_t output_plane = conv_param->output_w_ * conv_param->output_h_;
  int oc8 = UP_DIV(output_channel, C8NUM);
  int in_plane8 = UP_ROUND(input_plane, 8);
  int out_plane8 = UP_ROUND(output_plane, 8);

  for (int c = 0; c < oc8; c++) {
    int32_t *dst_ptr = tmp + c * out_plane8 * C8NUM;
    int32_t *dst_ptr = tmp + c * output_plane * C8NUM;
    const int32_t *src_ptr = src + c * in_plane8 * kernel_plane * C8NUM;
    memset(dst_ptr, 0, out_plane8 * C8NUM * sizeof(int32_t));
    memset(dst_ptr, 0, output_plane * C8NUM * sizeof(int32_t));

    for (int ih = 0; ih < conv_param->input_h_; ih++) {
      for (int iw = 0; iw < conv_param->input_w_; iw++) {
@@ -63,7 +62,7 @@ int DeConvPostInt8(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t
    }       /*ih*/
  }         /*oc8*/

  PostFuncInt8(tmp, bias, out, output_channel, output_plane, out_plane8,
  PostFuncInt8(tmp, bias, out, output_channel, output_plane, UP_ROUND(output_plane, 8),
               conv_param->conv_quant_arg_.quant_multiplier_[0], conv_param->conv_quant_arg_.left_shift_[0],
               conv_param->conv_quant_arg_.right_shift_[0], conv_param->conv_quant_arg_.quant_args_[2][0].zp_,
               conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0]);
--- a/mindspore/lite/test/run_test.sh
+++ b/mindspore/lite/test/run_test.sh
@@ -21,6 +21,5 @@ cp -fr $TEST_DATA_DIR/testPK ./data
 ./lite-test --gtest_filter=TestStrassenFp32*
 ./lite-test --gtest_filter=TestDeConvolutionFp32*


 ./lite-test --gtest_filter=TestPadInt8*

 ./lite-test --gtest_filter=TestPadInt8.*
 ./lite-test --gtest_filter=TestDeconvInt8.*
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc
@@ -17,6 +17,7 @@
 #include "mindspore/core/utils/log_adapter.h"
 #include "common/common_test.h"
 #include "mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h"
 #include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.h"
 #include "src/kernel_registry.h"
 #include "src/lite_kernel.h"

@@ -26,6 +27,144 @@ class TestMatMulFp32 : public mindspore::Common {
  TestMatMulFp32() {}
 };

 TEST_F(TestMatMulFp32, Row2Col8Test1) {
  float in[] = {0.21, 0.38, 0.81, 0.98, 0.09, 0.68, 0.02, 0.33, 0.85, 0.67, 0.81, 0.57, 0.70, 0.27, 0.90,
                0.07, 0.13, 0.03, 0.53, 0.97, 0.92, 0.35, 0.74, 0.78, 0.87, 0.23, 0.34, 0.09, 0.50, 0.39,
                0.09, 0.93, 0.91, 0.20, 0.97, 0.61, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31,
                0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08,
                0.04, 0.24, 0.52, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31, 0.35, 0.52, 0.02,
                0.33, 0.99, 0.49, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08, 0.04, 0.24, 0.52};
  float co[] = {0.21, 0.67, 0.53, 0.09, 0.43, 0.35, 0.04, 0.43, 0.38, 0.81, 0.97, 0.50, 0.14, 0.52, 0.10, 0.14,
                0.81, 0.57, 0.92, 0.39, 0.67, 0.02, 0.18, 0.67, 0.98, 0.70, 0.35, 0.09, 0.10, 0.33, 0.92, 0.10,
                0.09, 0.27, 0.74, 0.93, 0.73, 0.99, 0.46, 0.73, 0.68, 0.90, 0.78, 0.91, 0.37, 0.49, 0.08, 0.37,
                0.02, 0.07, 0.87, 0.20, 0.24, 0.67, 0.04, 0.24, 0.33, 0.13, 0.23, 0.97, 0.93, 0.75, 0.24, 0.93,
                0.85, 0.03, 0.34, 0.61, 0.31, 0.66, 0.52, 0.31, 0.35, 0.04, 0,    0,    0,    0,    0,    0,
                0.52, 0.10, 0,    0,    0,    0,    0,    0,    0.02, 0.18, 0,    0,    0,    0,    0,    0,
                0.33, 0.92, 0,    0,    0,    0,    0,    0,    0.99, 0.46, 0,    0,    0,    0,    0,    0,
                0.49, 0.08, 0,    0,    0,    0,    0,    0,    0.67, 0.04, 0,    0,    0,    0,    0,    0,
                0.75, 0.24, 0,    0,    0,    0,    0,    0,    0.66, 0.52, 0,    0,    0,    0,    0,    0};
  float out[144] = {0};
  RowMajor2Col8Major(in, out, 10, 9);
  CompareOutputData(out, co, 144, 0.0001);
 }

 TEST_F(TestMatMulFp32, Row2Col8Test2) {
  float in[] = {0.21, 0.38, 0.81, 0.98, 0.09, 0.68, 0.02, 0.33, 0.85, 0.67, 0.81, 0.57, 0.70, 0.27, 0.90,
                0.07, 0.13, 0.03, 0.53, 0.97, 0.92, 0.35, 0.74, 0.78, 0.87, 0.23, 0.34, 0.09, 0.50, 0.39,
                0.09, 0.93, 0.91, 0.20, 0.97, 0.61, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31,
                0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08,
                0.04, 0.24, 0.52, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31, 0.35, 0.52, 0.02,
                0.33, 0.99, 0.49, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08, 0.04, 0.24, 0.52};
  float co[] = {0.21, 0.68, 0.81, 0.07, 0.92, 0.23, 0.09, 0.61, 0.38, 0.02, 0.57, 0.13, 0.35, 0.34, 0.93,
                0.43, 0.81, 0.33, 0.70, 0.03, 0.74, 0.09, 0.91, 0.14, 0.98, 0.85, 0.27, 0.53, 0.78, 0.50,
                0.20, 0.67, 0.09, 0.67, 0.90, 0.97, 0.87, 0.39, 0.97, 0.10, 0.73, 0.35, 0.49, 0.10, 0.04,
                0.67, 0.93, 0.33, 0.37, 0.52, 0.67, 0.18, 0.24, 0.10, 0.31, 0.99, 0.24, 0.02, 0.75, 0.92,
                0.52, 0.73, 0.35, 0.49, 0.93, 0.33, 0.66, 0.46, 0.43, 0.37, 0.52, 0.67, 0.31, 0.99, 0.04,
                0.08, 0.14, 0.24, 0.02, 0.75, 0.66, 0.46, 0,    0,    0,    0,    0,    0,    0.04, 0.08,
                0,    0,    0,    0,    0,    0,    0.10, 0.04, 0,    0,    0,    0,    0,    0,    0.18,
                0.24, 0,    0,    0,    0,    0,    0,    0.92, 0.52, 0,    0,    0,    0,    0,    0};
  float out[120] = {0};
  RowMajor2Col8Major(in, out, 18, 5);
  CompareOutputData(out, co, 120, 0.0001);
 }

 TEST_F(TestMatMulFp32, Row8x82RowTest1) {
  float in[] = {0.21, 0.38, 0.81, 0.98, 0.09, 0, 0, 0, 0.68, 0.02, 0.33, 0.85, 0.67, 0, 0, 0,
                0.81, 0.57, 0.70, 0.27, 0.90, 0, 0, 0, 0.07, 0.13, 0.03, 0.53, 0.97, 0, 0, 0,
                0.92, 0.35, 0.74, 0.78, 0.87, 0, 0, 0, 0.23, 0.34, 0.09, 0.50, 0.39, 0, 0, 0,
                0.09, 0.93, 0.91, 0.20, 0.97, 0, 0, 0, 0.61, 0.43, 0.14, 0.67, 0.10, 0, 0, 0,
                0.73, 0.37, 0.24, 0.93, 0.31, 0, 0, 0, 0.35, 0.52, 0.02, 0.33, 0.99, 0, 0, 0,
                0.49, 0.67, 0.75, 0.66, 0.04, 0, 0, 0, 0.10, 0.18, 0.92, 0.46, 0.08, 0, 0, 0,
                0.04, 0.24, 0.52, 0.43, 0.14, 0, 0, 0, 0.67, 0.10, 0.73, 0.37, 0.24, 0, 0, 0,
                0.93, 0.31, 0.35, 0.52, 0.02, 0, 0, 0, 0.33, 0.99, 0.49, 0.67, 0.75, 0, 0, 0,
                0.66, 0.04, 0.10, 0.18, 0.92, 0, 0, 0, 0.46, 0.08, 0.04, 0.24, 0.52, 0, 0, 0,
                0,    0,    0,    0,    0,    0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0,
                0,    0,    0,    0,    0,    0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0,
                0,    0,    0,    0,    0,    0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0};
  float co[] = {0.21, 0.38, 0.81, 0.98, 0.09, 0.68, 0.02, 0.33, 0.85, 0.67, 0.81, 0.57, 0.70, 0.27, 0.90,
                0.07, 0.13, 0.03, 0.53, 0.97, 0.92, 0.35, 0.74, 0.78, 0.87, 0.23, 0.34, 0.09, 0.50, 0.39,
                0.09, 0.93, 0.91, 0.20, 0.97, 0.61, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31,
                0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08,
                0.04, 0.24, 0.52, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31, 0.35, 0.52, 0.02,
                0.33, 0.99, 0.49, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08, 0.04, 0.24, 0.52};
  float out[90] = {0};
  Row8x8Major2RowMajor(in, out, 18, 5);
  CompareOutputData(out, co, 90, 0.0001);
 }

 TEST_F(TestMatMulFp32, Row8x82RowTest2) {
  float in[] = {0.21, 0.38, 0.81, 0.98, 0.09, 0, 0, 0, 0.68, 0.02, 0.33, 0.85, 0.67, 0, 0, 0,
                0.81, 0.57, 0.70, 0.27, 0.90, 0, 0, 0, 0.07, 0.13, 0.03, 0.53, 0.97, 0, 0, 0,
                0.92, 0.35, 0.74, 0.78, 0.87, 0, 0, 0, 0.23, 0.34, 0.09, 0.50, 0.39, 0, 0, 0,
                0,    0,    0,    0,    0,    0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0};
  float co[] = {0.21, 0.38, 0.81, 0.98, 0.09, 0.68, 0.02, 0.33, 0.85, 0.67, 0.81, 0.57, 0.70, 0.27, 0.90,
                0.07, 0.13, 0.03, 0.53, 0.97, 0.92, 0.35, 0.74, 0.78, 0.87, 0.23, 0.34, 0.09, 0.50, 0.39};
  float out[30] = {0};
  Row8x8Major2RowMajor(in, out, 6, 5);
  CompareOutputData(out, co, 30, 0.0001);
 }

 TEST_F(TestMatMulFp32, Row8x82RowTest3) {
  float in[] = {
    0.21, 0.38, 0.81, 0.98, 0.09, 0.68, 0.02, 0.33, 0.97, 0.92, 0.35, 0.74, 0.78, 0.87, 0.23, 0.34, 0.67, 0.10, 0.73,
    0.37, 0.24, 0.93, 0.31, 0.35, 0.92, 0.46, 0.08, 0.04, 0.24, 0.52, 0.43, 0.14, 0.99, 0.49, 0.67, 0.75, 0.66, 0.04,
    0.10, 0.18, 0.68, 0.02, 0.33, 0.85, 0.67, 0.81, 0.57, 0.70, 0.87, 0.23, 0.34, 0.09, 0.50, 0.39, 0.09, 0.93, 0.93,
    0.31, 0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.52, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24, 0.75, 0.66, 0.04, 0.10,
    0.18, 0.92, 0.46, 0.08, 0.31, 0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.21, 0.38, 0.81, 0.98, 0.09, 0.68, 0.02,
    0.33, 0.97, 0.92, 0.35, 0.74, 0.78, 0.87, 0.23, 0.34, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31, 0.35, 0.92, 0.46,
    0.08, 0.04, 0.24, 0.52, 0.43, 0.14, 0.68, 0.02, 0.33, 0.85, 0.67, 0.81, 0.57, 0.70, 0.87, 0.23, 0.34, 0.09, 0.50,
    0.39, 0.09, 0.93, 0.93, 0.31, 0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.52, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24,
    0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08, 0.31, 0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.21, 0.38, 0.81,
    0.98, 0.09, 0.68, 0.02, 0.33, 0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    0,    0,    0.85, 0.67, 0.81, 0.57, 0.70, 0.27, 0.90, 0.07, 0.09, 0.50, 0.39, 0.09, 0.93, 0.91, 0.20, 0.97, 0.52,
    0.02, 0.33, 0.99, 0.49, 0.67, 0.75, 0.66, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31, 0.35, 0.92, 0.46, 0.08, 0.04,
    0.24, 0.52, 0.21, 0.38, 0.27, 0.90, 0.07, 0.13, 0.03, 0.53, 0.97, 0.92, 0.91, 0.20, 0.97, 0.61, 0.43, 0.14, 0.67,
    0.10, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.93, 0.31, 0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.04, 0.24,
    0.52, 0.43, 0.14, 0.67, 0.10, 0.73, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08, 0.85, 0.67, 0.81, 0.57, 0.70,
    0.27, 0.90, 0.07, 0.09, 0.50, 0.39, 0.09, 0.93, 0.91, 0.20, 0.97, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.75, 0.66,
    0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31, 0.35, 0.27, 0.90, 0.07, 0.13, 0.03, 0.53, 0.97, 0.92, 0.91, 0.20, 0.97,
    0.61, 0.43, 0.14, 0.67, 0.10, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.93, 0.31, 0.35, 0.52, 0.02, 0.33,
    0.99, 0.49, 0.04, 0.24, 0.52, 0.43, 0.14, 0.67, 0.10, 0.73, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08, 0.85,
    0.67, 0.81, 0.57, 0.70, 0.27, 0.90, 0.07, 0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    0,    0,    0,    0,    0.13, 0.03, 0.53, 0,    0,    0,    0,    0,    0.61, 0.43, 0.14, 0,    0,    0,    0,
    0,    0.04, 0.10, 0.18, 0,    0,    0,    0,    0,    0.52, 0.02, 0.33, 0,    0,    0,    0,    0,    0.81, 0.98,
    0.09, 0,    0,    0,    0,    0,    0.35, 0.74, 0.78, 0,    0,    0,    0,    0,    0.73, 0.37, 0.24, 0,    0,
    0,    0,    0,    0.08, 0.04, 0.24, 0,    0,    0,    0,    0,    0.67, 0.75, 0.67, 0,    0,    0,    0,    0,
    0.37, 0.24, 0.93, 0,    0,    0,    0,    0,    0.04, 0.24, 0.52, 0,    0,    0,    0,    0,    0.13, 0.03, 0.53,
    0,    0,    0,    0,    0,    0.61, 0.43, 0.14, 0,    0,    0,    0,    0,    0.04, 0.10, 0.18, 0,    0,    0,
    0,    0,    0.52, 0.02, 0.33, 0,    0,    0,    0,    0,    0.35, 0.74, 0.78, 0,    0,    0,    0,    0,    0.73,
    0.37, 0.24, 0,    0,    0,    0,    0,    0.08, 0.04, 0.24, 0,    0,    0,    0,    0,    0.67, 0.75, 0.67, 0,
    0,    0,    0,    0,    0.37, 0.24, 0.93, 0,    0,    0,    0,    0,    0.04, 0.24, 0.52, 0,    0,    0,    0,
    0,    0.13, 0.03, 0.53, 0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    0,    0,    0,    0,    0,    0};
  float co[] = {
    0.21, 0.38, 0.81, 0.98, 0.09, 0.68, 0.02, 0.33, 0.85, 0.67, 0.81, 0.57, 0.70, 0.27, 0.90, 0.07, 0.13, 0.03, 0.53,
    0.97, 0.92, 0.35, 0.74, 0.78, 0.87, 0.23, 0.34, 0.09, 0.50, 0.39, 0.09, 0.93, 0.91, 0.20, 0.97, 0.61, 0.43, 0.14,
    0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31, 0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18,
    0.92, 0.46, 0.08, 0.04, 0.24, 0.52, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31, 0.35, 0.52, 0.02, 0.33,
    0.99, 0.49, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08, 0.04, 0.24, 0.52, 0.21, 0.38, 0.81, 0.98, 0.09,
    0.68, 0.02, 0.33, 0.85, 0.67, 0.81, 0.57, 0.70, 0.27, 0.90, 0.07, 0.13, 0.03, 0.53, 0.97, 0.92, 0.35, 0.74, 0.78,
    0.87, 0.23, 0.34, 0.09, 0.50, 0.39, 0.09, 0.93, 0.91, 0.20, 0.97, 0.61, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24,
    0.93, 0.31, 0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08, 0.04, 0.24,
    0.52, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31, 0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.75, 0.67,
    0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08, 0.04, 0.24, 0.52, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93,
    0.31, 0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08, 0.04, 0.24, 0.52,
    0.21, 0.38, 0.81, 0.98, 0.09, 0.68, 0.02, 0.33, 0.85, 0.67, 0.81, 0.57, 0.70, 0.27, 0.90, 0.07, 0.13, 0.03, 0.53,
    0.97, 0.92, 0.35, 0.74, 0.78, 0.87, 0.23, 0.34, 0.09, 0.50, 0.39, 0.09, 0.93, 0.91, 0.20, 0.97, 0.61, 0.43, 0.14,
    0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31, 0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18,
    0.92, 0.46, 0.08, 0.04, 0.24, 0.52, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31, 0.35, 0.52, 0.02, 0.33,
    0.68, 0.02, 0.33, 0.85, 0.67, 0.81, 0.57, 0.70, 0.27, 0.90, 0.07, 0.13, 0.03, 0.53, 0.97, 0.92, 0.35, 0.74, 0.78,
    0.87, 0.23, 0.34, 0.09, 0.50, 0.39, 0.09, 0.93, 0.91, 0.20, 0.97, 0.61, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24,
    0.93, 0.31, 0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08, 0.04, 0.24,
    0.52, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93, 0.31, 0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.75, 0.67,
    0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08, 0.04, 0.24, 0.52, 0.43, 0.14, 0.67, 0.10, 0.73, 0.37, 0.24, 0.93,
    0.31, 0.35, 0.52, 0.02, 0.33, 0.99, 0.49, 0.67, 0.75, 0.66, 0.04, 0.10, 0.18, 0.92, 0.46, 0.08, 0.04, 0.24, 0.52,
    0.21, 0.38, 0.81, 0.98, 0.09, 0.68, 0.02, 0.33, 0.85, 0.67, 0.81, 0.57, 0.70, 0.27, 0.90, 0.07, 0.13, 0.03, 0.53};
  float out[418] = {0};
  Row8x8Major2RowMajor(in, out, 22, 19);
  CompareOutputData(out, co, 418, 0.0001);
 }

 int MMTestInit(std::vector<lite::tensor::Tensor *> *inputs_, std::vector<lite::tensor::Tensor *> *outputs_,
               float *a_ptr, float *b_ptr, std::vector<int> a_shape, std::vector<int> b_shape,
               std::vector<int> c_shape) {
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/deconv_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/deconv_int8_tests.cc
@@ -258,7 +258,7 @@ TEST_F(TestDeconvInt8, DeConvInt8Test1) {
  CompareOutputData(reinterpret_cast<int8_t *>(outputs_[0]->Data()), correct, total_size, 3);

  delete deconv_param;
  //  delete deconv;
  delete deconv;
  for (auto t : inputs_) delete t;
  for (auto t : outputs_) delete t;
  free(correct);