Browse Source

armv8.4 i8mm optimization for convolution gemm int8 (#4034)

tags/20220721
nihui GitHub 3 years ago
parent
commit
76849cede4
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 4036 additions and 1125 deletions
  1. +7
    -7
      cmake/ncnn_add_layer.cmake
  2. +2
    -2
      src/CMakeLists.txt
  3. +69
    -0
      src/layer/arm/convolution_arm_i8mm.cpp
  4. +1245
    -424
      src/layer/arm/convolution_sgemm_int8.h
  5. +1055
    -303
      src/layer/arm/convolution_sgemm_pack1to4_int8.h
  6. +950
    -229
      src/layer/arm/convolution_sgemm_pack8to1_int8.h
  7. +706
    -159
      src/layer/arm/convolution_sgemm_pack8to4_int8.h
  8. +2
    -1
      tests/test_convolution.cpp

+ 7
- 7
cmake/ncnn_add_layer.cmake View File

@@ -239,25 +239,25 @@ macro(ncnn_add_layer class)
ncnn_add_arch_opt_source(${class} asimdfhm "-march=armv8.2-a+fp16+fp16fml")
endif()
if(NCNN_ARM84BF16)
ncnn_add_arch_opt_source(${class} bf16 "-march=armv8.4-a+bf16")
ncnn_add_arch_opt_source(${class} bf16 "-march=armv8.4-a+fp16+dotprod+bf16")
endif()
if(NCNN_ARM84I8MM)
ncnn_add_arch_opt_source(${class} i8mm "-march=armv8.4-a+i8mm")
ncnn_add_arch_opt_source(${class} i8mm "-march=armv8.4-a+fp16+dotprod+i8mm")
endif()
if(NCNN_ARM86SVE)
ncnn_add_arch_opt_source(${class} sve "-march=armv8.6-a+sve")
ncnn_add_arch_opt_source(${class} sve "-march=armv8.6-a+fp16+dotprod+sve")
endif()
if(NCNN_ARM86SVE2)
ncnn_add_arch_opt_source(${class} sve2 "-march=armv8.6-a+sve2")
ncnn_add_arch_opt_source(${class} sve2 "-march=armv8.6-a+fp16+dotprod+sve2")
endif()
if(NCNN_ARM86SVEBF16)
ncnn_add_arch_opt_source(${class} svebf16 "-march=armv8.6-a+sve+bf16")
ncnn_add_arch_opt_source(${class} svebf16 "-march=armv8.6-a+fp16+dotprod+sve+bf16")
endif()
if(NCNN_ARM86SVEI8MM)
ncnn_add_arch_opt_source(${class} svei8mm "-march=armv8.6-a+sve+i8mm")
ncnn_add_arch_opt_source(${class} svei8mm "-march=armv8.6-a+fp16+dotprod+sve+i8mm")
endif()
if(NCNN_ARM86SVEF32MM)
ncnn_add_arch_opt_source(${class} svef32mm "-march=armv8.6-a+sve+f32mm")
ncnn_add_arch_opt_source(${class} svef32mm "-march=armv8.6-a+fp16+dotprod+sve+f32mm")
endif()
endif()



+ 2
- 2
src/CMakeLists.txt View File

@@ -414,7 +414,7 @@ endif()

if(((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64|aarch64)")))
if(NOT NCNN_RUNTIME_CPU AND NCNN_ARM86SVE)
set(ARM_MARCH_FLAG "-march=armv8.6-a+sve")
set(ARM_MARCH_FLAG "-march=armv8.6-a+fp16+dotprod+sve")
if(NCNN_ARM86SVE2)
set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+sve2")
endif()
@@ -428,7 +428,7 @@ if(((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (APPLE AND CMAKE_OSX_AR
set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+f32mm")
endif()
elseif(NOT NCNN_RUNTIME_CPU AND (NCNN_ARM84BF16 OR NCNN_ARM84I8MM))
set(ARM_MARCH_FLAG "-march=armv8.4-a")
set(ARM_MARCH_FLAG "-march=armv8.4-a+fp16+dotprod")
if(NCNN_ARM84BF16)
set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+bf16")
endif()


+ 69
- 0
src/layer/arm/convolution_arm_i8mm.cpp View File

@@ -0,0 +1,69 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "cpu.h"
#include "mat.h"

namespace ncnn {

#include "convolution_sgemm_int8.h"
#include "convolution_sgemm_pack1to4_int8.h"
#include "convolution_sgemm_pack8to1_int8.h"
#include "convolution_sgemm_pack8to4_int8.h"

// pack1
void im2col_sgemm_int8_neon_i8mm(const Mat& bottom_im2col, Mat& top_blob, const Mat& kernel, const Option& opt)
{
im2col_sgemm_int8_neon(bottom_im2col, top_blob, kernel, opt);
}

void convolution_im2col_sgemm_transform_kernel_int8_neon_i8mm(const Mat& kernel, Mat& kernel_tm, int inch, int outch, int kernel_w, int kernel_h)
{
convolution_im2col_sgemm_transform_kernel_int8_neon(kernel, kernel_tm, inch, outch, kernel_w, kernel_h);
}

// pack1to4
void im2col_sgemm_pack1to4_int8_neon_i8mm(const Mat& bottom_im2col, Mat& top_blob, const Mat& kernel, const Option& opt)
{
im2col_sgemm_pack1to4_int8_neon(bottom_im2col, top_blob, kernel, opt);
}

void convolution_im2col_sgemm_transform_kernel_pack1to4_int8_neon_i8mm(const Mat& kernel, Mat& kernel_tm, int inch, int outch, int kernel_w, int kernel_h)
{
convolution_im2col_sgemm_transform_kernel_pack1to4_int8_neon(kernel, kernel_tm, inch, outch, kernel_w, kernel_h);
}

// pack8to1
void im2col_sgemm_pack8to1_int8_neon_i8mm(const Mat& bottom_im2col, Mat& top_blob, const Mat& kernel, const Option& opt)
{
im2col_sgemm_pack8to1_int8_neon(bottom_im2col, top_blob, kernel, opt);
}

void convolution_im2col_sgemm_transform_kernel_pack8to1_int8_neon_i8mm(const Mat& kernel, Mat& kernel_tm, int inch, int outch, int kernel_w, int kernel_h)
{
convolution_im2col_sgemm_transform_kernel_pack8to1_int8_neon(kernel, kernel_tm, inch, outch, kernel_w, kernel_h);
}

// pack8to4
void im2col_sgemm_pack8to4_int8_neon_i8mm(const Mat& bottom_im2col, Mat& top_blob, const Mat& kernel, const Option& opt)
{
im2col_sgemm_pack8to4_int8_neon(bottom_im2col, top_blob, kernel, opt);
}

void convolution_im2col_sgemm_transform_kernel_pack8to4_int8_neon_i8mm(const Mat& kernel, Mat& kernel_tm, int inch, int outch, int kernel_w, int kernel_h)
{
convolution_im2col_sgemm_transform_kernel_pack8to4_int8_neon(kernel, kernel_tm, inch, outch, kernel_w, kernel_h);
}

} // namespace ncnn

+ 1245
- 424
src/layer/arm/convolution_sgemm_int8.h
File diff suppressed because it is too large
View File


+ 1055
- 303
src/layer/arm/convolution_sgemm_pack1to4_int8.h
File diff suppressed because it is too large
View File


+ 950
- 229
src/layer/arm/convolution_sgemm_pack8to1_int8.h
File diff suppressed because it is too large
View File


+ 706
- 159
src/layer/arm/convolution_sgemm_pack8to4_int8.h
File diff suppressed because it is too large
View File


+ 2
- 1
tests/test_convolution.cpp View File

@@ -413,7 +413,8 @@ static int test_convolution_1()
|| test_convolution_int8(4, 8, 16, 24, 3, 1, 1, 1, 1)
|| test_convolution_int8(4, 20, 16, 24, 3, 1, 1, 1, 0)
|| test_convolution_int8(6, 7, 64, 64, 3, 1, 2, 0, 1)
|| test_convolution_int8(25, 33, 16, 15, 3, 1, 1, 1, 0);
|| test_convolution_int8(25, 33, 16, 15, 3, 1, 1, 1, 0)
|| test_convolution_int8(7, 7, 15, 12, 3, 1, 1, 1, 0);
}
#endif // NCNN_INT8



Loading…
Cancel
Save