| @@ -239,25 +239,25 @@ macro(ncnn_add_layer class) | |||||
| ncnn_add_arch_opt_source(${class} asimdfhm "-march=armv8.2-a+fp16+fp16fml") | ncnn_add_arch_opt_source(${class} asimdfhm "-march=armv8.2-a+fp16+fp16fml") | ||||
| endif() | endif() | ||||
| if(NCNN_ARM84BF16) | if(NCNN_ARM84BF16) | ||||
| ncnn_add_arch_opt_source(${class} bf16 "-march=armv8.4-a+bf16") | |||||
| ncnn_add_arch_opt_source(${class} bf16 "-march=armv8.4-a+fp16+dotprod+bf16") | |||||
| endif() | endif() | ||||
| if(NCNN_ARM84I8MM) | if(NCNN_ARM84I8MM) | ||||
| ncnn_add_arch_opt_source(${class} i8mm "-march=armv8.4-a+i8mm") | |||||
| ncnn_add_arch_opt_source(${class} i8mm "-march=armv8.4-a+fp16+dotprod+i8mm") | |||||
| endif() | endif() | ||||
| if(NCNN_ARM86SVE) | if(NCNN_ARM86SVE) | ||||
| ncnn_add_arch_opt_source(${class} sve "-march=armv8.6-a+sve") | |||||
| ncnn_add_arch_opt_source(${class} sve "-march=armv8.6-a+fp16+dotprod+sve") | |||||
| endif() | endif() | ||||
| if(NCNN_ARM86SVE2) | if(NCNN_ARM86SVE2) | ||||
| ncnn_add_arch_opt_source(${class} sve2 "-march=armv8.6-a+sve2") | |||||
| ncnn_add_arch_opt_source(${class} sve2 "-march=armv8.6-a+fp16+dotprod+sve2") | |||||
| endif() | endif() | ||||
| if(NCNN_ARM86SVEBF16) | if(NCNN_ARM86SVEBF16) | ||||
| ncnn_add_arch_opt_source(${class} svebf16 "-march=armv8.6-a+sve+bf16") | |||||
| ncnn_add_arch_opt_source(${class} svebf16 "-march=armv8.6-a+fp16+dotprod+sve+bf16") | |||||
| endif() | endif() | ||||
| if(NCNN_ARM86SVEI8MM) | if(NCNN_ARM86SVEI8MM) | ||||
| ncnn_add_arch_opt_source(${class} svei8mm "-march=armv8.6-a+sve+i8mm") | |||||
| ncnn_add_arch_opt_source(${class} svei8mm "-march=armv8.6-a+fp16+dotprod+sve+i8mm") | |||||
| endif() | endif() | ||||
| if(NCNN_ARM86SVEF32MM) | if(NCNN_ARM86SVEF32MM) | ||||
| ncnn_add_arch_opt_source(${class} svef32mm "-march=armv8.6-a+sve+f32mm") | |||||
| ncnn_add_arch_opt_source(${class} svef32mm "-march=armv8.6-a+fp16+dotprod+sve+f32mm") | |||||
| endif() | endif() | ||||
| endif() | endif() | ||||
| @@ -414,7 +414,7 @@ endif() | |||||
| if(((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64|aarch64)"))) | if(((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64|aarch64)"))) | ||||
| if(NOT NCNN_RUNTIME_CPU AND NCNN_ARM86SVE) | if(NOT NCNN_RUNTIME_CPU AND NCNN_ARM86SVE) | ||||
| set(ARM_MARCH_FLAG "-march=armv8.6-a+sve") | |||||
| set(ARM_MARCH_FLAG "-march=armv8.6-a+fp16+dotprod+sve") | |||||
| if(NCNN_ARM86SVE2) | if(NCNN_ARM86SVE2) | ||||
| set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+sve2") | set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+sve2") | ||||
| endif() | endif() | ||||
| @@ -428,7 +428,7 @@ if(((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (APPLE AND CMAKE_OSX_AR | |||||
| set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+f32mm") | set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+f32mm") | ||||
| endif() | endif() | ||||
| elseif(NOT NCNN_RUNTIME_CPU AND (NCNN_ARM84BF16 OR NCNN_ARM84I8MM)) | elseif(NOT NCNN_RUNTIME_CPU AND (NCNN_ARM84BF16 OR NCNN_ARM84I8MM)) | ||||
| set(ARM_MARCH_FLAG "-march=armv8.4-a") | |||||
| set(ARM_MARCH_FLAG "-march=armv8.4-a+fp16+dotprod") | |||||
| if(NCNN_ARM84BF16) | if(NCNN_ARM84BF16) | ||||
| set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+bf16") | set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+bf16") | ||||
| endif() | endif() | ||||
| @@ -0,0 +1,69 @@ | |||||
| // Tencent is pleased to support the open source community by making ncnn available. | |||||
| // | |||||
| // Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. | |||||
| // | |||||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||||
| // in compliance with the License. You may obtain a copy of the License at | |||||
| // | |||||
| // https://opensource.org/licenses/BSD-3-Clause | |||||
| // | |||||
| // Unless required by applicable law or agreed to in writing, software distributed | |||||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||||
| // specific language governing permissions and limitations under the License. | |||||
| #include "cpu.h" | |||||
| #include "mat.h" | |||||
| namespace ncnn { | |||||
| #include "convolution_sgemm_int8.h" | |||||
| #include "convolution_sgemm_pack1to4_int8.h" | |||||
| #include "convolution_sgemm_pack8to1_int8.h" | |||||
| #include "convolution_sgemm_pack8to4_int8.h" | |||||
| // pack1 | |||||
| void im2col_sgemm_int8_neon_i8mm(const Mat& bottom_im2col, Mat& top_blob, const Mat& kernel, const Option& opt) | |||||
| { | |||||
| im2col_sgemm_int8_neon(bottom_im2col, top_blob, kernel, opt); | |||||
| } | |||||
| void convolution_im2col_sgemm_transform_kernel_int8_neon_i8mm(const Mat& kernel, Mat& kernel_tm, int inch, int outch, int kernel_w, int kernel_h) | |||||
| { | |||||
| convolution_im2col_sgemm_transform_kernel_int8_neon(kernel, kernel_tm, inch, outch, kernel_w, kernel_h); | |||||
| } | |||||
| // pack1to4 | |||||
| void im2col_sgemm_pack1to4_int8_neon_i8mm(const Mat& bottom_im2col, Mat& top_blob, const Mat& kernel, const Option& opt) | |||||
| { | |||||
| im2col_sgemm_pack1to4_int8_neon(bottom_im2col, top_blob, kernel, opt); | |||||
| } | |||||
| void convolution_im2col_sgemm_transform_kernel_pack1to4_int8_neon_i8mm(const Mat& kernel, Mat& kernel_tm, int inch, int outch, int kernel_w, int kernel_h) | |||||
| { | |||||
| convolution_im2col_sgemm_transform_kernel_pack1to4_int8_neon(kernel, kernel_tm, inch, outch, kernel_w, kernel_h); | |||||
| } | |||||
| // pack8to1 | |||||
| void im2col_sgemm_pack8to1_int8_neon_i8mm(const Mat& bottom_im2col, Mat& top_blob, const Mat& kernel, const Option& opt) | |||||
| { | |||||
| im2col_sgemm_pack8to1_int8_neon(bottom_im2col, top_blob, kernel, opt); | |||||
| } | |||||
| void convolution_im2col_sgemm_transform_kernel_pack8to1_int8_neon_i8mm(const Mat& kernel, Mat& kernel_tm, int inch, int outch, int kernel_w, int kernel_h) | |||||
| { | |||||
| convolution_im2col_sgemm_transform_kernel_pack8to1_int8_neon(kernel, kernel_tm, inch, outch, kernel_w, kernel_h); | |||||
| } | |||||
| // pack8to4 | |||||
| void im2col_sgemm_pack8to4_int8_neon_i8mm(const Mat& bottom_im2col, Mat& top_blob, const Mat& kernel, const Option& opt) | |||||
| { | |||||
| im2col_sgemm_pack8to4_int8_neon(bottom_im2col, top_blob, kernel, opt); | |||||
| } | |||||
| void convolution_im2col_sgemm_transform_kernel_pack8to4_int8_neon_i8mm(const Mat& kernel, Mat& kernel_tm, int inch, int outch, int kernel_w, int kernel_h) | |||||
| { | |||||
| convolution_im2col_sgemm_transform_kernel_pack8to4_int8_neon(kernel, kernel_tm, inch, outch, kernel_w, kernel_h); | |||||
| } | |||||
| } // namespace ncnn | |||||
| @@ -413,7 +413,8 @@ static int test_convolution_1() | |||||
| || test_convolution_int8(4, 8, 16, 24, 3, 1, 1, 1, 1) | || test_convolution_int8(4, 8, 16, 24, 3, 1, 1, 1, 1) | ||||
| || test_convolution_int8(4, 20, 16, 24, 3, 1, 1, 1, 0) | || test_convolution_int8(4, 20, 16, 24, 3, 1, 1, 1, 0) | ||||
| || test_convolution_int8(6, 7, 64, 64, 3, 1, 2, 0, 1) | || test_convolution_int8(6, 7, 64, 64, 3, 1, 2, 0, 1) | ||||
| || test_convolution_int8(25, 33, 16, 15, 3, 1, 1, 1, 0); | |||||
| || test_convolution_int8(25, 33, 16, 15, 3, 1, 1, 1, 0) | |||||
| || test_convolution_int8(7, 7, 15, 12, 3, 1, 1, 1, 0); | |||||
| } | } | ||||
| #endif // NCNN_INT8 | #endif // NCNN_INT8 | ||||