| @@ -26,6 +26,7 @@ if (PLATFORM_ARM64) | |||||
| set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC}) | set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC}) | ||||
| endif() | endif() | ||||
| #[[ | |||||
| if (PLATFORM_ARM32) | if (PLATFORM_ARM32) | ||||
| # assembly | # assembly | ||||
| file(GLOB ASSEMBLY_SRC nnacl/assembly/arm32/*.s | file(GLOB ASSEMBLY_SRC nnacl/assembly/arm32/*.s | ||||
| @@ -34,6 +35,7 @@ if (PLATFORM_ARM32) | |||||
| set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C) | set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C) | ||||
| set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC}) | set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC}) | ||||
| endif() | endif() | ||||
| ]] | |||||
| add_library(cpu_kernel_mid_ OBJECT ${KERNEL_SRC} ${TRAIN_KERNEL_SRC}) | add_library(cpu_kernel_mid_ OBJECT ${KERNEL_SRC} ${TRAIN_KERNEL_SRC}) | ||||
| add_subdirectory(nnacl) | add_subdirectory(nnacl) | ||||
| @@ -112,7 +112,7 @@ void IndirectGemmFp32_8x8(float *output, const float *input, const float *weight | |||||
| } | } | ||||
| } | } | ||||
| #endif | #endif | ||||
| #ifndef ENABLE_ARM32 | |||||
| // #ifndef ENABLE_ARM32 | |||||
| void IndirectGemmFp32_8x4(float *output, const float *input, const float *weight, const float *bias, size_t step, | void IndirectGemmFp32_8x4(float *output, const float *input, const float *weight, const float *bias, size_t step, | ||||
| size_t ic4, size_t output_channel, size_t offset, size_t mode, size_t writeC4, size_t relu, | size_t ic4, size_t output_channel, size_t offset, size_t mode, size_t writeC4, size_t relu, | ||||
| size_t relu6) { | size_t relu6) { | ||||
| @@ -155,7 +155,7 @@ void IndirectGemmFp32_8x4(float *output, const float *input, const float *weight | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| #endif | |||||
| // #endif | |||||
| int8_t MinInt8(int8_t a, int8_t b) { return b ^ ((a ^ b) & -(a < b)); } | int8_t MinInt8(int8_t a, int8_t b) { return b ^ ((a ^ b) & -(a < b)); } | ||||
| @@ -36,11 +36,12 @@ void IndirectGemmInt8_4x4(int8_t *output, const int8_t *input, const int8_t *wei | |||||
| size_t ic4, size_t oc, size_t offset, const int32_t *input_sum, size_t act_min, | size_t ic4, size_t oc, size_t offset, const int32_t *input_sum, size_t act_min, | ||||
| size_t act_max, size_t out_zp, size_t out_multiplier, size_t shift_before, | size_t act_max, size_t out_zp, size_t out_multiplier, size_t shift_before, | ||||
| size_t shift_after); | size_t shift_after); | ||||
| #elif defined(ENABLE_ARM32) | |||||
| void IndirectGemmInt8_2x4(int8_t *output, const int8_t *input, const int8_t *weight, const int32_t *bias, size_t ksize, | |||||
| size_t ic4, size_t oc, size_t offset, const int32_t *input_sum, size_t act_min, | |||||
| size_t act_max, size_t out_zp, size_t out_multiplier, size_t shift_before, | |||||
| size_t shift_after); | |||||
| // #elif defined(ENABLE_ARM32) | |||||
| // void IndirectGemmInt8_2x4(int8_t *output, const int8_t *input, const int8_t *weight, const int32_t *bias, | |||||
| // size_t ksize, | |||||
| // size_t ic4, size_t oc, size_t offset, const int32_t *input_sum, size_t act_min, | |||||
| // size_t act_max, size_t out_zp, size_t out_multiplier, size_t shift_before, | |||||
| // size_t shift_after); | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -32,10 +32,10 @@ void IndirectGemmInt8(int8_t *dst, int32_t *tmp_dst, const int8_t *src, const in | |||||
| IndirectGemmInt8_4x4(dst, src, weight, bias, UP_DIV(kernel_plane, C4NUM), ic4, output_channel, | IndirectGemmInt8_4x4(dst, src, weight, bias, UP_DIV(kernel_plane, C4NUM), ic4, output_channel, | ||||
| output_channel * sizeof(int8_t), input_sum, act_min, act_max, out_zp, out_multiplier, | output_channel * sizeof(int8_t), input_sum, act_min, act_max, out_zp, out_multiplier, | ||||
| shift_before, shift_after); | shift_before, shift_after); | ||||
| #elif defined(ENABLE_ARM32) | |||||
| IndirectGemmInt8_2x4(dst, src, weight, bias, UP_DIV(kernel_plane, C4NUM), ic4, output_channel, | |||||
| output_channel * sizeof(int8_t), input_sum, act_min, act_max, out_zp, out_multiplier, | |||||
| shift_before, shift_after); | |||||
| // #elif defined(ENABLE_ARM32) | |||||
| // IndirectGemmInt8_2x4(dst, src, weight, bias, UP_DIV(kernel_plane, C4NUM), ic4, output_channel, | |||||
| // output_channel * sizeof(int8_t), input_sum, act_min, act_max, out_zp, out_multiplier, | |||||
| // shift_before, shift_after); | |||||
| #else | #else | ||||
| int tile_num = conv_param->tile_num_; | int tile_num = conv_param->tile_num_; | ||||
| int plane_c4 = UP_DIV(kernel_plane, C4NUM); | int plane_c4 = UP_DIV(kernel_plane, C4NUM); | ||||
| @@ -198,7 +198,7 @@ void IndirectGemmInt8Opt(int8_t *dst, int32_t *tmp_dst, const int8_t *src, const | |||||
| void Conv3x3Uint8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, int oc, int ic8, size_t real_cal_num) { | void Conv3x3Uint8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, int oc, int ic8, size_t real_cal_num) { | ||||
| int oc4 = UP_DIV(oc, C4NUM); | int oc4 = UP_DIV(oc, C4NUM); | ||||
| #ifdef ENABLE_ARM | |||||
| #ifdef ENABLE_ARM64 | |||||
| IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, oc4 * 4 * 16 * sizeof(int32_t)); | IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, oc4 * 4 * 16 * sizeof(int32_t)); | ||||
| #else | #else | ||||
| const int input_unit_square = 16; | const int input_unit_square = 16; | ||||
| @@ -108,6 +108,7 @@ if (PLATFORM_ARM64) | |||||
| ${TEST_ASSEMBLY_SRC} | ${TEST_ASSEMBLY_SRC} | ||||
| ) | ) | ||||
| endif() | endif() | ||||
| #[[ | |||||
| if (PLATFORM_ARM32) | if (PLATFORM_ARM32) | ||||
| # assembly | # assembly | ||||
| file(GLOB TEST_ASSEMBLY_SRC | file(GLOB TEST_ASSEMBLY_SRC | ||||
| @@ -119,6 +120,7 @@ if (PLATFORM_ARM32) | |||||
| ${TEST_ASSEMBLY_SRC} | ${TEST_ASSEMBLY_SRC} | ||||
| ) | ) | ||||
| endif() | endif() | ||||
| ]] | |||||
| if (ENABLE_FP16) | if (ENABLE_FP16) | ||||
| file(GLOB KERNEL_OP_FP16_SRC | file(GLOB KERNEL_OP_FP16_SRC | ||||
| ${LITE_DIR}/src/runtime/kernel/arm/fp16/*.cc | ${LITE_DIR}/src/runtime/kernel/arm/fp16/*.cc | ||||