GitOrigin-RevId: e3a12cf9b3
tags/v0.5.0
| @@ -52,21 +52,9 @@ class GemmInterleaved<Strategy, true> { | |||||
| } | } | ||||
| size_t get_b_workspace_size() const { | size_t get_b_workspace_size() const { | ||||
| #if __ARM_FEATURE_DOTPROD | |||||
| size_t new_blockn = m_strategy.block_n; | |||||
| if (m_strategy.KERNEL_W == 6 && m_strategy.UNROLL_K == 4 && | |||||
| m_strategy.KERNEL_H == 8) { | |||||
| new_blockn = round_up<size_t>((m_strategy.block_n-1) % 6, 4) + | |||||
| m_strategy.block_n / 6 * 6; | |||||
| } | |||||
| size_t N = round_up(new_blockn, m_strategy.KERNEL_W); | |||||
| size_t K = round_up(m_strategy.block_k, m_strategy.UNROLL_K); | |||||
| return round_up(sizeof(stype) * N * K, CACHELINE_SIZE) + m_align_size; | |||||
| #else | |||||
| size_t N = round_up(m_strategy.block_n, m_strategy.KERNEL_W); | size_t N = round_up(m_strategy.block_n, m_strategy.KERNEL_W); | ||||
| size_t K = round_up(m_strategy.block_k, m_strategy.UNROLL_K); | size_t K = round_up(m_strategy.block_k, m_strategy.UNROLL_K); | ||||
| return round_up(sizeof(stype) * N * K, CACHELINE_SIZE) + m_align_size; | return round_up(sizeof(stype) * N * K, CACHELINE_SIZE) + m_align_size; | ||||
| #endif | |||||
| } | } | ||||
| //! temporary storage for output, post process such as add bias or relu will | //! temporary storage for output, post process such as add bias or relu will | ||||
| @@ -1452,7 +1452,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_QUANTIZEDSYM_MK4_DOT) { | |||||
| #if MEGDNN_AARCH64 | #if MEGDNN_AARCH64 | ||||
| cb("IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"); | cb("IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"); | ||||
| #elif MEGDNN_ARMV7 | #elif MEGDNN_ARMV7 | ||||
| cb("IM2COLMATMUL:AARCH32_INT8_MK4_8X6X4_DOTPROD:96"); | |||||
| cb("IM2COLMATMUL:AARCH32_INT8_MK4_8X4X4_DOTPROD:96"); | |||||
| #endif | #endif | ||||
| #undef cb | #undef cb | ||||
| } | } | ||||
| @@ -1476,7 +1476,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_S8x8x32_MK4_DOT) { | |||||
| #if MEGDNN_AARCH64 | #if MEGDNN_AARCH64 | ||||
| cb("IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"); | cb("IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"); | ||||
| #elif MEGDNN_ARMV7 | #elif MEGDNN_ARMV7 | ||||
| cb("IM2COLMATMUL:AARCH32_INT8_MK4_8X6X4_DOTPROD:96"); | |||||
| cb("IM2COLMATMUL:AARCH32_INT8_MK4_8X4X4_DOTPROD:96"); | |||||
| #endif | #endif | ||||
| #undef cb | #undef cb | ||||
| } | } | ||||
| @@ -1500,7 +1500,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8x8x32_MK4_DOT) { | |||||
| #if MEGDNN_AARCH64 | #if MEGDNN_AARCH64 | ||||
| cb("IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"); | cb("IM2COLMATMUL:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD:96"); | ||||
| #elif MEGDNN_ARMV7 | #elif MEGDNN_ARMV7 | ||||
| cb("IM2COLMATMUL:AARCH32_INT8_MK4_8X6X4_DOTPROD:96"); | |||||
| cb("IM2COLMATMUL:AARCH32_INT8_MK4_8X4X4_DOTPROD:96"); | |||||
| #endif | #endif | ||||
| #undef cb | #undef cb | ||||
| } | } | ||||
| @@ -1529,7 +1529,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CONV1x1_QUANTIZEDSYM_MK4_DOT) { | |||||
| #if MEGDNN_AARCH64 | #if MEGDNN_AARCH64 | ||||
| cb("CONV1x1:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD"); | cb("CONV1x1:AARCH64_INT8X8X32_MK4_8X12X4_DOTPROD"); | ||||
| #elif MEGDNN_ARMV7 | #elif MEGDNN_ARMV7 | ||||
| cb("CONV1x1:AARCH32_INT8_MK4_8X6X4_DOTPROD"); | |||||
| cb("CONV1x1:AARCH32_INT8_MK4_8X4X4_DOTPROD"); | |||||
| #endif | #endif | ||||
| #undef cb | #undef cb | ||||
| } | } | ||||