GitOrigin-RevId: ee5a6874fb
tags/v0.6.0
| @@ -6,7 +6,8 @@ | |||||
| * | * | ||||
| * Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
| * software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | */ | ||||
| #include "src/aarch64/conv_bias/fp16/algos.h" | #include "src/aarch64/conv_bias/fp16/algos.h" | ||||
| @@ -22,7 +23,7 @@ using namespace aarch64; | |||||
| MIDOUT_DECL(megdnn_aarch64_conv_bias_stride2_conv2357_fp16) | MIDOUT_DECL(megdnn_aarch64_conv_bias_stride2_conv2357_fp16) | ||||
| bool ConvBiasImpl::AlgoF16DirectStride2::usable( | bool ConvBiasImpl::AlgoF16DirectStride2::usable( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp16, 0, 0) { | MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp16, 0, 0) { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| @@ -47,7 +48,7 @@ bool ConvBiasImpl::AlgoF16DirectStride2::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoF16DirectStride2::get_workspace( | size_t ConvBiasImpl::AlgoF16DirectStride2::get_workspace( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp16, 0, 1) { | MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp16, 0, 1) { | ||||
| auto wbundle = arm_common::MultithreadDirectConvCommon< | auto wbundle = arm_common::MultithreadDirectConvCommon< | ||||
| dt_float16, __fp16>::get_bundle_stride(param, m_large_group); | dt_float16, __fp16>::get_bundle_stride(param, m_large_group); | ||||
| @@ -59,7 +60,7 @@ size_t ConvBiasImpl::AlgoF16DirectStride2::get_workspace( | |||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoF16DirectStride2::dispatch_kerns( | ConvBiasImpl::AlgoF16DirectStride2::dispatch_kerns( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp32, 0, 2) { | MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp32, 0, 2) { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| @@ -19,6 +19,7 @@ namespace aarch64 { | |||||
| class ConvBiasImpl::AlgoF16DirectStride2 final : public AlgoBase { | class ConvBiasImpl::AlgoF16DirectStride2 final : public AlgoBase { | ||||
| SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const; | SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const; | ||||
| bool m_large_group; | bool m_large_group; | ||||
| public: | public: | ||||
| AlgoF16DirectStride2(bool large_group) : m_large_group(large_group) {} | AlgoF16DirectStride2(bool large_group) : m_large_group(large_group) {} | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| @@ -26,15 +27,12 @@ public: | |||||
| return m_large_group ? "ARMV8F16STRD2_LARGE_GROUP" | return m_large_group ? "ARMV8F16STRD2_LARGE_GROUP" | ||||
| : "ARMV8F16STRD2_SMALL_GROUP"; | : "ARMV8F16STRD2_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(const NCBKernSizeParam&) const override; | |||||
| }; | }; | ||||
| } // namespace aarch64 | } // namespace aarch64 | ||||
| } // namespace megdnn | } // namespace megdnn | ||||
| @@ -22,7 +22,7 @@ using namespace aarch64; | |||||
| MIDOUT_DECL(megdnn_aarch64_conv_bias_stride2_conv2357_fp32) | MIDOUT_DECL(megdnn_aarch64_conv_bias_stride2_conv2357_fp32) | ||||
| bool ConvBiasImpl::AlgoF32DirectStride2::usable( | bool ConvBiasImpl::AlgoF32DirectStride2::usable( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp32, 0, 0) { | MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp32, 0, 0) { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| @@ -47,7 +47,7 @@ bool ConvBiasImpl::AlgoF32DirectStride2::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoF32DirectStride2::get_workspace( | size_t ConvBiasImpl::AlgoF32DirectStride2::get_workspace( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp32, 0, 1) { | MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp32, 0, 1) { | ||||
| auto wbundle = arm_common::MultithreadDirectConvCommon< | auto wbundle = arm_common::MultithreadDirectConvCommon< | ||||
| float, float>::get_bundle_stride(param, m_large_group); | float, float>::get_bundle_stride(param, m_large_group); | ||||
| @@ -58,7 +58,7 @@ size_t ConvBiasImpl::AlgoF32DirectStride2::get_workspace( | |||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoF32DirectStride2::dispatch_kerns( | ConvBiasImpl::AlgoF32DirectStride2::dispatch_kerns( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp32, 0, 2) { | MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp32, 0, 2) { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| @@ -23,6 +23,7 @@ using FallbackConvBiasImpl = fallback::ConvBiasImpl; | |||||
| class ConvBiasImpl::AlgoF32DirectStride2 final : public AlgoBase { | class ConvBiasImpl::AlgoF32DirectStride2 final : public AlgoBase { | ||||
| SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const; | SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const; | ||||
| bool m_large_group; | bool m_large_group; | ||||
| public: | public: | ||||
| AlgoF32DirectStride2(bool large_group) : m_large_group(large_group) {} | AlgoF32DirectStride2(bool large_group) : m_large_group(large_group) {} | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| @@ -31,14 +32,12 @@ public: | |||||
| : "ARMV8F32STRD2_SMALL_GROUP"; | : "ARMV8F32STRD2_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(const NCBKernSizeParam&) const override; | |||||
| }; | }; | ||||
| } // namespace aarch64 | } // namespace aarch64 | ||||
| @@ -30,9 +30,8 @@ using megdnn::arm_common::TypeCvtOp; | |||||
| /* ===================== matrix mul algo ===================== */ | /* ===================== matrix mul algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoS8MatrixMul::usable( | bool ConvBiasImpl::AlgoS8MatrixMul::usable( | ||||
| FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| return param.src_type.enumv() == DTypeEnum::QuantizedS8 && | return param.src_type.enumv() == DTypeEnum::QuantizedS8 && | ||||
| param.dst_type.enumv() == DTypeEnum::QuantizedS8 && | param.dst_type.enumv() == DTypeEnum::QuantizedS8 && | ||||
| @@ -13,6 +13,7 @@ | |||||
| #include "src/aarch64/conv_bias/opr_impl.h" | #include "src/aarch64/conv_bias/opr_impl.h" | ||||
| #include "src/fallback/conv_bias/opr_impl.h" | #include "src/fallback/conv_bias/opr_impl.h" | ||||
| #include "src/common/opr_delegate.h" | |||||
| namespace megdnn { | namespace megdnn { | ||||
| namespace aarch64 { | namespace aarch64 { | ||||
| @@ -27,21 +28,21 @@ public: | |||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "S8MATMUL"; } | const char* name() const override { return "S8MATMUL"; } | ||||
| bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override { | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<NCBKern> dispatch_kerns( | SmallVector<NCBKern> dispatch_kerns( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const override { | |||||
| const NCBKernSizeParam& param) const override { | |||||
| size_t group = param.filter_meta.group; | size_t group = param.filter_meta.group; | ||||
| return {{kimpl, {group, 1_z, 1_z}}}; | return {{kimpl, {group, 1_z, 1_z}}}; | ||||
| } | } | ||||
| //! select matmul to the highest preference | //! select matmul to the highest preference | ||||
| bool is_preferred(FallbackConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override { | |||||
| return static_cast<arm_common::ConvBiasImpl*>(opr) | |||||
| bool is_preferred(const NCBKernSizeParam& param) const override { | |||||
| static CpuOprDelegationStorage<1> storage; | |||||
| auto conv_bias_opr = storage.get<ConvBias, 0>(); | |||||
| return static_cast<ConvBiasImpl*>(conv_bias_opr) | |||||
| ->is_matmul_quantized_prefer(param); | ->is_matmul_quantized_prefer(param); | ||||
| } | } | ||||
| }; | }; | ||||
| @@ -32,9 +32,8 @@ using megdnn::arm_common::TypeCvtOp; | |||||
| /* ===================== matrix mul algo ===================== */ | /* ===================== matrix mul algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoQU8MatrixMul::usable( | bool ConvBiasImpl::AlgoQU8MatrixMul::usable( | ||||
| FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| return param.src_type.enumv() == DTypeEnum::Quantized8Asymm && | return param.src_type.enumv() == DTypeEnum::Quantized8Asymm && | ||||
| param.dst_type.enumv() == DTypeEnum::Quantized8Asymm && | param.dst_type.enumv() == DTypeEnum::Quantized8Asymm && | ||||
| @@ -13,6 +13,7 @@ | |||||
| #include "src/aarch64/conv_bias/opr_impl.h" | #include "src/aarch64/conv_bias/opr_impl.h" | ||||
| #include "src/fallback/conv_bias/opr_impl.h" | #include "src/fallback/conv_bias/opr_impl.h" | ||||
| #include "src/common/opr_delegate.h" | |||||
| namespace megdnn { | namespace megdnn { | ||||
| namespace aarch64 { | namespace aarch64 { | ||||
| @@ -27,22 +28,21 @@ public: | |||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "QU8MATMUL"; } | const char* name() const override { return "QU8MATMUL"; } | ||||
| bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override { | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<NCBKern> dispatch_kerns( | SmallVector<NCBKern> dispatch_kerns( | ||||
| FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| size_t group = param.filter_meta.group; | size_t group = param.filter_meta.group; | ||||
| return {{kimpl, {group, 1_z, 1_z}}}; | return {{kimpl, {group, 1_z, 1_z}}}; | ||||
| } | } | ||||
| //! select matmul to the highest preference | //! select matmul to the highest preference | ||||
| bool is_preferred(FallbackConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override { | |||||
| return static_cast<arm_common::ConvBiasImpl*>(opr) | |||||
| bool is_preferred(const NCBKernSizeParam& param) const override { | |||||
| static CpuOprDelegationStorage<1> storage; | |||||
| auto conv_bias_opr = storage.get<ConvBias, 0>(); | |||||
| return static_cast<ConvBiasImpl*>(conv_bias_opr) | |||||
| ->is_matmul_quantized_prefer(param); | ->is_matmul_quantized_prefer(param); | ||||
| } | } | ||||
| }; | }; | ||||
| @@ -27,10 +27,9 @@ using namespace arm_common; | |||||
| /* ======================= AlgoFP16WinogradF23 ======================== */ | /* ======================= AlgoFP16WinogradF23 ======================== */ | ||||
| bool ConvBiasImpl::AlgoFP16WinogradF23::usable( | bool ConvBiasImpl::AlgoFP16WinogradF23::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 0) { | ||||
| using Strategy = winograd::winograd_2x3_4x4_f16; | using Strategy = winograd::winograd_2x3_4x4_f16; | ||||
| Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
| @@ -38,13 +37,13 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable( | |||||
| strategy, m_tile_size, param) | strategy, m_tile_size, param) | ||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
| opr->param().output_block_size == 2 && | |||||
| param.output_block_size == 2 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::DEFAULT)) && | param::MatrixMul::Format::DEFAULT)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -69,10 +68,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23, | |||||
| /* ======================= AlgoFP16WinogradF45 ======================== */ | /* ======================= AlgoFP16WinogradF45 ======================== */ | ||||
| bool ConvBiasImpl::AlgoFP16WinogradF45::usable( | bool ConvBiasImpl::AlgoFP16WinogradF45::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 0) { | ||||
| using Strategy = winograd::winograd_4x5_1x1_f16; | using Strategy = winograd::winograd_4x5_1x1_f16; | ||||
| Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
| @@ -80,13 +78,13 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable( | |||||
| strategy, m_tile_size, param) | strategy, m_tile_size, param) | ||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
| opr->param().output_block_size == 4 && | |||||
| param.output_block_size == 4 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::DEFAULT)) && | param::MatrixMul::Format::DEFAULT)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 5) && | param.filter_meta.spatial[0] == 5) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -109,10 +107,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF45, | |||||
| /* ======================= AlgoFP16WinogradF63 ======================== */ | /* ======================= AlgoFP16WinogradF63 ======================== */ | ||||
| bool ConvBiasImpl::AlgoFP16WinogradF63::usable( | bool ConvBiasImpl::AlgoFP16WinogradF63::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 0) { | ||||
| using Strategy = winograd::winograd_6x3_1x1_f16; | using Strategy = winograd::winograd_6x3_1x1_f16; | ||||
| Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
| @@ -120,13 +117,13 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable( | |||||
| strategy, m_tile_size, param) | strategy, m_tile_size, param) | ||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
| opr->param().output_block_size == 6 && | |||||
| param.output_block_size == 6 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::DEFAULT)) && | param::MatrixMul::Format::DEFAULT)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -149,10 +146,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF63, | |||||
| /* ======================= AlgoFP16WinogradF23_8x8 ======================== */ | /* ======================= AlgoFP16WinogradF23_8x8 ======================== */ | ||||
| bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable( | bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 3, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 3, 0) { | ||||
| if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0) | if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0) | ||||
| return false; | return false; | ||||
| @@ -166,13 +162,13 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable( | |||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| m_matmul_algo->packmode() == PackMode::NO_PACK && | m_matmul_algo->packmode() == PackMode::NO_PACK && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
| opr->param().output_block_size == 2 && | |||||
| param.output_block_size == 2 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::MK8)) && | param::MatrixMul::Format::MK8)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -197,7 +193,7 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23_8x8, | |||||
| MIDOUT_DECL(megdnn_arm_common_conv_bias_fp16_kimpl) | MIDOUT_DECL(megdnn_arm_common_conv_bias_fp16_kimpl) | ||||
| bool ConvBiasImpl::AlgoF16Direct::usable( | bool ConvBiasImpl::AlgoF16Direct::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 0, 0) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 0, 0) { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| @@ -227,7 +223,7 @@ bool ConvBiasImpl::AlgoF16Direct::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoF16Direct::get_workspace( | size_t ConvBiasImpl::AlgoF16Direct::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 0, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 0, 1) { | ||||
| auto wbundle = | auto wbundle = | ||||
| MultithreadDirectConvCommon<dt_float16, __fp16>::get_bundle( | MultithreadDirectConvCommon<dt_float16, __fp16>::get_bundle( | ||||
| @@ -310,7 +306,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF16Direct::get_kimpls( | |||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF16Direct::dispatch_kerns( | SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF16Direct::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 0, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 0, 1) { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| @@ -321,7 +317,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF16Direct::dispatch_kerns( | |||||
| /* ===================== stride-1 algo ===================== */ | /* ===================== stride-1 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoF16DirectStride1::usable( | bool ConvBiasImpl::AlgoF16DirectStride1::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 1, 0) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 1, 0) { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| @@ -425,7 +421,7 @@ ConvBiasImpl::AlgoF16DirectStride1::get_kimpls( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoF16DirectStride1::get_workspace( | size_t ConvBiasImpl::AlgoF16DirectStride1::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 1, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 1, 1) { | ||||
| auto bundle = MultithreadDirectConvCommon< | auto bundle = MultithreadDirectConvCommon< | ||||
| dt_float16, __fp16>::get_bundle_stride(param, m_large_group); | dt_float16, __fp16>::get_bundle_stride(param, m_large_group); | ||||
| @@ -437,7 +433,7 @@ size_t ConvBiasImpl::AlgoF16DirectStride1::get_workspace( | |||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoF16DirectStride1::dispatch_kerns( | ConvBiasImpl::AlgoF16DirectStride1::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 1, 2) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 1, 2) { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| @@ -88,14 +88,12 @@ public: | |||||
| return m_large_group ? "F16DIRECT_LARGE_GROUP" | return m_large_group ? "F16DIRECT_LARGE_GROUP" | ||||
| : "F16DIRECT_SMALL_GROUP"; | : "F16DIRECT_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -109,12 +107,10 @@ public: | |||||
| const char* name() const override { | const char* name() const override { | ||||
| return m_large_group ? "F16STRD1_LARGE_GROUP" : "F16STRD1_SMALL_GROUP"; | return m_large_group ? "F16STRD1_LARGE_GROUP" : "F16STRD1_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -6,7 +6,8 @@ | |||||
| * | * | ||||
| * Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
| * software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/algos.h" | #include "src/arm_common/conv_bias/fp32/algos.h" | ||||
| @@ -30,9 +31,8 @@ using namespace arm_common; | |||||
| /* ======================= AlgoFP32WinogradF23_4x4 ======================== */ | /* ======================= AlgoFP32WinogradF23_4x4 ======================== */ | ||||
| bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable( | bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 0, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 0, 0) { | ||||
| if (param.filter_meta.icpg % 4 != 0 || param.filter_meta.ocpg % 4 != 0) | if (param.filter_meta.icpg % 4 != 0 || param.filter_meta.ocpg % 4 != 0) | ||||
| @@ -47,13 +47,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable( | |||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| m_matmul_algo->packmode() == PackMode::NO_PACK && | m_matmul_algo->packmode() == PackMode::NO_PACK && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
| opr->param().output_block_size == 2 && | |||||
| param.output_block_size == 2 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::MK4)) && | param::MatrixMul::Format::MK4)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -76,10 +76,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4, | |||||
| /* ======================= AlgoFP32WinogradF63 ======================== */ | /* ======================= AlgoFP32WinogradF63 ======================== */ | ||||
| bool ConvBiasImpl::AlgoFP32WinogradF63::usable( | bool ConvBiasImpl::AlgoFP32WinogradF63::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 0) { | ||||
| using Strategy = winograd::winograd_6x3_1x1_f; | using Strategy = winograd::winograd_6x3_1x1_f; | ||||
| Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
| @@ -87,13 +86,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable( | |||||
| strategy, m_tile_size, param) | strategy, m_tile_size, param) | ||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
| opr->param().output_block_size == 6 && | |||||
| param.output_block_size == 6 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::DEFAULT)) && | param::MatrixMul::Format::DEFAULT)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -116,10 +115,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63, | |||||
| /* ======================= AlgoFP32WinogradF54 ======================== */ | /* ======================= AlgoFP32WinogradF54 ======================== */ | ||||
| bool ConvBiasImpl::AlgoFP32WinogradF54::usable( | bool ConvBiasImpl::AlgoFP32WinogradF54::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 0) { | ||||
| using Strategy = winograd::winograd_5x4_1x1_f; | using Strategy = winograd::winograd_5x4_1x1_f; | ||||
| Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
| @@ -127,13 +125,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable( | |||||
| strategy, m_tile_size, param) | strategy, m_tile_size, param) | ||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
| opr->param().output_block_size == 5 && | |||||
| param.output_block_size == 5 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::DEFAULT)) && | param::MatrixMul::Format::DEFAULT)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 4) && | param.filter_meta.spatial[0] == 4) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -156,10 +154,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF54, | |||||
| /* ======================= AlgoFP32WinogradF45 ======================== */ | /* ======================= AlgoFP32WinogradF45 ======================== */ | ||||
| bool ConvBiasImpl::AlgoFP32WinogradF45::usable( | bool ConvBiasImpl::AlgoFP32WinogradF45::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 0) { | ||||
| using Strategy = winograd::winograd_4x5_1x1_f; | using Strategy = winograd::winograd_4x5_1x1_f; | ||||
| Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
| @@ -167,13 +164,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable( | |||||
| strategy, m_tile_size, param) | strategy, m_tile_size, param) | ||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
| opr->param().output_block_size == 4 && | |||||
| param.output_block_size == 4 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::DEFAULT)) && | param::MatrixMul::Format::DEFAULT)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 5) && | param.filter_meta.spatial[0] == 5) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -196,10 +193,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF45, | |||||
| /* ======================= AlgoFP32WinogradF63_4x4 ======================== */ | /* ======================= AlgoFP32WinogradF63_4x4 ======================== */ | ||||
| bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable( | bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 4, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 4, 0) { | ||||
| if (param.filter_meta.icpg % 4 != 0 || param.filter_meta.ocpg % 4 != 0) | if (param.filter_meta.icpg % 4 != 0 || param.filter_meta.ocpg % 4 != 0) | ||||
| return false; | return false; | ||||
| @@ -213,13 +209,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable( | |||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| m_matmul_algo->packmode() == PackMode::NO_PACK && | m_matmul_algo->packmode() == PackMode::NO_PACK && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
| opr->param().output_block_size == 6 && | |||||
| param.output_block_size == 6 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::MK4)) && | param::MatrixMul::Format::MK4)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -244,9 +240,8 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4, | |||||
| /* =================== AlgoFP32WinogradF23_4x4_NCHW44 =================== */ | /* =================== AlgoFP32WinogradF23_4x4_NCHW44 =================== */ | ||||
| bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable( | bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, | ||||
| midout_iv("AlgoFP32WinogradF23_4x4_NCHW44"_hash)) { | midout_iv("AlgoFP32WinogradF23_4x4_NCHW44"_hash)) { | ||||
| @@ -262,13 +257,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable( | |||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| m_matmul_algo->packmode() == | m_matmul_algo->packmode() == | ||||
| fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK && | fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW44 || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW44 || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW44_WINOGRAD && | param::ConvBias::Format::NCHW44_WINOGRAD && | ||||
| opr->param().output_block_size == 2 && | |||||
| param.output_block_size == 2 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::MK4)) && | param::MatrixMul::Format::MK4)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -291,10 +286,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4_NCHW44, | |||||
| /* =================== AlgoFP32WinogradF63_4x4_NCHW44 ===================== */ | /* =================== AlgoFP32WinogradF63_4x4_NCHW44 ===================== */ | ||||
| bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable( | bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, | ||||
| midout_iv("AlgoFP32WinogradF63_4x4_NCHW44"_hash)) { | midout_iv("AlgoFP32WinogradF63_4x4_NCHW44"_hash)) { | ||||
| if (param.filter_meta.icpg % 4 != 0 || param.filter_meta.ocpg % 4 != 0) | if (param.filter_meta.icpg % 4 != 0 || param.filter_meta.ocpg % 4 != 0) | ||||
| @@ -309,13 +303,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable( | |||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| m_matmul_algo->packmode() == | m_matmul_algo->packmode() == | ||||
| fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK && | fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW44 || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW44 || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW44_WINOGRAD && | param::ConvBias::Format::NCHW44_WINOGRAD && | ||||
| opr->param().output_block_size == 6 && | |||||
| param.output_block_size == 6 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::MK4)) && | param::MatrixMul::Format::MK4)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -341,7 +335,7 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4_NCHW44, | |||||
| MIDOUT_DECL(megdnn_arm_common_conv_bias_f32_kimpl); | MIDOUT_DECL(megdnn_arm_common_conv_bias_f32_kimpl); | ||||
| bool ConvBiasImpl::AlgoF32Direct::usable( | bool ConvBiasImpl::AlgoF32Direct::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 0, 0) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 0, 0) { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| @@ -370,7 +364,7 @@ bool ConvBiasImpl::AlgoF32Direct::usable( | |||||
| return false; | return false; | ||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoF32Direct::get_workspace( | size_t ConvBiasImpl::AlgoF32Direct::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 0, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 0, 1) { | ||||
| auto wbundle = MultithreadDirectConvCommon<float, float>::get_bundle( | auto wbundle = MultithreadDirectConvCommon<float, float>::get_bundle( | ||||
| param, m_large_group); | param, m_large_group); | ||||
| @@ -409,7 +403,8 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF32Direct::get_kimpls( | |||||
| } | } | ||||
| for (size_t ic = 0; ic < IC; ic++) { | for (size_t ic = 0; ic < IC; ic++) { | ||||
| MultithreadDirectConvCommon<float, float>::copy_padding_kern( | MultithreadDirectConvCommon<float, float>::copy_padding_kern( | ||||
| bundle, kern_param, ncb_index, {ncb_index.thread_id, 0, ic}); | |||||
| bundle, kern_param, ncb_index, | |||||
| {ncb_index.thread_id, 0, ic}); | |||||
| } | } | ||||
| for (size_t oc = 0; oc < OC; oc++) { | for (size_t oc = 0; oc < OC; oc++) { | ||||
| MultithreadDirectConvCommon<float, float>::do_conv_kern( | MultithreadDirectConvCommon<float, float>::do_conv_kern( | ||||
| @@ -449,7 +444,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF32Direct::get_kimpls( | |||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF32Direct::dispatch_kerns( | SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF32Direct::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 0, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 0, 1) { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| @@ -458,7 +453,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoF32Direct::dispatch_kerns( | |||||
| } | } | ||||
| /* ===================== stride-1 algo ===================== */ | /* ===================== stride-1 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoF32DirectStride1::usable( | bool ConvBiasImpl::AlgoF32DirectStride1::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 1, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 1, 1) { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| @@ -484,7 +479,7 @@ bool ConvBiasImpl::AlgoF32DirectStride1::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoF32DirectStride1::get_workspace( | size_t ConvBiasImpl::AlgoF32DirectStride1::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 1, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 1, 1) { | ||||
| auto bundle = | auto bundle = | ||||
| MultithreadDirectConvCommon<float, float>::get_bundle_stride( | MultithreadDirectConvCommon<float, float>::get_bundle_stride( | ||||
| @@ -575,7 +570,7 @@ ConvBiasImpl::AlgoF32DirectStride1::get_kimpls( | |||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoF32DirectStride1::dispatch_kerns( | ConvBiasImpl::AlgoF32DirectStride1::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 1, 2) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 1, 2) { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| @@ -586,7 +581,7 @@ ConvBiasImpl::AlgoF32DirectStride1::dispatch_kerns( | |||||
| /* ===================== stride-2 algo ===================== */ | /* ===================== stride-2 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoF32DirectStride2::usable( | bool ConvBiasImpl::AlgoF32DirectStride2::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 2, 0) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 2, 0) { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| @@ -611,7 +606,7 @@ bool ConvBiasImpl::AlgoF32DirectStride2::usable( | |||||
| return false; | return false; | ||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoF32DirectStride2::get_workspace( | size_t ConvBiasImpl::AlgoF32DirectStride2::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 2, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 2, 1) { | ||||
| auto bundle = | auto bundle = | ||||
| MultithreadDirectConvCommon<float, float>::get_bundle_stride( | MultithreadDirectConvCommon<float, float>::get_bundle_stride( | ||||
| @@ -701,7 +696,7 @@ ConvBiasImpl::AlgoF32DirectStride2::get_kimpls( | |||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoF32DirectStride2::dispatch_kerns( | ConvBiasImpl::AlgoF32DirectStride2::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 2, 2) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 2, 2) { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| @@ -137,13 +137,11 @@ public: | |||||
| return m_large_group ? "F32DIRECT_LARGE_GROUP" | return m_large_group ? "F32DIRECT_LARGE_GROUP" | ||||
| : "F32DIRECT_SMALL_GROUP"; | : "F32DIRECT_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -157,13 +155,11 @@ public: | |||||
| const char* name() const override { | const char* name() const override { | ||||
| return m_large_group ? "F32STRD1_LARGE_GROUP" : "F32STRD1_SMALL_GROUP"; | return m_large_group ? "F32STRD1_LARGE_GROUP" : "F32STRD1_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -177,13 +173,11 @@ public: | |||||
| const char* name() const override { | const char* name() const override { | ||||
| return m_large_group ? "F32STRD2_LARGE_GROUP" : "F32STRD2_SMALL_GROUP"; | return m_large_group ? "F32STRD2_LARGE_GROUP" : "F32STRD2_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -194,13 +188,11 @@ public: | |||||
| AlgoF32DirectNCHW44() {} | AlgoF32DirectNCHW44() {} | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "F32_CONV_NCHW44_DIRECT"; } | const char* name() const override { return "F32_CONV_NCHW44_DIRECT"; } | ||||
| bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -211,13 +203,11 @@ public: | |||||
| AlgoF32DirectNCHWNCHW44() {} | AlgoF32DirectNCHWNCHW44() {} | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "F32_CONV_NCHW_NCHW44"; } | const char* name() const override { return "F32_CONV_NCHW_NCHW44"; } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -227,13 +217,11 @@ class ConvBiasImpl::AlgoF32ChannelWiseNCHW44 final : public AlgoBase { | |||||
| public: | public: | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "F32_CHANNEL_WISE_NCHW44"; } | const char* name() const override { return "F32_CHANNEL_WISE_NCHW44"; } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -10,8 +10,8 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/channel_wise_nchw44_kern.h" | |||||
| #include "src/arm_common/conv_bias/fp32/algos.h" | #include "src/arm_common/conv_bias/fp32/algos.h" | ||||
| #include "src/arm_common/conv_bias/fp32/channel_wise_nchw44_kern.h" | |||||
| #include "src/arm_common/elemwise_op.h" | #include "src/arm_common/elemwise_op.h" | ||||
| #include "midout.h" | #include "midout.h" | ||||
| @@ -26,8 +26,7 @@ using conv_fun = std::function<void( | |||||
| MIDOUT_DECL(conv_bias_fp32_channel_wise_nchw44) | MIDOUT_DECL(conv_bias_fp32_channel_wise_nchw44) | ||||
| bool ConvBiasImpl::AlgoF32ChannelWiseNCHW44::usable( | bool ConvBiasImpl::AlgoF32ChannelWiseNCHW44::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const { | |||||
| const NCBKernSizeParam& param, AlgoSelectionStrategy) const { | |||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| auto FH = fm.spatial[0]; | auto FH = fm.spatial[0]; | ||||
| size_t OC = fm.ocpg; | size_t OC = fm.ocpg; | ||||
| @@ -49,13 +48,13 @@ bool ConvBiasImpl::AlgoF32ChannelWiseNCHW44::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoF32ChannelWiseNCHW44::get_workspace( | size_t ConvBiasImpl::AlgoF32ChannelWiseNCHW44::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam&) const { | |||||
| const NCBKernSizeParam&) const { | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoF32ChannelWiseNCHW44::dispatch_kerns( | ConvBiasImpl::AlgoF32ChannelWiseNCHW44::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| const constexpr size_t pack_group_size = 4_z; | const constexpr size_t pack_group_size = 4_z; | ||||
| auto fm = param.filter_meta; | auto fm = param.filter_meta; | ||||
| const int batch = param.n; | const int batch = param.n; | ||||
| @@ -159,8 +159,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle, | |||||
| } // namespace | } // namespace | ||||
| /* ===================== stride1 algo ===================== */ | /* ===================== stride1 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoF32DirectNCHW44::usable(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param, | |||||
| bool ConvBiasImpl::AlgoF32DirectNCHW44::usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const { | AlgoSelectionStrategy) const { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| auto fh = fm.spatial[0]; | auto fh = fm.spatial[0]; | ||||
| @@ -182,13 +181,13 @@ bool ConvBiasImpl::AlgoF32DirectNCHW44::usable(fallback::ConvBiasImpl*, | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoF32DirectNCHW44::get_workspace( | size_t ConvBiasImpl::AlgoF32DirectNCHW44::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoF32DirectNCHW44::dispatch_kerns( | ConvBiasImpl::AlgoF32DirectNCHW44::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto fm = param.filter_meta; | auto fm = param.filter_meta; | ||||
| const int batch = param.n; | const int batch = param.n; | ||||
| const int group = fm.group; | const int group = fm.group; | ||||
| @@ -188,8 +188,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle, | |||||
| } // namespace | } // namespace | ||||
| bool ConvBiasImpl::AlgoF32DirectNCHWNCHW44::usable( | bool ConvBiasImpl::AlgoF32DirectNCHWNCHW44::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const { | |||||
| const NCBKernSizeParam& param, AlgoSelectionStrategy) const { | |||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| auto fh = fm.spatial[0]; | auto fh = fm.spatial[0]; | ||||
| int oc = fm.ocpg; | int oc = fm.ocpg; | ||||
| @@ -209,13 +208,13 @@ bool ConvBiasImpl::AlgoF32DirectNCHWNCHW44::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoF32DirectNCHWNCHW44::get_workspace( | size_t ConvBiasImpl::AlgoF32DirectNCHWNCHW44::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoF32DirectNCHWNCHW44::dispatch_kerns( | ConvBiasImpl::AlgoF32DirectNCHWNCHW44::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto fm = param.filter_meta; | auto fm = param.filter_meta; | ||||
| const int batch = param.n; | const int batch = param.n; | ||||
| const int group = fm.group; | const int group = fm.group; | ||||
| @@ -28,7 +28,7 @@ using namespace arm_common; | |||||
| MIDOUT_DECL(megdnn_arm_common_conv_bias_int8) | MIDOUT_DECL(megdnn_arm_common_conv_bias_int8) | ||||
| /* ===================== stride1 algo ===================== */ | /* ===================== stride1 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoS8DirectStride1::usable( | bool ConvBiasImpl::AlgoS8DirectStride1::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| bool avaible = direct_int8_stride1::can_conv_direct_stride1_int8(param); | bool avaible = direct_int8_stride1::can_conv_direct_stride1_int8(param); | ||||
| auto fm = param.filter_meta; | auto fm = param.filter_meta; | ||||
| @@ -40,7 +40,7 @@ bool ConvBiasImpl::AlgoS8DirectStride1::usable( | |||||
| return avaible; | return avaible; | ||||
| } | } | ||||
| bool ConvBiasImpl::AlgoS8DirectStride1::is_preferred( | bool ConvBiasImpl::AlgoS8DirectStride1::is_preferred( | ||||
| megdnn::fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| auto FH = fm.spatial[0]; | auto FH = fm.spatial[0]; | ||||
| auto OC = fm.ocpg; | auto OC = fm.ocpg; | ||||
| @@ -53,14 +53,14 @@ bool ConvBiasImpl::AlgoS8DirectStride1::is_preferred( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoS8DirectStride1::get_workspace( | size_t ConvBiasImpl::AlgoS8DirectStride1::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto bundle = direct_int8_stride1::get_bundle(param, m_large_group); | auto bundle = direct_int8_stride1::get_bundle(param, m_large_group); | ||||
| return bundle.total_size_in_bytes(); | return bundle.total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoS8DirectStride1::dispatch_kerns( | ConvBiasImpl::AlgoS8DirectStride1::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 1, 0) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 1, 0) { | ||||
| return direct_int8_stride1::get_kimpls(param, m_large_group); | return direct_int8_stride1::get_kimpls(param, m_large_group); | ||||
| } | } | ||||
| @@ -70,20 +70,20 @@ ConvBiasImpl::AlgoS8DirectStride1::dispatch_kerns( | |||||
| /* ===================== stride1 algo ===================== */ | /* ===================== stride1 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44::usable( | bool ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const { | AlgoSelectionStrategy) const { | ||||
| return channel_wise_nchw44::stride1::is_available(param); | return channel_wise_nchw44::stride1::is_available(param); | ||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44::get_workspace( | size_t ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto bundle = channel_wise_nchw44::stride1::get_bundle(param); | auto bundle = channel_wise_nchw44::stride1::get_bundle(param); | ||||
| return bundle.total_size_in_bytes(); | return bundle.total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44::dispatch_kerns( | ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, | ||||
| midout_iv("AlgoS8ChanWiseStride1NCHW44"_hash)) { | midout_iv("AlgoS8ChanWiseStride1NCHW44"_hash)) { | ||||
| return channel_wise_nchw44::stride1::get_kimpls(param); | return channel_wise_nchw44::stride1::get_kimpls(param); | ||||
| @@ -94,20 +94,20 @@ ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44::dispatch_kerns( | |||||
| /* ===================== stride2 algo ===================== */ | /* ===================== stride2 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44::usable( | bool ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const { | AlgoSelectionStrategy) const { | ||||
| return channel_wise_nchw44::stride2::is_available(param); | return channel_wise_nchw44::stride2::is_available(param); | ||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44::get_workspace( | size_t ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto bundle = channel_wise_nchw44::stride2::get_bundle(param); | auto bundle = channel_wise_nchw44::stride2::get_bundle(param); | ||||
| return bundle.total_size_in_bytes(); | return bundle.total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44::dispatch_kerns( | ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, | ||||
| midout_iv("AlgoS8ChanWiseStride2NCHW44"_hash)) { | midout_iv("AlgoS8ChanWiseStride2NCHW44"_hash)) { | ||||
| return channel_wise_nchw44::stride2::get_kimpls(param); | return channel_wise_nchw44::stride2::get_kimpls(param); | ||||
| @@ -118,7 +118,7 @@ ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44::dispatch_kerns( | |||||
| /* ===================== stride2 algo ===================== */ | /* ===================== stride2 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoS8DirectStride2::usable( | bool ConvBiasImpl::AlgoS8DirectStride2::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| bool avaible = direct_int8_stride2::can_conv_direct_stride2_int8(param); | bool avaible = direct_int8_stride2::can_conv_direct_stride2_int8(param); | ||||
| if (algo_selection_strategy == | if (algo_selection_strategy == | ||||
| @@ -130,14 +130,14 @@ bool ConvBiasImpl::AlgoS8DirectStride2::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoS8DirectStride2::get_workspace( | size_t ConvBiasImpl::AlgoS8DirectStride2::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto bundle = direct_int8_stride2::get_bundle(param, m_large_group); | auto bundle = direct_int8_stride2::get_bundle(param, m_large_group); | ||||
| return bundle.total_size_in_bytes(); | return bundle.total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoS8DirectStride2::dispatch_kerns( | ConvBiasImpl::AlgoS8DirectStride2::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 1, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 1, 1) { | ||||
| return direct_int8_stride2::get_kimpls(param, m_large_group); | return direct_int8_stride2::get_kimpls(param, m_large_group); | ||||
| } | } | ||||
| @@ -148,7 +148,7 @@ ConvBiasImpl::AlgoS8DirectStride2::dispatch_kerns( | |||||
| #if __ARM_FEATURE_DOTPROD | #if __ARM_FEATURE_DOTPROD | ||||
| /* ===================== dot stride1 algo ======================== */ | /* ===================== dot stride1 algo ======================== */ | ||||
| bool ConvBiasImpl::AlgoDotS8DirectStride1::usable( | bool ConvBiasImpl::AlgoDotS8DirectStride1::usable( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| bool avaible = | bool avaible = | ||||
| direct_dotprod_int8_stride1::can_conv_direct_stride1_int8(param); | direct_dotprod_int8_stride1::can_conv_direct_stride1_int8(param); | ||||
| @@ -163,14 +163,14 @@ bool ConvBiasImpl::AlgoDotS8DirectStride1::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoDotS8DirectStride1::get_workspace( | size_t ConvBiasImpl::AlgoDotS8DirectStride1::get_workspace( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto bundle = direct_dotprod_int8_stride1::get_bundle(param, m_large_group); | auto bundle = direct_dotprod_int8_stride1::get_bundle(param, m_large_group); | ||||
| return bundle.total_size_in_bytes(); | return bundle.total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoDotS8DirectStride1::dispatch_kerns( | ConvBiasImpl::AlgoDotS8DirectStride1::dispatch_kerns( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 2, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 2, 1) { | ||||
| return direct_dotprod_int8_stride1::get_kimpls(param, m_large_group); | return direct_dotprod_int8_stride1::get_kimpls(param, m_large_group); | ||||
| } | } | ||||
| @@ -180,7 +180,7 @@ ConvBiasImpl::AlgoDotS8DirectStride1::dispatch_kerns( | |||||
| /* ===================== dot stride2 algo ======================== */ | /* ===================== dot stride2 algo ======================== */ | ||||
| bool ConvBiasImpl::AlgoDotS8DirectStride2::usable( | bool ConvBiasImpl::AlgoDotS8DirectStride2::usable( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| bool avaible = | bool avaible = | ||||
| direct_dotprod_int8_stride2::can_conv_direct_stride2_int8(param); | direct_dotprod_int8_stride2::can_conv_direct_stride2_int8(param); | ||||
| @@ -193,14 +193,14 @@ bool ConvBiasImpl::AlgoDotS8DirectStride2::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoDotS8DirectStride2::get_workspace( | size_t ConvBiasImpl::AlgoDotS8DirectStride2::get_workspace( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto bundle = direct_dotprod_int8_stride2::get_bundle(param, m_large_group); | auto bundle = direct_dotprod_int8_stride2::get_bundle(param, m_large_group); | ||||
| return bundle.total_size_in_bytes(); | return bundle.total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoDotS8DirectStride2::dispatch_kerns( | ConvBiasImpl::AlgoDotS8DirectStride2::dispatch_kerns( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 2, 2) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 2, 2) { | ||||
| return direct_dotprod_int8_stride2::get_kimpls(param, m_large_group); | return direct_dotprod_int8_stride2::get_kimpls(param, m_large_group); | ||||
| } | } | ||||
| @@ -212,7 +212,7 @@ ConvBiasImpl::AlgoDotS8DirectStride2::dispatch_kerns( | |||||
| /* ======================= AlgoS8WinogradF23_8x8 ======================== */ | /* ======================= AlgoS8WinogradF23_8x8 ======================== */ | ||||
| bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable( | bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0) | if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0) | ||||
| return false; | return false; | ||||
| @@ -225,13 +225,14 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable( | |||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| m_matmul_algo->packmode() == PackMode::NO_PACK && | m_matmul_algo->packmode() == PackMode::NO_PACK && | ||||
| ((opr->param().format == param::ConvBias::Format::NCHW && | |||||
| ((param.filter_meta.format == param::ConvBias::Format::NCHW && | |||||
| param.filter_type.enumv() == DTypeEnum::QuantizedS8) || | param.filter_type.enumv() == DTypeEnum::QuantizedS8) || | ||||
| (opr->param().format == param::ConvBias::Format::NCHW_WINOGRAD && | |||||
| opr->param().output_block_size == 2 && | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | |||||
| param.output_block_size == 2 && | |||||
| param.winograd_matmul_format == param::MatrixMul::Format::MK8 && | param.winograd_matmul_format == param::MatrixMul::Format::MK8 && | ||||
| param.filter_type.enumv() == DTypeEnum::QuantizedS16)) && | param.filter_type.enumv() == DTypeEnum::QuantizedS16)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -251,7 +252,7 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8WinogradF23_8x8, | |||||
| //=========================== input int8 compute float32 ========= | //=========================== input int8 compute float32 ========= | ||||
| bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, | ||||
| @@ -270,14 +271,14 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | |||||
| .get_matmul_kern_param(param)); | .get_matmul_kern_param(param)); | ||||
| return is_matmul_usable && | return is_matmul_usable && | ||||
| m_matmul_algo->packmode() == PackMode::NO_PACK && | m_matmul_algo->packmode() == PackMode::NO_PACK && | ||||
| ((opr->param().format == param::ConvBias::Format::NCHW44 && | |||||
| ((param.filter_meta.format == param::ConvBias::Format::NCHW44 && | |||||
| param.filter_type.enumv() == DTypeEnum::QuantizedS8) || | param.filter_type.enumv() == DTypeEnum::QuantizedS8) || | ||||
| ((opr->param().format == | |||||
| ((param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW44_WINOGRAD) && | param::ConvBias::Format::NCHW44_WINOGRAD) && | ||||
| opr->param().output_block_size == 2 && | |||||
| param.output_block_size == 2 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::MK4)) && | param::MatrixMul::Format::MK4)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -302,40 +303,42 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8CF32WinogradF23_4x4_NCHW44, | |||||
| /* ======================= AlgoS8WinogradF23_8x8_NCHW44 ======================== */ | /* ======================= AlgoS8WinogradF23_8x8_NCHW44 ======================== */ | ||||
| bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable( | bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MIDOUT_BEGIN( | MIDOUT_BEGIN( | ||||
| megdnn_arm_common_conv_bias_int8, | megdnn_arm_common_conv_bias_int8, | ||||
| midout_iv( | |||||
| "arm_common_AlgoS8WinogradF23_8x8_NCHW44::usable"_hash)) { | |||||
| if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0) | |||||
| return false; | |||||
| using Strategy = winograd::winograd_2x3_8x8_s8_nchw44; | |||||
| Strategy strategy(param.src_type, param.filter_type, param.dst_type); | |||||
| auto&& matmul_param = | |||||
| megdnn::winograd::ConvBias<Strategy, param::MatrixMul::Format::MK8>( | |||||
| strategy, m_tile_size, param) | |||||
| .get_matmul_kern_param(param); | |||||
| bool is_matmul_usable = m_matmul_algo->usable(matmul_param); | |||||
| return is_matmul_usable && | |||||
| ((opr->param().format == param::ConvBias::Format::NCHW44 && | |||||
| param.filter_type.enumv() == DTypeEnum::QuantizedS8) || | |||||
| (opr->param().format == param::ConvBias::Format::NCHW44_WINOGRAD && | |||||
| opr->param().output_block_size == 2 && | |||||
| param.winograd_matmul_format == param::MatrixMul::Format::MK8 && | |||||
| param.filter_type.enumv() == DTypeEnum::QuantizedS16)) && | |||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||||
| param.filter_meta.spatial[0] == 3) && | |||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | |||||
| param.filter_meta.stride[0] == 1) && | |||||
| (param.filter_meta.dilation[0] == param.filter_meta.dilation[1] && | |||||
| param.filter_meta.dilation[0] == 1) && | |||||
| param.compute_mode == param::ConvBias::ComputeMode::DEFAULT && | |||||
| param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
| param.bias_type.enumv() == DTypeEnum::QuantizedS32 && | |||||
| param.dst_type.enumv() == DTypeEnum::QuantizedS8; | |||||
| midout_iv("arm_common_AlgoS8WinogradF23_8x8_NCHW44::usable"_hash)) { | |||||
| if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0) | |||||
| return false; | |||||
| using Strategy = winograd::winograd_2x3_8x8_s8_nchw44; | |||||
| Strategy strategy(param.src_type, param.filter_type, param.dst_type); | |||||
| auto&& matmul_param = | |||||
| megdnn::winograd::ConvBias<Strategy, | |||||
| param::MatrixMul::Format::MK8>( | |||||
| strategy, m_tile_size, param) | |||||
| .get_matmul_kern_param(param); | |||||
| bool is_matmul_usable = m_matmul_algo->usable(matmul_param); | |||||
| return is_matmul_usable && | |||||
| ((param.filter_meta.format == param::ConvBias::Format::NCHW44 && | |||||
| param.filter_type.enumv() == DTypeEnum::QuantizedS8) || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW44_WINOGRAD && | |||||
| param.output_block_size == 2 && | |||||
| param.winograd_matmul_format == | |||||
| param::MatrixMul::Format::MK8 && | |||||
| param.filter_type.enumv() == DTypeEnum::QuantizedS16)) && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||||
| param.filter_meta.spatial[0] == 3) && | |||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | |||||
| param.filter_meta.stride[0] == 1) && | |||||
| (param.filter_meta.dilation[0] == | |||||
| param.filter_meta.dilation[1] && | |||||
| param.filter_meta.dilation[0] == 1) && | |||||
| param.compute_mode == param::ConvBias::ComputeMode::DEFAULT && | |||||
| param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
| param.bias_type.enumv() == DTypeEnum::QuantizedS32 && | |||||
| param.dst_type.enumv() == DTypeEnum::QuantizedS8; | |||||
| } | } | ||||
| MIDOUT_END(); | MIDOUT_END(); | ||||
| return false; | return false; | ||||
| @@ -26,16 +26,13 @@ public: | |||||
| const char* name() const override { | const char* name() const override { | ||||
| return m_large_group ? "S8STRD1_LARGE_GROUP" : "S8STRD1_SMALL_GROUP"; | return m_large_group ? "S8STRD1_LARGE_GROUP" : "S8STRD1_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| bool is_preferred(megdnn::fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(const NCBKernSizeParam& param) const override; | |||||
| }; | }; | ||||
| class ConvBiasImpl::AlgoS8DirectStride2 final : public AlgoBase { | class ConvBiasImpl::AlgoS8DirectStride2 final : public AlgoBase { | ||||
| @@ -47,13 +44,11 @@ public: | |||||
| const char* name() const override { | const char* name() const override { | ||||
| return m_large_group ? "S8STRD2_LARGE_GROUP" : "S8STRD2_SMALL_GROUP"; | return m_large_group ? "S8STRD2_LARGE_GROUP" : "S8STRD2_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -62,15 +57,12 @@ public: | |||||
| AlgoS8DirectNCHW44() {} | AlgoS8DirectNCHW44() {} | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "S8_NCHW44_DIRECT"; } | const char* name() const override { return "S8_NCHW44_DIRECT"; } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| bool is_preferred(megdnn::fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(const NCBKernSizeParam& param) const override; | |||||
| }; | }; | ||||
| class ConvBiasImpl::AlgoS8DirectNCHWNCHW44 final : public AlgoBase { | class ConvBiasImpl::AlgoS8DirectNCHWNCHW44 final : public AlgoBase { | ||||
| @@ -78,27 +70,22 @@ public: | |||||
| AlgoS8DirectNCHWNCHW44() {} | AlgoS8DirectNCHWNCHW44() {} | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "S8_CONV_NCHW_NCHW44"; } | const char* name() const override { return "S8_CONV_NCHW_NCHW44"; } | ||||
| bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| bool is_preferred(megdnn::fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(const NCBKernSizeParam& param) const override; | |||||
| }; | }; | ||||
| class ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44 final : public AlgoBase { | class ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44 final : public AlgoBase { | ||||
| public: | public: | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "S8_CHAN_WISE_STRD1_NCHW44"; } | const char* name() const override { return "S8_CHAN_WISE_STRD1_NCHW44"; } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -106,12 +93,10 @@ class ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44 final : public AlgoBase { | |||||
| public: | public: | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "S8_CHAN_WISE_STRD2_NCHW44"; } | const char* name() const override { return "S8_CHAN_WISE_STRD2_NCHW44"; } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -121,13 +106,11 @@ class ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44 final : public AlgoBase { | |||||
| public: | public: | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "ARMDOTS8_NCHW_NCHW44"; } | const char* name() const override { return "ARMDOTS8_NCHW_NCHW44"; } | ||||
| bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam&, | |||||
| bool usable(const NCBKernSizeParam&, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam&) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -142,13 +125,11 @@ public: | |||||
| return m_large_group ? "ARMDOTS8STRD1_LARGE_GROUP" | return m_large_group ? "ARMDOTS8STRD1_LARGE_GROUP" | ||||
| : "ARMDOTS8STRD1_SMALL_GROUP"; | : "ARMDOTS8STRD1_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam&, | |||||
| bool usable(const NCBKernSizeParam&, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam&) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -163,13 +144,11 @@ public: | |||||
| : "ARMDOTS8STRD2_SMALL_GROUP"; | : "ARMDOTS8STRD2_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam&, | |||||
| bool usable(const NCBKernSizeParam&, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam&) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -178,21 +157,16 @@ public: | |||||
| AlgoDotS8Direct_NCHW44() {} | AlgoDotS8Direct_NCHW44() {} | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { | |||||
| return "ARMDOTS8DIRECT_NCHW44"; | |||||
| } | |||||
| bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam&, | |||||
| const char* name() const override { return "ARMDOTS8DIRECT_NCHW44"; } | |||||
| bool usable(const NCBKernSizeParam&, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam&) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns( | SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| bool is_preferred(megdnn::fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(const NCBKernSizeParam& param) const override; | |||||
| }; | }; | ||||
| #endif | #endif | ||||
| @@ -161,7 +161,7 @@ static void conv_kern(const WorkspaceBundle& bundle, | |||||
| } // namespace | } // namespace | ||||
| bool ConvBiasImpl::AlgoDotS8Direct_NCHW44::usable( | bool ConvBiasImpl::AlgoDotS8Direct_NCHW44::usable( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| MEGDNN_MARK_USED_VAR(algo_selection_strategy); | MEGDNN_MARK_USED_VAR(algo_selection_strategy); | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| @@ -199,19 +199,19 @@ bool ConvBiasImpl::AlgoDotS8Direct_NCHW44::usable( | |||||
| } | } | ||||
| bool ConvBiasImpl::AlgoDotS8Direct_NCHW44::is_preferred( | bool ConvBiasImpl::AlgoDotS8Direct_NCHW44::is_preferred( | ||||
| megdnn::fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| return true; | return true; | ||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoDotS8Direct_NCHW44::get_workspace( | size_t ConvBiasImpl::AlgoDotS8Direct_NCHW44::get_workspace( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoDotS8Direct_NCHW44::dispatch_kerns( | ConvBiasImpl::AlgoDotS8Direct_NCHW44::dispatch_kerns( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, | ||||
| midout_iv("ALGODOTS8DIRECT_NCHW44"_hash)) { | midout_iv("ALGODOTS8DIRECT_NCHW44"_hash)) { | ||||
| auto fm = param.filter_meta; | auto fm = param.filter_meta; | ||||
| @@ -189,7 +189,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle, | |||||
| } | } | ||||
| bool ConvBiasImpl::AlgoS8DirectNCHW44::usable( | bool ConvBiasImpl::AlgoS8DirectNCHW44::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| MEGDNN_MARK_USED_VAR(algo_selection_strategy); | MEGDNN_MARK_USED_VAR(algo_selection_strategy); | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| @@ -213,22 +213,20 @@ bool ConvBiasImpl::AlgoS8DirectNCHW44::usable( | |||||
| } | } | ||||
| bool ConvBiasImpl::AlgoS8DirectNCHW44::is_preferred( | bool ConvBiasImpl::AlgoS8DirectNCHW44::is_preferred( | ||||
| megdnn::fallback::ConvBiasImpl* conv_bias_impl_ptr, | |||||
| const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| // TODO: benchmark and fix | // TODO: benchmark and fix | ||||
| MEGDNN_MARK_USED_VAR(conv_bias_impl_ptr); | |||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| return false; | return false; | ||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoS8DirectNCHW44::get_workspace( | size_t ConvBiasImpl::AlgoS8DirectNCHW44::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoS8DirectNCHW44::dispatch_kerns( | ConvBiasImpl::AlgoS8DirectNCHW44::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto fm = param.filter_meta; | auto fm = param.filter_meta; | ||||
| size_t N = param.n; | size_t N = param.n; | ||||
| size_t IC = fm.icpg; | size_t IC = fm.icpg; | ||||
| @@ -214,7 +214,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle, | |||||
| } | } | ||||
| bool ConvBiasImpl::AlgoS8DirectNCHWNCHW44::usable( | bool ConvBiasImpl::AlgoS8DirectNCHWNCHW44::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| MEGDNN_MARK_USED_VAR(algo_selection_strategy); | MEGDNN_MARK_USED_VAR(algo_selection_strategy); | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| @@ -236,22 +236,20 @@ bool ConvBiasImpl::AlgoS8DirectNCHWNCHW44::usable( | |||||
| } | } | ||||
| bool ConvBiasImpl::AlgoS8DirectNCHWNCHW44::is_preferred( | bool ConvBiasImpl::AlgoS8DirectNCHWNCHW44::is_preferred( | ||||
| megdnn::fallback::ConvBiasImpl* conv_bias_impl_ptr, | |||||
| const NCBKernSizeParam& param) const { | const NCBKernSizeParam& param) const { | ||||
| // TODO: benchmark and fix | // TODO: benchmark and fix | ||||
| MEGDNN_MARK_USED_VAR(conv_bias_impl_ptr); | |||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| return false; | return false; | ||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoS8DirectNCHWNCHW44::get_workspace( | size_t ConvBiasImpl::AlgoS8DirectNCHWNCHW44::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoS8DirectNCHWNCHW44::dispatch_kerns( | ConvBiasImpl::AlgoS8DirectNCHWNCHW44::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto fm = param.filter_meta; | auto fm = param.filter_meta; | ||||
| size_t N = param.n; | size_t N = param.n; | ||||
| size_t OC = fm.ocpg; | size_t OC = fm.ocpg; | ||||
| @@ -172,8 +172,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle, | |||||
| } // namespace | } // namespace | ||||
| bool ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44::usable( | bool ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const { | |||||
| const NCBKernSizeParam& param, AlgoSelectionStrategy) const { | |||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| auto fh = fm.spatial[0]; | auto fh = fm.spatial[0]; | ||||
| int oc = fm.ocpg; | int oc = fm.ocpg; | ||||
| @@ -194,13 +193,13 @@ bool ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44::get_workspace( | size_t ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44::dispatch_kerns( | ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto fm = param.filter_meta; | auto fm = param.filter_meta; | ||||
| const int batch = param.n; | const int batch = param.n; | ||||
| const int group = fm.group; | const int group = fm.group; | ||||
| @@ -83,7 +83,7 @@ void get_rectified_size_str2(size_t IH, size_t IW, size_t OH, size_t OW, | |||||
| /* ===================== direct algo ===================== */ | /* ===================== direct algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoI8x8x16Direct::usable( | bool ConvBiasImpl::AlgoI8x8x16Direct::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 1, 0) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 1, 0) { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| @@ -129,7 +129,7 @@ WorkspaceBundle ConvBiasImpl::AlgoI8x8x16Direct::get_bundle( | |||||
| return {nullptr, {part0, part1}}; | return {nullptr, {part0, part1}}; | ||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoI8x8x16Direct::get_workspace( | size_t ConvBiasImpl::AlgoI8x8x16Direct::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 1, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 1, 1) { | ||||
| auto bundle = get_bundle(param); | auto bundle = get_bundle(param); | ||||
| return bundle.total_size_in_bytes(); | return bundle.total_size_in_bytes(); | ||||
| @@ -293,7 +293,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoI8x8x16Direct::get_kimpls( | |||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoI8x8x16Direct::dispatch_kerns( | ConvBiasImpl::AlgoI8x8x16Direct::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 1, 2) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 1, 2) { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| @@ -303,7 +303,7 @@ ConvBiasImpl::AlgoI8x8x16Direct::dispatch_kerns( | |||||
| /* ===================== stride-2 algo ===================== */ | /* ===================== stride-2 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoI8x8x16Stride2::usable( | bool ConvBiasImpl::AlgoI8x8x16Stride2::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 2, 0) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 2, 0) { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| @@ -350,7 +350,7 @@ WorkspaceBundle ConvBiasImpl::AlgoI8x8x16Stride2::get_bundle( | |||||
| return {nullptr, {part0, part1}}; | return {nullptr, {part0, part1}}; | ||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoI8x8x16Stride2::get_workspace( | size_t ConvBiasImpl::AlgoI8x8x16Stride2::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 2, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 2, 1) { | ||||
| auto bundle = get_bundle(param); | auto bundle = get_bundle(param); | ||||
| return bundle.total_size_in_bytes(); | return bundle.total_size_in_bytes(); | ||||
| @@ -513,7 +513,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoI8x8x16Stride2::get_kimpls( | |||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoI8x8x16Stride2::dispatch_kerns( | ConvBiasImpl::AlgoI8x8x16Stride2::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 2, 2) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 2, 2) { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| @@ -521,7 +521,7 @@ ConvBiasImpl::AlgoI8x8x16Stride2::dispatch_kerns( | |||||
| return {}; | return {}; | ||||
| } | } | ||||
| bool ConvBiasImpl::AlgoI8x8x16Stride2Filter2::usable( | bool ConvBiasImpl::AlgoI8x8x16Stride2Filter2::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 3, 0) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 3, 0) { | ||||
| return param.bias_mode == BiasMode::NO_BIAS && | return param.bias_mode == BiasMode::NO_BIAS && | ||||
| @@ -534,7 +534,7 @@ bool ConvBiasImpl::AlgoI8x8x16Stride2Filter2::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoI8x8x16Stride2Filter2::get_workspace( | size_t ConvBiasImpl::AlgoI8x8x16Stride2Filter2::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 3, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 3, 1) { | ||||
| return conv_bias::get_workspace_in_bytes_conv_int8x8x16_stride2_flt2( | return conv_bias::get_workspace_in_bytes_conv_int8x8x16_stride2_flt2( | ||||
| param); | param); | ||||
| @@ -545,7 +545,7 @@ size_t ConvBiasImpl::AlgoI8x8x16Stride2Filter2::get_workspace( | |||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoI8x8x16Stride2Filter2::dispatch_kerns( | ConvBiasImpl::AlgoI8x8x16Stride2Filter2::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| // return {conv_bias::conv_int8x8x16_stride2_flt2,true}; | // return {conv_bias::conv_int8x8x16_stride2_flt2,true}; | ||||
| auto kern = [](const NCBKernParam& param, const NCBKernIndex& ncb_index) { | auto kern = [](const NCBKernParam& param, const NCBKernIndex& ncb_index) { | ||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 3, 2) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 3, 2) { | ||||
| @@ -35,12 +35,10 @@ public: | |||||
| return m_large_group ? "I8816DIRECT_LARGE_GROUP" | return m_large_group ? "I8816DIRECT_LARGE_GROUP" | ||||
| : "I8816DIRECT_SMALL_GROUP"; | : "I8816DIRECT_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -64,13 +62,11 @@ public: | |||||
| return m_large_group ? "I8816STRD2_LARGE_GROUP" | return m_large_group ? "I8816STRD2_LARGE_GROUP" | ||||
| : "I8816STRD2_SMALL_GROUP"; | : "I8816STRD2_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -79,13 +75,11 @@ public: | |||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "I8816STRD2F2"; } | const char* name() const override { return "I8816STRD2F2"; } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -232,7 +232,7 @@ void* const ConvBiasImpl::sm_arm_common_algo_type = | |||||
| &arm_common_algo_type_storage; | &arm_common_algo_type_storage; | ||||
| bool ConvBiasImpl::is_matmul_quantized_prefer( | bool ConvBiasImpl::is_matmul_quantized_prefer( | ||||
| const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
| const ConvBiasImpl::NCBKernSizeParam& param) const { | |||||
| // fallback::ConvBiasImpl::NCBKernParam conv_ncb_param; | // fallback::ConvBiasImpl::NCBKernParam conv_ncb_param; | ||||
| fallback::ConvBiasImpl::NCBKernSizeParam conv_ncb_param( | fallback::ConvBiasImpl::NCBKernSizeParam conv_ncb_param( | ||||
| param, 0, param::MatrixMul::Format::DEFAULT, {}, 0, | param, 0, param::MatrixMul::Format::DEFAULT, {}, 0, | ||||
| @@ -27,7 +27,7 @@ public: | |||||
| SmallVector<AlgoBase*> algo_pack() override; | SmallVector<AlgoBase*> algo_pack() override; | ||||
| bool is_matmul_quantized_prefer( | bool is_matmul_quantized_prefer( | ||||
| const ConvBiasImpl::NCBKernSizeParam& ncb_param) override; | |||||
| const ConvBiasImpl::NCBKernSizeParam& ncb_param) const override; | |||||
| class AlgoPack; | class AlgoPack; | ||||
| protected: | protected: | ||||
| @@ -6,17 +6,18 @@ | |||||
| * | * | ||||
| * Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
| * software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/quint8/algos.h" | #include "src/arm_common/conv_bias/quint8/algos.h" | ||||
| #include "midout.h" | |||||
| #include "src/arm_common/conv_bias/quint8/stride1.h" | #include "src/arm_common/conv_bias/quint8/stride1.h" | ||||
| #include "src/arm_common/conv_bias/quint8/stride2.h" | |||||
| #include "src/arm_common/conv_bias/quint8/stride1_dotprod.h" | #include "src/arm_common/conv_bias/quint8/stride1_dotprod.h" | ||||
| #include "src/arm_common/conv_bias/quint8/stride2.h" | |||||
| #include "src/arm_common/conv_bias/quint8/stride2_dotprod.h" | #include "src/arm_common/conv_bias/quint8/stride2_dotprod.h" | ||||
| #include "src/arm_common/elemwise_op.h" | #include "src/arm_common/elemwise_op.h" | ||||
| #include "src/fallback/conv_bias/common.h" | #include "src/fallback/conv_bias/common.h" | ||||
| #include "midout.h" | |||||
| MIDOUT_DECL(megdnn_arm_common_conv_bias_quint8) | MIDOUT_DECL(megdnn_arm_common_conv_bias_quint8) | ||||
| @@ -25,7 +26,7 @@ using namespace arm_common; | |||||
| /* ===================== stride1 algo ===================== */ | /* ===================== stride1 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoQU8DirectStride1::usable( | bool ConvBiasImpl::AlgoQU8DirectStride1::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| bool avaible = direct_quint8_stride1::can_conv_direct_stride1_quint8(param); | bool avaible = direct_quint8_stride1::can_conv_direct_stride1_quint8(param); | ||||
| if (algo_selection_strategy == | if (algo_selection_strategy == | ||||
| @@ -37,14 +38,14 @@ bool ConvBiasImpl::AlgoQU8DirectStride1::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoQU8DirectStride1::get_workspace( | size_t ConvBiasImpl::AlgoQU8DirectStride1::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto bundle = direct_quint8_stride1::get_bundle(param, m_large_group); | auto bundle = direct_quint8_stride1::get_bundle(param, m_large_group); | ||||
| return bundle.total_size_in_bytes(); | return bundle.total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoQU8DirectStride1::dispatch_kerns( | ConvBiasImpl::AlgoQU8DirectStride1::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_quint8, 0, 0) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_quint8, 0, 0) { | ||||
| return direct_quint8_stride1::get_kimpls(param, m_large_group); | return direct_quint8_stride1::get_kimpls(param, m_large_group); | ||||
| } | } | ||||
| @@ -54,7 +55,7 @@ ConvBiasImpl::AlgoQU8DirectStride1::dispatch_kerns( | |||||
| /* ===================== stride2 algo ===================== */ | /* ===================== stride2 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoQU8DirectStride2::usable( | bool ConvBiasImpl::AlgoQU8DirectStride2::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| bool avaible = direct_quint8_stride2::can_conv_direct_stride2_quint8(param); | bool avaible = direct_quint8_stride2::can_conv_direct_stride2_quint8(param); | ||||
| if (algo_selection_strategy == | if (algo_selection_strategy == | ||||
| @@ -66,14 +67,14 @@ bool ConvBiasImpl::AlgoQU8DirectStride2::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoQU8DirectStride2::get_workspace( | size_t ConvBiasImpl::AlgoQU8DirectStride2::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto bundle = direct_quint8_stride2::get_bundle(param, m_large_group); | auto bundle = direct_quint8_stride2::get_bundle(param, m_large_group); | ||||
| return bundle.total_size_in_bytes(); | return bundle.total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoQU8DirectStride2::dispatch_kerns( | ConvBiasImpl::AlgoQU8DirectStride2::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_quint8, 0, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_quint8, 0, 1) { | ||||
| return direct_quint8_stride2::get_kimpls(param, m_large_group); | return direct_quint8_stride2::get_kimpls(param, m_large_group); | ||||
| } | } | ||||
| @@ -83,7 +84,7 @@ ConvBiasImpl::AlgoQU8DirectStride2::dispatch_kerns( | |||||
| #if __ARM_FEATURE_DOTPROD | #if __ARM_FEATURE_DOTPROD | ||||
| /* ===================== stride1 algo ===================== */ | /* ===================== stride1 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoDotU8DirectStride1::usable( | bool ConvBiasImpl::AlgoDotU8DirectStride1::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| bool avaible = | bool avaible = | ||||
| direct_dotprod_quint8_stride1::can_conv_direct_stride1_quint8( | direct_dotprod_quint8_stride1::can_conv_direct_stride1_quint8( | ||||
| @@ -97,7 +98,7 @@ bool ConvBiasImpl::AlgoDotU8DirectStride1::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoDotU8DirectStride1::get_workspace( | size_t ConvBiasImpl::AlgoDotU8DirectStride1::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto bundle = | auto bundle = | ||||
| direct_dotprod_quint8_stride1::get_bundle(param, m_large_group); | direct_dotprod_quint8_stride1::get_bundle(param, m_large_group); | ||||
| return bundle.total_size_in_bytes(); | return bundle.total_size_in_bytes(); | ||||
| @@ -105,7 +106,7 @@ size_t ConvBiasImpl::AlgoDotU8DirectStride1::get_workspace( | |||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoDotU8DirectStride1::dispatch_kerns( | ConvBiasImpl::AlgoDotU8DirectStride1::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_quint8, 1, 0) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_quint8, 1, 0) { | ||||
| return direct_dotprod_quint8_stride1::get_kimpls(param, m_large_group); | return direct_dotprod_quint8_stride1::get_kimpls(param, m_large_group); | ||||
| } | } | ||||
| @@ -115,7 +116,7 @@ ConvBiasImpl::AlgoDotU8DirectStride1::dispatch_kerns( | |||||
| /* ===================== stride2 algo ===================== */ | /* ===================== stride2 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoDotU8DirectStride2::usable( | bool ConvBiasImpl::AlgoDotU8DirectStride2::usable( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| bool avaible = | bool avaible = | ||||
| direct_dotprod_quint8_stride2::can_conv_direct_stride2_quint8( | direct_dotprod_quint8_stride2::can_conv_direct_stride2_quint8( | ||||
| @@ -129,7 +130,7 @@ bool ConvBiasImpl::AlgoDotU8DirectStride2::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoDotU8DirectStride2::get_workspace( | size_t ConvBiasImpl::AlgoDotU8DirectStride2::get_workspace( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto bundle = | auto bundle = | ||||
| direct_dotprod_quint8_stride2::get_bundle(param, m_large_group); | direct_dotprod_quint8_stride2::get_bundle(param, m_large_group); | ||||
| return bundle.total_size_in_bytes(); | return bundle.total_size_in_bytes(); | ||||
| @@ -137,7 +138,7 @@ size_t ConvBiasImpl::AlgoDotU8DirectStride2::get_workspace( | |||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoDotU8DirectStride2::dispatch_kerns( | ConvBiasImpl::AlgoDotU8DirectStride2::dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_arm_common_conv_bias_quint8, 1, 1) { | MIDOUT_BEGIN(megdnn_arm_common_conv_bias_quint8, 1, 1) { | ||||
| return direct_dotprod_quint8_stride2::get_kimpls(param, m_large_group); | return direct_dotprod_quint8_stride2::get_kimpls(param, m_large_group); | ||||
| } | } | ||||
| @@ -6,7 +6,8 @@ | |||||
| * | * | ||||
| * Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
| * software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| @@ -26,13 +27,11 @@ public: | |||||
| return m_large_group ? "QU8STRD1_LARGE_GROUP" : "QU8STRD1_SMALL_GROUP"; | return m_large_group ? "QU8STRD1_LARGE_GROUP" : "QU8STRD1_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -45,16 +44,14 @@ public: | |||||
| const char* name() const override { | const char* name() const override { | ||||
| return m_large_group ? "QU8STRD2_LARGE_GROUP" : "QU8STRD2_SMALL_GROUP"; | return m_large_group ? "QU8STRD2_LARGE_GROUP" : "QU8STRD2_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| #if __ARM_FEATURE_DOTPROD | |||||
| #if __ARM_FEATURE_DOTPROD | |||||
| class ConvBiasImpl::AlgoDotU8DirectStride1 final : public AlgoBase { | class ConvBiasImpl::AlgoDotU8DirectStride1 final : public AlgoBase { | ||||
| bool m_large_group; | bool m_large_group; | ||||
| @@ -66,13 +63,11 @@ public: | |||||
| : "ARMDOTU8STRD1_SMALL_GROUP"; | : "ARMDOTU8STRD1_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| @@ -86,13 +81,11 @@ public: | |||||
| return m_large_group ? "ARMDOTU8STRD2_LARGE_GROUP" | return m_large_group ? "ARMDOTU8STRD2_LARGE_GROUP" | ||||
| : "ARMDOTU8STRD2_SMALL_GROUP"; | : "ARMDOTU8STRD2_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | const NCBKernSizeParam& param) const override; | ||||
| }; | }; | ||||
| #endif | #endif | ||||
| @@ -26,9 +26,8 @@ using namespace armv7; | |||||
| /* ===================== matrix mul algo ===================== */ | /* ===================== matrix mul algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoS8MatrixMul::usable( | bool ConvBiasImpl::AlgoS8MatrixMul::usable( | ||||
| FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| return param.src_type.enumv() == DTypeEnum::QuantizedS8 && | return param.src_type.enumv() == DTypeEnum::QuantizedS8 && | ||||
| param.dst_type.enumv() == DTypeEnum::QuantizedS8 && | param.dst_type.enumv() == DTypeEnum::QuantizedS8 && | ||||
| @@ -27,14 +27,12 @@ public: | |||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "S8MATMUL"; } | const char* name() const override { return "S8MATMUL"; } | ||||
| bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override { | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<NCBKern> dispatch_kerns( | SmallVector<NCBKern> dispatch_kerns( | ||||
| FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| size_t group = param.filter_meta.group; | size_t group = param.filter_meta.group; | ||||
| return {{kimpl, {group, 1_z, 1_z}}}; | return {{kimpl, {group, 1_z, 1_z}}}; | ||||
| @@ -26,9 +26,8 @@ using namespace armv7; | |||||
| /* ===================== matrix mul algo ===================== */ | /* ===================== matrix mul algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoQU8MatrixMul::usable( | bool ConvBiasImpl::AlgoQU8MatrixMul::usable( | ||||
| FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| return param.src_type.enumv() == DTypeEnum::Quantized8Asymm && | return param.src_type.enumv() == DTypeEnum::Quantized8Asymm && | ||||
| param.dst_type.enumv() == DTypeEnum::Quantized8Asymm && | param.dst_type.enumv() == DTypeEnum::Quantized8Asymm && | ||||
| @@ -27,15 +27,13 @@ public: | |||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "QU8MATMUL"; } | const char* name() const override { return "QU8MATMUL"; } | ||||
| bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override { | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<fallback::ConvBiasImpl::NCBKern> dispatch_kerns( | SmallVector<fallback::ConvBiasImpl::NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl* /*opr*/, | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| size_t group = param.filter_meta.group; | size_t group = param.filter_meta.group; | ||||
| return {{kimpl, {group, 1_z, 1_z}}}; | return {{kimpl, {group, 1_z, 1_z}}}; | ||||
| @@ -10,6 +10,7 @@ | |||||
| */ | */ | ||||
| #include "src/fallback/conv_bias/algos.h" | #include "src/fallback/conv_bias/algos.h" | ||||
| #include "megdnn/opr_param_defs.h" | |||||
| #include "src/common/opr_delegate.h" | #include "src/common/opr_delegate.h" | ||||
| #include "src/fallback/conv_bias/winograd/strategy.h" | #include "src/fallback/conv_bias/winograd/strategy.h" | ||||
| #include "src/naive/convolution/helper.h" | #include "src/naive/convolution/helper.h" | ||||
| @@ -21,18 +22,28 @@ using namespace fallback; | |||||
| namespace { | namespace { | ||||
| param::Convolution get_param_convolution(const param::ConvBias param) { | |||||
| param::Convolution ret{param.mode, param.pad_h, | |||||
| param.pad_w, param.stride_h, | |||||
| param.stride_w, param.dilate_h, | |||||
| param.dilate_w, param::Convolution::Sparse::DENSE, | |||||
| param.format}; | |||||
| return ret; | |||||
| param::Convolution get_param_convolution( | |||||
| const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
| param::Convolution::Mode mode; | |||||
| param::Convolution::Sparse sparse; | |||||
| if (param.filter_meta.should_flip) { | |||||
| mode = param::Convolution::Mode::CONVOLUTION; | |||||
| } else { | |||||
| mode = param::Convolution::Mode::CROSS_CORRELATION; | |||||
| } | |||||
| return param::Convolution{mode, | |||||
| param.filter_meta.padding[0], | |||||
| param.filter_meta.padding[1], | |||||
| param.filter_meta.stride[0], | |||||
| param.filter_meta.stride[1], | |||||
| param.filter_meta.dilation[1], | |||||
| param.filter_meta.dilation[0], | |||||
| sparse = param::Convolution::Sparse::DENSE, | |||||
| param.filter_meta.format}; | |||||
| } | } | ||||
| TensorLayoutArray get_layouts(const param::ConvBias& param, | |||||
| const ConvBiasImpl::NCBKernSizeParam& p) { | |||||
| megdnn_assert(param.format == param::ConvBias::Format::NCHW); | |||||
| TensorLayoutArray get_layouts(const ConvBiasImpl::NCBKernSizeParam& p) { | |||||
| megdnn_assert(p.filter_meta.format == param::ConvBias::Format::NCHW); | |||||
| UNPACK_CONV_NCB_KERN_SIZES(p); | UNPACK_CONV_NCB_KERN_SIZES(p); | ||||
| MEGDNN_MARK_USED_VAR(SH); | MEGDNN_MARK_USED_VAR(SH); | ||||
| MEGDNN_MARK_USED_VAR(SW); | MEGDNN_MARK_USED_VAR(SW); | ||||
| @@ -53,14 +64,14 @@ TensorLayoutArray get_layouts(const param::ConvBias& param, | |||||
| return {src_layout, filter_layout, bias_layout, dst_layout}; | return {src_layout, filter_layout, bias_layout, dst_layout}; | ||||
| } | } | ||||
| void kern_default(param::ConvBias param, const ConvBiasImpl::NCBKernParam& p) { | |||||
| void kern_default(const ConvBiasImpl::NCBKernParam& p) { | |||||
| dt_byte* workspace_ptr = static_cast<dt_byte*>(p.workspace_ptr); | dt_byte* workspace_ptr = static_cast<dt_byte*>(p.workspace_ptr); | ||||
| auto filter_meta_ptr = | auto filter_meta_ptr = | ||||
| reinterpret_cast<const ConvBiasForward::CanonizedFilterMeta*>( | reinterpret_cast<const ConvBiasForward::CanonizedFilterMeta*>( | ||||
| &p.filter_meta); | &p.filter_meta); | ||||
| auto filter_meta = *filter_meta_ptr; | auto filter_meta = *filter_meta_ptr; | ||||
| auto layouts = get_layouts(param, p); | |||||
| auto layouts = get_layouts(p); | |||||
| TensorND src{reinterpret_cast<dt_byte*>(const_cast<void*>(p.src_ptr)), | TensorND src{reinterpret_cast<dt_byte*>(const_cast<void*>(p.src_ptr)), | ||||
| layouts[0]}; | layouts[0]}; | ||||
| @@ -83,7 +94,7 @@ void kern_default(param::ConvBias param, const ConvBiasImpl::NCBKernParam& p) { | |||||
| bias.layout.dtype.enumv() == \ | bias.layout.dtype.enumv() == \ | ||||
| DTypeTrait<dtype::bias_dt>::enumv) && \ | DTypeTrait<dtype::bias_dt>::enumv) && \ | ||||
| sfb.layout.dtype.enumv() == DTypeTrait<dtype::out_dt>::enumv && \ | sfb.layout.dtype.enumv() == DTypeTrait<dtype::out_dt>::enumv && \ | ||||
| param.compute_mode == param::ConvBias::ComputeMode::cmode) { \ | |||||
| p.compute_mode == param::ConvBias::ComputeMode::cmode) { \ | |||||
| func(src, filter, bias, sfb, workspace_ptr, filter_meta); \ | func(src, filter, bias, sfb, workspace_ptr, filter_meta); \ | ||||
| } | } | ||||
| #define DISPATCH(in_dt, out_dt) \ | #define DISPATCH(in_dt, out_dt) \ | ||||
| @@ -118,7 +129,7 @@ void kern_default(param::ConvBias param, const ConvBiasImpl::NCBKernParam& p) { | |||||
| auto res = sfb; | auto res = sfb; | ||||
| using NonlineMode = param::ConvBias::NonlineMode; | using NonlineMode = param::ConvBias::NonlineMode; | ||||
| switch (param.nonlineMode) { | |||||
| switch (p.nonlineMode) { | |||||
| #define cb(_mode) \ | #define cb(_mode) \ | ||||
| case NonlineMode::_mode: { \ | case NonlineMode::_mode: { \ | ||||
| if (res.layout.dtype.category() != DTypeCategory::QUANTIZED) { \ | if (res.layout.dtype.category() != DTypeCategory::QUANTIZED) { \ | ||||
| @@ -168,24 +179,23 @@ MIDOUT_DECL(megdnn_fallback_naive) | |||||
| /* ======================= AlgoNaive ======================== */ | /* ======================= AlgoNaive ======================== */ | ||||
| bool ConvBiasImpl::AlgoNaive::usable( | bool ConvBiasImpl::AlgoNaive::usable( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam&, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MIDOUT_BEGIN(megdnn_fallback_naive, 0) { | MIDOUT_BEGIN(megdnn_fallback_naive, 0) { | ||||
| return opr->param().format == param::ConvBias::Format::NCHW; | |||||
| return param.filter_meta.format == param::ConvBias::Format::NCHW; | |||||
| } | } | ||||
| MIDOUT_END(); | MIDOUT_END(); | ||||
| return false; | return false; | ||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoNaive::get_workspace(ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& p) const { | |||||
| size_t ConvBiasImpl::AlgoNaive::get_workspace(const NCBKernSizeParam& p) const { | |||||
| MIDOUT_BEGIN(megdnn_fallback_naive, 1) { | MIDOUT_BEGIN(megdnn_fallback_naive, 1) { | ||||
| auto layouts = get_layouts(opr->param(), p); | |||||
| auto layouts = get_layouts(p); | |||||
| //! When group>1 or n>1, this algo will parallel by group and n | //! When group>1 or n>1, this algo will parallel by group and n | ||||
| size_t nr_threads = p.nr_threads; | size_t nr_threads = p.nr_threads; | ||||
| auto conv_opr = | auto conv_opr = | ||||
| inplace_cpu_handle()->create_operator<ConvolutionForward>(); | inplace_cpu_handle()->create_operator<ConvolutionForward>(); | ||||
| conv_opr->param() = get_param_convolution(opr->param()); | |||||
| conv_opr->param() = get_param_convolution(p); | |||||
| if (p.dst_type.enumv() == DTypeEnum::QuantizedS8 || | if (p.dst_type.enumv() == DTypeEnum::QuantizedS8 || | ||||
| p.dst_type.enumv() == DTypeEnum::Quantized8Asymm) { | p.dst_type.enumv() == DTypeEnum::Quantized8Asymm) { | ||||
| TensorLayout conv_dst_layout; | TensorLayout conv_dst_layout; | ||||
| @@ -201,15 +211,14 @@ size_t ConvBiasImpl::AlgoNaive::get_workspace(ConvBiasImpl* opr, | |||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoNaive::dispatch_kerns( | SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoNaive::dispatch_kerns( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& p) const { | |||||
| param::ConvBias opr_param = opr->param(); | |||||
| size_t workspace_size = get_workspace(opr, p); | |||||
| const NCBKernSizeParam& p) const { | |||||
| size_t workspace_size = get_workspace(p); | |||||
| //! When group>1 or n>1, this algo will parallel by group and n | //! When group>1 or n>1, this algo will parallel by group and n | ||||
| size_t nr_threads = p.nr_threads; | size_t nr_threads = p.nr_threads; | ||||
| size_t GROUP = p.filter_meta.group; | size_t GROUP = p.filter_meta.group; | ||||
| size_t N = p.n; | size_t N = p.n; | ||||
| size_t workspace_per_thread = workspace_size / nr_threads; | size_t workspace_per_thread = workspace_size / nr_threads; | ||||
| auto kern = [opr_param, workspace_per_thread]( | |||||
| auto kern = [workspace_per_thread]( | |||||
| const NCBKernParam& param, | const NCBKernParam& param, | ||||
| const NCBKernIndex& ncb_index) { | const NCBKernIndex& ncb_index) { | ||||
| MIDOUT_BEGIN(megdnn_fallback_naive, 2) { | MIDOUT_BEGIN(megdnn_fallback_naive, 2) { | ||||
| @@ -224,7 +233,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoNaive::dispatch_kerns( | |||||
| thread_param.dst_ptr = param.dst<void>(batch_id, group_id); | thread_param.dst_ptr = param.dst<void>(batch_id, group_id); | ||||
| thread_param.src_ptr = param.src<void>(batch_id, group_id); | thread_param.src_ptr = param.src<void>(batch_id, group_id); | ||||
| thread_param.bias_ptr = param.bias<void>(batch_id, group_id); | thread_param.bias_ptr = param.bias<void>(batch_id, group_id); | ||||
| kern_default(opr_param, thread_param); | |||||
| kern_default(thread_param); | |||||
| } | } | ||||
| MIDOUT_END(); | MIDOUT_END(); | ||||
| }; | }; | ||||
| @@ -235,10 +244,9 @@ MIDOUT_DECL(megdnn_fallback_winograd) | |||||
| /* ======================= AlgoWinogradF32 ======================== */ | /* ======================= AlgoWinogradF32 ======================== */ | ||||
| bool ConvBiasImpl::AlgoWinogradF32::usable( | bool ConvBiasImpl::AlgoWinogradF32::usable( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 0) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 0) { | ||||
| using Strategy = fallback::winograd::winograd_2x3_1x1_f; | using Strategy = fallback::winograd::winograd_2x3_1x1_f; | ||||
| Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
| @@ -246,13 +254,13 @@ bool ConvBiasImpl::AlgoWinogradF32::usable( | |||||
| strategy, UNIT_TILE_SIZE, param) | strategy, UNIT_TILE_SIZE, param) | ||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
| opr->param().output_block_size == 2 && | |||||
| param.output_block_size == 2 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::DEFAULT)) && | param::MatrixMul::Format::DEFAULT)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -268,7 +276,7 @@ bool ConvBiasImpl::AlgoWinogradF32::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoWinogradF32::get_workspace( | size_t ConvBiasImpl::AlgoWinogradF32::get_workspace( | ||||
| ConvBiasImpl*, const NCBKernSizeParam& p) const { | |||||
| const NCBKernSizeParam& p) const { | |||||
| MEGDNN_MARK_USED_VAR(p); | MEGDNN_MARK_USED_VAR(p); | ||||
| MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 1) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 1) { | ||||
| fallback::winograd::winograd_2x3_1x1_f strategy( | fallback::winograd::winograd_2x3_1x1_f strategy( | ||||
| @@ -284,7 +292,7 @@ size_t ConvBiasImpl::AlgoWinogradF32::get_workspace( | |||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoWinogradF32::dispatch_kerns( | ConvBiasImpl::AlgoWinogradF32::dispatch_kerns( | ||||
| ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 2) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 2) { | ||||
| fallback::winograd::winograd_2x3_1x1_f strategy( | fallback::winograd::winograd_2x3_1x1_f strategy( | ||||
| @@ -302,10 +310,9 @@ ConvBiasImpl::AlgoWinogradF32::dispatch_kerns( | |||||
| /* ======================= AlgoWinogradF32 4x4 ======================== */ | /* ======================= AlgoWinogradF32 4x4 ======================== */ | ||||
| bool ConvBiasImpl::AlgoWinogradF32_4x4::usable( | bool ConvBiasImpl::AlgoWinogradF32_4x4::usable( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_fallback_winograd, 2, 0) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 2, 0) { | ||||
| if (param.filter_meta.icpg % 4 != 0 || param.filter_meta.ocpg % 4 != 0) | if (param.filter_meta.icpg % 4 != 0 || param.filter_meta.ocpg % 4 != 0) | ||||
| return false; | return false; | ||||
| @@ -317,13 +324,13 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable( | |||||
| strategy, UNIT_TILE_SIZE, param) | strategy, UNIT_TILE_SIZE, param) | ||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
| opr->param().output_block_size == 2 && | |||||
| param.output_block_size == 2 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::MK4)) && | param::MatrixMul::Format::MK4)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -339,7 +346,7 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace( | size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace( | ||||
| ConvBiasImpl*, const NCBKernSizeParam& p) const { | |||||
| const NCBKernSizeParam& p) const { | |||||
| MEGDNN_MARK_USED_VAR(p); | MEGDNN_MARK_USED_VAR(p); | ||||
| MIDOUT_BEGIN(megdnn_fallback_winograd, 2, 1) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 2, 1) { | ||||
| fallback::winograd::winograd_2x3_4x4_f strategy( | fallback::winograd::winograd_2x3_4x4_f strategy( | ||||
| @@ -356,7 +363,7 @@ size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace( | |||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns( | ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns( | ||||
| ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MIDOUT_BEGIN(megdnn_fallback_winograd, 2, 2) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 2, 2) { | ||||
| fallback::winograd::winograd_2x3_4x4_f strategy( | fallback::winograd::winograd_2x3_4x4_f strategy( | ||||
| @@ -374,10 +381,9 @@ ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns( | |||||
| /* ======================= AlgoWinogradQS8 ======================== */ | /* ======================= AlgoWinogradQS8 ======================== */ | ||||
| bool ConvBiasImpl::AlgoWinogradQS8::usable( | bool ConvBiasImpl::AlgoWinogradQS8::usable( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 0) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 0) { | ||||
| using Strategy = fallback::winograd::winograd_2x3_1x1_qs8; | using Strategy = fallback::winograd::winograd_2x3_1x1_qs8; | ||||
| Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
| @@ -386,13 +392,13 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable( | |||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
| opr->param().output_block_size == 2 && | |||||
| param.output_block_size == 2 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::DEFAULT)) && | param::MatrixMul::Format::DEFAULT)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -408,7 +414,7 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace( | size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace( | ||||
| ConvBiasImpl*, const NCBKernSizeParam& p) const { | |||||
| const NCBKernSizeParam& p) const { | |||||
| MEGDNN_MARK_USED_VAR(p); | MEGDNN_MARK_USED_VAR(p); | ||||
| MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 1) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 1) { | ||||
| fallback::winograd::winograd_2x3_1x1_qs8 strategy( | fallback::winograd::winograd_2x3_1x1_qs8 strategy( | ||||
| @@ -424,7 +430,7 @@ size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace( | |||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns( | ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns( | ||||
| ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 2) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 2) { | ||||
| fallback::winograd::winograd_2x3_1x1_qs8 strategy( | fallback::winograd::winograd_2x3_1x1_qs8 strategy( | ||||
| @@ -442,10 +448,9 @@ ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns( | |||||
| /* ======================= AlgoWinogradQS8 8x8 ======================== */ | /* ======================= AlgoWinogradQS8 8x8 ======================== */ | ||||
| bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable( | bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_fallback_winograd, 4, 0) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 4, 0) { | ||||
| if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0) | if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0) | ||||
| return false; | return false; | ||||
| @@ -457,13 +462,13 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable( | |||||
| strategy, UNIT_TILE_SIZE, param) | strategy, UNIT_TILE_SIZE, param) | ||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
| opr->param().output_block_size == 2 && | |||||
| param.output_block_size == 2 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::MK8)) && | param::MatrixMul::Format::MK8)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -479,7 +484,7 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace( | size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace( | ||||
| ConvBiasImpl*, const NCBKernSizeParam& p) const { | |||||
| const NCBKernSizeParam& p) const { | |||||
| MEGDNN_MARK_USED_VAR(p); | MEGDNN_MARK_USED_VAR(p); | ||||
| MIDOUT_BEGIN(megdnn_fallback_winograd, 4, 1) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 4, 1) { | ||||
| fallback::winograd::winograd_2x3_8x8_qs8 strategy( | fallback::winograd::winograd_2x3_8x8_qs8 strategy( | ||||
| @@ -496,7 +501,7 @@ size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace( | |||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoWinogradQS8_8x8::dispatch_kerns( | ConvBiasImpl::AlgoWinogradQS8_8x8::dispatch_kerns( | ||||
| ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MIDOUT_BEGIN(megdnn_fallback_winograd, 4, 2) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 4, 2) { | ||||
| fallback::winograd::winograd_2x3_8x8_qs8 strategy( | fallback::winograd::winograd_2x3_8x8_qs8 strategy( | ||||
| @@ -22,12 +22,10 @@ class ConvBiasImpl::AlgoNaive final : public AlgoBase { | |||||
| public: | public: | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "FALLBACK_NAIVE"; } | const char* name() const override { return "FALLBACK_NAIVE"; } | ||||
| bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(ConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(const NCBKernSizeParam&) const override; | |||||
| }; | }; | ||||
| class ConvBiasImpl::AlgoWinogradF32 final : public AlgoBase { | class ConvBiasImpl::AlgoWinogradF32 final : public AlgoBase { | ||||
| @@ -43,12 +41,10 @@ public: | |||||
| } | } | ||||
| return m_name.c_str(); | return m_name.c_str(); | ||||
| } | } | ||||
| bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(ConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(const NCBKernSizeParam&) const override; | |||||
| private: | private: | ||||
| MatrixMulImpl::AlgoBase* m_matmul_algo; | MatrixMulImpl::AlgoBase* m_matmul_algo; | ||||
| @@ -69,12 +65,10 @@ public: | |||||
| } | } | ||||
| return m_name.c_str(); | return m_name.c_str(); | ||||
| } | } | ||||
| bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(ConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(const NCBKernSizeParam&) const override; | |||||
| private: | private: | ||||
| MatrixMulImpl::AlgoBase* m_matmul_algo; | MatrixMulImpl::AlgoBase* m_matmul_algo; | ||||
| @@ -95,12 +89,10 @@ public: | |||||
| } | } | ||||
| return m_name.c_str(); | return m_name.c_str(); | ||||
| } | } | ||||
| bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(ConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(const NCBKernSizeParam&) const override; | |||||
| private: | private: | ||||
| MatrixMulImpl::AlgoBase* m_matmul_algo; | MatrixMulImpl::AlgoBase* m_matmul_algo; | ||||
| @@ -121,12 +113,10 @@ public: | |||||
| } | } | ||||
| return m_name.c_str(); | return m_name.c_str(); | ||||
| } | } | ||||
| bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(ConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns(const NCBKernSizeParam&) const override; | |||||
| private: | private: | ||||
| MatrixMulImpl::AlgoBase* m_matmul_algo; | MatrixMulImpl::AlgoBase* m_matmul_algo; | ||||
| @@ -140,22 +140,17 @@ using BiasMode = ConvBiasForward::BiasMode; | |||||
| #define MEGDNN_WINOGRAD_ALGO_FUN_DECLARE() \ | #define MEGDNN_WINOGRAD_ALGO_FUN_DECLARE() \ | ||||
| bool is_reproducible() const override { return true; } \ | bool is_reproducible() const override { return true; } \ | ||||
| bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, \ | |||||
| bool usable(const NCBKernSizeParam& param, \ | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; \ | AlgoSelectionStrategy algo_selection_strategy) const override; \ | ||||
| size_t get_workspace(fallback::ConvBiasImpl*, \ | |||||
| const NCBKernSizeParam& param) const override; \ | |||||
| virtual SmallVector<NCBKern> dispatch_kerns(fallback::ConvBiasImpl* opr, \ | |||||
| const NCBKernSizeParam& param) \ | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; \ | |||||
| virtual SmallVector<NCBKern> dispatch_kerns(const NCBKernSizeParam& param) \ | |||||
| const override; \ | const override; \ | ||||
| SmallVector<TensorLayout> deduce_preprocessed_filter_layout( \ | SmallVector<TensorLayout> deduce_preprocessed_filter_layout( \ | ||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) \ | |||||
| const override; \ | |||||
| size_t get_preprocess_workspace(fallback::ConvBiasImpl*, \ | |||||
| const NCBKernSizeParam& param) \ | |||||
| const NCBKernSizeParam& param) const override; \ | |||||
| size_t get_preprocess_workspace(const NCBKernSizeParam& param) \ | |||||
| const override; \ | const override; \ | ||||
| virtual SmallVector<NCBKern> dispatch_preprocess_kerns( \ | virtual SmallVector<NCBKern> dispatch_preprocess_kerns( \ | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) \ | |||||
| const override; \ | |||||
| const NCBKernSizeParam& param) const override; \ | |||||
| \ | \ | ||||
| private: \ | private: \ | ||||
| fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; \ | fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; \ | ||||
| @@ -48,7 +48,7 @@ size_t ConvBiasImpl::AlgoConv1x1::get_oc_tile_size_heuristic( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoConv1x1::get_workspace( | size_t ConvBiasImpl::AlgoConv1x1::get_workspace( | ||||
| ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| size_t OH = param.osz[0]; | size_t OH = param.osz[0]; | ||||
| size_t OW = param.osz[1]; | size_t OW = param.osz[1]; | ||||
| size_t compt_oc_block_size = get_oc_tile_size_heuristic(param); | size_t compt_oc_block_size = get_oc_tile_size_heuristic(param); | ||||
| @@ -90,7 +90,7 @@ size_t ConvBiasImpl::AlgoConv1x1::get_workspace( | |||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_kerns( | SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_kerns( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| SmallVector<ConvBiasImpl::NCBKern> ret_kern; | SmallVector<ConvBiasImpl::NCBKern> ret_kern; | ||||
| size_t OH = param.osz[0]; | size_t OH = param.osz[0]; | ||||
| size_t OW = param.osz[1]; | size_t OW = param.osz[1]; | ||||
| @@ -138,11 +138,11 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_kerns( | |||||
| //! get thread bundle | //! get thread bundle | ||||
| thread_bundle = utils::get_thread_bundle(param, matmul_bundle.get_size(2), | thread_bundle = utils::get_thread_bundle(param, matmul_bundle.get_size(2), | ||||
| compt_oc_block_size); | |||||
| compt_oc_block_size); | |||||
| Conv1x1StrategyBase* conv1x1_strategy = | Conv1x1StrategyBase* conv1x1_strategy = | ||||
| Conv1x1Factory::make_conv1x1_strategy(param, pack_mode, | Conv1x1Factory::make_conv1x1_strategy(param, pack_mode, | ||||
| opr->param().format); | |||||
| param.filter_meta.format); | |||||
| auto kern_packA = [this, whole_bundle, matmul_bundle, param, | auto kern_packA = [this, whole_bundle, matmul_bundle, param, | ||||
| compt_oc_block_size, conv1x1_strategy]( | compt_oc_block_size, conv1x1_strategy]( | ||||
| @@ -180,13 +180,12 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_kerns( | |||||
| return ret_kern; | return ret_kern; | ||||
| } | } | ||||
| bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param, | |||||
| bool ConvBiasImpl::AlgoConv1x1::usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const { | AlgoSelectionStrategy) const { | ||||
| MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 2) { | MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 2) { | ||||
| if (opr->param().format != param::ConvBias::Format::NCHW && | |||||
| opr->param().format != param::ConvBias::Format::NCHW44 && | |||||
| opr->param().format != param::ConvBias::Format::NCHW44_DOT) | |||||
| if (param.filter_meta.format != param::ConvBias::Format::NCHW && | |||||
| param.filter_meta.format != param::ConvBias::Format::NCHW44 && | |||||
| param.filter_meta.format != param::ConvBias::Format::NCHW44_DOT) | |||||
| return false; | return false; | ||||
| size_t FH = param.filter_meta.spatial[0], | size_t FH = param.filter_meta.spatial[0], | ||||
| @@ -199,7 +198,7 @@ bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr, | |||||
| if (FH != 1 || FW != 1 || PH || PW || SH != 1 || SW != 1) | if (FH != 1 || FW != 1 || PH || PW || SH != 1 || SW != 1) | ||||
| return false; | return false; | ||||
| if(param.src_type.enumv() != param.filter_type.enumv()) { | |||||
| if (param.src_type.enumv() != param.filter_type.enumv()) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -225,8 +224,8 @@ bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr, | |||||
| } | } | ||||
| } | } | ||||
| if (opr->param().format == param::ConvBias::Format::NCHW44 || | |||||
| opr->param().format == param::ConvBias::Format::NCHW44_DOT) { | |||||
| if (param.filter_meta.format == param::ConvBias::Format::NCHW44 || | |||||
| param.filter_meta.format == param::ConvBias::Format::NCHW44_DOT) { | |||||
| if (param.filter_meta.icpg < 4_z || param.filter_meta.icpg == 1 || | if (param.filter_meta.icpg < 4_z || param.filter_meta.icpg == 1 || | ||||
| param.filter_meta.ocpg == 1) { | param.filter_meta.ocpg == 1) { | ||||
| return false; | return false; | ||||
| @@ -236,13 +235,14 @@ bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr, | |||||
| size_t OH = param.osz[0]; | size_t OH = param.osz[0]; | ||||
| size_t OW = param.osz[1]; | size_t OW = param.osz[1]; | ||||
| MatrixMulImpl::KernSizeParam matmul_param = utils::get_matmul_kern_param( | |||||
| param, OH * OW, get_oc_tile_size_heuristic(param)); | |||||
| MatrixMulImpl::KernSizeParam matmul_param = | |||||
| utils::get_matmul_kern_param(param, OH * OW, | |||||
| get_oc_tile_size_heuristic(param)); | |||||
| bool matmul_usable = m_matmul_algo->usable(matmul_param); | bool matmul_usable = m_matmul_algo->usable(matmul_param); | ||||
| auto pack_mode = m_matmul_algo->packmode(); | auto pack_mode = m_matmul_algo->packmode(); | ||||
| bool strategy_usable = Conv1x1Factory::can_make_conv1x1_strategy( | bool strategy_usable = Conv1x1Factory::can_make_conv1x1_strategy( | ||||
| param, pack_mode, opr->param().format); | |||||
| param, pack_mode, param.filter_meta.format); | |||||
| return matmul_usable && strategy_usable && | return matmul_usable && strategy_usable && | ||||
| (param.filter_meta.dilation[0] == | (param.filter_meta.dilation[0] == | ||||
| @@ -255,7 +255,7 @@ bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr, | |||||
| } | } | ||||
| bool ConvBiasImpl::AlgoConv1x1::is_preferred( | bool ConvBiasImpl::AlgoConv1x1::is_preferred( | ||||
| ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| size_t OH = param.osz[0]; | size_t OH = param.osz[0]; | ||||
| size_t OW = param.osz[1]; | size_t OW = param.osz[1]; | ||||
| if (OH * OW != 1) { | if (OH * OW != 1) { | ||||
| @@ -265,8 +265,8 @@ bool ConvBiasImpl::AlgoConv1x1::is_preferred( | |||||
| if (param.src_type.enumv() == DTypeEnum::Int8 && | if (param.src_type.enumv() == DTypeEnum::Int8 && | ||||
| param.filter_type.enumv() == DTypeEnum::Int8 && | param.filter_type.enumv() == DTypeEnum::Int8 && | ||||
| param.dst_type.enumv() == DTypeEnum::Int16) { | param.dst_type.enumv() == DTypeEnum::Int16) { | ||||
| return true; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| #elif MEGDNN_X86 | #elif MEGDNN_X86 | ||||
| size_t OC = param.filter_meta.ocpg; | size_t OC = param.filter_meta.ocpg; | ||||
| if (OC > 2 || param.src_type.enumv() == DTypeEnum::Float32) | if (OC > 2 || param.src_type.enumv() == DTypeEnum::Float32) | ||||
| @@ -276,4 +276,4 @@ bool ConvBiasImpl::AlgoConv1x1::is_preferred( | |||||
| } | } | ||||
| } | } | ||||
| // vim: syntax=cpp.doxygen | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -34,14 +34,13 @@ public: | |||||
| return m_name.c_str(); | return m_name.c_str(); | ||||
| } | } | ||||
| bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns( | SmallVector<NCBKern> dispatch_kerns( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(ConvBiasImpl*, const NCBKernSizeParam&) const override; | |||||
| bool is_preferred(const NCBKernSizeParam&) const override; | |||||
| protected: | protected: | ||||
| size_t get_oc_tile_size_heuristic(const NCBKernSizeParam& param) const; | size_t get_oc_tile_size_heuristic(const NCBKernSizeParam& param) const; | ||||
| @@ -249,7 +249,7 @@ size_t ConvBiasImpl::AlgoConv1x1Gemv::get_oc_tile_size_heuristic( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoConv1x1Gemv::get_workspace( | size_t ConvBiasImpl::AlgoConv1x1Gemv::get_workspace( | ||||
| ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_fallback_conv1x1_gemv, | MIDOUT_BEGIN(megdnn_fallback_conv1x1_gemv, | ||||
| midout_iv("AlgoConv1x1Gemv::get_workspace"_hash)) { | midout_iv("AlgoConv1x1Gemv::get_workspace"_hash)) { | ||||
| size_t compt_oc_block_size = get_oc_tile_size_heuristic(param); | size_t compt_oc_block_size = get_oc_tile_size_heuristic(param); | ||||
| @@ -265,7 +265,7 @@ size_t ConvBiasImpl::AlgoConv1x1Gemv::get_workspace( | |||||
| SmallVector<ConvBiasImpl::NCBKern> | SmallVector<ConvBiasImpl::NCBKern> | ||||
| ConvBiasImpl::AlgoConv1x1Gemv::dispatch_kerns( | ConvBiasImpl::AlgoConv1x1Gemv::dispatch_kerns( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| SmallVector<ConvBiasImpl::NCBKern> ret_kern; | SmallVector<ConvBiasImpl::NCBKern> ret_kern; | ||||
| size_t OC = param.filter_meta.ocpg; | size_t OC = param.filter_meta.ocpg; | ||||
| size_t compt_oc_block_size = get_oc_tile_size_heuristic(param); | size_t compt_oc_block_size = get_oc_tile_size_heuristic(param); | ||||
| @@ -311,7 +311,7 @@ ConvBiasImpl::AlgoConv1x1Gemv::dispatch_kerns( | |||||
| } \ | } \ | ||||
| MIDOUT_END() | MIDOUT_END() | ||||
| switch (opr->param().format) { | |||||
| switch (param.filter_meta.format) { | |||||
| case param::ConvBias::Format::NCHW: | case param::ConvBias::Format::NCHW: | ||||
| cb1(param::ConvBias::Format::NCHW, dt_float32, dt_float32, | cb1(param::ConvBias::Format::NCHW, dt_float32, dt_float32, | ||||
| PostprocessMode::FLOAT, "NCHW::GEMV::FLOAT"_hash); | PostprocessMode::FLOAT, "NCHW::GEMV::FLOAT"_hash); | ||||
| @@ -401,18 +401,18 @@ ConvBiasImpl::AlgoConv1x1Gemv::dispatch_kerns( | |||||
| return ret_kern; | return ret_kern; | ||||
| } | } | ||||
| bool ConvBiasImpl::AlgoConv1x1Gemv::usable(ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param, | |||||
| bool ConvBiasImpl::AlgoConv1x1Gemv::usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const { | AlgoSelectionStrategy) const { | ||||
| MIDOUT_BEGIN(megdnn_fallback_conv1x1_gemv, | MIDOUT_BEGIN(megdnn_fallback_conv1x1_gemv, | ||||
| midout_iv("AlgoConv1x1Gemv::usable"_hash)) { | midout_iv("AlgoConv1x1Gemv::usable"_hash)) { | ||||
| auto format = param.filter_meta.format; | |||||
| #if MEGDNN_X86 | #if MEGDNN_X86 | ||||
| if (opr->param().format != param::ConvBias::Format::NCHW) | |||||
| if (format != param::ConvBias::Format::NCHW) | |||||
| return false; | return false; | ||||
| #elif MEGDNN_AARCH64 || MEGDNN_ARMV7 | #elif MEGDNN_AARCH64 || MEGDNN_ARMV7 | ||||
| if (opr->param().format != param::ConvBias::Format::NCHW && | |||||
| opr->param().format != param::ConvBias::Format::NCHW44 && | |||||
| opr->param().format != param::ConvBias::Format::NCHW44_DOT) | |||||
| if (format != param::ConvBias::Format::NCHW && | |||||
| format != param::ConvBias::Format::NCHW44 && | |||||
| format != param::ConvBias::Format::NCHW44_DOT) | |||||
| return false; | return false; | ||||
| #endif | #endif | ||||
| @@ -469,13 +469,13 @@ bool ConvBiasImpl::AlgoConv1x1Gemv::usable(ConvBiasImpl* opr, | |||||
| return false; | return false; | ||||
| } | } | ||||
| #if MEGDNN_AARCH64 || MEGDNN_ARMV7 | #if MEGDNN_AARCH64 || MEGDNN_ARMV7 | ||||
| if (opr->param().format == param::ConvBias::Format::NCHW44) { | |||||
| if (format == param::ConvBias::Format::NCHW44) { | |||||
| if (param.src_type.enumv() != DTypeEnum::Float32 && | if (param.src_type.enumv() != DTypeEnum::Float32 && | ||||
| param.src_type.enumv() != DTypeEnum::Int8 && | param.src_type.enumv() != DTypeEnum::Int8 && | ||||
| param.src_type.enumv() != DTypeEnum::QuantizedS8) { | param.src_type.enumv() != DTypeEnum::QuantizedS8) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| } else if (opr->param().format == param::ConvBias::Format::NCHW44_DOT) { | |||||
| } else if (format == param::ConvBias::Format::NCHW44_DOT) { | |||||
| if (param.src_type.enumv() != DTypeEnum::Int8 && | if (param.src_type.enumv() != DTypeEnum::Int8 && | ||||
| param.src_type.enumv() != DTypeEnum::QuantizedS8) { | param.src_type.enumv() != DTypeEnum::QuantizedS8) { | ||||
| return false; | return false; | ||||
| @@ -492,11 +492,11 @@ bool ConvBiasImpl::AlgoConv1x1Gemv::usable(ConvBiasImpl* opr, | |||||
| } | } | ||||
| bool ConvBiasImpl::AlgoConv1x1Gemv::is_preferred( | bool ConvBiasImpl::AlgoConv1x1Gemv::is_preferred( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_fallback_conv1x1_gemv, | MIDOUT_BEGIN(megdnn_fallback_conv1x1_gemv, | ||||
| midout_iv("AlgoConv1x1Gemv::is_preferred"_hash)) { | midout_iv("AlgoConv1x1Gemv::is_preferred"_hash)) { | ||||
| #if (MEGDNN_ARMV7 || MEGDNN_AARCH64) | #if (MEGDNN_ARMV7 || MEGDNN_AARCH64) | ||||
| if (opr->param().format == param::ConvBias::Format::NCHW && | |||||
| if (param.filter_meta.format == param::ConvBias::Format::NCHW && | |||||
| param.src_type.enumv() == DTypeEnum::Quantized8Asymm) { | param.src_type.enumv() == DTypeEnum::Quantized8Asymm) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -507,4 +507,4 @@ bool ConvBiasImpl::AlgoConv1x1Gemv::is_preferred( | |||||
| return false; | return false; | ||||
| } | } | ||||
| // vim: syntax=cpp.doxygen | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -24,18 +24,15 @@ public: | |||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { | |||||
| return "CONV1x1_GEMV"; | |||||
| } | |||||
| const char* name() const override { return "CONV1x1_GEMV"; } | |||||
| bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns( | SmallVector<NCBKern> dispatch_kerns( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(ConvBiasImpl*, const NCBKernSizeParam&) const override; | |||||
| bool is_preferred(const NCBKernSizeParam&) const override; | |||||
| protected: | protected: | ||||
| size_t get_oc_tile_size_heuristic(const NCBKernSizeParam& param) const; | size_t get_oc_tile_size_heuristic(const NCBKernSizeParam& param) const; | ||||
| @@ -478,7 +478,7 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoIm2col::get_workspace( | size_t ConvBiasImpl::AlgoIm2col::get_workspace( | ||||
| ConvBiasImpl*, const NCBKernSizeParam& p) const { | |||||
| const NCBKernSizeParam& p) const { | |||||
| MIDOUT_BEGIN(megdnn_fallback_im2col, 0, 0) { | MIDOUT_BEGIN(megdnn_fallback_im2col, 0, 0) { | ||||
| return get_bundle(p).total_size_in_bytes(); | return get_bundle(p).total_size_in_bytes(); | ||||
| } | } | ||||
| @@ -487,7 +487,7 @@ size_t ConvBiasImpl::AlgoIm2col::get_workspace( | |||||
| } | } | ||||
| SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoIm2col::dispatch_kerns( | SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoIm2col::dispatch_kerns( | ||||
| ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| MIDOUT_BEGIN(megdnn_fallback_im2col, 0, 1) { | MIDOUT_BEGIN(megdnn_fallback_im2col, 0, 1) { | ||||
| UNPACK_CONV_F32_NCB_KERN_SIZES(param); | UNPACK_CONV_F32_NCB_KERN_SIZES(param); | ||||
| MEGDNN_MARK_USED_VAR(SH); | MEGDNN_MARK_USED_VAR(SH); | ||||
| @@ -660,12 +660,13 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoIm2col::dispatch_kerns( | |||||
| } | } | ||||
| bool ConvBiasImpl::AlgoIm2col::usable( | bool ConvBiasImpl::AlgoIm2col::usable( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MIDOUT_BEGIN(megdnn_fallback_im2col, 0, 2) { | MIDOUT_BEGIN(megdnn_fallback_im2col, 0, 2) { | ||||
| if (opr->param().format != param::ConvBias::Format::NCHW && | |||||
| opr->param().format != param::ConvBias::Format::NCHW44_DOT && | |||||
| opr->param().format != param::ConvBias::Format::NCHW44) { | |||||
| auto format = param.filter_meta.format; | |||||
| if (format != param::ConvBias::Format::NCHW && | |||||
| format != param::ConvBias::Format::NCHW44_DOT && | |||||
| format != param::ConvBias::Format::NCHW44) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -695,8 +696,8 @@ bool ConvBiasImpl::AlgoIm2col::usable( | |||||
| } | } | ||||
| fallback::MatrixMulImpl::AlgoBase::MatmulDescription mdesc = | fallback::MatrixMulImpl::AlgoBase::MatmulDescription mdesc = | ||||
| m_matmul_algo->matmul_description(); | m_matmul_algo->matmul_description(); | ||||
| if (opr->param().format == param::ConvBias::Format::NCHW44 || | |||||
| opr->param().format == param::ConvBias::Format::NCHW44_DOT) { | |||||
| if (format == param::ConvBias::Format::NCHW44 || | |||||
| format == param::ConvBias::Format::NCHW44_DOT) { | |||||
| //! current NCHW44 im2col only support DEFAULT mode matmul | //! current NCHW44 im2col only support DEFAULT mode matmul | ||||
| if (mdesc.packmode != Pack_Mode::DEFAULT) { | if (mdesc.packmode != Pack_Mode::DEFAULT) { | ||||
| return false; | return false; | ||||
| @@ -15,6 +15,8 @@ | |||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| #include "src/fallback/conv_bias/opr_impl.h" | #include "src/fallback/conv_bias/opr_impl.h" | ||||
| #include "src/fallback/matrix_mul/opr_impl.h" | #include "src/fallback/matrix_mul/opr_impl.h" | ||||
| #include "src/common/opr_delegate.h" | |||||
| namespace megdnn { | namespace megdnn { | ||||
| namespace fallback { | namespace fallback { | ||||
| @@ -54,16 +56,18 @@ public: | |||||
| } | } | ||||
| return m_name.c_str(); | return m_name.c_str(); | ||||
| } | } | ||||
| bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns( | SmallVector<NCBKern> dispatch_kerns( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(fallback::ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred( | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| if (param.src_type.category() == DTypeCategory::QUANTIZED) { | if (param.src_type.category() == DTypeCategory::QUANTIZED) { | ||||
| return opr->is_matmul_quantized_prefer(param); | |||||
| static CpuOprDelegationStorage<1> storage; | |||||
| auto conv_bias_opr = storage.get<ConvBias, 0>(); | |||||
| return static_cast<ConvBiasImpl*>(conv_bias_opr) | |||||
| ->is_matmul_quantized_prefer(param); | |||||
| } | } | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| auto OC = fm.ocpg, IC = fm.icpg; | auto OC = fm.ocpg, IC = fm.icpg; | ||||
| @@ -54,7 +54,6 @@ class ConvBiasImpl::AlgoPack : NonCopyableObj { | |||||
| public: | public: | ||||
| AlgoPack() { | AlgoPack() { | ||||
| refhold.emplace_back(new AlgoConv1x1Gemv()); | refhold.emplace_back(new AlgoConv1x1Gemv()); | ||||
| all_algos.emplace_back(refhold.back().get()); | all_algos.emplace_back(refhold.back().get()); | ||||
| @@ -121,7 +120,7 @@ bool ConvBiasImpl::is_naive_algo(ConvBiasImpl::Algorithm* algo) { | |||||
| } | } | ||||
| #define NCB_ALGO_FUNC(name, algo, param) \ | #define NCB_ALGO_FUNC(name, algo, param) \ | ||||
| static_cast<AlgoBase*>(algo)->name(this, param) | |||||
| static_cast<AlgoBase*>(algo)->name(param) | |||||
| void ConvBiasImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | void ConvBiasImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | ||||
| _megdnn_tensor_in bias, _megdnn_tensor_in z, | _megdnn_tensor_in bias, _megdnn_tensor_in z, | ||||
| @@ -243,11 +242,10 @@ ConvBiasImpl::Algorithm* ConvBiasImpl::get_algorithm_heuristic_with_ncb( | |||||
| const NCBKernSizeParam& param, size_t workspace_limit_in_bytes, | const NCBKernSizeParam& param, size_t workspace_limit_in_bytes, | ||||
| bool reproducible) { | bool reproducible) { | ||||
| for (auto i : get_all_algorithms_with_ncb(param)) { | for (auto i : get_all_algorithms_with_ncb(param)) { | ||||
| size_t need_workspace = NCB_ALGO_FUNC(get_workspace, i, param); | |||||
| if (static_cast<AlgoBase*>(i)->usable_reproducible( | if (static_cast<AlgoBase*>(i)->usable_reproducible( | ||||
| this, param, AlgoSelectionStrategy::HEURISTIC, | |||||
| reproducible) && | |||||
| need_workspace <= workspace_limit_in_bytes) { | |||||
| param, AlgoSelectionStrategy::HEURISTIC, reproducible) && | |||||
| NCB_ALGO_FUNC(get_workspace, i, param) <= | |||||
| workspace_limit_in_bytes) { | |||||
| return i; | return i; | ||||
| } | } | ||||
| } | } | ||||
| @@ -392,8 +390,8 @@ std::vector<ConvBiasImpl::Algorithm*> ConvBiasImpl::get_all_algorithms_with_ncb( | |||||
| std::vector<Algorithm*> algos; | std::vector<Algorithm*> algos; | ||||
| std::vector<Algorithm*> prefer_algos; | std::vector<Algorithm*> prefer_algos; | ||||
| for (auto&& algo : algo_pack()) { | for (auto&& algo : algo_pack()) { | ||||
| if (algo->usable(this, param, AlgoSelectionStrategy::FULL_RUN)) { | |||||
| if (algo->is_preferred(this, param)) { | |||||
| if (algo->usable(param, AlgoSelectionStrategy::FULL_RUN)) { | |||||
| if (algo->is_preferred(param)) { | |||||
| prefer_algos.push_back(algo); | prefer_algos.push_back(algo); | ||||
| } else { | } else { | ||||
| algos.push_back(algo); | algos.push_back(algo); | ||||
| @@ -193,7 +193,7 @@ public: | |||||
| //! move arm_common to fallback | //! move arm_common to fallback | ||||
| virtual bool is_matmul_quantized_prefer( | virtual bool is_matmul_quantized_prefer( | ||||
| const ConvBiasImpl::NCBKernSizeParam& ncb_param) { | |||||
| const ConvBiasImpl::NCBKernSizeParam& ncb_param) const { | |||||
| MEGDNN_MARK_USED_VAR(ncb_param); | MEGDNN_MARK_USED_VAR(ncb_param); | ||||
| return true; | return true; | ||||
| }; | }; | ||||
| @@ -209,43 +209,39 @@ public: | |||||
| public: | public: | ||||
| virtual ~AlgoBase() = default; | virtual ~AlgoBase() = default; | ||||
| virtual bool usable( | virtual bool usable( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const = 0; | AlgoSelectionStrategy algo_selection_strategy) const = 0; | ||||
| virtual size_t get_workspace(ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const = 0; | |||||
| virtual size_t get_workspace(const NCBKernSizeParam& param) const = 0; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| ConvBiasImpl* opr, const NCBKernSizeParam& param) const = 0; | |||||
| const NCBKernSizeParam& param) const = 0; | |||||
| virtual SmallVector<NCBKern> dispatch_preprocess_kerns( | virtual SmallVector<NCBKern> dispatch_preprocess_kerns( | ||||
| ConvBiasImpl*, const NCBKernSizeParam&) const { | |||||
| const NCBKernSizeParam&) const { | |||||
| return {}; | return {}; | ||||
| }; | }; | ||||
| //! get the layouts of weight_prerocess dst | //! get the layouts of weight_prerocess dst | ||||
| virtual SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | virtual SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | ||||
| ConvBiasImpl*, const NCBKernSizeParam&) const { | |||||
| const NCBKernSizeParam&) const { | |||||
| return {}; | return {}; | ||||
| }; | }; | ||||
| //! get the workspace when weight_prerocess | //! get the workspace when weight_prerocess | ||||
| virtual size_t get_preprocess_workspace(ConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const { | |||||
| virtual size_t get_preprocess_workspace(const NCBKernSizeParam&) const { | |||||
| return 0_z; | return 0_z; | ||||
| }; | }; | ||||
| //! Temporarily used to identify whether the matmul algorithm is | //! Temporarily used to identify whether the matmul algorithm is | ||||
| //! is_preferred. | //! is_preferred. | ||||
| virtual bool is_preferred(ConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const { | |||||
| virtual bool is_preferred(const NCBKernSizeParam&) const { | |||||
| return false; | return false; | ||||
| } | } | ||||
| bool usable_reproducible(ConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param, | |||||
| bool usable_reproducible(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy, | AlgoSelectionStrategy algo_selection_strategy, | ||||
| bool reproducible = true) const { | bool reproducible = true) const { | ||||
| return (!reproducible || is_reproducible()) && | return (!reproducible || is_reproducible()) && | ||||
| usable(opr, param, algo_selection_strategy); | |||||
| usable(param, algo_selection_strategy); | |||||
| } | } | ||||
| }; | }; | ||||
| @@ -501,9 +501,10 @@ public: | |||||
| Strategy strategy = m_strategy; | Strategy strategy = m_strategy; | ||||
| SmallVector<NCBKern> kerns; | SmallVector<NCBKern> kerns; | ||||
| auto filter_process_kern = | auto filter_process_kern = | ||||
| [strategy, bundle, &preprocessed_dst]( | |||||
| [strategy, bundle, &preprocessed_dst, this]( | |||||
| const NCBKernParam& ncb_param, | const NCBKernParam& ncb_param, | ||||
| const NCBKernIndex& ncb_index) mutable { | const NCBKernIndex& ncb_index) mutable { | ||||
| MEGDNN_MARK_USED_VAR(this); | |||||
| MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, | MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, | ||||
| midout_iv("filter_preprocess"_hash)) { | midout_iv("filter_preprocess"_hash)) { | ||||
| bundle.set(ncb_param.workspace_ptr); | bundle.set(ncb_param.workspace_ptr); | ||||
| @@ -569,9 +570,10 @@ public: | |||||
| param.filter_meta.format == param::ConvBias::Format::NCHW88 || | param.filter_meta.format == param::ConvBias::Format::NCHW88 || | ||||
| param.filter_meta.format == param::ConvBias::Format::NCHW44)) { | param.filter_meta.format == param::ConvBias::Format::NCHW44)) { | ||||
| auto filter_process_kern = | auto filter_process_kern = | ||||
| [strategy = m_strategy, bundle_top, bundle_compute]( | |||||
| [strategy = m_strategy, bundle_top, bundle_compute, this]( | |||||
| const NCBKernParam& ncb_param, | const NCBKernParam& ncb_param, | ||||
| const NCBKernIndex& ncb_index) mutable { | const NCBKernIndex& ncb_index) mutable { | ||||
| MEGDNN_MARK_USED_VAR(this); | |||||
| MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, | MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, | ||||
| midout_iv("filter_process"_hash)) { | midout_iv("filter_process"_hash)) { | ||||
| bundle_top.set(ncb_param.workspace_ptr); | bundle_top.set(ncb_param.workspace_ptr); | ||||
| @@ -594,9 +596,10 @@ public: | |||||
| } | } | ||||
| auto winograd_compute_kern = | auto winograd_compute_kern = | ||||
| [strategy = m_strategy, bundle_top, bundle_compute, matmul_algo, | [strategy = m_strategy, bundle_top, bundle_compute, matmul_algo, | ||||
| matmul_param, unit_tile_size, | |||||
| unit_oc_size](const NCBKernParam& ncb_param, | |||||
| const NCBKernIndex& ncb_index) mutable { | |||||
| matmul_param, unit_tile_size, unit_oc_size, | |||||
| this](const NCBKernParam& ncb_param, | |||||
| const NCBKernIndex& ncb_index) mutable { | |||||
| MEGDNN_MARK_USED_VAR(this); | |||||
| MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, | MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, | ||||
| midout_iv("winograd_compute"_hash)) { | midout_iv("winograd_compute"_hash)) { | ||||
| bundle_top.set(ncb_param.workspace_ptr); | bundle_top.set(ncb_param.workspace_ptr); | ||||
| @@ -728,43 +731,43 @@ public: | |||||
| } \ | } \ | ||||
| MIDOUT_END(); | MIDOUT_END(); | ||||
| #define MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(_class, _strategy, _midout_flag, \ | |||||
| _matmul_format) \ | |||||
| size_t ConvBiasImpl::_class::get_workspace( \ | |||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ | |||||
| MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_workspace_size, \ | |||||
| _strategy, _midout_flag, \ | |||||
| _matmul_format); \ | |||||
| return 0; \ | |||||
| } \ | |||||
| size_t ConvBiasImpl::_class::get_preprocess_workspace( \ | |||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ | |||||
| MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \ | |||||
| _class, get_preprocess_workspace_size, _strategy, \ | |||||
| _midout_flag, _matmul_format); \ | |||||
| return 0; \ | |||||
| } \ | |||||
| SmallVector<TensorLayout> \ | |||||
| ConvBiasImpl::_class::deduce_preprocessed_filter_layout( \ | |||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ | |||||
| MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \ | |||||
| _class, deduce_preprocessed_filter_layout, _strategy, \ | |||||
| _midout_flag, _matmul_format); \ | |||||
| return {}; \ | |||||
| } \ | |||||
| SmallVector<ConvBiasImpl::NCBKern> \ | |||||
| ConvBiasImpl::_class::dispatch_preprocess_kerns( \ | |||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ | |||||
| MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_preprocess_kerns, \ | |||||
| _strategy, _midout_flag, \ | |||||
| _matmul_format); \ | |||||
| return {}; \ | |||||
| } \ | |||||
| SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::_class::dispatch_kerns( \ | |||||
| fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ | |||||
| MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_kerns, _strategy, \ | |||||
| _midout_flag, _matmul_format); \ | |||||
| return {}; \ | |||||
| #define MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(_class, _strategy, _midout_flag, \ | |||||
| _matmul_format) \ | |||||
| size_t ConvBiasImpl::_class::get_workspace(const NCBKernSizeParam& param) \ | |||||
| const { \ | |||||
| MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_workspace_size, \ | |||||
| _strategy, _midout_flag, \ | |||||
| _matmul_format); \ | |||||
| return 0; \ | |||||
| } \ | |||||
| size_t ConvBiasImpl::_class::get_preprocess_workspace( \ | |||||
| const NCBKernSizeParam& param) const { \ | |||||
| MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \ | |||||
| _class, get_preprocess_workspace_size, _strategy, \ | |||||
| _midout_flag, _matmul_format); \ | |||||
| return 0; \ | |||||
| } \ | |||||
| SmallVector<TensorLayout> \ | |||||
| ConvBiasImpl::_class::deduce_preprocessed_filter_layout( \ | |||||
| const NCBKernSizeParam& param) const { \ | |||||
| MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \ | |||||
| _class, deduce_preprocessed_filter_layout, _strategy, \ | |||||
| _midout_flag, _matmul_format); \ | |||||
| return {}; \ | |||||
| } \ | |||||
| SmallVector<ConvBiasImpl::NCBKern> \ | |||||
| ConvBiasImpl::_class::dispatch_preprocess_kerns( \ | |||||
| const NCBKernSizeParam& param) const { \ | |||||
| MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_preprocess_kerns, \ | |||||
| _strategy, _midout_flag, \ | |||||
| _matmul_format); \ | |||||
| return {}; \ | |||||
| } \ | |||||
| SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::_class::dispatch_kerns( \ | |||||
| const NCBKernSizeParam& param) const { \ | |||||
| MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_kerns, _strategy, \ | |||||
| _midout_flag, _matmul_format); \ | |||||
| return {}; \ | |||||
| } | } | ||||
| // vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
| @@ -164,7 +164,7 @@ void kern_direct(const NCBKernParam& param) { | |||||
| /* ===================== fallback algo ===================== */ | /* ===================== fallback algo ===================== */ | ||||
| bool ConvolutionImpl::AlgoFallback::usable( | bool ConvolutionImpl::AlgoFallback::usable( | ||||
| ConvolutionImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| return fm.format == param::Convolution::Format::NCHW && | return fm.format == param::Convolution::Format::NCHW && | ||||
| @@ -175,7 +175,7 @@ bool ConvolutionImpl::AlgoFallback::usable( | |||||
| } | } | ||||
| size_t ConvolutionImpl::AlgoFallback::get_workspace( | size_t ConvolutionImpl::AlgoFallback::get_workspace( | ||||
| ConvolutionImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto FH = param.filter_meta.spatial[0], FW = param.filter_meta.spatial[1]; | auto FH = param.filter_meta.spatial[0], FW = param.filter_meta.spatial[1]; | ||||
| size_t nr_threads = param.nr_threads; | size_t nr_threads = param.nr_threads; | ||||
| if (param.filter_meta.should_flip) { | if (param.filter_meta.should_flip) { | ||||
| @@ -190,11 +190,11 @@ size_t ConvolutionImpl::AlgoFallback::get_workspace( | |||||
| SmallVector<ConvolutionImpl::NCBKern> | SmallVector<ConvolutionImpl::NCBKern> | ||||
| ConvolutionImpl::AlgoFallback::dispatch_kern( | ConvolutionImpl::AlgoFallback::dispatch_kern( | ||||
| ConvolutionImpl* opr, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| size_t group = param.filter_meta.group; | size_t group = param.filter_meta.group; | ||||
| size_t N = param.n; | size_t N = param.n; | ||||
| size_t nr_threads = param.nr_threads; | size_t nr_threads = param.nr_threads; | ||||
| size_t workspace_per_thread = get_workspace(opr, param) / nr_threads; | |||||
| size_t workspace_per_thread = get_workspace( param) / nr_threads; | |||||
| auto kern_fallback = [workspace_per_thread](const NCBKernParam& p, | auto kern_fallback = [workspace_per_thread](const NCBKernParam& p, | ||||
| const NCBKernIndex& ncb_index) { | const NCBKernIndex& ncb_index) { | ||||
| UNPACK_CONV_F32_NCB_KERN_SIZES(p); | UNPACK_CONV_F32_NCB_KERN_SIZES(p); | ||||
| @@ -218,7 +218,7 @@ ConvolutionImpl::AlgoFallback::dispatch_kern( | |||||
| /* ===================== naive algo ===================== */ | /* ===================== naive algo ===================== */ | ||||
| bool ConvolutionImpl::AlgoNaive::usable( | bool ConvolutionImpl::AlgoNaive::usable( | ||||
| ConvolutionImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| bool ret = false; | bool ret = false; | ||||
| @@ -241,7 +241,7 @@ bool ConvolutionImpl::AlgoNaive::usable( | |||||
| } | } | ||||
| SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern( | SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern( | ||||
| ConvolutionImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| size_t N = param.n; | size_t N = param.n; | ||||
| size_t group = param.filter_meta.group; | size_t group = param.filter_meta.group; | ||||
| #define cb(dt, cmode, compute_type) \ | #define cb(dt, cmode, compute_type) \ | ||||
| @@ -289,75 +289,42 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern( | |||||
| /* ===================== default algo ===================== */ | /* ===================== default algo ===================== */ | ||||
| ConvolutionImpl::AlgoDefault::AlgoDefault(fallback::ConvBiasImpl* conv_bias_opr, | |||||
| ConvBiasImpl::AlgoBase* algorithm) | |||||
| : m_conv_bias_opr(conv_bias_opr), m_algorithm(algorithm) { | |||||
| ConvolutionImpl::AlgoDefault::AlgoDefault(ConvBiasImpl::AlgoBase* algorithm) | |||||
| : m_algorithm(algorithm) { | |||||
| megdnn_assert_internal(algorithm); | megdnn_assert_internal(algorithm); | ||||
| m_name = ssprintf("CONVOLUTION_DEFAULT_%s", m_algorithm->name()); | m_name = ssprintf("CONVOLUTION_DEFAULT_%s", m_algorithm->name()); | ||||
| } | } | ||||
| ConvBiasImpl::NCBKernSizeParam | ConvBiasImpl::NCBKernSizeParam | ||||
| ConvolutionImpl::AlgoDefault::AlgoDefault::init_convbias_opr_and_param( | |||||
| ConvBiasImpl* conv_bias_opr, const NCBKernSizeParam& param) { | |||||
| ConvolutionImpl::AlgoDefault::init_conv_bias_param( | |||||
| const NCBKernSizeParam& param) { | |||||
| DType bias_type = param.dst_type; | DType bias_type = param.dst_type; | ||||
| if (bias_type.category() == DTypeCategory::QUANTIZED) { | if (bias_type.category() == DTypeCategory::QUANTIZED) { | ||||
| bias_type = dtype::QuantizedS32( | bias_type = dtype::QuantizedS32( | ||||
| mul_scale(param.src_type, param.filter_type)); | mul_scale(param.src_type, param.filter_type)); | ||||
| } | } | ||||
| ::ConvBiasImpl::NCBKernSizeParam conv_bias_size_param( | |||||
| param, 0, param::MatrixMul::Format::DEFAULT, bias_type, 0, | |||||
| BiasMode::NO_BIAS, param::ConvBias::NonlineMode::IDENTITY); | |||||
| // nonline mode | |||||
| conv_bias_opr->param().nonlineMode = conv_bias_size_param.nonlineMode; | |||||
| // convolution mode | |||||
| if (conv_bias_size_param.filter_meta.should_flip) { | |||||
| conv_bias_opr->param().mode = param::ConvolutionV0::Mode::CONVOLUTION; | |||||
| } else { | |||||
| conv_bias_opr->param().mode = | |||||
| param::ConvolutionV0::Mode::CROSS_CORRELATION; | |||||
| } | |||||
| // sparse | |||||
| if (conv_bias_size_param.filter_meta.group > 1) { | |||||
| conv_bias_opr->param().sparse = param::ConvolutionV0::Sparse::GROUP; | |||||
| } else { | |||||
| conv_bias_opr->param().sparse = param::ConvolutionV0::Sparse::DENSE; | |||||
| } | |||||
| // format | |||||
| conv_bias_opr->param().format = conv_bias_size_param.filter_meta.format; | |||||
| // pad stride dilate | |||||
| conv_bias_opr->param().pad_h = conv_bias_size_param.filter_meta.padding[0]; | |||||
| conv_bias_opr->param().pad_w = conv_bias_size_param.filter_meta.padding[1]; | |||||
| conv_bias_opr->param().stride_h = | |||||
| conv_bias_size_param.filter_meta.stride[0]; | |||||
| conv_bias_opr->param().stride_w = | |||||
| conv_bias_size_param.filter_meta.stride[1]; | |||||
| conv_bias_opr->param().dilate_h = | |||||
| conv_bias_size_param.filter_meta.dilation[0]; | |||||
| conv_bias_opr->param().dilate_w = | |||||
| conv_bias_size_param.filter_meta.dilation[1]; | |||||
| // output_block_size | |||||
| conv_bias_opr->param().output_block_size = | |||||
| conv_bias_size_param.output_block_size; | |||||
| // compute_mode | |||||
| conv_bias_opr->param().compute_mode = conv_bias_size_param.compute_mode; | |||||
| return conv_bias_size_param; | |||||
| return {param, | |||||
| 0, | |||||
| param::MatrixMul::Format::DEFAULT, | |||||
| bias_type, | |||||
| 0, | |||||
| BiasMode::NO_BIAS, | |||||
| param::ConvBias::NonlineMode::IDENTITY}; | |||||
| } | } | ||||
| bool ConvolutionImpl::AlgoDefault::is_preferred( | bool ConvolutionImpl::AlgoDefault::is_preferred( | ||||
| ConvolutionImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = | ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = | ||||
| init_convbias_opr_and_param(m_conv_bias_opr, param); | |||||
| return m_algorithm->is_preferred(m_conv_bias_opr, conv_bias_param); | |||||
| init_conv_bias_param(param); | |||||
| return m_algorithm->is_preferred(conv_bias_param); | |||||
| } | } | ||||
| bool ConvolutionImpl::AlgoDefault::usable( | bool ConvolutionImpl::AlgoDefault::usable( | ||||
| ConvolutionImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = | ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = | ||||
| init_convbias_opr_and_param(m_conv_bias_opr, param); | |||||
| return m_algorithm->usable(m_conv_bias_opr, conv_bias_param, | |||||
| init_conv_bias_param(param); | |||||
| return m_algorithm->usable(conv_bias_param, | |||||
| static_cast<ConvBiasImpl::AlgoSelectionStrategy>( | static_cast<ConvBiasImpl::AlgoSelectionStrategy>( | ||||
| algo_selection_strategy)); | algo_selection_strategy)); | ||||
| } | } | ||||
| @@ -365,69 +332,62 @@ bool ConvolutionImpl::AlgoDefault::usable( | |||||
| WorkspaceBundle ConvolutionImpl::AlgoDefault::get_bundle( | WorkspaceBundle ConvolutionImpl::AlgoDefault::get_bundle( | ||||
| const NCBKernSizeParam& param) const { | const NCBKernSizeParam& param) const { | ||||
| ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = | ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = | ||||
| init_convbias_opr_and_param(m_conv_bias_opr, param); | |||||
| m_conv_bias_opr->execution_policy() = {m_algorithm}; | |||||
| init_conv_bias_param(param); | |||||
| return WorkspaceBundle(nullptr, {m_algorithm->get_workspace( | return WorkspaceBundle(nullptr, {m_algorithm->get_workspace( | ||||
| m_conv_bias_opr, conv_bias_param)}); | |||||
| conv_bias_param)}); | |||||
| } | } | ||||
| size_t ConvolutionImpl::AlgoDefault::get_workspace( | size_t ConvolutionImpl::AlgoDefault::get_workspace( | ||||
| ConvolutionImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| size_t ConvolutionImpl::AlgoDefault::get_preprocess_workspace( | size_t ConvolutionImpl::AlgoDefault::get_preprocess_workspace( | ||||
| ConvolutionImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = | ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = | ||||
| init_convbias_opr_and_param(m_conv_bias_opr, param); | |||||
| m_conv_bias_opr->execution_policy() = {m_algorithm}; | |||||
| return m_algorithm->get_preprocess_workspace(m_conv_bias_opr, | |||||
| conv_bias_param); | |||||
| init_conv_bias_param(param); | |||||
| return m_algorithm->get_preprocess_workspace(conv_bias_param); | |||||
| } | } | ||||
| SmallVector<TensorLayout> | SmallVector<TensorLayout> | ||||
| ConvolutionImpl::AlgoDefault::deduce_preprocessed_filter_layout( | ConvolutionImpl::AlgoDefault::deduce_preprocessed_filter_layout( | ||||
| ConvolutionImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = | ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = | ||||
| init_convbias_opr_and_param(m_conv_bias_opr, param); | |||||
| m_conv_bias_opr->execution_policy() = {m_algorithm}; | |||||
| return m_algorithm->deduce_preprocessed_filter_layout(m_conv_bias_opr, | |||||
| conv_bias_param); | |||||
| init_conv_bias_param( param); | |||||
| return m_algorithm->deduce_preprocessed_filter_layout(conv_bias_param); | |||||
| } | } | ||||
| //! Return the implement preprocess kernel | //! Return the implement preprocess kernel | ||||
| SmallVector<ConvolutionImpl::NCBKern> | SmallVector<ConvolutionImpl::NCBKern> | ||||
| ConvolutionImpl::AlgoDefault::get_preprocess_kimpl( | ConvolutionImpl::AlgoDefault::get_preprocess_kimpl( | ||||
| ::ConvBiasImpl* conv_bias_opr, ConvBiasImpl::AlgoBase* algo, | |||||
| ConvBiasImpl::AlgoBase* algo, | |||||
| const NCBKernSizeParam& param) { | const NCBKernSizeParam& param) { | ||||
| MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv("get_preprocess_kimpl"_hash)) { | MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv("get_preprocess_kimpl"_hash)) { | ||||
| // construct the conv_bias kern param | // construct the conv_bias kern param | ||||
| ::ConvBiasImpl::NCBKernParam conv_bias_param; | ::ConvBiasImpl::NCBKernParam conv_bias_param; | ||||
| ::ConvBiasImpl::NCBKernSizeParam conv_bias_size_param = | |||||
| init_convbias_opr_and_param(conv_bias_opr, param); | |||||
| static_cast<::ConvBiasImpl::NCBKernSizeParam&>(conv_bias_param) = | static_cast<::ConvBiasImpl::NCBKernSizeParam&>(conv_bias_param) = | ||||
| conv_bias_size_param; | |||||
| init_conv_bias_param(param); | |||||
| auto conv_bias_preprocess_kerns = | auto conv_bias_preprocess_kerns = | ||||
| algo->dispatch_preprocess_kerns(conv_bias_opr, conv_bias_param); | |||||
| algo->dispatch_preprocess_kerns(conv_bias_param); | |||||
| SmallVector<ConvolutionImpl::NCBKern> convolution_preprocess_kerns; | SmallVector<ConvolutionImpl::NCBKern> convolution_preprocess_kerns; | ||||
| //! Set the conv_bias param using convolution param | //! Set the conv_bias param using convolution param | ||||
| auto set_copy_param_filter_workspace_ptr = | |||||
| auto set_param_filter_workspace_ptr = | |||||
| [](const NCBKernParam& conv_param, | [](const NCBKernParam& conv_param, | ||||
| ::ConvBiasImpl::NCBKernParam& copied_param) { | |||||
| copied_param.filter_ptr = conv_param.filter_ptr; | |||||
| copied_param.workspace_ptr = conv_param.workspace_ptr; | |||||
| copied_param.workspace_size = conv_param.workspace_size; | |||||
| ::ConvBiasImpl::NCBKernParam& conv_bias_param) { | |||||
| conv_bias_param.filter_ptr = conv_param.filter_ptr; | |||||
| conv_bias_param.workspace_ptr = conv_param.workspace_ptr; | |||||
| conv_bias_param.workspace_size = conv_param.workspace_size; | |||||
| }; | }; | ||||
| for (size_t i = 0; i < conv_bias_preprocess_kerns.size(); i++) { | for (size_t i = 0; i < conv_bias_preprocess_kerns.size(); i++) { | ||||
| auto kernel = conv_bias_preprocess_kerns[i]; | auto kernel = conv_bias_preprocess_kerns[i]; | ||||
| //! If the kerenl batch parallel | //! If the kerenl batch parallel | ||||
| auto run = [=](const NCBKernParam& p, | |||||
| const NCBKernIndex& ncb_index) { | |||||
| auto copy_param = conv_bias_param; | |||||
| set_copy_param_filter_workspace_ptr(p, copy_param); | |||||
| kernel.kern(copy_param, | |||||
| {ncb_index.thread_id, ncb_index.ndrange_id}); | |||||
| auto run = [param = conv_bias_param, kernel, | |||||
| &set_param_filter_workspace_ptr]( | |||||
| const NCBKernParam& p, | |||||
| const NCBKernIndex& ncb_index) mutable { | |||||
| set_param_filter_workspace_ptr(p, param); | |||||
| kernel.kern(param, {ncb_index.thread_id, ncb_index.ndrange_id}); | |||||
| }; | }; | ||||
| convolution_preprocess_kerns.push_back({run, kernel.global_size}); | convolution_preprocess_kerns.push_back({run, kernel.global_size}); | ||||
| } | } | ||||
| @@ -438,38 +398,35 @@ ConvolutionImpl::AlgoDefault::get_preprocess_kimpl( | |||||
| //! Return the implement kernel | //! Return the implement kernel | ||||
| SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoDefault::get_kimpl( | SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoDefault::get_kimpl( | ||||
| ::ConvBiasImpl* conv_bias_opr, ConvBiasImpl::AlgoBase* algo, | |||||
| ConvBiasImpl::AlgoBase* algo, | |||||
| const NCBKernSizeParam& param) { | const NCBKernSizeParam& param) { | ||||
| MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(0)) { | MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(0)) { | ||||
| // construct the conv_bias kern param | // construct the conv_bias kern param | ||||
| ::ConvBiasImpl::NCBKernParam conv_bias_param; | ::ConvBiasImpl::NCBKernParam conv_bias_param; | ||||
| ::ConvBiasImpl::NCBKernSizeParam conv_bias_size_param = | |||||
| init_convbias_opr_and_param(conv_bias_opr, param); | |||||
| static_cast<::ConvBiasImpl::NCBKernSizeParam&>(conv_bias_param) = | static_cast<::ConvBiasImpl::NCBKernSizeParam&>(conv_bias_param) = | ||||
| conv_bias_size_param; | |||||
| auto conv_bias_kerns = | |||||
| algo->dispatch_kerns(conv_bias_opr, conv_bias_param); | |||||
| init_conv_bias_param(param); | |||||
| auto&& conv_bias_kerns = algo->dispatch_kerns(conv_bias_param); | |||||
| SmallVector<ConvolutionImpl::NCBKern> convolution_kerns; | SmallVector<ConvolutionImpl::NCBKern> convolution_kerns; | ||||
| //! Set the conv_bias param using convolution param | //! Set the conv_bias param using convolution param | ||||
| auto set_copy_param_compute_address = | auto set_copy_param_compute_address = | ||||
| [](const NCBKernParam& conv_param, | [](const NCBKernParam& conv_param, | ||||
| ::ConvBiasImpl::NCBKernParam& copied_param) { | |||||
| copied_param.src_ptr = conv_param.src_ptr; | |||||
| copied_param.filter_ptr = conv_param.filter_ptr; | |||||
| copied_param.dst_ptr = conv_param.dst_ptr; | |||||
| copied_param.workspace_ptr = conv_param.workspace_ptr; | |||||
| copied_param.workspace_size = conv_param.workspace_size; | |||||
| ::ConvBiasImpl::NCBKernParam& conv_bias_param) { | |||||
| conv_bias_param.src_ptr = conv_param.src_ptr; | |||||
| conv_bias_param.filter_ptr = conv_param.filter_ptr; | |||||
| conv_bias_param.dst_ptr = conv_param.dst_ptr; | |||||
| conv_bias_param.workspace_ptr = conv_param.workspace_ptr; | |||||
| conv_bias_param.workspace_size = conv_param.workspace_size; | |||||
| }; | }; | ||||
| for (size_t i = 0; i < conv_bias_kerns.size(); i++) { | for (size_t i = 0; i < conv_bias_kerns.size(); i++) { | ||||
| auto kernel = conv_bias_kerns[i]; | |||||
| auto&& kernel = conv_bias_kerns[i]; | |||||
| //! If the kerenl batch parallel | //! If the kerenl batch parallel | ||||
| auto run = [=](const NCBKernParam& p, | |||||
| const NCBKernIndex& ncb_index) { | |||||
| auto copy_param = conv_bias_param; | |||||
| set_copy_param_compute_address(p, copy_param); | |||||
| kernel.kern(copy_param, | |||||
| {ncb_index.thread_id, ncb_index.ndrange_id}); | |||||
| auto run = [param = conv_bias_param, kernel, | |||||
| &set_copy_param_compute_address]( | |||||
| const NCBKernParam& p, | |||||
| const NCBKernIndex& ncb_index) mutable { | |||||
| set_copy_param_compute_address(p, param); | |||||
| kernel.kern(param, {ncb_index.thread_id, ncb_index.ndrange_id}); | |||||
| }; | }; | ||||
| convolution_kerns.push_back({run, kernel.global_size}); | convolution_kerns.push_back({run, kernel.global_size}); | ||||
| } | } | ||||
| @@ -6,7 +6,8 @@ | |||||
| * | * | ||||
| * Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
| * software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| @@ -35,10 +36,10 @@ void kern_naive_forward(const ConvolutionImpl::NCBKernParam& p, | |||||
| src.layout.dtype = p.src_type; | src.layout.dtype = p.src_type; | ||||
| dst.layout.dtype = p.dst_type; | dst.layout.dtype = p.dst_type; | ||||
| if (p.filter_meta.format == param::Convolution::Format::NCHW) { | if (p.filter_meta.format == param::Convolution::Format::NCHW) { | ||||
| istrd *= p.isz[0] * p.isz[1]; | |||||
| ostrd *= p.osz[0] * p.osz[1]; | |||||
| src.layout.init_contiguous_stride({1, IC, IH, IW}); | |||||
| dst.layout.init_contiguous_stride({1, OC, OH, OW}); | |||||
| istrd *= p.isz[0] * p.isz[1]; | |||||
| ostrd *= p.osz[0] * p.osz[1]; | |||||
| src.layout.init_contiguous_stride({1, IC, IH, IW}); | |||||
| dst.layout.init_contiguous_stride({1, OC, OH, OW}); | |||||
| } else { | } else { | ||||
| // Must be NHWC | // Must be NHWC | ||||
| megdnn_assert( | megdnn_assert( | ||||
| @@ -75,14 +76,12 @@ class ConvolutionImpl::AlgoFallback final : public AlgoBase { | |||||
| public: | public: | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "FALLBACK_ALGO"; } | const char* name() const override { return "FALLBACK_ALGO"; } | ||||
| bool usable(ConvolutionImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(ConvolutionImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kern( | SmallVector<NCBKern> dispatch_kern( | ||||
| ConvolutionImpl* /*opr*/, | |||||
| const NCBKernSizeParam& /*param*/) const override; | const NCBKernSizeParam& /*param*/) const override; | ||||
| }; | }; | ||||
| @@ -90,66 +89,55 @@ class ConvolutionImpl::AlgoNaive final : public AlgoBase { | |||||
| public: | public: | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "NAIVE_ALGO"; } | const char* name() const override { return "NAIVE_ALGO"; } | ||||
| bool usable(ConvolutionImpl* /*opr*/, const NCBKernSizeParam& /*param*/, | |||||
| bool usable(const NCBKernSizeParam& /*param*/, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(ConvolutionImpl*, | |||||
| const NCBKernSizeParam&) const override { | |||||
| return 0; | |||||
| }; | |||||
| size_t get_workspace(const NCBKernSizeParam&) const override { return 0; }; | |||||
| SmallVector<NCBKern> dispatch_kern( | SmallVector<NCBKern> dispatch_kern( | ||||
| ConvolutionImpl* /*opr*/, | |||||
| const NCBKernSizeParam& /*param*/) const override; | const NCBKernSizeParam& /*param*/) const override; | ||||
| }; | }; | ||||
| class ConvolutionImpl::AlgoDefault final : public AlgoBase { | class ConvolutionImpl::AlgoDefault final : public AlgoBase { | ||||
| static ConvBiasImpl::NCBKernSizeParam init_convbias_opr_and_param( | |||||
| ConvBiasImpl* conv_bias_opr, const NCBKernSizeParam& param); | |||||
| static ConvBiasImpl::NCBKernSizeParam init_conv_bias_param( | |||||
| const NCBKernSizeParam& param); | |||||
| WorkspaceBundle get_bundle(const NCBKernSizeParam& param) const; | WorkspaceBundle get_bundle(const NCBKernSizeParam& param) const; | ||||
| static SmallVector<NCBKern> get_kimpl(ConvBiasImpl* conv_bias_opr, | |||||
| ConvBiasImpl::AlgoBase* algo, | |||||
| static SmallVector<NCBKern> get_kimpl(ConvBiasImpl::AlgoBase* algo, | |||||
| const NCBKernSizeParam& param); | const NCBKernSizeParam& param); | ||||
| static SmallVector<NCBKern> get_preprocess_kimpl( | static SmallVector<NCBKern> get_preprocess_kimpl( | ||||
| ConvBiasImpl* conv_bias_opr, ConvBiasImpl::AlgoBase* algo, | |||||
| const NCBKernSizeParam& param); | |||||
| ConvBiasImpl::AlgoBase* algo, const NCBKernSizeParam& param); | |||||
| public: | public: | ||||
| AlgoDefault(fallback::ConvBiasImpl* conv_bias_opr, ConvBiasImpl::AlgoBase*); | |||||
| AlgoDefault(ConvBiasImpl::AlgoBase*); | |||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return m_name.c_str(); } | const char* name() const override { return m_name.c_str(); } | ||||
| bool usable(ConvolutionImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(ConvolutionImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| size_t get_preprocess_workspace(ConvolutionImpl*, | |||||
| const NCBKernSizeParam&) const override; | |||||
| size_t get_preprocess_workspace(const NCBKernSizeParam&) const override; | |||||
| SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | ||||
| ConvolutionImpl*, const NCBKernSizeParam&) const override; | |||||
| const NCBKernSizeParam&) const override; | |||||
| SmallVector<NCBKern> dispatch_preprocess_kern( | SmallVector<NCBKern> dispatch_preprocess_kern( | ||||
| ConvolutionImpl*, const NCBKernSizeParam& param) const override { | |||||
| return get_preprocess_kimpl(m_conv_bias_opr, m_algorithm, param); | |||||
| const NCBKernSizeParam& param) const override { | |||||
| return get_preprocess_kimpl(m_algorithm, param); | |||||
| } | } | ||||
| SmallVector<NCBKern> dispatch_kern( | SmallVector<NCBKern> dispatch_kern( | ||||
| ConvolutionImpl* /*opr*/, | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| return get_kimpl(m_conv_bias_opr, m_algorithm, param); | |||||
| return get_kimpl(m_algorithm, param); | |||||
| } | } | ||||
| void* type() const override { return sm_fallback_conv_algo_type; } | void* type() const override { return sm_fallback_conv_algo_type; } | ||||
| //! select matmul to the highest preference | //! select matmul to the highest preference | ||||
| bool is_preferred(ConvolutionImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(const NCBKernSizeParam& param) const override; | |||||
| private: | private: | ||||
| std::string m_name; | std::string m_name; | ||||
| fallback::ConvBiasImpl* m_conv_bias_opr; | |||||
| ConvBiasImpl::AlgoBase* m_algorithm; | ConvBiasImpl::AlgoBase* m_algorithm; | ||||
| }; | }; | ||||
| @@ -59,8 +59,7 @@ public: | |||||
| static_cast<ConvBiasImpl*>(conv_bias_opr)->algo_pack(); | static_cast<ConvBiasImpl*>(conv_bias_opr)->algo_pack(); | ||||
| for (auto&& algorithm : conv_bias_algo) { | for (auto&& algorithm : conv_bias_algo) { | ||||
| // fallback algo | // fallback algo | ||||
| refhold.emplace_back(new AlgoDefault( | |||||
| static_cast<ConvBiasImpl*>(conv_bias_opr), algorithm)); | |||||
| refhold.emplace_back(new AlgoDefault(algorithm)); | |||||
| all_algos.emplace_back(refhold.back().get()); | all_algos.emplace_back(refhold.back().get()); | ||||
| } | } | ||||
| @@ -82,7 +81,7 @@ bool ConvolutionImpl::is_naive_algo(ConvolutionImpl::Algorithm* algo) { | |||||
| } | } | ||||
| #define NCB_ALGO_FUNC(name, algo, param) \ | #define NCB_ALGO_FUNC(name, algo, param) \ | ||||
| static_cast<AlgoBase*>(algo)->name(this, fparam) | |||||
| static_cast<AlgoBase*>(algo)->name(param) | |||||
| void ConvolutionImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | void ConvolutionImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | ||||
| _megdnn_tensor_out dst, | _megdnn_tensor_out dst, | ||||
| @@ -131,7 +130,7 @@ size_t ConvolutionImpl::get_workspace_in_bytes( | |||||
| return naive::ConvolutionForwardImpl::get_workspace_in_bytes( | return naive::ConvolutionForwardImpl::get_workspace_in_bytes( | ||||
| src, filter, dst, preprocessed_filter); | src, filter, dst, preprocessed_filter); | ||||
| } else { | } else { | ||||
| return static_cast<AlgoBase*>(algo)->get_workspace(this, fparam); | |||||
| return NCB_ALGO_FUNC(get_workspace, algo, fparam); | |||||
| } | } | ||||
| } | } | ||||
| @@ -144,8 +143,7 @@ size_t ConvolutionImpl::get_preprocess_workspace_in_bytes( | |||||
| return naive::ConvolutionForwardImpl::get_preprocess_workspace_in_bytes( | return naive::ConvolutionForwardImpl::get_preprocess_workspace_in_bytes( | ||||
| src, filter, dst); | src, filter, dst); | ||||
| } else { | } else { | ||||
| return static_cast<AlgoBase*>(algo)->get_preprocess_workspace(this, | |||||
| fparam); | |||||
| return NCB_ALGO_FUNC(get_preprocess_workspace, algo, fparam); | |||||
| } | } | ||||
| } | } | ||||
| @@ -158,8 +156,7 @@ SmallVector<TensorLayout> ConvolutionImpl::deduce_preprocessed_filter_layout( | |||||
| return naive::ConvolutionForwardImpl::deduce_preprocessed_filter_layout( | return naive::ConvolutionForwardImpl::deduce_preprocessed_filter_layout( | ||||
| src, filter, dst); | src, filter, dst); | ||||
| } else { | } else { | ||||
| return static_cast<AlgoBase*>(algo)->deduce_preprocessed_filter_layout( | |||||
| this, fparam); | |||||
| return NCB_ALGO_FUNC(deduce_preprocessed_filter_layout, algo, fparam); | |||||
| } | } | ||||
| } | } | ||||
| @@ -251,8 +248,7 @@ ConvolutionImpl::NCBKernParam ConvolutionImpl::make_ncb_kern_param( | |||||
| void ConvolutionImpl::exec_preprocess_with_ncb_kern(const NCBKernParam& param, | void ConvolutionImpl::exec_preprocess_with_ncb_kern(const NCBKernParam& param, | ||||
| Algorithm* algo) { | Algorithm* algo) { | ||||
| auto kerns = | |||||
| static_cast<AlgoBase*>(algo)->dispatch_preprocess_kern(this, param); | |||||
| auto kerns = NCB_ALGO_FUNC(dispatch_preprocess_kern, algo, param); | |||||
| auto fallback_handle = handle(); | auto fallback_handle = handle(); | ||||
| for (auto kernel : kerns) { | for (auto kernel : kerns) { | ||||
| megdnn_assert( | megdnn_assert( | ||||
| @@ -272,14 +268,15 @@ void ConvolutionImpl::exec_preprocess_with_ncb_kern(const NCBKernParam& param, | |||||
| void ConvolutionImpl::exec_with_ncb_kern(const NCBKernParam& param, | void ConvolutionImpl::exec_with_ncb_kern(const NCBKernParam& param, | ||||
| Algorithm* algo) { | Algorithm* algo) { | ||||
| auto kerns = static_cast<AlgoBase*>(algo)->dispatch_kern(this, param); | |||||
| auto kerns = NCB_ALGO_FUNC(dispatch_kern, algo, param); | |||||
| auto fallback_handle = handle(); | auto fallback_handle = handle(); | ||||
| for (auto kernel : kerns) { | for (auto kernel : kerns) { | ||||
| megdnn_assert(param.filter_meta.format == Param::Format::NCHW || | |||||
| param.filter_meta.format == Param::Format::NHWC || | |||||
| param.filter_meta.format == Param::Format::NCHW88 || | |||||
| param.filter_meta.format == Param::Format::NCHW44, | |||||
| "invalid conv format"); | |||||
| megdnn_assert( | |||||
| param.filter_meta.format == Param::Format::NCHW || | |||||
| param.filter_meta.format == Param::Format::NHWC || | |||||
| param.filter_meta.format == Param::Format::NCHW88 || | |||||
| param.filter_meta.format == Param::Format::NCHW44, | |||||
| "invalid conv format"); | |||||
| auto run = [param, kernel](size_t index, size_t thread_id) { | auto run = [param, kernel](size_t index, size_t thread_id) { | ||||
| CpuNDRange ndrange_id(kernel.global_size, index); | CpuNDRange ndrange_id(kernel.global_size, index); | ||||
| kernel.kern(param, {thread_id, ndrange_id}); | kernel.kern(param, {thread_id, ndrange_id}); | ||||
| @@ -293,13 +290,11 @@ ConvolutionImpl::Algorithm* ConvolutionImpl::get_algorithm_heuristic_with_ncb( | |||||
| const NCBKernSizeParam& param, size_t workspace_limit_in_bytes, | const NCBKernSizeParam& param, size_t workspace_limit_in_bytes, | ||||
| bool reproducible) { | bool reproducible) { | ||||
| for (auto i : get_all_algorithms_with_ncb(param)) { | for (auto i : get_all_algorithms_with_ncb(param)) { | ||||
| size_t need_workspace = | |||||
| static_cast<AlgoBase*>(i)->get_workspace(this, param); | |||||
| bool usable_reproducible = | bool usable_reproducible = | ||||
| static_cast<AlgoBase*>(i)->usable_reproducible( | static_cast<AlgoBase*>(i)->usable_reproducible( | ||||
| this, param, AlgoSelectionStrategy::HEURISTIC, | |||||
| reproducible); | |||||
| if (usable_reproducible && need_workspace <= workspace_limit_in_bytes) { | |||||
| param, AlgoSelectionStrategy::HEURISTIC, reproducible); | |||||
| if (usable_reproducible && NCB_ALGO_FUNC(get_workspace, i, param) <= | |||||
| workspace_limit_in_bytes) { | |||||
| return i; | return i; | ||||
| } | } | ||||
| } | } | ||||
| @@ -311,8 +306,8 @@ ConvolutionImpl::get_all_algorithms_with_ncb(const NCBKernSizeParam& param) { | |||||
| std::vector<Algorithm*> ret; | std::vector<Algorithm*> ret; | ||||
| std::vector<Algorithm*> prefer_algos; | std::vector<Algorithm*> prefer_algos; | ||||
| for (auto&& i : algo_pack()) { | for (auto&& i : algo_pack()) { | ||||
| if (i->usable(this, param, AlgoSelectionStrategy::FULL_RUN)) { | |||||
| if (i->is_preferred(this, param)) { | |||||
| if (i->usable(param, AlgoSelectionStrategy::FULL_RUN)) { | |||||
| if (i->is_preferred(param)) { | |||||
| prefer_algos.push_back(i); | prefer_algos.push_back(i); | ||||
| } else { | } else { | ||||
| ret.push_back(i); | ret.push_back(i); | ||||
| @@ -178,42 +178,38 @@ public: | |||||
| class AlgoBase : public Algorithm { | class AlgoBase : public Algorithm { | ||||
| public: | public: | ||||
| virtual ~AlgoBase() = default; | virtual ~AlgoBase() = default; | ||||
| virtual bool usable(ConvolutionImpl* opr, const NCBKernSizeParam& param, | |||||
| virtual bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const = 0; | AlgoSelectionStrategy) const = 0; | ||||
| virtual size_t get_workspace(ConvolutionImpl* opr, | |||||
| const NCBKernSizeParam& param) const = 0; | |||||
| virtual size_t get_workspace(const NCBKernSizeParam& param) const = 0; | |||||
| virtual SmallVector<NCBKern> dispatch_kern( | virtual SmallVector<NCBKern> dispatch_kern( | ||||
| ConvolutionImpl* opr, const NCBKernSizeParam& param) const = 0; | |||||
| const NCBKernSizeParam& param) const = 0; | |||||
| virtual SmallVector<NCBKern> dispatch_preprocess_kern( | virtual SmallVector<NCBKern> dispatch_preprocess_kern( | ||||
| ConvolutionImpl*, const NCBKernSizeParam&) const { | |||||
| const NCBKernSizeParam&) const { | |||||
| return {}; | return {}; | ||||
| }; | }; | ||||
| //! get the layouts of weight_prerocess dst | //! get the layouts of weight_prerocess dst | ||||
| virtual SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | virtual SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | ||||
| ConvolutionImpl*, const NCBKernSizeParam&) const { | |||||
| const NCBKernSizeParam&) const { | |||||
| return {}; | return {}; | ||||
| }; | }; | ||||
| //! get the workspace when weight_prerocess | //! get the workspace when weight_prerocess | ||||
| virtual size_t get_preprocess_workspace(ConvolutionImpl*, | |||||
| const NCBKernSizeParam&) const { | |||||
| virtual size_t get_preprocess_workspace(const NCBKernSizeParam&) const { | |||||
| return 0_z; | return 0_z; | ||||
| }; | }; | ||||
| //! Temporarily used to identify whether the matmul algorithm is | //! Temporarily used to identify whether the matmul algorithm is | ||||
| //! is_preferred. | //! is_preferred. | ||||
| virtual bool is_preferred(ConvolutionImpl*, | |||||
| const NCBKernSizeParam&) const { | |||||
| virtual bool is_preferred(const NCBKernSizeParam&) const { | |||||
| return false; | return false; | ||||
| } | } | ||||
| bool usable_reproducible(ConvolutionImpl* opr, | |||||
| const NCBKernSizeParam& param, | |||||
| bool usable_reproducible(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy, | AlgoSelectionStrategy algo_selection_strategy, | ||||
| bool reproducible = true) const { | bool reproducible = true) const { | ||||
| return (!reproducible || is_reproducible()) && | return (!reproducible || is_reproducible()) && | ||||
| usable(opr, param, algo_selection_strategy); | |||||
| usable(param, algo_selection_strategy); | |||||
| } | } | ||||
| }; | }; | ||||
| @@ -6,7 +6,8 @@ | |||||
| * | * | ||||
| * Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
| * software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | */ | ||||
| #include "src/x86/conv_bias/f32/algos.h" | #include "src/x86/conv_bias/f32/algos.h" | ||||
| @@ -104,7 +105,7 @@ void get_rectified_size(size_t IH, size_t IW, size_t OH, size_t OW, size_t FH, | |||||
| /* ===================== direct algo ===================== */ | /* ===================== direct algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoDirect::usable( | bool ConvBiasImpl::AlgoDirect::usable( | ||||
| FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| bool aviliable = fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && | bool aviliable = fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && | ||||
| @@ -142,7 +143,7 @@ WorkspaceBundle ConvBiasImpl::AlgoDirect::get_bundle( | |||||
| return {nullptr, {part0, part1}}; | return {nullptr, {part0, part1}}; | ||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoDirect::get_workspace( | size_t ConvBiasImpl::AlgoDirect::get_workspace( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| @@ -280,7 +281,8 @@ void ConvBiasImpl::AlgoDirect::do_conv_kern(const WorkspaceBundle& bundle, | |||||
| size_t workspace_group_id = workspace_ids[0], | size_t workspace_group_id = workspace_ids[0], | ||||
| workspace_batch_id = workspace_ids[1], oc = workspace_ids[2]; | workspace_batch_id = workspace_ids[1], oc = workspace_ids[2]; | ||||
| const float* sptr = kern_param.src<float>(batch_id, group_id); | const float* sptr = kern_param.src<float>(batch_id, group_id); | ||||
| const float* filter = kern_param.filter<float>(group_id) + oc * FH * FW * IC; | |||||
| const float* filter = | |||||
| kern_param.filter<float>(group_id) + oc * FH * FW * IC; | |||||
| const float* bias_ptr = | const float* bias_ptr = | ||||
| kern_param.bias<float>(batch_id, group_id) + oc * bias_offset; | kern_param.bias<float>(batch_id, group_id) + oc * bias_offset; | ||||
| float* dst = kern_param.dst<float>(batch_id, group_id) + oc * OH * OW; | float* dst = kern_param.dst<float>(batch_id, group_id) + oc * OH * OW; | ||||
| @@ -318,7 +320,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoDirect::get_kimpls( | |||||
| } | } | ||||
| /* ===================== direct-stride2 algo ===================== */ | /* ===================== direct-stride2 algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoDirectStride2::usable( | bool ConvBiasImpl::AlgoDirectStride2::usable( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const { | AlgoSelectionStrategy algo_selection_strategy) const { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| auto FH = fm.spatial[0]; | auto FH = fm.spatial[0]; | ||||
| @@ -363,7 +365,7 @@ WorkspaceBundle ConvBiasImpl::AlgoDirectStride2::get_bundle( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoDirectStride2::get_workspace( | size_t ConvBiasImpl::AlgoDirectStride2::get_workspace( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| //! Process one input channel copy padding | //! Process one input channel copy padding | ||||
| @@ -528,7 +530,7 @@ WorkspaceBundle ConvBiasImpl::AlgoMatrixMul::get_bundle( | |||||
| } | } | ||||
| bool ConvBiasImpl::AlgoMatrixMul::is_preferred( | bool ConvBiasImpl::AlgoMatrixMul::is_preferred( | ||||
| FallbackConvBiasImpl* opr, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| if (fm.dilation[0] != 1 || fm.dilation[1] != 1) { | if (fm.dilation[0] != 1 || fm.dilation[1] != 1) { | ||||
| return false; | return false; | ||||
| @@ -550,7 +552,7 @@ bool ConvBiasImpl::AlgoMatrixMul::is_preferred( | |||||
| int ic = find_nearest_elem<int>(fm.icpg, {4, 8, 16, 32, 64, 96, 128}); | int ic = find_nearest_elem<int>(fm.icpg, {4, 8, 16, 32, 64, 96, 128}); | ||||
| int on = std::round(geometric_mean(param.osz[0], param.osz[1])); | int on = std::round(geometric_mean(param.osz[0], param.osz[1])); | ||||
| ProfileElement cur(f, oc, ic, on); | ProfileElement cur(f, oc, ic, on); | ||||
| auto H = static_cast<HandleImpl*>(opr->handle()); | |||||
| auto H = static_cast<HandleImpl*>(inplace_cpu_handle().get()); | |||||
| auto&& target = std::lower_bound(H->profile_cache().begin(), | auto&& target = std::lower_bound(H->profile_cache().begin(), | ||||
| H->profile_cache().end(), cur); | H->profile_cache().end(), cur); | ||||
| megdnn_assert_internal(target->f == cur.f); | megdnn_assert_internal(target->f == cur.f); | ||||
| @@ -6,7 +6,8 @@ | |||||
| * | * | ||||
| * Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
| * software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| @@ -37,14 +38,13 @@ public: | |||||
| return m_large_group ? "X86_CONV_BIAS_DIRECT_STRIDE1_LARGE_GROUP" | return m_large_group ? "X86_CONV_BIAS_DIRECT_STRIDE1_LARGE_GROUP" | ||||
| : "X86_CONV_BIAS_DIRECT_STRIDE1_SMALL_GROUP"; | : "X86_CONV_BIAS_DIRECT_STRIDE1_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| @@ -74,14 +74,13 @@ public: | |||||
| return m_large_group ? "X86_CONV_BIAS_DIRECT_STRIDE2_LARGE_GROUP" | return m_large_group ? "X86_CONV_BIAS_DIRECT_STRIDE2_LARGE_GROUP" | ||||
| : "X86_CONV_BIAS_DIRECT_STRIDE2_SMALL_GROUP"; | : "X86_CONV_BIAS_DIRECT_STRIDE2_SMALL_GROUP"; | ||||
| } | } | ||||
| bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| @@ -131,7 +130,7 @@ public: | |||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "X86_CONV_BIAS_MATMUL"; } | const char* name() const override { return "X86_CONV_BIAS_MATMUL"; } | ||||
| bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const override { | AlgoSelectionStrategy) const override { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| return fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && | return fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && | ||||
| @@ -145,15 +144,12 @@ public: | |||||
| param.nr_threads == 1_z; | param.nr_threads == 1_z; | ||||
| } | } | ||||
| bool is_preferred(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam&) const override; | |||||
| bool is_preferred(const NCBKernSizeParam&) const override; | |||||
| size_t get_workspace(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override { | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<NCBKern> dispatch_kerns( | SmallVector<NCBKern> dispatch_kerns( | ||||
| FallbackConvBiasImpl* /*opr*/, | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| size_t group = param.filter_meta.group; | size_t group = param.filter_meta.group; | ||||
| return {{kimpl, {group, 1_z, 1_z}}}; | return {{kimpl, {group, 1_z, 1_z}}}; | ||||
| @@ -171,7 +167,7 @@ public: | |||||
| AlgoMkldnnConv() {} | AlgoMkldnnConv() {} | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "MKLDNN_CONV_FP32"; } | const char* name() const override { return "MKLDNN_CONV_FP32"; } | ||||
| bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const override { | AlgoSelectionStrategy) const override { | ||||
| auto&& fm = param.filter_meta; | auto&& fm = param.filter_meta; | ||||
| @@ -184,13 +180,9 @@ public: | |||||
| return ok; | return ok; | ||||
| }; | }; | ||||
| size_t get_workspace(FallbackConvBiasImpl* /*opr*/, | |||||
| const NCBKernSizeParam&) const override { | |||||
| return 0; | |||||
| } | |||||
| size_t get_workspace(const NCBKernSizeParam&) const override { return 0; } | |||||
| SmallVector<NCBKern> dispatch_kerns( | SmallVector<NCBKern> dispatch_kerns( | ||||
| FallbackConvBiasImpl* /*opr*/, | |||||
| const NCBKernSizeParam& /*param*/) const override { | const NCBKernSizeParam& /*param*/) const override { | ||||
| auto kern = [](const NCBKernParam& param, | auto kern = [](const NCBKernParam& param, | ||||
| const NCBKernIndex& ncb_index) { | const NCBKernIndex& ncb_index) { | ||||
| @@ -6,16 +6,17 @@ | |||||
| * | * | ||||
| * Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
| * software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | */ | ||||
| #include "src/x86/conv_bias/f32/algos.h" | |||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| #include "src/x86/conv_bias/f32/algos.h" | |||||
| #include "src/x86/conv_bias/f32/strategy.h" | |||||
| #include "src/x86/conv_bias/opr_impl.h" | #include "src/x86/conv_bias/opr_impl.h" | ||||
| #include "src/x86/conv_bias/postprocess_helper.h" | #include "src/x86/conv_bias/postprocess_helper.h" | ||||
| #include "src/x86/handle.h" | #include "src/x86/handle.h" | ||||
| #include "src/x86/profile.h" | #include "src/x86/profile.h" | ||||
| #include "src/x86/conv_bias/f32/strategy.h" | |||||
| #include "midout.h" | #include "midout.h" | ||||
| @@ -27,10 +28,9 @@ using namespace x86; | |||||
| /* ======================= AlgoFP32WinogradF63_8*8 ======================== */ | /* ======================= AlgoFP32WinogradF63_8*8 ======================== */ | ||||
| bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable( | bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 1, 0) { | MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 1, 0) { | ||||
| //! TODO: now nchw88 winograd only support Dense mode | //! TODO: now nchw88 winograd only support Dense mode | ||||
| if (param.filter_meta.icpg % 8 != 0 || | if (param.filter_meta.icpg % 8 != 0 || | ||||
| @@ -44,13 +44,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable( | |||||
| strategy, m_tile_size, param) | strategy, m_tile_size, param) | ||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW88 || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW88 || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW88_WINOGRAD && | param::ConvBias::Format::NCHW88_WINOGRAD && | ||||
| opr->param().output_block_size == 6 && | |||||
| param.output_block_size == 6 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::MK8)) && | param::MatrixMul::Format::MK8)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -74,10 +74,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_8x8, | |||||
| /* ======================= AlgoFP32WinogradF23_8*8 ======================== */ | /* ======================= AlgoFP32WinogradF23_8*8 ======================== */ | ||||
| bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable( | bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable( | ||||
| fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| MEGDNN_MARK_USED_VAR(param); | MEGDNN_MARK_USED_VAR(param); | ||||
| MEGDNN_MARK_USED_VAR(opr); | |||||
| MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 2, 0) { | MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 2, 0) { | ||||
| //! TODO: now nchw88 winograd only support Dense mode | //! TODO: now nchw88 winograd only support Dense mode | ||||
| if (param.filter_meta.icpg % 8 != 0 || | if (param.filter_meta.icpg % 8 != 0 || | ||||
| @@ -91,13 +90,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable( | |||||
| strategy, m_tile_size, param) | strategy, m_tile_size, param) | ||||
| .get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
| return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
| (opr->param().format == param::ConvBias::Format::NCHW88 || | |||||
| (opr->param().format == | |||||
| (param.filter_meta.format == param::ConvBias::Format::NCHW88 || | |||||
| (param.filter_meta.format == | |||||
| param::ConvBias::Format::NCHW88_WINOGRAD && | param::ConvBias::Format::NCHW88_WINOGRAD && | ||||
| opr->param().output_block_size == 2 && | |||||
| param.output_block_size == 2 && | |||||
| param.winograd_matmul_format == | param.winograd_matmul_format == | ||||
| param::MatrixMul::Format::MK8)) && | param::MatrixMul::Format::MK8)) && | ||||
| opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && | |||||
| !param.filter_meta.should_flip && | |||||
| (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | ||||
| param.filter_meta.spatial[0] == 3) && | param.filter_meta.spatial[0] == 3) && | ||||
| (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | (param.filter_meta.stride[0] == param.filter_meta.stride[1] && | ||||
| @@ -36,7 +36,7 @@ using namespace megdnn; | |||||
| using namespace x86; | using namespace x86; | ||||
| bool ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::usable( | bool ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::usable( | ||||
| FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| return chanwise_avx2_stride1_qint8_usable(param); | return chanwise_avx2_stride1_qint8_usable(param); | ||||
| } | } | ||||
| @@ -66,7 +66,7 @@ WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_bundle( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_workspace( | size_t ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_workspace( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| @@ -78,12 +78,12 @@ ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_kimpls( | |||||
| } | } | ||||
| bool ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::is_preferred( | bool ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::is_preferred( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return chanwise_avx2_stride1_qint8_preferred(param); | return chanwise_avx2_stride1_qint8_preferred(param); | ||||
| } | } | ||||
| bool ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::usable( | bool ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::usable( | ||||
| FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| return chanwise_avx2_stride2_qint8_usable(param); | return chanwise_avx2_stride2_qint8_usable(param); | ||||
| } | } | ||||
| @@ -113,7 +113,7 @@ WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_bundle( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_workspace( | size_t ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_workspace( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| @@ -125,12 +125,12 @@ ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_kimpls( | |||||
| } | } | ||||
| bool ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::is_preferred( | bool ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::is_preferred( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return chanwise_avx2_stride2_qint8_preferred(param); | return chanwise_avx2_stride2_qint8_preferred(param); | ||||
| } | } | ||||
| bool ConvBiasImpl::AlgoDirectAvx2Stride1Int8::usable( | bool ConvBiasImpl::AlgoDirectAvx2Stride1Int8::usable( | ||||
| FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | |||||
| const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
| return direct_avx2_stride1_int8_usable(param); | return direct_avx2_stride1_int8_usable(param); | ||||
| } | } | ||||
| @@ -170,7 +170,7 @@ WorkspaceBundle ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_bundle( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_workspace( | size_t ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_workspace( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| @@ -182,14 +182,13 @@ ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_kimpls( | |||||
| } | } | ||||
| bool ConvBiasImpl::AlgoDirectAvx2Stride1Int8::is_preferred( | bool ConvBiasImpl::AlgoDirectAvx2Stride1Int8::is_preferred( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return direct_avx2_stride1_int8_preferred(param); | return direct_avx2_stride1_int8_preferred(param); | ||||
| } | } | ||||
| /* ===================== avx2 int8 stride 2 ===================== */ | /* ===================== avx2 int8 stride 2 ===================== */ | ||||
| bool ConvBiasImpl::AlgoAVX2DirectConvStride2::usable( | bool ConvBiasImpl::AlgoAVX2DirectConvStride2::usable( | ||||
| FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const { | |||||
| const NCBKernSizeParam& param, AlgoSelectionStrategy) const { | |||||
| return direct_avx2_stride2_int8_usable(param); | return direct_avx2_stride2_int8_usable(param); | ||||
| } | } | ||||
| @@ -229,7 +228,7 @@ WorkspaceBundle ConvBiasImpl::AlgoAVX2DirectConvStride2::get_bundle( | |||||
| } | } | ||||
| size_t ConvBiasImpl::AlgoAVX2DirectConvStride2::get_workspace( | size_t ConvBiasImpl::AlgoAVX2DirectConvStride2::get_workspace( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| @@ -241,13 +240,12 @@ ConvBiasImpl::AlgoAVX2DirectConvStride2::get_kimpls( | |||||
| } | } | ||||
| bool ConvBiasImpl::AlgoAVX2DirectConvStride2::is_preferred( | bool ConvBiasImpl::AlgoAVX2DirectConvStride2::is_preferred( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return direct_avx2_stride2_int8_preferred(param); | return direct_avx2_stride2_int8_preferred(param); | ||||
| } | } | ||||
| #if MEGDNN_X86_WITH_MKL_DNN | #if MEGDNN_X86_WITH_MKL_DNN | ||||
| bool ConvBiasImpl::AlgoMkldnnQint8::usable(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param, | |||||
| bool ConvBiasImpl::AlgoMkldnnQint8::usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const { | AlgoSelectionStrategy) const { | ||||
| return mkldnn_qint8_usable(param); | return mkldnn_qint8_usable(param); | ||||
| } | } | ||||
| @@ -426,19 +424,18 @@ void ConvBiasImpl::AlgoMkldnnQint8::kern_mkldnn_s8x8x32( | |||||
| #undef REORDER_MEMORY | #undef REORDER_MEMORY | ||||
| bool ConvBiasImpl::AlgoMkldnnQint8::is_preferred( | bool ConvBiasImpl::AlgoMkldnnQint8::is_preferred( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return mkldnn_qint8_preferred(param); | return mkldnn_qint8_preferred(param); | ||||
| } | } | ||||
| /* ===================== mkldnn qint8 matmul algo ===================== */ | /* ===================== mkldnn qint8 matmul algo ===================== */ | ||||
| bool ConvBiasImpl::AlgoMkldnnMatmulQint8::usable(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param, | |||||
| bool ConvBiasImpl::AlgoMkldnnMatmulQint8::usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const { | AlgoSelectionStrategy) const { | ||||
| return mkldnn_matmul_qint8_usable(param); | return mkldnn_matmul_qint8_usable(param); | ||||
| } | } | ||||
| bool ConvBiasImpl::AlgoMkldnnMatmulQint8::is_preferred( | bool ConvBiasImpl::AlgoMkldnnMatmulQint8::is_preferred( | ||||
| FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
| const NCBKernSizeParam& param) const { | |||||
| return mkldnn_matmul_qint8_preferred(param); | return mkldnn_matmul_qint8_preferred(param); | ||||
| } | } | ||||
| @@ -25,18 +25,15 @@ public: | |||||
| const char* name() const override { | const char* name() const override { | ||||
| return "X86_CONV_BIAS_CHANWISE_AVX2_INT8_STRIDE1"; | return "X86_CONV_BIAS_CHANWISE_AVX2_INT8_STRIDE1"; | ||||
| } | } | ||||
| bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| void* type() const override; | void* type() const override; | ||||
| bool is_preferred(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(const NCBKernSizeParam& param) const override; | |||||
| }; | }; | ||||
| /* ===================== avx2 stride2 chanwise algo ===================== */ | /* ===================== avx2 stride2 chanwise algo ===================== */ | ||||
| @@ -49,18 +46,15 @@ public: | |||||
| const char* name() const override { | const char* name() const override { | ||||
| return "X86_CONV_BIAS_CHANWISE_AVX2_INT8_STRIDE2"; | return "X86_CONV_BIAS_CHANWISE_AVX2_INT8_STRIDE2"; | ||||
| } | } | ||||
| bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| void* type() const override; | void* type() const override; | ||||
| bool is_preferred(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(const NCBKernSizeParam& param) const override; | |||||
| }; | }; | ||||
| /* ===================== avx2 stride1 direct algo ===================== */ | /* ===================== avx2 stride1 direct algo ===================== */ | ||||
| @@ -73,18 +67,15 @@ public: | |||||
| const char* name() const override { | const char* name() const override { | ||||
| return "X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE1"; | return "X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE1"; | ||||
| } | } | ||||
| bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| virtual SmallVector<NCBKern> dispatch_kerns( | virtual SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| void* type() const override; | void* type() const override; | ||||
| bool is_preferred(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(const NCBKernSizeParam& param) const override; | |||||
| }; | }; | ||||
| /* ================== avx2 int8 direct conv stride2 algo ================== */ | /* ================== avx2 int8 direct conv stride2 algo ================== */ | ||||
| @@ -97,18 +88,15 @@ public: | |||||
| const char* name() const override { | const char* name() const override { | ||||
| return "X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE2"; | return "X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE2"; | ||||
| } | } | ||||
| bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy algo_selection_strategy) const override; | AlgoSelectionStrategy algo_selection_strategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl* opr, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override; | |||||
| SmallVector<NCBKern> dispatch_kerns( | SmallVector<NCBKern> dispatch_kerns( | ||||
| fallback::ConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| return get_kimpls(param); | return get_kimpls(param); | ||||
| } | } | ||||
| void* type() const override; | void* type() const override; | ||||
| bool is_preferred(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(const NCBKernSizeParam& param) const override; | |||||
| }; | }; | ||||
| #if MEGDNN_X86_WITH_MKL_DNN | #if MEGDNN_X86_WITH_MKL_DNN | ||||
| @@ -122,16 +110,14 @@ public: | |||||
| AlgoMkldnnQint8() {} | AlgoMkldnnQint8() {} | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "MKLDNN_INT8"; } | const char* name() const override { return "MKLDNN_INT8"; } | ||||
| bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const override; | AlgoSelectionStrategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl* /*opr*/, | |||||
| const NCBKernSizeParam& param) const override { | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override { | |||||
| size_t nr_threads = param.nr_threads; | size_t nr_threads = param.nr_threads; | ||||
| return get_bundle(param).total_size_in_bytes() * nr_threads; | return get_bundle(param).total_size_in_bytes() * nr_threads; | ||||
| } | } | ||||
| SmallVector<NCBKern> dispatch_kerns( | SmallVector<NCBKern> dispatch_kerns( | ||||
| FallbackConvBiasImpl* /*opr*/, | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| size_t group = param.filter_meta.group; | size_t group = param.filter_meta.group; | ||||
| size_t n = param.n; | size_t n = param.n; | ||||
| @@ -147,8 +133,7 @@ public: | |||||
| return {{kern, {group, n, 1_z}}}; | return {{kern, {group, n, 1_z}}}; | ||||
| } | } | ||||
| void* type() const override; | void* type() const override; | ||||
| bool is_preferred(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(const NCBKernSizeParam& param) const override; | |||||
| }; | }; | ||||
| /* ===================== mkldnn qint8 matmul algo ===================== */ | /* ===================== mkldnn qint8 matmul algo ===================== */ | ||||
| class ConvBiasImpl::AlgoMkldnnMatmulQint8 final : public AlgoBase { | class ConvBiasImpl::AlgoMkldnnMatmulQint8 final : public AlgoBase { | ||||
| @@ -160,22 +145,19 @@ class ConvBiasImpl::AlgoMkldnnMatmulQint8 final : public AlgoBase { | |||||
| public: | public: | ||||
| bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
| const char* name() const override { return "MKLDNN_MATMUL_INT8"; } | const char* name() const override { return "MKLDNN_MATMUL_INT8"; } | ||||
| bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
| bool usable(const NCBKernSizeParam& param, | |||||
| AlgoSelectionStrategy) const override; | AlgoSelectionStrategy) const override; | ||||
| size_t get_workspace(FallbackConvBiasImpl* /*opr*/, | |||||
| const NCBKernSizeParam& param) const override { | |||||
| size_t get_workspace(const NCBKernSizeParam& param) const override { | |||||
| return get_bundle(param).total_size_in_bytes(); | return get_bundle(param).total_size_in_bytes(); | ||||
| } | } | ||||
| SmallVector<NCBKern> dispatch_kerns( | SmallVector<NCBKern> dispatch_kerns( | ||||
| FallbackConvBiasImpl* /*opr*/, | |||||
| const NCBKernSizeParam& param) const override { | const NCBKernSizeParam& param) const override { | ||||
| size_t group = param.filter_meta.group; | size_t group = param.filter_meta.group; | ||||
| return {{kern_mkldnn_matmul_s8x8x32, {group, 1_z, 1_z}}}; | return {{kern_mkldnn_matmul_s8x8x32, {group, 1_z, 1_z}}}; | ||||
| } | } | ||||
| //! select matmul to the highest preference | //! select matmul to the highest preference | ||||
| bool is_preferred(FallbackConvBiasImpl*, | |||||
| const NCBKernSizeParam& param) const override; | |||||
| bool is_preferred(const NCBKernSizeParam& param) const override; | |||||
| void* type() const override; | void* type() const override; | ||||
| }; | }; | ||||
| @@ -163,7 +163,7 @@ const char* ConvBiasImpl::get_algorithm_set_name() const { | |||||
| } | } | ||||
| bool ConvBiasImpl::is_matmul_quantized_prefer( | bool ConvBiasImpl::is_matmul_quantized_prefer( | ||||
| const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
| const ConvBiasImpl::NCBKernSizeParam& param) const { | |||||
| bool conv_direct_chanwise_mkldnn_usable = true; | bool conv_direct_chanwise_mkldnn_usable = true; | ||||
| if (param.dst_type.enumv() == DTypeEnum::QuantizedS8 || | if (param.dst_type.enumv() == DTypeEnum::QuantizedS8 || | ||||
| param.dst_type.enumv() == DTypeEnum::QuantizedS32) { | param.dst_type.enumv() == DTypeEnum::QuantizedS32) { | ||||
| @@ -55,7 +55,7 @@ public: | |||||
| const char* get_algorithm_set_name() const override; | const char* get_algorithm_set_name() const override; | ||||
| bool is_matmul_quantized_prefer( | bool is_matmul_quantized_prefer( | ||||
| const ConvBiasImpl::NCBKernSizeParam& ncb_param) override; | |||||
| const ConvBiasImpl::NCBKernSizeParam& ncb_param) const override; | |||||
| }; | }; | ||||
| } // namespace x86 | } // namespace x86 | ||||