| @@ -0,0 +1,230 @@ | |||
| /** | |||
| * \file dnn/src/fallback/conv_bias/conv1x1/algos.cpp | |||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
| * | |||
| * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| */ | |||
| #include "src/fallback/conv_bias/conv1x1/algos.h" | |||
| #include "src/common/opr_delegate.h" | |||
| #include "src/fallback/conv_bias/common.h" | |||
| #include "src/fallback/conv_bias/conv1x1/conv1x1_dispatcher.h" | |||
| #include "src/fallback/conv_bias/conv1x1/conv1x1_strategy.h" | |||
| #include "src/fallback/conv_bias/opr_impl.h" | |||
| #include "megdnn/opr_param_defs.h" | |||
| #include "src/naive/convolution/helper.h" | |||
| #if MEGDNN_X86 | |||
| #include "src/x86/conv_bias/postprocess_helper.h" | |||
| #endif | |||
| #include "midout.h" | |||
| MIDOUT_DECL(megdnn_fallback_conv1x1) | |||
| using namespace megdnn; | |||
| using namespace fallback; | |||
| #if MEGDNN_X86 | |||
| using namespace x86; | |||
| #endif | |||
| using namespace conv1x1; | |||
| size_t ConvBiasImpl::AlgoConv1x1::get_oc_tile_size_heuristic( | |||
| const NCBKernSizeParam& param) const { | |||
| size_t OH = param.osz[0]; | |||
| size_t OW = param.osz[1]; | |||
| size_t OC = param.filter_meta.ocpg; | |||
| if (OH * OW >= 56 * 56 || OC >= 64) | |||
| return m_oc_block_size; | |||
| return div_ceil(OC, param.nr_threads); | |||
| } | |||
| size_t ConvBiasImpl::AlgoConv1x1::get_workspace( | |||
| ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||
| size_t OH = param.osz[0]; | |||
| size_t OW = param.osz[1]; | |||
| size_t compt_oc_block_size = get_oc_tile_size_heuristic(param); | |||
| auto matmul_param = | |||
| get_matmul_kern_param(param, OH * OW, compt_oc_block_size); | |||
| auto pack_mode = m_matmul_algo->packmode(); | |||
| if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) { | |||
| MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 0, 0) { | |||
| Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::DEFAULT> dispatcher; | |||
| return dispatcher | |||
| .get_bundle(param, matmul_param, m_matmul_algo, | |||
| compt_oc_block_size) | |||
| .total_size_in_bytes(); | |||
| } | |||
| MIDOUT_END(); | |||
| } else if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) { | |||
| MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 0, 1) { | |||
| Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA> dispatcher; | |||
| return dispatcher | |||
| .get_bundle(param, matmul_param, m_matmul_algo, | |||
| compt_oc_block_size) | |||
| .total_size_in_bytes(); | |||
| } | |||
| MIDOUT_END(); | |||
| } else { | |||
| MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 0, 2) { | |||
| Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::NO_PACK> dispatcher; | |||
| return dispatcher | |||
| .get_bundle(param, matmul_param, m_matmul_algo, | |||
| compt_oc_block_size) | |||
| .total_size_in_bytes(); | |||
| } | |||
| MIDOUT_END(); | |||
| } | |||
| return 0; | |||
| } | |||
| SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_kerns( | |||
| ConvBiasImpl* opr, const NCBKernSizeParam& param) const { | |||
| SmallVector<ConvBiasImpl::NCBKern> ret_kern; | |||
| size_t OH = param.osz[0]; | |||
| size_t OW = param.osz[1]; | |||
| size_t OC = param.filter_meta.ocpg; | |||
| size_t compt_oc_block_size = get_oc_tile_size_heuristic(param); | |||
| size_t GROUP = param.filter_meta.group; | |||
| size_t BATCH = param.n; | |||
| size_t oc_blocks_per_group = div_ceil(OC, compt_oc_block_size); | |||
| auto matmul_param = | |||
| get_matmul_kern_param(param, OH * OW, compt_oc_block_size); | |||
| WorkspaceBundle whole_bundle = {nullptr, {}}; | |||
| WorkspaceBundle thread_bundle = {nullptr, {}}; | |||
| WorkspaceBundle matmul_bundle = {nullptr, {}}; | |||
| auto pack_mode = m_matmul_algo->packmode(); | |||
| if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) { | |||
| MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 1, 0) { | |||
| Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::DEFAULT> dispatcher; | |||
| whole_bundle = dispatcher.get_bundle( | |||
| param, matmul_param, m_matmul_algo, compt_oc_block_size); | |||
| matmul_bundle = m_matmul_algo->get_bundle(matmul_param); | |||
| } | |||
| MIDOUT_END(); | |||
| } else if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) { | |||
| MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 1, 1) { | |||
| Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA> dispatcher; | |||
| whole_bundle = dispatcher.get_bundle( | |||
| param, matmul_param, m_matmul_algo, compt_oc_block_size); | |||
| matmul_bundle = m_matmul_algo->get_bundle(matmul_param); | |||
| } | |||
| MIDOUT_END(); | |||
| } else { | |||
| MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 1, 2) { | |||
| Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::NO_PACK> dispatcher; | |||
| whole_bundle = dispatcher.get_bundle( | |||
| param, matmul_param, m_matmul_algo, compt_oc_block_size); | |||
| matmul_bundle = { | |||
| nullptr, | |||
| {0, 0, m_matmul_algo->get_workspace(matmul_param)}}; | |||
| } | |||
| MIDOUT_END(); | |||
| } | |||
| //! get thread bundle | |||
| thread_bundle = get_thread_bundle(param, matmul_bundle.get_size(2), | |||
| compt_oc_block_size); | |||
| Conv1x1StrategyBase* conv1x1_strategy = | |||
| Conv1x1Factory::make_conv1x1_strategy(param, pack_mode, | |||
| opr->param().format); | |||
| auto kern_packA = [this, whole_bundle, matmul_bundle, param, | |||
| compt_oc_block_size, conv1x1_strategy]( | |||
| const NCBKernParam& ncb_param, | |||
| const NCBKernIndex& ncb_index) mutable { | |||
| conv1x1_strategy->packA(whole_bundle, matmul_bundle, | |||
| compt_oc_block_size, this->m_matmul_algo, param, | |||
| ncb_param, std::move(ncb_index)); | |||
| }; | |||
| auto kern_packB = [this, whole_bundle, matmul_bundle, param, | |||
| conv1x1_strategy]( | |||
| const NCBKernParam& ncb_param, | |||
| const NCBKernIndex& ncb_index) mutable { | |||
| conv1x1_strategy->packB(whole_bundle, matmul_bundle, | |||
| this->m_matmul_algo, param, ncb_param, | |||
| std::move(ncb_index)); | |||
| }; | |||
| auto kern_compt = [this, whole_bundle, matmul_bundle, thread_bundle, param, | |||
| compt_oc_block_size, conv1x1_strategy]( | |||
| const NCBKernParam& ncb_param, | |||
| const NCBKernIndex& ncb_index) mutable { | |||
| conv1x1_strategy->exec(whole_bundle, matmul_bundle, thread_bundle, | |||
| compt_oc_block_size, this->m_matmul_algo, param, | |||
| ncb_param, std::move(ncb_index)); | |||
| }; | |||
| if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT || | |||
| pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) { | |||
| ret_kern.push_back({kern_packA, {GROUP, oc_blocks_per_group}}); | |||
| if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) { | |||
| ret_kern.push_back({kern_packB, {1}}); | |||
| } | |||
| } | |||
| ret_kern.push_back({kern_compt, {BATCH, GROUP, oc_blocks_per_group}}); | |||
| return ret_kern; | |||
| } | |||
| bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr, | |||
| const NCBKernSizeParam& param, | |||
| AlgoSelectionStrategy) const { | |||
| MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 2) { | |||
| //! only support nchw format | |||
| if (opr->param().format != param::ConvBias::Format::NCHW) | |||
| return false; | |||
| size_t FH = param.filter_meta.spatial[0], | |||
| FW = param.filter_meta.spatial[1]; | |||
| size_t PH = param.filter_meta.padding[0], | |||
| PW = param.filter_meta.padding[1]; | |||
| size_t SH = param.filter_meta.stride[0], | |||
| SW = param.filter_meta.stride[1]; | |||
| if (FH != 1 || FW != 1 || PH || PW || SH != 1 || SW != 1) | |||
| return false; | |||
| //! make sure 8x8x16 and 8x8x32 biasmode is nobias and nonlineMode | |||
| //! is identity otherwise return false mean that 8x8x32 and 8x8x16 | |||
| //! not support PostProcess | |||
| if (param.src_type.enumv() == param.filter_type.enumv() && | |||
| (param.src_type.enumv() == DTypeEnum::Int8 && | |||
| (param.dst_type.enumv() == DTypeEnum::Int16 || | |||
| param.dst_type.enumv() == DTypeEnum::Int32)) && | |||
| param.bias_mode != megdnn::BiasMode::NO_BIAS && | |||
| param.nonlineMode != megdnn::NonlineMode::IDENTITY) | |||
| return false; | |||
| if (param.src_type.enumv() == param.filter_type.enumv() && | |||
| ((param.src_type.enumv() == DTypeEnum::QuantizedS8 || | |||
| param.src_type.enumv() == DTypeEnum::Quantized8Asymm) && | |||
| param.dst_type.enumv() == DTypeEnum::QuantizedS32) && | |||
| param.bias_mode != megdnn::BiasMode::NO_BIAS && | |||
| param.nonlineMode != megdnn::NonlineMode::IDENTITY) | |||
| return false; | |||
| size_t OH = param.osz[0]; | |||
| size_t OW = param.osz[1]; | |||
| MatrixMulImpl::KernSizeParam matmul_param = | |||
| get_matmul_kern_param(param, OH * OW, get_oc_tile_size_heuristic(param)); | |||
| bool matmulusable = m_matmul_algo->usable(matmul_param); | |||
| return matmulusable && | |||
| (param.filter_meta.dilation[0] == | |||
| param.filter_meta.dilation[1] && | |||
| param.filter_meta.dilation[0] == 1) && | |||
| param.compute_mode == param::ConvBias::ComputeMode::DEFAULT; | |||
| } | |||
| MIDOUT_END(); | |||
| return false; | |||
| } | |||
| @@ -0,0 +1,56 @@ | |||
| /** | |||
| * \file dnn/src/fallback/conv_bias/conv1x1/algos.h | |||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
| * | |||
| * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| */ | |||
| #pragma once | |||
| #include "megdnn/thin/small_vector.h" | |||
| #include "src/common/utils.h" | |||
| #include "src/fallback/conv_bias/opr_impl.h" | |||
| #include "src/fallback/matrix_mul/opr_impl.h" | |||
| namespace megdnn { | |||
| namespace fallback { | |||
| class ConvBiasImpl::AlgoConv1x1 final : public AlgoBase { | |||
| public: | |||
| AlgoConv1x1(MatrixMulImpl::AlgoBase* matmul_algo, size_t oc_block_size) | |||
| : m_matmul_algo(matmul_algo), m_oc_block_size(oc_block_size) {} | |||
| bool is_reproducible() const override { return true; } | |||
| const char* name() const override { | |||
| if (m_name.empty()) { | |||
| m_name = ssprintf("CONV1x1:%s:%zu", m_matmul_algo->name(), | |||
| m_oc_block_size); | |||
| } | |||
| return m_name.c_str(); | |||
| } | |||
| bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||
| AlgoSelectionStrategy algo_selection_strategy) const override; | |||
| size_t get_workspace(ConvBiasImpl*, | |||
| const NCBKernSizeParam& param) const override; | |||
| SmallVector<NCBKern> dispatch_kerns( | |||
| ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; | |||
| protected: | |||
| size_t get_oc_tile_size_heuristic(const NCBKernSizeParam& param) const; | |||
| private: | |||
| MatrixMulImpl::AlgoBase* m_matmul_algo; | |||
| mutable std::string m_name; | |||
| mutable size_t m_oc_block_size = 0; | |||
| }; | |||
| } // namespace fallback | |||
| } // namespace megdnn | |||
| // vim: syntax=cpp.doxygen | |||
| @@ -0,0 +1,99 @@ | |||
| /** | |||
| * \file dnn/src/fallback/conv_bias/conv1x1/conv1x1_dispatcher.h | |||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
| * | |||
| * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| */ | |||
| #pragma once | |||
| #include "src/fallback/conv_bias/conv1x1/conv1x1_strategy.h" | |||
| namespace megdnn { | |||
| namespace fallback { | |||
| namespace conv1x1 { | |||
| namespace { | |||
| //! get_thread_bundle | |||
| WorkspaceBundle get_thread_bundle(const ConvBiasImpl::NCBKernSizeParam& param, | |||
| size_t matmul_c_size, size_t oc_tile_size) { | |||
| //! for some cases, matmul result need temp space to store | |||
| size_t OH = param.osz[0]; | |||
| size_t OW = param.osz[1]; | |||
| bool is_dst_8bit = (param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
| param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
| (param.src_type.enumv() == DTypeEnum::Quantized8Asymm && | |||
| param.dst_type.enumv() == DTypeEnum::Quantized8Asymm); | |||
| size_t matmul_dst_bytes_per_thread = | |||
| is_dst_8bit ? oc_tile_size * OH * OW * sizeof(param.bias_type) : 0; | |||
| return WorkspaceBundle{nullptr, | |||
| {matmul_c_size, matmul_dst_bytes_per_thread}}; | |||
| } | |||
| } // anonymous namespace | |||
| template <MatrixMulImpl::AlgoBase::PackMode pack_mode> | |||
| class Conv1x1Kerns { | |||
| public: | |||
| //! get_bundle | |||
| WorkspaceBundle get_bundle(const ConvBiasImpl::NCBKernSizeParam& param, | |||
| const MatrixMulImpl::KernSizeParam& matmul_param, | |||
| const MatrixMulImpl::AlgoBase* matmul_algo, | |||
| size_t oc_tile_size) { | |||
| size_t GROUP = param.filter_meta.group; | |||
| size_t OC = param.filter_meta.ocpg; | |||
| size_t BATCH = param.n; | |||
| //! bundle per thread | |||
| //! matmul_param records a matmul with M = oc_tile_size, K = IC, N = OH | |||
| //! * OW this does not bother packb bytes | |||
| auto matmul_bundle = matmul_algo->get_bundle(matmul_param); | |||
| auto thread_bundle = get_thread_bundle(param, matmul_bundle.get_size(2), | |||
| oc_tile_size); | |||
| //! size per thread | |||
| size_t all_threads_bytes = | |||
| thread_bundle.total_size_in_bytes() * param.nr_threads; | |||
| //! packa size = GROUP * packa_size_each_group | |||
| size_t packa_bytes_per_oc_tile = matmul_bundle.get_size(0); | |||
| size_t oc_tiles_per_group = div_ceil(OC, oc_tile_size); | |||
| size_t all_packa_bytes = | |||
| packa_bytes_per_oc_tile * oc_tiles_per_group * GROUP; | |||
| if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) | |||
| return WorkspaceBundle{nullptr, | |||
| {all_packa_bytes, 0, all_threads_bytes}}; | |||
| //! packb size = N * GROUP * packb_size_per_group | |||
| size_t packb_bytes_per_group = matmul_bundle.get_size(1); | |||
| size_t all_packb_bytes = packb_bytes_per_group * GROUP * BATCH; | |||
| return WorkspaceBundle{ | |||
| nullptr, {all_packa_bytes, all_packb_bytes, all_threads_bytes}}; | |||
| } | |||
| }; | |||
| template<> | |||
| class Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::NO_PACK> { | |||
| public: | |||
| //! get_bundle | |||
| WorkspaceBundle get_bundle(const ConvBiasImpl::NCBKernSizeParam& param, | |||
| const MatrixMulImpl::KernSizeParam& matmul_param, | |||
| const MatrixMulImpl::AlgoBase* matmul_algo, | |||
| size_t oc_tile_size) { | |||
| size_t matmul_size = matmul_algo->get_workspace(matmul_param); | |||
| auto thread_bundle = get_thread_bundle(param, matmul_size, oc_tile_size); | |||
| //! size per thread | |||
| size_t all_threads_bytes = | |||
| thread_bundle.total_size_in_bytes() * param.nr_threads; | |||
| return WorkspaceBundle{nullptr, {0, 0, all_threads_bytes}}; | |||
| } | |||
| }; | |||
| } // namespace conv1x1 | |||
| } // namespace fallback | |||
| } // namespace megdnn | |||
| @@ -0,0 +1,214 @@ | |||
| /** | |||
| * \file dnn/src/fallback/conv_bias/conv1x1/Conv1x1_strategy.cpp | |||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
| * | |||
| * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| */ | |||
| #include <unordered_map> | |||
| #include "src/fallback/conv_bias/conv1x1/conv1x1_strategy.h" | |||
| #include "midout.h" | |||
| MIDOUT_DECL(megdnn_fallback_conv1x1_factory_strategy) | |||
| namespace megdnn { | |||
| namespace fallback { | |||
| namespace conv1x1 { | |||
| namespace { | |||
| struct StrategyHashParam { | |||
| ConvBiasImpl::NCBKernSizeParam param; | |||
| param::ConvBias::Format format; | |||
| MatrixMulImpl::AlgoBase::PackMode packmode; | |||
| }; | |||
| struct StrategyHashParamHash { | |||
| std::size_t operator()(const StrategyHashParam& sparam) const { | |||
| constexpr size_t base = 1; //! avoid hashkey is zero | |||
| std::size_t result = | |||
| static_cast<std::size_t>(sparam.param.src_type.enumv()) + base; | |||
| result = result ^ | |||
| ((static_cast<std::size_t>(sparam.param.dst_type.enumv()) + | |||
| base) | |||
| << 3); | |||
| result = result ^ | |||
| ((static_cast<std::size_t>(sparam.param.filter_type.enumv()) + | |||
| base) | |||
| << 6); | |||
| result = result ^ | |||
| ((static_cast<std::size_t>(sparam.param.bias_type.enumv()) + | |||
| base) | |||
| << 9); | |||
| result = result ^ | |||
| ((static_cast<std::size_t>(sparam.format) + base) << 12); | |||
| result = result ^ | |||
| ((static_cast<std::size_t>(sparam.packmode) + base) << 15); | |||
| return result; | |||
| }; | |||
| }; | |||
| struct StrategyHashParamEqual { | |||
| bool operator()(const StrategyHashParam& param1, | |||
| const StrategyHashParam& param2) const { | |||
| bool flags = true; | |||
| flags = param1.param.src_type == param2.param.src_type && flags; | |||
| flags = param1.param.filter_type == param2.param.filter_type && flags; | |||
| flags = param1.param.bias_type == param2.param.bias_type && flags; | |||
| flags = param1.param.dst_type == param2.param.dst_type && flags; | |||
| flags = param1.format == param2.format && flags; | |||
| flags = param1.packmode == param2.packmode && flags; | |||
| return flags; | |||
| }; | |||
| }; | |||
| std::unique_ptr<Conv1x1StrategyBase> create_conv1x1_strategy( | |||
| const ConvBiasImpl::NCBKernSizeParam& param, | |||
| MatrixMulImpl::AlgoBase::PackMode pack_mode, | |||
| param::ConvBias::Format format) { | |||
| MEGDNN_MARK_USED_VAR(format); | |||
| #define cb1(_packmode, _dt, _post_ctype, _postprocess_mode, _midout_tag) \ | |||
| MIDOUT_BEGIN(megdnn_fallback_conv1x1_factory_strategy, \ | |||
| midout_iv(_midout_tag)) { \ | |||
| if (param.filter_type.enumv() == DTypeTrait<_dt>::enumv) { \ | |||
| return std::make_unique< \ | |||
| Conv1x1Strategy<_dt, _dt, _dt, _post_ctype, _post_ctype, \ | |||
| _postprocess_mode, _packmode>>(); \ | |||
| } \ | |||
| } \ | |||
| MIDOUT_END() | |||
| #define cb2(_packmode, _i_src_type, _i_bias_type, _i_dst_type, _src_ctype, \ | |||
| _bias_ctype, _dst_ctype, _postprocess_mode, _midout_tag) \ | |||
| MIDOUT_BEGIN(megdnn_fallback_conv1x1_factory_strategy, \ | |||
| midout_iv(_midout_tag)) { \ | |||
| if (param.filter_type.enumv() == param.src_type.enumv() && \ | |||
| param.src_type.enumv() == DTypeTrait<_i_src_type>::enumv && \ | |||
| param.dst_type.enumv() == DTypeTrait<_i_dst_type>::enumv) { \ | |||
| return std::make_unique< \ | |||
| Conv1x1Strategy<_src_ctype, _bias_ctype, _dst_ctype, \ | |||
| DTypeTrait<_i_bias_type>::ctype, \ | |||
| DTypeTrait<_i_dst_type>::ctype, \ | |||
| _postprocess_mode, _packmode>>(); \ | |||
| } \ | |||
| } \ | |||
| MIDOUT_END() | |||
| switch (pack_mode) { | |||
| case MatrixMulImpl::AlgoBase::PackMode::DEFAULT: | |||
| cb1(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dt_float32, | |||
| dt_float32, PostprocessMode::FLOAT, "Default::FLOAT"_hash); | |||
| #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
| cb1(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dt_float16, __fp16, | |||
| PostprocessMode::FLOAT, "Default::FLOAT16_FP16"_hash); | |||
| #else | |||
| #if !MEGDNN_DISABLE_FLOAT16 | |||
| cb1(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dt_float16, | |||
| dt_float16, PostprocessMode::NO_PROCESS, | |||
| "Default::FLOAT16_FLOAT16"_hash); | |||
| #endif | |||
| #endif | |||
| cb2(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dt_int8, dt_int32, | |||
| dt_int32, dt_int8, dt_int32, dt_int32, | |||
| PostprocessMode::NO_PROCESS, "Default::INT8x8x32_INT32"_hash); | |||
| cb2(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dt_int8, dt_int16, | |||
| dt_int16, dt_int8, dt_int16, dt_int16, | |||
| PostprocessMode::NO_PROCESS, "Default::INT8x8x16_INT16"_hash); | |||
| #if MEGDNN_AARCH64 || MEGDNN_ARMV7 | |||
| cb2(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, | |||
| dtype::Quantized8Asymm, dtype::QuantizedS32, | |||
| dtype::QuantizedS32, dt_uint8, dt_int32, dt_int32, | |||
| PostprocessMode::NO_PROCESS, | |||
| "Default::QUINT8x8x32_QINT32"_hash); | |||
| cb2(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, | |||
| dtype::Quantized8Asymm, dtype::QuantizedS32, | |||
| dtype::Quantized8Asymm, dt_uint8, dt_int32, dt_uint8, | |||
| PostprocessMode::QUANTIZED, "Default::QUINT8x8x32_QUINT8"_hash); | |||
| #endif | |||
| cb2(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dtype::QuantizedS8, | |||
| dtype::QuantizedS32, dtype::QuantizedS32, dt_int8, dt_int32, | |||
| dt_int32, PostprocessMode::NO_PROCESS, | |||
| "Default::QINT8x8x32_QINT32"_hash); | |||
| cb2(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dtype::QuantizedS8, | |||
| dtype::QuantizedS32, dtype::QuantizedS8, dt_int8, dt_int32, | |||
| dt_int8, PostprocessMode::QUANTIZED, | |||
| "Default::QINT8x8x32_QINT8"_hash); | |||
| break; | |||
| case MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA: | |||
| cb1(MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA, dt_float32, | |||
| dt_float32, PostprocessMode::FLOAT, "OnlyPackA::FLOAT"_hash); | |||
| break; | |||
| case MatrixMulImpl::AlgoBase::PackMode::NO_PACK: | |||
| cb1(MatrixMulImpl::AlgoBase::PackMode::NO_PACK, dt_float32, | |||
| dt_float32, PostprocessMode::FLOAT, "NoPack::FLOAT"_hash); | |||
| cb2(MatrixMulImpl::AlgoBase::PackMode::NO_PACK, dt_int8, dt_int16, | |||
| dt_int16, dt_int8, dt_int16, dt_int16, | |||
| PostprocessMode::NO_PROCESS, "NoPack::INT8x8x16_INT16"_hash); | |||
| cb2(MatrixMulImpl::AlgoBase::PackMode::NO_PACK, dt_int8, dt_int32, | |||
| dt_int32, dt_int8, dt_int32, dt_int32, | |||
| PostprocessMode::NO_PROCESS, "NoPack::INT8x8x32_INT32"_hash); | |||
| cb2(MatrixMulImpl::AlgoBase::PackMode::NO_PACK, | |||
| dtype::QuantizedS8, dtype::QuantizedS32, | |||
| dtype::QuantizedS32, dt_int8, dt_int32, dt_int32, | |||
| PostprocessMode::NO_PROCESS, | |||
| "NoPack::QINT8x8x32_QINT32"_hash); | |||
| break; | |||
| default: | |||
| megdnn_throw("Invalid Pack Mode"); | |||
| break; | |||
| } | |||
| #undef cb1 | |||
| #undef cb2 | |||
| megdnn_throw("Invalid Data Type"); | |||
| return nullptr; | |||
| } | |||
| class StrategyDelegationStorage { | |||
| public: | |||
| Conv1x1StrategyBase* get(const ConvBiasImpl::NCBKernSizeParam& param, | |||
| MatrixMulImpl::AlgoBase::PackMode pack_mode, | |||
| param::ConvBias::Format format) { | |||
| MEGDNN_LOCK_GUARD(m_mtx); | |||
| StrategyHashParam sparam; | |||
| sparam.param = param; | |||
| sparam.format = format; | |||
| sparam.packmode = pack_mode; | |||
| if (m_map_strategies.find(sparam) == m_map_strategies.end()) { | |||
| auto strategy = create_conv1x1_strategy(param, pack_mode, format); | |||
| m_map_strategies[sparam] = std::move(strategy); | |||
| } | |||
| return m_map_strategies[sparam].get(); | |||
| } | |||
| private: | |||
| std::mutex m_mtx; | |||
| std::unordered_map<StrategyHashParam, std::unique_ptr<Conv1x1StrategyBase>, | |||
| StrategyHashParamHash, StrategyHashParamEqual> | |||
| m_map_strategies; | |||
| }; | |||
| } // anonymous namespace | |||
| Conv1x1StrategyBase* Conv1x1Factory::make_conv1x1_strategy( | |||
| const ConvBiasImpl::NCBKernSizeParam& param, | |||
| MatrixMulImpl::AlgoBase::PackMode pack_mode, | |||
| param::ConvBias::Format format) { | |||
| static StrategyDelegationStorage storage; | |||
| return storage.get(param, pack_mode, format); | |||
| } | |||
| } // namespace conv1x1 | |||
| } // namespace fallback | |||
| } // namespace megdnn | |||
| @@ -0,0 +1,310 @@ | |||
| /** | |||
| * \file dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.h | |||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
| * | |||
| * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #pragma once | |||
| #include "megdnn/opr_param_defs.h" | |||
| #include "src/fallback/conv_bias/opr_impl.h" | |||
| #if MEGDNN_X86 | |||
| #include "src/x86/conv_bias/postprocess_helper.h" | |||
| #endif | |||
| namespace megdnn { | |||
| namespace fallback { | |||
| namespace conv1x1 { | |||
| #if MEGDNN_X86 | |||
| using namespace x86; | |||
| #endif | |||
| namespace { | |||
| //! get_matmul_kern_param | |||
| MatrixMulImpl::KernSizeParam get_matmul_kern_param( | |||
| const ConvBiasImpl::NCBKernSizeParam& param, size_t n, size_t m) { | |||
| size_t M = m; | |||
| size_t N = n; | |||
| size_t K = param.filter_meta.icpg; //! K = IC | |||
| size_t LDA = K, LDB = N, LDC = N; | |||
| bool is_dst_8bit = (param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
| param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
| (param.src_type.enumv() == DTypeEnum::Quantized8Asymm && | |||
| param.dst_type.enumv() == DTypeEnum::Quantized8Asymm); | |||
| return {param.filter_type, | |||
| param.src_type, | |||
| is_dst_8bit ? param.bias_type : param.dst_type, | |||
| M, | |||
| N, | |||
| K, | |||
| LDA, | |||
| LDB, | |||
| LDC, | |||
| false, | |||
| false, | |||
| param::MatrixMul::ComputeMode::DEFAULT, | |||
| param::MatrixMul::Format::DEFAULT}; | |||
| } | |||
| } // namespace | |||
| class Conv1x1StrategyBase { | |||
| public: | |||
| virtual void packA(WorkspaceBundle& whole_bundle, | |||
| WorkspaceBundle& matmul_bundle, | |||
| size_t oc_tile_size, | |||
| const MatrixMulImpl::AlgoBase* matmul_algo, | |||
| const ConvBiasImpl::NCBKernSizeParam& param, | |||
| const ConvBiasImpl::NCBKernParam& ncb_param, | |||
| const ConvBiasImpl::NCBKernIndex& ncb_index) = 0; | |||
| virtual void packB(WorkspaceBundle& whole_bundle, | |||
| WorkspaceBundle& matmul_bundle, | |||
| const MatrixMulImpl::AlgoBase* matmul_algo, | |||
| const ConvBiasImpl::NCBKernSizeParam& param, | |||
| const ConvBiasImpl::NCBKernParam& ncb_param, | |||
| const ConvBiasImpl::NCBKernIndex& ncb_index) = 0; | |||
| virtual void exec(WorkspaceBundle& whole_bundle, | |||
| WorkspaceBundle& matmul_bundle, | |||
| WorkspaceBundle& thread_bundle, | |||
| size_t oc_tile_size, | |||
| const MatrixMulImpl::AlgoBase* matmul_algo, | |||
| const ConvBiasImpl::NCBKernSizeParam& param, | |||
| const ConvBiasImpl::NCBKernParam& ncb_param, | |||
| const ConvBiasImpl::NCBKernIndex& ncb_index) = 0; | |||
| virtual ~Conv1x1StrategyBase() = default; | |||
| }; | |||
| template <typename src_ctype, typename bias_ctype, typename dst_ctype, | |||
| typename op_ctype, typename op_dtype, | |||
| megdnn::PostprocessMode postprocess_mode, MatrixMulImpl::AlgoBase::PackMode pack_mode> | |||
| class Conv1x1Strategy : public Conv1x1StrategyBase { | |||
| public: | |||
| void packA(WorkspaceBundle& whole_bundle, | |||
| WorkspaceBundle& matmul_bundle, | |||
| size_t oc_tile_size, | |||
| const MatrixMulImpl::AlgoBase* matmul_algo, | |||
| const ConvBiasImpl::NCBKernSizeParam& param, | |||
| const ConvBiasImpl::NCBKernParam& ncb_param, | |||
| const ConvBiasImpl::NCBKernIndex& ncb_index) override { | |||
| if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::NO_PACK) { | |||
| megdnn_log_error("NoPack mode has no packA kernel"); | |||
| return; | |||
| } | |||
| whole_bundle.set(ncb_param.workspace_ptr); | |||
| //! packa size per group | |||
| size_t OC = param.filter_meta.ocpg; | |||
| size_t oc_tiles_per_group = div_ceil(OC, oc_tile_size); | |||
| size_t packa_bytes_per_oc_tile = matmul_bundle.get_size(0); | |||
| size_t packa_bytes_per_group = | |||
| oc_tiles_per_group * packa_bytes_per_oc_tile; | |||
| size_t group_id = ncb_index.ndrange_id[0]; | |||
| size_t oc_tile_id_in_group = ncb_index.ndrange_id[1]; | |||
| size_t oc_start = oc_tile_id_in_group * oc_tile_size; | |||
| size_t oc_end = oc_start + oc_tile_size; | |||
| oc_end = (oc_end <= OC ? oc_end : OC); | |||
| size_t OH = param.osz[0]; | |||
| size_t OW = param.osz[1]; | |||
| size_t IC = param.filter_meta.icpg; | |||
| MatrixMulImpl::KernParam matmul_kern_param; | |||
| static_cast<MatrixMulImpl::KernSizeParam&>(matmul_kern_param) = | |||
| get_matmul_kern_param(param, OH * OW, oc_end - oc_start); | |||
| size_t bytes_offset_of_a_panel = | |||
| group_id * packa_bytes_per_group + | |||
| oc_tile_id_in_group * packa_bytes_per_oc_tile; | |||
| size_t numbers_offset_of_filter = | |||
| oc_tile_size * IC * oc_tile_id_in_group; | |||
| src_ctype* a_panel = reinterpret_cast<src_ctype*>( | |||
| reinterpret_cast<int8_t*>(whole_bundle.get(0)) + | |||
| bytes_offset_of_a_panel); | |||
| matmul_kern_param.A_ptr = const_cast<src_ctype*>( | |||
| ncb_param.filter<src_ctype>(group_id) + | |||
| numbers_offset_of_filter); | |||
| matmul_algo->pack_A(matmul_kern_param, a_panel, 0, | |||
| oc_end - oc_start); | |||
| } | |||
| void packB(WorkspaceBundle& whole_bundle, | |||
| WorkspaceBundle& matmul_bundle, | |||
| const MatrixMulImpl::AlgoBase* matmul_algo, | |||
| const ConvBiasImpl::NCBKernSizeParam& param, | |||
| const ConvBiasImpl::NCBKernParam& ncb_param, | |||
| const ConvBiasImpl::NCBKernIndex& ncb_index) override { | |||
| if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) { | |||
| whole_bundle.set(ncb_param.workspace_ptr); | |||
| //! packb size per group | |||
| size_t packb_bytes_per_group = matmul_bundle.get_size(1); | |||
| size_t GROUP = param.filter_meta.group; | |||
| size_t BATCH = param.n; | |||
| size_t SH = param.filter_meta.stride[0]; | |||
| size_t SW = param.filter_meta.stride[1]; | |||
| size_t OH = param.osz[0]; | |||
| size_t OW = param.osz[1]; | |||
| size_t OC = param.filter_meta.ocpg; | |||
| MatrixMulImpl::KernParam matmul_kern_param; | |||
| static_cast<MatrixMulImpl::KernSizeParam&>(matmul_kern_param) = | |||
| get_matmul_kern_param(param, OH * OW, OC); | |||
| rep(batch, BATCH) { | |||
| rep(g, GROUP) { | |||
| if (SH == 2 && SW == 2) | |||
| megdnn_throw("no support for stride = 2"); | |||
| size_t bytes_offset_of_b_panel = | |||
| batch * packb_bytes_per_group * GROUP + | |||
| g * packb_bytes_per_group; | |||
| src_ctype* b_panel = reinterpret_cast<src_ctype*>( | |||
| reinterpret_cast<int8_t*>(whole_bundle.get(1)) + | |||
| bytes_offset_of_b_panel); | |||
| matmul_kern_param.B_ptr = const_cast<src_ctype*>( | |||
| ncb_param.src<src_ctype>(batch, g)); | |||
| matmul_algo->pack_B(matmul_kern_param, b_panel, 0, OH * OW); | |||
| } | |||
| } | |||
| } else { | |||
| megdnn_log_error("OnlyPackA mode and NoPack mode has no packB kernel"); | |||
| } | |||
| } | |||
| void exec(WorkspaceBundle& whole_bundle, | |||
| WorkspaceBundle& matmul_bundle, | |||
| WorkspaceBundle& thread_bundle, | |||
| size_t oc_tile_size, | |||
| const MatrixMulImpl::AlgoBase* matmul_algo, | |||
| const ConvBiasImpl::NCBKernSizeParam& param, | |||
| const ConvBiasImpl::NCBKernParam& ncb_param, | |||
| const ConvBiasImpl::NCBKernIndex& ncb_index) override { | |||
| whole_bundle.set(ncb_param.workspace_ptr); | |||
| size_t OC = param.filter_meta.ocpg; | |||
| size_t IC = param.filter_meta.icpg; | |||
| //! packa bytes per group | |||
| size_t oc_tiles_per_group = div_ceil(OC, oc_tile_size); | |||
| size_t packa_bytes_per_oc_tile = matmul_bundle.get_size(0); | |||
| size_t packa_bytes_per_group = | |||
| packa_bytes_per_oc_tile * oc_tiles_per_group; | |||
| //! packb bytes per group | |||
| size_t packb_bytes_per_group = matmul_bundle.get_size(1); | |||
| //! matmul bytes per thread | |||
| size_t matmul_bytes_per_thread = thread_bundle.get_size(0); | |||
| size_t batch_id = ncb_index.ndrange_id[0]; | |||
| size_t group_id = ncb_index.ndrange_id[1]; | |||
| size_t oc_tile_id_in_group = ncb_index.ndrange_id[2]; | |||
| size_t thread_id = ncb_index.thread_id; | |||
| size_t GROUP = param.filter_meta.group; | |||
| size_t OH = param.osz[0]; | |||
| size_t OW = param.osz[1]; | |||
| size_t oc_start = oc_tile_size * oc_tile_id_in_group; | |||
| size_t oc_end = oc_start + oc_tile_size; | |||
| oc_end = (oc_end <= OC ? oc_end : OC); | |||
| MatrixMulImpl::KernParam matmul_kern_param; | |||
| static_cast<MatrixMulImpl::KernSizeParam&>(matmul_kern_param) = | |||
| get_matmul_kern_param(param, OH * OW, oc_end - oc_start); | |||
| size_t bytes_offset_of_a_panel = | |||
| group_id * packa_bytes_per_group + | |||
| oc_tile_id_in_group * packa_bytes_per_oc_tile; | |||
| int8_t* a_panel = reinterpret_cast<int8_t*>(whole_bundle.get(0)) + | |||
| bytes_offset_of_a_panel; | |||
| size_t bytes_offset_of_b_panel = | |||
| batch_id * packb_bytes_per_group * GROUP + | |||
| group_id * packb_bytes_per_group; | |||
| int8_t* b_panel = reinterpret_cast<int8_t*>(whole_bundle.get(1)) + | |||
| bytes_offset_of_b_panel; | |||
| size_t thread_offset = thread_bundle.total_size_in_bytes() * thread_id; | |||
| size_t bytes_offset_of_matmul_dst_this_thread = | |||
| thread_offset + thread_bundle.get_size(0); | |||
| int8_t* matmul_temp_dst = | |||
| reinterpret_cast<int8_t*>(whole_bundle.get(2)) + | |||
| bytes_offset_of_matmul_dst_this_thread; | |||
| size_t numbers_of_ncb_dst_offset = | |||
| oc_tile_size * OH * OW * oc_tile_id_in_group; | |||
| void* conv_bias_dst = static_cast<void*>( | |||
| ncb_param.dst<dst_ctype>(batch_id, group_id) + | |||
| numbers_of_ncb_dst_offset); | |||
| size_t numbers_of_ncb_filter_offset = | |||
| oc_tile_size * IC * oc_tile_id_in_group; | |||
| matmul_kern_param.A_ptr = const_cast<src_ctype*>( | |||
| ncb_param.filter<src_ctype>(group_id) + | |||
| numbers_of_ncb_filter_offset); | |||
| matmul_kern_param.B_ptr = const_cast<src_ctype*>( | |||
| ncb_param.src<src_ctype>(batch_id, group_id)); | |||
| matmul_kern_param.workspace_ptr = | |||
| reinterpret_cast<int8_t*>(whole_bundle.get(2)) + thread_offset; | |||
| matmul_kern_param.workspace_size = matmul_bytes_per_thread; | |||
| bool is_dst_8bit = | |||
| (param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
| param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
| (param.src_type.enumv() == DTypeEnum::Quantized8Asymm && | |||
| param.dst_type.enumv() == DTypeEnum::Quantized8Asymm); | |||
| void* matmul_dst = is_dst_8bit ? matmul_temp_dst : conv_bias_dst; | |||
| matmul_kern_param.C_ptr = matmul_dst; | |||
| if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::NO_PACK) { | |||
| auto matmul_kern = matmul_algo->get_kern(matmul_kern_param); | |||
| matmul_kern(matmul_kern_param); | |||
| } else { | |||
| auto matmul_kern_naked = | |||
| matmul_algo->get_kern_naked(matmul_kern_param); | |||
| matmul_kern_naked(matmul_kern_param, a_panel, b_panel); | |||
| } | |||
| //! do postprocess | |||
| void* bias_ptr = nullptr; | |||
| if (param.bias_mode == megdnn::BiasMode::BIAS) | |||
| bias_ptr = static_cast<void*>(const_cast<bias_ctype*>( | |||
| ncb_param.bias<bias_ctype>(batch_id, group_id) + | |||
| numbers_of_ncb_dst_offset)); | |||
| else | |||
| bias_ptr = static_cast<void*>(const_cast<bias_ctype*>( | |||
| ncb_param.bias<bias_ctype>(batch_id, group_id) + oc_start)); | |||
| PostProcess<op_ctype, op_dtype, postprocess_mode>::run( | |||
| matmul_dst, bias_ptr, conv_bias_dst, param.bias_mode, | |||
| param.nonlineMode, param.bias_type, param.dst_type, 1_z, | |||
| oc_end - oc_start, OH, OW); | |||
| } | |||
| }; | |||
| class Conv1x1Factory { | |||
| public: | |||
| static Conv1x1StrategyBase* make_conv1x1_strategy( | |||
| const ConvBiasImpl::NCBKernSizeParam& param, | |||
| MatrixMulImpl::AlgoBase::PackMode pack_mode, | |||
| param::ConvBias::Format format); | |||
| }; | |||
| } // namespace conv1x1 | |||
| } // namespace fallback | |||
| } // namespace megdnn | |||
| @@ -15,6 +15,7 @@ | |||
| #include "src/common/opr_delegate.h" | |||
| #include "src/common/utils.h" | |||
| #include "src/fallback/conv_bias/algos.h" | |||
| #include "src/fallback/conv_bias/conv1x1/algos.h" | |||
| #include "src/fallback/conv_bias/im2col/algos.h" | |||
| #include "src/fallback/conv_bias/opr_impl.h" | |||
| #include "src/naive/convolution/algorithms.h" | |||
| @@ -54,7 +55,13 @@ public: | |||
| ohw_tile_size)); | |||
| all_algos.emplace_back(refhold.back().get()); | |||
| } | |||
| #if 1 | |||
| for (size_t oc_tile_size : {24, 48}) { | |||
| refhold.emplace_back(new AlgoConv1x1( | |||
| static_cast<MatrixMulImpl::AlgoBase*>(algo), | |||
| oc_tile_size)); | |||
| all_algos.emplace_back(refhold.back().get()); | |||
| } | |||
| #if 0 | |||
| //! As these algos maybe very slow, it will make fastrun search slow, so | |||
| //! we disable it, but for the test of strategyhelper, we just keep it. | |||
| //! FIXME: I do not know a better way to do it. | |||
| @@ -248,6 +248,7 @@ protected: | |||
| private: | |||
| class AlgoNaive; | |||
| class AlgoIm2col; | |||
| class AlgoConv1x1; | |||
| class AlgoWinogradF32; | |||
| class AlgoWinogradF32_4x4; | |||
| class AlgoWinogradQS8; | |||
| @@ -438,7 +438,6 @@ size_t MatrixMulImpl::AlgoInt8x8x32SSEM4N8K2::get_workspace( | |||
| m, n, k, trans_a, trans_b, strategy, cacheline) | |||
| .get_workspace_size(); | |||
| } | |||
| MEGDNN_REG_GEMM_FUNC_FOR_IM2COL_IMPL_DETAIL( | |||
| AlgoInt8x8x32SSEM4N8K2, megdnn_x86_matmul_kern, 9, | |||
| x86::matmul::gemm_sse_s8s8s32_4x8x2, dt_int8, dt_int32, dt_int16); | |||
| @@ -875,6 +875,82 @@ std::vector<conv_bias::TestArg> get_conv_bias_args( | |||
| return args; | |||
| } | |||
| std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_1x1_args( | |||
| bool no_bias, bool no_nonlinemode, bool quantized_nlmod, | |||
| bool only_broadcast_bias) { | |||
| using namespace conv_bias; | |||
| using Param = param::ConvBias; | |||
| using NLMode = param::ConvBias::NonlineMode; | |||
| using CONVMode = param::ConvBias::Mode; | |||
| std::vector<TestArg> args; | |||
| auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h, | |||
| size_t stride, NLMode nlmode, CONVMode convmode) { | |||
| Param param; | |||
| param.stride_h = stride; | |||
| param.stride_w = stride; | |||
| param.pad_h = 0; | |||
| param.pad_w = 0; | |||
| param.mode = convmode; | |||
| param.nonlineMode = nlmode; | |||
| args.emplace_back(param, TensorShape{n, ic, h, w}, | |||
| TensorShape{oc, ic, 1, 1}, TensorShape{}); | |||
| if (!no_bias) { | |||
| args.emplace_back(param, TensorShape{n, ic, h, w}, | |||
| TensorShape{oc, ic, 1, 1}, | |||
| TensorShape{1, oc, 1, 1}); | |||
| if (!only_broadcast_bias) { | |||
| args.emplace_back(param, TensorShape{n, ic, h, w}, | |||
| TensorShape{oc, ic, 1, 1}, | |||
| TensorShape{n, oc, (h - 1) / stride + 1, | |||
| (w - 1) / stride + 1}); | |||
| } | |||
| } | |||
| param.sparse = param::ConvBias::Sparse::GROUP; | |||
| args.emplace_back(param, TensorShape{n, 2 * ic, h, w}, | |||
| TensorShape{2, oc, ic, 1, 1}, TensorShape{}); | |||
| if (!no_bias) { | |||
| args.emplace_back(param, TensorShape{n, 2 * ic, h, w}, | |||
| TensorShape{2, oc, ic, 1, 1}, | |||
| TensorShape{1, 2 * oc, 1, 1}); | |||
| if (!only_broadcast_bias) { | |||
| args.emplace_back(param, TensorShape{n, 2 * ic, h, w}, | |||
| TensorShape{2, oc, ic, 1, 1}, | |||
| TensorShape{n, 2 * oc, (h - 1) / stride + 1, | |||
| (w - 1) / stride + 1}); | |||
| } | |||
| } | |||
| }; | |||
| std::vector<NLMode> nonlinemode = {NLMode::IDENTITY}; | |||
| if (!no_nonlinemode) { | |||
| nonlinemode.emplace_back(NLMode::RELU); | |||
| nonlinemode.emplace_back(NLMode::H_SWISH); | |||
| if (!quantized_nlmod) { | |||
| nonlinemode.emplace_back(NLMode::SIGMOID); | |||
| } | |||
| } | |||
| std::vector<CONVMode> convmodes{param::ConvBias::Mode::CONVOLUTION, | |||
| param::ConvBias::Mode::CROSS_CORRELATION}; | |||
| for (size_t n : {1, 2}) | |||
| for (size_t oc : {1, 9, 33}) | |||
| for (size_t ic : {1, 16, 64}) | |||
| for (size_t size : {7, 14, 28}) | |||
| for (auto nlmode : nonlinemode) | |||
| for (auto convmode : convmodes) { | |||
| pack(n, oc, ic, size, size, 1, nlmode, convmode); | |||
| } | |||
| return args; | |||
| } | |||
| void check_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle, | |||
| const char* algo_name) { | |||
| using namespace conv_bias; | |||
| @@ -76,6 +76,10 @@ std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_args( | |||
| bool no_nonlinemode, bool quantized_nlmod = false, | |||
| bool only_broadcast_bias = false); | |||
| std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_1x1_args( | |||
| bool no_bias, bool no_nonlinemode, bool quantized_nlmod = false, | |||
| bool only_broadcast_bias = false); | |||
| void check_conv_bias(std::vector<megdnn::test::conv_bias::TestArg> args, | |||
| megdnn::Handle* handle, const char* algo_name); | |||
| @@ -919,6 +919,79 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_FP32_PACKA) { | |||
| #undef cb | |||
| } | |||
| /**************************** Conv1x1 PackA *************************/ | |||
| namespace { | |||
| void checker_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle, | |||
| RNG* rng, float epsilon, DType type0, DType type1, | |||
| DType type2, DType type3, const char* algo_name) { | |||
| using namespace conv_bias; | |||
| Checker<ConvBias> checker(handle); | |||
| checker.set_before_exec_callback( | |||
| conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name)); | |||
| checker.set_dtype(0, type0); | |||
| checker.set_dtype(1, type1); | |||
| checker.set_dtype(2, type2); | |||
| checker.set_dtype(4, type3); | |||
| checker.set_epsilon(epsilon); | |||
| if (NULL != rng) { | |||
| checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng); | |||
| } | |||
| for (auto&& arg : args) { | |||
| checker.set_param(arg.param).execs( | |||
| {arg.src, arg.filter, arg.bias, {}, {}}); | |||
| } | |||
| } | |||
| } // namespace | |||
| #if MEGDNN_X86_WITH_MKL | |||
| TEST_F(X86_MULTI_THREADS, CONV_BIAS_CONV1X1_S1_FP32_PACKA) { | |||
| using namespace conv_bias; | |||
| std::vector<conv_bias::TestArg> args = get_conv_bias_1x1_args(false, false); | |||
| check_conv_bias(args, handle(), "CONV1x1:X86_F32_MKL_PACKA:24"); | |||
| } | |||
| TEST_F(X86_MULTI_THREADS, CONV_BIAS_CONV1X1_S1_FP32_BLAS) { | |||
| using namespace conv_bias; | |||
| std::vector<conv_bias::TestArg> args = get_conv_bias_1x1_args(false, false); | |||
| check_conv_bias(args, handle(), "CONV1x1:X86_F32_BLAS:48"); | |||
| } | |||
| #endif | |||
| TEST_F(X86_MULTI_THREADS, CONV_BIAS_CONV1X1_S1_INT8X8X32) { | |||
| using namespace conv_bias; | |||
| UniformIntRNG rng{-50, 50}; | |||
| float epsilon = 0.001; | |||
| std::vector<conv_bias::TestArg> args = get_conv_bias_1x1_args(true, true); | |||
| #if MEGDNN_X86_WITH_MKL_DNN | |||
| if (x86::is_supported(x86::SIMDType::VNNI)) { | |||
| checker_conv_bias(args, handle(), &rng, epsilon, dtype::Int8{}, | |||
| dtype::Int8{}, dtype::Int32{}, dtype::Int32{}, | |||
| "CONV1x1:X86_INT8X8X32_MKLDNN:24"); | |||
| } | |||
| #endif | |||
| #if MEGDNN_X86_WITH_VNNI | |||
| if (x86::is_supported(x86::SIMDType::VNNI)) { | |||
| checker_conv_bias(args, handle(), &rng, epsilon, dtype::Int8{}, | |||
| dtype::Int8{}, dtype::Int32{}, dtype::Int32{}, | |||
| "CONV1x1:X86_INT8X8X32_VNNI:24"); | |||
| } | |||
| #endif | |||
| if (x86::is_supported(x86::SIMDType::AVX2)) { | |||
| checker_conv_bias(args, handle(), &rng, epsilon, dtype::Int8{}, | |||
| dtype::Int8{}, dtype::Int32{}, dtype::Int32{}, | |||
| "CONV1x1:X86_INT8X8X32_AVX2_4X16X2:24"); | |||
| checker_conv_bias(args, handle(), &rng, epsilon, dtype::Int8{}, | |||
| dtype::Int8{}, dtype::Int32{}, dtype::Int32{}, | |||
| "CONV1x1:X86_INT8X8X32_AVX2_2X4X16:24"); | |||
| } | |||
| checker_conv_bias(args, handle(), &rng, epsilon, dtype::Int8{}, | |||
| dtype::Int8{}, dtype::Int32{}, dtype::Int32{}, | |||
| "CONV1x1:X86_INT8X8X32_SSE_4X8X2:48"); | |||
| } | |||
| /************************* End Conv1x1 PackA ************************/ | |||
| #endif | |||
| TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_QINT8) { | |||