| @@ -351,6 +351,12 @@ public: | |||
| const TensorLayout& bias, const TensorLayout& z, | |||
| const TensorLayout& dst) = 0; | |||
| static void deduce_winograd_origin_layout_and_param( | |||
| const Param::Format format, const size_t output_block_size, | |||
| const TensorLayout& src_layout, | |||
| const TensorLayout& winograd_filter_layout, | |||
| TensorLayout& origin_layout, Param& origin_param); | |||
| enum class BiasMode : uint32_t { | |||
| NO_BIAS = 0, //!< no bias | |||
| BROADCAST_CHANNEL_BIAS, //!< broadcast channel bias, [1, c, 1, 1] | |||
| @@ -285,6 +285,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | |||
| bool is_matmul_usable = false; | |||
| using Strategy = winograd::winograd_2x3_4x4_s8_f32_nchw44; | |||
| using PackMode = fallback::MatrixMulImpl::AlgoBase::PackMode; | |||
| Strategy strategy(param.src_type, param.filter_type, param.dst_type); | |||
| is_matmul_usable = m_matmul_algo->usable( | |||
| megdnn::winograd::ConvBias<Strategy, | |||
| @@ -293,6 +294,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | |||
| param.osz[1], param.filter_meta.ocpg) | |||
| .get_matmul_kern_param(param)); | |||
| return is_matmul_usable && | |||
| m_matmul_algo->packmode() == PackMode::NO_PACK && | |||
| ((opr->param().format == param::ConvBias::Format::NCHW44 && | |||
| param.filter_type.enumv() == DTypeEnum::QuantizedS8) || | |||
| ((opr->param().format == | |||
| @@ -308,8 +310,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | |||
| (param.filter_meta.dilation[0] == | |||
| param.filter_meta.dilation[1] && | |||
| param.filter_meta.dilation[0] == 1) && | |||
| (param.compute_mode == param::ConvBias::ComputeMode::FLOAT32 || | |||
| param.compute_mode == param::ConvBias::ComputeMode::DEFAULT) && | |||
| param.compute_mode == param::ConvBias::ComputeMode::FLOAT32 && | |||
| param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
| param.bias_type.enumv() == DTypeEnum::QuantizedS32 && | |||
| param.dst_type.enumv() == DTypeEnum::QuantizedS8; | |||
| @@ -164,6 +164,105 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( | |||
| } | |||
| return ret; | |||
| } | |||
| /*! | |||
| * \brief deduce the origin filter layout and param after winograd transformed | |||
| */ | |||
| void ConvBiasForward::deduce_winograd_origin_layout_and_param( | |||
| const Param::Format format, const size_t output_block_size, | |||
| const TensorLayout& src_layout, | |||
| const TensorLayout& winograd_filter_layout, TensorLayout& origin_layout, | |||
| Param& origin_param) { | |||
| if (format == megdnn::param::ConvBias::Format::NCHW88_WINOGRAD || | |||
| format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD || | |||
| format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||
| //! change NCHWxx_WINOGRAD to NCHWxx | |||
| size_t OC = 0; | |||
| size_t IC = 0; | |||
| size_t GROUP = 1; | |||
| size_t FH = winograd_filter_layout[1] - output_block_size + 1; | |||
| //! {alpha, alpha, IC, OC} | |||
| if (winograd_filter_layout.ndim == 4) { | |||
| OC = winograd_filter_layout[3]; | |||
| IC = winograd_filter_layout[2]; | |||
| } | |||
| //! {group, alpha, alpha, IC, OC} | |||
| else if (winograd_filter_layout.ndim == 5) { | |||
| OC = winograd_filter_layout[4]; | |||
| IC = winograd_filter_layout[3]; | |||
| GROUP = winograd_filter_layout[0]; | |||
| } | |||
| //! {alpha, alpha, OC/f, IC/f, f, f} | |||
| else if (winograd_filter_layout.ndim == 6) { | |||
| OC = winograd_filter_layout[2] * winograd_filter_layout[5]; | |||
| IC = winograd_filter_layout[3] * winograd_filter_layout[4]; | |||
| } | |||
| //! {group, alpha, alpha, OC/f, IC/f, f, f} | |||
| else if (winograd_filter_layout.ndim == 7) { | |||
| OC = winograd_filter_layout[3] * winograd_filter_layout[6]; | |||
| IC = winograd_filter_layout[4] * winograd_filter_layout[5]; | |||
| GROUP = winograd_filter_layout[0]; | |||
| } | |||
| auto origin_data_type = winograd_filter_layout.dtype; | |||
| if (src_layout.dtype.enumv() == DTypeEnum::QuantizedS8) { | |||
| if (origin_data_type.enumv() == DTypeEnum::QuantizedS16) { | |||
| float scale = | |||
| origin_data_type.param<dtype::QuantizedS16>().scale; | |||
| origin_data_type = megdnn::dtype::QuantizedS8(scale); | |||
| } else { | |||
| //! In order to braing the sacle of filter, the transformed | |||
| //! qint8 winograd filter computing with float dtype is Qint32 | |||
| megdnn_assert(origin_data_type.enumv() == | |||
| DTypeEnum::QuantizedS32); | |||
| float scale = | |||
| origin_data_type.param<dtype::QuantizedS32>().scale; | |||
| origin_data_type = megdnn::dtype::QuantizedS8(scale); | |||
| } | |||
| } | |||
| if (GROUP == 1) { | |||
| if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||
| origin_layout = | |||
| TensorLayout({OC, IC, FH, FH}, origin_data_type); | |||
| } else if (format == | |||
| megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { | |||
| origin_layout = TensorLayout({OC / 4, IC / 4, FH, FH, 4, 4}, | |||
| origin_data_type); | |||
| } else { | |||
| megdnn_assert(format == | |||
| megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); | |||
| origin_layout = TensorLayout({OC / 8, IC / 8, FH, FH, 8, 8}, | |||
| origin_data_type); | |||
| } | |||
| } else { | |||
| if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||
| origin_layout = | |||
| TensorLayout({GROUP, OC, IC, FH, FH}, origin_data_type); | |||
| } else if (format == | |||
| megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { | |||
| origin_layout = | |||
| TensorLayout({GROUP, OC / 4, IC / 4, FH, FH, 4, 4}, | |||
| origin_data_type); | |||
| } else { | |||
| megdnn_assert(format == | |||
| megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); | |||
| origin_layout = | |||
| TensorLayout({GROUP, OC / 8, IC / 8, FH, FH, 8, 8}, | |||
| origin_data_type); | |||
| } | |||
| } | |||
| origin_param.output_block_size = 0; | |||
| if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||
| origin_param.format = megdnn::param::ConvBias::Format::NCHW; | |||
| } else if (format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { | |||
| origin_param.format = megdnn::param::ConvBias::Format::NCHW44; | |||
| } else { | |||
| megdnn_assert(format == | |||
| megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); | |||
| origin_param.format = megdnn::param::ConvBias::Format::NCHW88; | |||
| } | |||
| } | |||
| } | |||
| template <typename T> | |||
| struct NCHWParamTrait; | |||
| @@ -103,18 +103,17 @@ void WinogradTransformReplacePass::apply(OptState& opt) const { | |||
| winograd_preprocess_param.output_block_size = | |||
| winograd_param.output_block_size; | |||
| size_t pack_c_size = 1; | |||
| if (new_inp[0]->shape().ndim == 5) { | |||
| pack_c_size = new_inp[0]->layout().shape[4]; | |||
| } | |||
| auto conv_bias_param = conv_bias_opr.param(); | |||
| //! If input dtype is Qint8 and matmul format is MK4, The winograd | |||
| //! compute type is float. | |||
| if (conv_bias_opr.input(0)->dtype().enumv() == | |||
| DTypeEnum::QuantizedS8 && | |||
| pack_c_size == 4 && | |||
| winograd_preprocess_param.format == | |||
| megdnn::param::MatrixMul::Format::MK4) { | |||
| winograd_preprocess_param.compute_mode = | |||
| megdnn::param::ConvBias::ComputeMode::FLOAT32; | |||
| conv_bias_param.compute_mode = | |||
| megdnn::param::ConvBias::ComputeMode::FLOAT32; | |||
| } | |||
| auto winograd_preprocess_opr = opr::WinogradFilterPreprocess::make( | |||
| @@ -124,7 +123,6 @@ void WinogradTransformReplacePass::apply(OptState& opt) const { | |||
| inputs.size()); | |||
| SymbolVar new_conv_bias_opr; | |||
| auto conv_bias_param = conv_bias_opr.param(); | |||
| if (new_inp[0]->shape().ndim == 4) { | |||
| conv_bias_param.format = | |||
| megdnn::ConvBias::Param::Format::NCHW_WINOGRAD; | |||
| @@ -562,6 +562,10 @@ class AlgoChooser { | |||
| } | |||
| } | |||
| static void get_origin_param_and_layouts(const ExeContext&, | |||
| ConvTensorLayouts&, | |||
| typename Opr::Param&) {} | |||
| //! get all profile result, either by retrieving cache or profiling | |||
| static AlgoChooserProfileCache::Result get_profile_result( | |||
| ExeContext& ctx, bool enable_update); | |||
| @@ -600,10 +604,14 @@ template <typename Opr> | |||
| AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result( | |||
| ExeContext& ctx, bool enable_update) { | |||
| AlgoChooserProfileCache& cache = ctx.mgb_opr()->profile_cache(); | |||
| auto param_blob = ctx.mgb_opr()->param_blob(); | |||
| AlgoChooserProfileCache::Key cache_key{ctx.layouts().data(), | |||
| ctx.layouts().size(), | |||
| param_blob.first, param_blob.second}; | |||
| ConvTensorLayouts origin_layouts = ctx.layouts(); | |||
| typename Opr::Param origin_param = ctx.mgb_opr()->param(); | |||
| get_origin_param_and_layouts(ctx, origin_layouts, origin_param); | |||
| AlgoChooserProfileCache::Key cache_key{origin_layouts.data(), | |||
| origin_layouts.size(), &origin_param, | |||
| sizeof(origin_param)}; | |||
| { | |||
| auto&& rst = cache.get(cache_key); | |||
| if (rst.valid()) | |||
| @@ -658,6 +666,23 @@ AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result( | |||
| return prof_rst; | |||
| } | |||
| template <> | |||
| void AlgoChooser<megdnn::ConvBias>::get_origin_param_and_layouts( | |||
| const ExeContext& ctx, ConvTensorLayouts& layouts, | |||
| megdnn::ConvBias::Param& param) { | |||
| auto format = static_cast<megdnn::param::ConvBias::Format>( | |||
| ctx.megdnn_opr()->param().format); | |||
| size_t output_block_size = ctx.megdnn_opr()->param().output_block_size; | |||
| TensorLayout origin_layout; | |||
| megdnn::ConvBias::deduce_winograd_origin_layout_and_param( | |||
| format, output_block_size, ctx.layouts()[0], ctx.layouts()[1], | |||
| origin_layout, param); | |||
| for (size_t i = 0; i < ctx.layouts().size(); i++) { | |||
| layouts[i] = ctx.layouts()[i]; | |||
| } | |||
| layouts[1] = origin_layout; | |||
| } | |||
| template <typename Opr> | |||
| typename AlgoChooser<Opr>::ImplAlgo AlgoChooser<Opr>::choose_by_profile( | |||
| ExeContext& ctx, bool require_reproducible, bool enable_update) { | |||
| @@ -724,6 +749,18 @@ void AlgoChooser<megdnn::ConvBias>::ExeContext:: | |||
| ConvBiasForward::get_matmul_format(winograd_param); | |||
| winograd_preprocess_opr->param().output_block_size = | |||
| winograd_param.output_block_size; | |||
| //! When filter input is qint8 and Matmul format is MK4, the winograd | |||
| //! compute type is float | |||
| if (m_layouts[1].dtype.enumv() == DTypeEnum::QuantizedS8 && | |||
| param.opr_param.format == megdnn::ConvBias::Param::Format::NCHW44) { | |||
| if (winograd_preprocess_opr->param().format == | |||
| megdnn::param::MatrixMul::Format::MK4){ | |||
| winograd_preprocess_opr->param().compute_mode = | |||
| ConvBias::Param::ComputeMode::FLOAT32; | |||
| param.opr_param.compute_mode = | |||
| ConvBias::Param::ComputeMode::FLOAT32; | |||
| } | |||
| } | |||
| TensorLayout filter_transform_layout; | |||
| winograd_preprocess_opr->deduce_layout(m_layouts[1], | |||
| filter_transform_layout); | |||