| @@ -21,6 +21,7 @@ using namespace megdnn; | |||
| using namespace fallback; | |||
| MIDOUT_DECL(megdnn_fallback_conv) | |||
| MIDOUT_DECL(megdnn_fallback_deconv) | |||
| namespace { | |||
| @@ -459,6 +460,70 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoDefault::get_kimpl( | |||
| MIDOUT_END(); | |||
| } | |||
| /////////////////////////// ConvolutionBackwardData ///////////////////// | |||
| /* ===================== naive algo ===================== */ | |||
| bool ConvolutionBackwardDataImpl::AlgoNaive::usable( | |||
| ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const { | |||
| bool ret = false; | |||
| #define cb(dt) ret |= (param.diff_type.enumv() == DTypeTrait<dt>::enumv); | |||
| MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); | |||
| #undef cb | |||
| #define cb(dt_src, dt_dst) \ | |||
| ret |= (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) | |||
| cb(dtype::Int8, dtype::Int32); | |||
| cb(dtype::Quantized8Asymm, dtype::QuantizedS32); | |||
| cb(dtype::QuantizedS8, dtype::QuantizedS32); | |||
| #undef cb | |||
| return ret; | |||
| } | |||
| size_t ConvolutionBackwardDataImpl::AlgoNaive::get_workspace( | |||
| ConvolutionBackwardDataImpl*, const NCBKernSizeParam&) const { | |||
| return 0; | |||
| } | |||
| ConvolutionBackwardDataImpl::ncb_kern_t | |||
| ConvolutionBackwardDataImpl::AlgoNaive::dispatch_kern( | |||
| ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const { | |||
| #define cb(_dt) \ | |||
| do { \ | |||
| if (param.filter_type.enumv() == DTypeTrait<_dt>::enumv) { \ | |||
| MIDOUT_BEGIN(megdnn_fallback_deconv, \ | |||
| midout_iv(DTypeTrait<_dt>::enumv)) { \ | |||
| using ctype = DTypeTrait<_dt>::ctype; \ | |||
| return kern_naive<ctype, ctype, ctype>; \ | |||
| } \ | |||
| MIDOUT_END(); \ | |||
| } \ | |||
| } while (0); | |||
| MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); | |||
| #undef cb | |||
| #define cb(dt_src, dt_dst) \ | |||
| do { \ | |||
| if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) { \ | |||
| MIDOUT_BEGIN(megdnn_fallback_deconv, \ | |||
| midout_iv(DTypeTrait<_dt>::enumv)) { \ | |||
| return kern_naive<DTypeTrait<dt_src>::ctype, \ | |||
| DTypeTrait<dt_src>::ctype, \ | |||
| DTypeTrait<dt_dst>::ctype>; \ | |||
| } \ | |||
| MIDOUT_END(); \ | |||
| } \ | |||
| } while (0) | |||
| cb(dtype::Int8, dtype::Int32); | |||
| cb(dtype::Quantized8Asymm, dtype::QuantizedS32); | |||
| cb(dtype::QuantizedS8, dtype::QuantizedS32); | |||
| megdnn_throw("unsupported data type on ConvolutionBackwardData"); | |||
| #undef cb | |||
| } | |||
| /* ===================== direct algo ===================== */ | |||
| bool ConvolutionBackwardDataImpl::AlgoDirect::usable( | |||
| @@ -474,7 +539,7 @@ bool ConvolutionBackwardDataImpl::AlgoDirect::usable( | |||
| size_t ConvolutionBackwardDataImpl::AlgoDirect::get_workspace( | |||
| ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const { | |||
| MIDOUT_BEGIN(megdnn_fallback_conv, | |||
| MIDOUT_BEGIN(megdnn_fallback_deconv, | |||
| midout_iv("AlgoDirect::get_workspace"_hash)) { | |||
| auto FH = param.filter_meta.spatial[0], | |||
| FW = param.filter_meta.spatial[1]; | |||
| @@ -511,7 +576,7 @@ bool ConvolutionBackwardDataImpl::AlgoMatrixMul::usable( | |||
| size_t ConvolutionBackwardDataImpl::AlgoMatrixMul::get_workspace( | |||
| ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const { | |||
| MIDOUT_BEGIN(megdnn_fallback_conv, | |||
| MIDOUT_BEGIN(megdnn_fallback_deconv, | |||
| midout_iv("AlgoMatrixMul::get_workspace"_hash)) { | |||
| return get_bundle(param).total_size_in_bytes(); | |||
| } | |||
| @@ -522,33 +587,33 @@ size_t ConvolutionBackwardDataImpl::AlgoMatrixMul::get_workspace( | |||
| ConvolutionBackwardDataImpl::ncb_kern_t | |||
| ConvolutionBackwardDataImpl::AlgoMatrixMul::dispatch_kern( | |||
| ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const { | |||
| #define cb(dt, midout_tag) \ | |||
| do { \ | |||
| if (param.filter_type.enumv() == DTypeTrait<dt>::enumv) { \ | |||
| MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(midout_tag)) { \ | |||
| using ctype = DTypeTrait<dt>::ctype; \ | |||
| return kern_matmul<ctype, ctype, ctype>; \ | |||
| } \ | |||
| MIDOUT_END(); \ | |||
| } \ | |||
| #define cb(dt, midout_tag) \ | |||
| do { \ | |||
| if (param.filter_type.enumv() == DTypeTrait<dt>::enumv) { \ | |||
| MIDOUT_BEGIN(megdnn_fallback_deconv, midout_iv(midout_tag)) { \ | |||
| using ctype = DTypeTrait<dt>::ctype; \ | |||
| return kern_matmul<ctype, ctype, ctype>; \ | |||
| } \ | |||
| MIDOUT_END(); \ | |||
| } \ | |||
| } while (0); | |||
| cb(dtype::Float32, "FLOAT"_hash); | |||
| MEGDNN_INC_FLOAT16(cb(dtype::Float16, "FLOAT16"_hash)); | |||
| MEGDNN_INC_FLOAT16(cb(dtype::BFloat16, "BFLOAT16"_hash)); | |||
| #undef cb | |||
| #define cb(dt_src, dt_dst, midout_tag) \ | |||
| do { \ | |||
| if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) { \ | |||
| MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(midout_tag)) { \ | |||
| return kern_matmul<DTypeTrait<dt_src>::ctype, \ | |||
| DTypeTrait<dt_src>::ctype, \ | |||
| DTypeTrait<dt_dst>::ctype>; \ | |||
| } \ | |||
| MIDOUT_END(); \ | |||
| } \ | |||
| #define cb(dt_src, dt_dst, midout_tag) \ | |||
| do { \ | |||
| if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) { \ | |||
| MIDOUT_BEGIN(megdnn_fallback_deconv, midout_iv(midout_tag)) { \ | |||
| return kern_matmul<DTypeTrait<dt_src>::ctype, \ | |||
| DTypeTrait<dt_src>::ctype, \ | |||
| DTypeTrait<dt_dst>::ctype>; \ | |||
| } \ | |||
| MIDOUT_END(); \ | |||
| } \ | |||
| } while (0) | |||
| cb(dtype::Int8, dtype::Int32, "INT8x8x32"_hash); | |||
| cb(dtype::QuantizedS8, dtype::QuantizedS32, "QINT8x8x32"_hash); | |||
| @@ -557,4 +622,9 @@ ConvolutionBackwardDataImpl::AlgoMatrixMul::dispatch_kern( | |||
| #undef cb | |||
| } | |||
| bool ConvolutionBackwardDataImpl::AlgoMatrixMul::is_preferred( | |||
| const NCBKernSizeParam& param) const { | |||
| return is_matrix_mul_preferred(param); | |||
| } | |||
| // vim: syntax=cpp.doxygen | |||
| @@ -156,6 +156,20 @@ private: | |||
| ConvBiasImpl::AlgoBase* m_algorithm; | |||
| }; | |||
| ////////////////////////// convolutionbackwarddata //////////////////////// | |||
| class ConvolutionBackwardDataImpl::AlgoNaive final : public AlgoBase { | |||
| public: | |||
| bool is_reproducible() const override { return true; } | |||
| const char* name() const override { return "DeconvNaive"; } | |||
| bool usable(ConvolutionBackwardDataImpl* opr, | |||
| const NCBKernSizeParam& param) const override; | |||
| size_t get_workspace(ConvolutionBackwardDataImpl*, | |||
| const NCBKernSizeParam& param) const override; | |||
| ncb_kern_t dispatch_kern(ConvolutionBackwardDataImpl*, | |||
| const NCBKernSizeParam&) const override; | |||
| bool is_naive() const override { return true; } | |||
| }; | |||
| class ConvolutionBackwardDataImpl::AlgoDirect final : public AlgoBase { | |||
| public: | |||
| bool is_reproducible() const override { return true; } | |||
| @@ -178,6 +192,7 @@ public: | |||
| const NCBKernSizeParam& param) const override; | |||
| ncb_kern_t dispatch_kern(ConvolutionBackwardDataImpl*, | |||
| const NCBKernSizeParam&) const override; | |||
| bool is_preferred(const NCBKernSizeParam& param) const override; | |||
| }; | |||
| } // namespace fallback | |||
| @@ -31,12 +31,6 @@ using namespace megdnn; | |||
| using namespace fallback; | |||
| namespace { | |||
| class NaiveConvolutionBackwardData final | |||
| : public megdnn::ConvolutionBackwardData::Algorithm { | |||
| bool is_reproducible() const override { return true; } | |||
| const char* name() const override { return "NCBD"; } | |||
| }; | |||
| NaiveConvolutionBackwardData naive_conv_backward_data; | |||
| template <typename T> | |||
| void incr_ptr(T*& dst, ptrdiff_t delta) { | |||
| @@ -407,11 +401,25 @@ ConvolutionImpl::NCBKernSizeParam::deduce_algo_data_type() const { | |||
| /* ===================== ConvolutionBackwardData ===================== */ | |||
| struct ConvolutionBackwardDataImpl::AlgoPack { | |||
| AlgoDirect direct; | |||
| AlgoMatrixMul matmul; | |||
| class ConvolutionBackwardDataImpl::AlgoPack : NonCopyableObj { | |||
| AlgoNaive algo_naive; | |||
| AlgoDirect algo_direct; | |||
| AlgoMatrixMul algo_matmul; | |||
| public: | |||
| AlgoPack() { | |||
| all_algos.emplace_back(&algo_matmul); | |||
| all_algos.emplace_back(&algo_direct); | |||
| all_algos.emplace_back(&algo_naive); | |||
| } | |||
| SmallVector<AlgoBase*> all_algos; | |||
| }; | |||
| ConvolutionBackwardDataImpl::AlgoPack ConvolutionBackwardDataImpl::sm_algo_pack; | |||
| SmallVector<ConvolutionBackwardDataImpl::AlgoBase*> | |||
| ConvolutionBackwardDataImpl::algo_pack() { | |||
| static AlgoPack sl_algo_pack; | |||
| return sl_algo_pack.all_algos; | |||
| } | |||
| void ConvolutionBackwardDataImpl::exec(_megdnn_tensor_in filter, | |||
| _megdnn_tensor_in diff, | |||
| @@ -539,7 +547,7 @@ void ConvolutionBackwardDataImpl::exec_with_ncb_kern( | |||
| p1g.filter_meta.group = 1; | |||
| auto algo = get_algorithm(p1g); | |||
| auto kptr = ncb_1g_dispatch_kern(algo, p1g); | |||
| if (algo == &naive_conv_backward_data || group == 1) { | |||
| if (group == 1 || static_cast<AlgoBase*>(algo)->is_naive()) { | |||
| auto run = [kptr, param]() { kptr(param); }; | |||
| static_cast<naive::HandleImpl*>(handle())->dispatch_kern(run); | |||
| } else { | |||
| @@ -625,7 +633,6 @@ size_t ConvolutionBackwardDataImpl::ncb_1g_get_workspace( | |||
| if (algo->handle_type() == Handle::HandleType::FALLBACK) { | |||
| return static_cast<AlgoBase*>(algo)->get_workspace(this, param); | |||
| } | |||
| megdnn_assert(algo == &naive_conv_backward_data); | |||
| return 0; | |||
| } | |||
| @@ -638,36 +645,6 @@ ConvolutionBackwardDataImpl::ncb_1g_dispatch_kern( | |||
| return static_cast<AlgoBase*>(algo)->dispatch_kern(this, param); | |||
| } | |||
| if (algo == &naive_conv_backward_data) { | |||
| #define cb(_dt) \ | |||
| do { \ | |||
| if (param.filter_type.enumv() == DTypeTrait<_dt>::enumv) { \ | |||
| MIDOUT_BEGIN(megdnn_fb_convbwd_float, \ | |||
| midout_iv(DTypeTrait<_dt>::enumv)) { \ | |||
| using ctype = DTypeTrait<_dt>::ctype; \ | |||
| return kern_naive<ctype, ctype, ctype>; \ | |||
| } \ | |||
| MIDOUT_END(); \ | |||
| } \ | |||
| } while (0); | |||
| MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); | |||
| #undef cb | |||
| #define cb(dt_src, dt_dst) \ | |||
| do { \ | |||
| if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) { \ | |||
| return kern_naive<DTypeTrait<dt_src>::ctype, \ | |||
| DTypeTrait<dt_src>::ctype, \ | |||
| DTypeTrait<dt_dst>::ctype>; \ | |||
| } \ | |||
| } while (0); | |||
| cb(dtype::Int8, dtype::Int32) cb(dtype::Quantized8Asymm, | |||
| dtype::QuantizedS32) | |||
| cb(dtype::QuantizedS8, dtype::QuantizedS32) megdnn_throw( | |||
| "unsupported data type on ConvolutionBackwardData"); | |||
| #undef cb | |||
| } | |||
| megdnn_throw( | |||
| megdnn_mangle("no suitable ConvolutionBackwardData algorithm")); | |||
| } | |||
| @@ -686,34 +663,17 @@ std::vector<ConvolutionBackwardDataImpl::Algorithm*> | |||
| ConvolutionBackwardDataImpl::ncb_1g_get_all_algorithms( | |||
| const NCBKernSizeParam& param) { | |||
| std::vector<Algorithm*> ret; | |||
| ret.reserve(2); | |||
| ret.push_back(&naive_conv_backward_data); | |||
| // insert from lowest to highest preference | |||
| AlgoBase* cand[2] = {nullptr}; | |||
| if (param.filter_meta.group == 1 && param.filter_meta.dilation[0] == 1 && | |||
| param.filter_meta.dilation[1] == 1) { | |||
| // we currently only have non-dilated algos | |||
| if (param.filter_type.enumv() == DTypeEnum::Float32) { | |||
| if (is_matrix_mul_preferred(param)) { | |||
| cand[0] = &sm_algo_pack.direct; | |||
| cand[1] = &sm_algo_pack.matmul; | |||
| std::vector<Algorithm*> prefer_algos; | |||
| for (auto&& i : algo_pack()) { | |||
| if (i->usable(this, param)) { | |||
| if (i->is_preferred(param)) { | |||
| prefer_algos.push_back(i); | |||
| } else { | |||
| cand[0] = &sm_algo_pack.matmul; | |||
| cand[1] = &sm_algo_pack.direct; | |||
| ret.push_back(i); | |||
| } | |||
| } else { | |||
| cand[0] = &sm_algo_pack.matmul; | |||
| } | |||
| } | |||
| for (auto i : cand) { | |||
| if (i && i->usable(this, param)) { | |||
| ret.push_back(i); | |||
| } | |||
| } | |||
| std::reverse(ret.begin(), ret.end()); | |||
| ret.insert(ret.begin(), prefer_algos.begin(), prefer_algos.end()); | |||
| return ret; | |||
| } | |||
| @@ -373,7 +373,7 @@ public: | |||
| }; | |||
| protected: | |||
| typedef void (*ncb_kern_t)(const NCBKernParam& param); | |||
| using ncb_kern_t = thin_function<void(const NCBKernParam& param)>; | |||
| //! default impl calls ncb_1g_dispatch_kern() | |||
| virtual void exec_with_ncb_kern(const NCBKernParam& param); | |||
| @@ -428,9 +428,18 @@ protected: | |||
| bool reproducible = true) const { | |||
| return (!reproducible || is_reproducible()) && usable(opr, param); | |||
| } | |||
| virtual bool is_preferred(const NCBKernSizeParam&) const { | |||
| return false; | |||
| } | |||
| //! if the algo is naive, it will not split by group | |||
| virtual bool is_naive() const { return false; } | |||
| }; | |||
| static bool is_matrix_mul_preferred(const NCBKernSizeParam& param); | |||
| /** | |||
| * \brief get all the algorithm for the opr. | |||
| */ | |||
| virtual SmallVector<AlgoBase*> algo_pack(); | |||
| private: | |||
| NCBKernSizeParam m_prev_selected_algo_sizep; | |||
| @@ -448,11 +457,10 @@ private: | |||
| _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace); | |||
| class AlgoNaive; | |||
| class AlgoDirect; | |||
| class AlgoMatrixMul; | |||
| struct AlgoPack; | |||
| static AlgoPack sm_algo_pack; | |||
| class AlgoPack; | |||
| }; | |||
| } // namespace fallback | |||
| @@ -9,6 +9,7 @@ | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #include "megdnn/dtype.h" | |||
| #include "test/fallback/fixture.h" | |||
| #include "test/common/benchmarker.h" | |||
| @@ -614,4 +615,53 @@ TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_QUINT8) { | |||
| } | |||
| } | |||
| TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_NAIVE_ALGO) { | |||
| Checker<ConvolutionBackwardData> checker(handle()); | |||
| checker.set_before_exec_callback( | |||
| AlgoChecker<ConvolutionBackwardData>("DeconvNaive")); | |||
| using Param = ConvolutionBackwardData::Param; | |||
| Param param; | |||
| auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, | |||
| size_t fh, size_t fw, size_t stride, size_t padding, | |||
| size_t dilate = 1, size_t group = 1) { | |||
| param.pad_h = param.pad_w = padding; | |||
| param.stride_h = param.stride_w = stride; | |||
| param.dilate_h = param.dilate_w = dilate; | |||
| TensorLayout diff = | |||
| TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()}; | |||
| TensorLayout grad; | |||
| TensorLayout filter; | |||
| if (group == 1) { | |||
| param.sparse = Param::Sparse::DENSE; | |||
| filter = {{oc, ic, fh, fw}, dtype::Float32()}; | |||
| } else { | |||
| param.sparse = Param::Sparse::GROUP; | |||
| filter = {{group, oc, ic, fh, fw}, dtype::Float32()}; | |||
| } | |||
| // TensorLayout grad; | |||
| { | |||
| auto opr = handle()->create_operator<ConvolutionBackwardData>(); | |||
| opr->param() = param; | |||
| opr->deduce_layout(filter, diff, grad); | |||
| } | |||
| checker.set_param(param); | |||
| checker.exec(TensorLayoutArray{filter, diff, grad}); | |||
| }; | |||
| for (auto mode : | |||
| {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) { | |||
| param.mode = mode; | |||
| run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1); | |||
| run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2); | |||
| run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3); | |||
| run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2); | |||
| run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3); | |||
| run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2); | |||
| run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3); | |||
| run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2); | |||
| } | |||
| } | |||
| // vim: syntax=cpp.doxygen | |||