refactor(dnn): refactor deconv algo

GitOrigin-RevId: 422be792eb
5 years ago · 1e71e0afe0
--- a/dnn/src/fallback/convolution/algos.cpp
+++ b/dnn/src/fallback/convolution/algos.cpp
@@ -21,6 +21,7 @@ using namespace megdnn;
 using namespace fallback;
 MIDOUT_DECL(megdnn_fallback_conv)
 MIDOUT_DECL(megdnn_fallback_deconv)
 namespace {
@@ -459,6 +460,70 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoDefault::get_kimpl(
    MIDOUT_END();
 }
 /////////////////////////// ConvolutionBackwardData /////////////////////
 /* ===================== naive algo ===================== */
 bool ConvolutionBackwardDataImpl::AlgoNaive::usable(
        ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
    bool ret = false;
 #define cb(dt) ret |= (param.diff_type.enumv() == DTypeTrait<dt>::enumv);
    MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
 #undef cb
 #define cb(dt_src, dt_dst)                                            \
    ret |= (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv &&   \
            param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
            param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv)
    cb(dtype::Int8, dtype::Int32);
    cb(dtype::Quantized8Asymm, dtype::QuantizedS32);
    cb(dtype::QuantizedS8, dtype::QuantizedS32);
 #undef cb
    return ret;
 }
 size_t ConvolutionBackwardDataImpl::AlgoNaive::get_workspace(
        ConvolutionBackwardDataImpl*, const NCBKernSizeParam&) const {
    return 0;
 }
 ConvolutionBackwardDataImpl::ncb_kern_t
 ConvolutionBackwardDataImpl::AlgoNaive::dispatch_kern(
        ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
 #define cb(_dt)                                                    \
    do {                                                           \
        if (param.filter_type.enumv() == DTypeTrait<_dt>::enumv) { \
            MIDOUT_BEGIN(megdnn_fallback_deconv,                   \
                         midout_iv(DTypeTrait<_dt>::enumv)) {      \
                using ctype = DTypeTrait<_dt>::ctype;              \
                return kern_naive<ctype, ctype, ctype>;            \
            }                                                      \
            MIDOUT_END();                                          \
        }                                                          \
    } while (0);
    MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
 #undef cb
 #define cb(dt_src, dt_dst)                                            \
    do {                                                              \
        if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv &&   \
            param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
            param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) {   \
            MIDOUT_BEGIN(megdnn_fallback_deconv,                      \
                         midout_iv(DTypeTrait<_dt>::enumv)) {         \
                return kern_naive<DTypeTrait<dt_src>::ctype,          \
                                  DTypeTrait<dt_src>::ctype,          \
                                  DTypeTrait<dt_dst>::ctype>;         \
            }                                                         \
            MIDOUT_END();                                             \
        }                                                             \
    } while (0)
    cb(dtype::Int8, dtype::Int32);
    cb(dtype::Quantized8Asymm, dtype::QuantizedS32);
    cb(dtype::QuantizedS8, dtype::QuantizedS32);
    megdnn_throw("unsupported data type on ConvolutionBackwardData");
 #undef cb
 }
 /* ===================== direct algo ===================== */
 bool ConvolutionBackwardDataImpl::AlgoDirect::usable(
@@ -474,7 +539,7 @@ bool ConvolutionBackwardDataImpl::AlgoDirect::usable(
 size_t ConvolutionBackwardDataImpl::AlgoDirect::get_workspace(
        ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
    MIDOUT_BEGIN(megdnn_fallback_conv,
    MIDOUT_BEGIN(megdnn_fallback_deconv,
                 midout_iv("AlgoDirect::get_workspace"_hash)) {
        auto FH = param.filter_meta.spatial[0],
             FW = param.filter_meta.spatial[1];
@@ -511,7 +576,7 @@ bool ConvolutionBackwardDataImpl::AlgoMatrixMul::usable(
 size_t ConvolutionBackwardDataImpl::AlgoMatrixMul::get_workspace(
        ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
    MIDOUT_BEGIN(megdnn_fallback_conv,
    MIDOUT_BEGIN(megdnn_fallback_deconv,
                 midout_iv("AlgoMatrixMul::get_workspace"_hash)) {
        return get_bundle(param).total_size_in_bytes();
    }
@@ -522,33 +587,33 @@ size_t ConvolutionBackwardDataImpl::AlgoMatrixMul::get_workspace(
 ConvolutionBackwardDataImpl::ncb_kern_t
 ConvolutionBackwardDataImpl::AlgoMatrixMul::dispatch_kern(
        ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
 #define cb(dt, midout_tag)                                              \
    do {                                                                \
        if (param.filter_type.enumv() == DTypeTrait<dt>::enumv) {       \
            MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(midout_tag)) { \
                using ctype = DTypeTrait<dt>::ctype;                    \
                return kern_matmul<ctype, ctype, ctype>;                \
            }                                                           \
            MIDOUT_END();                                               \
        }                                                               \
 #define cb(dt, midout_tag)                                                \
    do {                                                                  \
        if (param.filter_type.enumv() == DTypeTrait<dt>::enumv) {         \
            MIDOUT_BEGIN(megdnn_fallback_deconv, midout_iv(midout_tag)) { \
                using ctype = DTypeTrait<dt>::ctype;                      \
                return kern_matmul<ctype, ctype, ctype>;                  \
            }                                                             \
            MIDOUT_END();                                                 \
        }                                                                 \
    } while (0);
    cb(dtype::Float32, "FLOAT"_hash);
    MEGDNN_INC_FLOAT16(cb(dtype::Float16, "FLOAT16"_hash));
    MEGDNN_INC_FLOAT16(cb(dtype::BFloat16, "BFLOAT16"_hash));
 #undef cb
 #define cb(dt_src, dt_dst, midout_tag)                                  \
    do {                                                                \
        if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv &&     \
            param.filter_type.enumv() == DTypeTrait<dt_src>::enumv &&   \
            param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) {     \
            MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(midout_tag)) { \
                return kern_matmul<DTypeTrait<dt_src>::ctype,           \
                                   DTypeTrait<dt_src>::ctype,           \
                                   DTypeTrait<dt_dst>::ctype>;          \
            }                                                           \
            MIDOUT_END();                                               \
        }                                                               \
 #define cb(dt_src, dt_dst, midout_tag)                                    \
    do {                                                                  \
        if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv &&       \
            param.filter_type.enumv() == DTypeTrait<dt_src>::enumv &&     \
            param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) {       \
            MIDOUT_BEGIN(megdnn_fallback_deconv, midout_iv(midout_tag)) { \
                return kern_matmul<DTypeTrait<dt_src>::ctype,             \
                                   DTypeTrait<dt_src>::ctype,             \
                                   DTypeTrait<dt_dst>::ctype>;            \
            }                                                             \
            MIDOUT_END();                                                 \
        }                                                                 \
    } while (0)
    cb(dtype::Int8, dtype::Int32, "INT8x8x32"_hash);
    cb(dtype::QuantizedS8, dtype::QuantizedS32, "QINT8x8x32"_hash);
@@ -557,4 +622,9 @@ ConvolutionBackwardDataImpl::AlgoMatrixMul::dispatch_kern(
 #undef cb
 }
 bool ConvolutionBackwardDataImpl::AlgoMatrixMul::is_preferred(
        const NCBKernSizeParam& param) const {
    return is_matrix_mul_preferred(param);
 }
 // vim: syntax=cpp.doxygen
--- a/dnn/src/fallback/convolution/algos.h
+++ b/dnn/src/fallback/convolution/algos.h
@@ -156,6 +156,20 @@ private:
    ConvBiasImpl::AlgoBase* m_algorithm;
 };
 ////////////////////////// convolutionbackwarddata ////////////////////////
 class ConvolutionBackwardDataImpl::AlgoNaive final : public AlgoBase {
 public:
    bool is_reproducible() const override { return true; }
    const char* name() const override { return "DeconvNaive"; }
    bool usable(ConvolutionBackwardDataImpl* opr,
                const NCBKernSizeParam& param) const override;
    size_t get_workspace(ConvolutionBackwardDataImpl*,
                         const NCBKernSizeParam& param) const override;
    ncb_kern_t dispatch_kern(ConvolutionBackwardDataImpl*,
                             const NCBKernSizeParam&) const override;
    bool is_naive() const override { return true; }
 };
 class ConvolutionBackwardDataImpl::AlgoDirect final : public AlgoBase {
 public:
    bool is_reproducible() const override { return true; }
@@ -178,6 +192,7 @@ public:
                         const NCBKernSizeParam& param) const override;
    ncb_kern_t dispatch_kern(ConvolutionBackwardDataImpl*,
                             const NCBKernSizeParam&) const override;
    bool is_preferred(const NCBKernSizeParam& param) const override;
 };
 }  // namespace fallback
--- a/dnn/src/fallback/convolution/opr_impl.cpp
+++ b/dnn/src/fallback/convolution/opr_impl.cpp
@@ -31,12 +31,6 @@ using namespace megdnn;
 using namespace fallback;
 namespace {
 class NaiveConvolutionBackwardData final
        : public megdnn::ConvolutionBackwardData::Algorithm {
    bool is_reproducible() const override { return true; }
    const char* name() const override { return "NCBD"; }
 };
 NaiveConvolutionBackwardData naive_conv_backward_data;
 template <typename T>
 void incr_ptr(T*& dst, ptrdiff_t delta) {
@@ -407,11 +401,25 @@ ConvolutionImpl::NCBKernSizeParam::deduce_algo_data_type() const {
 /* ===================== ConvolutionBackwardData ===================== */
 struct ConvolutionBackwardDataImpl::AlgoPack {
    AlgoDirect direct;
    AlgoMatrixMul matmul;
 class ConvolutionBackwardDataImpl::AlgoPack : NonCopyableObj {
    AlgoNaive algo_naive;
    AlgoDirect algo_direct;
    AlgoMatrixMul algo_matmul;
 public:
    AlgoPack() {
        all_algos.emplace_back(&algo_matmul);
        all_algos.emplace_back(&algo_direct);
        all_algos.emplace_back(&algo_naive);
    }
    SmallVector<AlgoBase*> all_algos;
 };
 ConvolutionBackwardDataImpl::AlgoPack ConvolutionBackwardDataImpl::sm_algo_pack;
 SmallVector<ConvolutionBackwardDataImpl::AlgoBase*>
 ConvolutionBackwardDataImpl::algo_pack() {
    static AlgoPack sl_algo_pack;
    return sl_algo_pack.all_algos;
 }
 void ConvolutionBackwardDataImpl::exec(_megdnn_tensor_in filter,
                                       _megdnn_tensor_in diff,
@@ -539,7 +547,7 @@ void ConvolutionBackwardDataImpl::exec_with_ncb_kern(
    p1g.filter_meta.group = 1;
    auto algo = get_algorithm(p1g);
    auto kptr = ncb_1g_dispatch_kern(algo, p1g);
    if (algo == &naive_conv_backward_data || group == 1) {
    if (group == 1 || static_cast<AlgoBase*>(algo)->is_naive()) {
        auto run = [kptr, param]() { kptr(param); };
        static_cast<naive::HandleImpl*>(handle())->dispatch_kern(run);
    } else {
@@ -625,7 +633,6 @@ size_t ConvolutionBackwardDataImpl::ncb_1g_get_workspace(
    if (algo->handle_type() == Handle::HandleType::FALLBACK) {
        return static_cast<AlgoBase*>(algo)->get_workspace(this, param);
    }
    megdnn_assert(algo == &naive_conv_backward_data);
    return 0;
 }
@@ -638,36 +645,6 @@ ConvolutionBackwardDataImpl::ncb_1g_dispatch_kern(
        return static_cast<AlgoBase*>(algo)->dispatch_kern(this, param);
    }
    if (algo == &naive_conv_backward_data) {
 #define cb(_dt)                                                    \
    do {                                                           \
        if (param.filter_type.enumv() == DTypeTrait<_dt>::enumv) { \
            MIDOUT_BEGIN(megdnn_fb_convbwd_float,                  \
                         midout_iv(DTypeTrait<_dt>::enumv)) {      \
                using ctype = DTypeTrait<_dt>::ctype;              \
                return kern_naive<ctype, ctype, ctype>;            \
            }                                                      \
            MIDOUT_END();                                          \
        }                                                          \
    } while (0);
        MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
 #undef cb
 #define cb(dt_src, dt_dst)                                            \
    do {                                                              \
        if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv &&   \
            param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
            param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) {   \
            return kern_naive<DTypeTrait<dt_src>::ctype,              \
                              DTypeTrait<dt_src>::ctype,              \
                              DTypeTrait<dt_dst>::ctype>;             \
        }                                                             \
    } while (0);
        cb(dtype::Int8, dtype::Int32) cb(dtype::Quantized8Asymm,
                                         dtype::QuantizedS32)
                cb(dtype::QuantizedS8, dtype::QuantizedS32) megdnn_throw(
                        "unsupported data type on ConvolutionBackwardData");
 #undef cb
    }
    megdnn_throw(
            megdnn_mangle("no suitable ConvolutionBackwardData algorithm"));
 }
@@ -686,34 +663,17 @@ std::vector<ConvolutionBackwardDataImpl::Algorithm*>
 ConvolutionBackwardDataImpl::ncb_1g_get_all_algorithms(
        const NCBKernSizeParam& param) {
    std::vector<Algorithm*> ret;
    ret.reserve(2);
    ret.push_back(&naive_conv_backward_data);
    // insert from lowest to highest preference
    AlgoBase* cand[2] = {nullptr};
    if (param.filter_meta.group == 1 && param.filter_meta.dilation[0] == 1 &&
        param.filter_meta.dilation[1] == 1) {
        // we currently only have non-dilated algos
        if (param.filter_type.enumv() == DTypeEnum::Float32) {
            if (is_matrix_mul_preferred(param)) {
                cand[0] = &sm_algo_pack.direct;
                cand[1] = &sm_algo_pack.matmul;
    std::vector<Algorithm*> prefer_algos;
    for (auto&& i : algo_pack()) {
        if (i->usable(this, param)) {
            if (i->is_preferred(param)) {
                prefer_algos.push_back(i);
            } else {
                cand[0] = &sm_algo_pack.matmul;
                cand[1] = &sm_algo_pack.direct;
                ret.push_back(i);
            }
        } else {
            cand[0] = &sm_algo_pack.matmul;
        }
    }
    for (auto i : cand) {
        if (i && i->usable(this, param)) {
            ret.push_back(i);
        }
    }
    std::reverse(ret.begin(), ret.end());
    ret.insert(ret.begin(), prefer_algos.begin(), prefer_algos.end());
    return ret;
 }
--- a/dnn/src/fallback/convolution/opr_impl.h
+++ b/dnn/src/fallback/convolution/opr_impl.h
@@ -373,7 +373,7 @@ public:
    };
 protected:
    typedef void (*ncb_kern_t)(const NCBKernParam& param);
    using ncb_kern_t = thin_function<void(const NCBKernParam& param)>;
    //! default impl calls ncb_1g_dispatch_kern()
    virtual void exec_with_ncb_kern(const NCBKernParam& param);
@@ -428,9 +428,18 @@ protected:
                                 bool reproducible = true) const {
            return (!reproducible || is_reproducible()) && usable(opr, param);
        }
        virtual bool is_preferred(const NCBKernSizeParam&) const {
            return false;
        }
        //! if the algo is naive, it will not split by group
        virtual bool is_naive() const { return false; }
    };
    static bool is_matrix_mul_preferred(const NCBKernSizeParam& param);
    /**
     * \brief get all the algorithm for the opr.
     */
    virtual SmallVector<AlgoBase*> algo_pack();
 private:
    NCBKernSizeParam m_prev_selected_algo_sizep;
@@ -448,11 +457,10 @@ private:
                                     _megdnn_tensor_out grad,
                                     _megdnn_workspace workspace);
    class AlgoNaive;
    class AlgoDirect;
    class AlgoMatrixMul;
    struct AlgoPack;
    static AlgoPack sm_algo_pack;
    class AlgoPack;
 };
 }  // namespace fallback
--- a/dnn/test/fallback/convolution.cpp
+++ b/dnn/test/fallback/convolution.cpp
@@ -9,6 +9,7 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
 */
 #include "megdnn/dtype.h"
 #include "test/fallback/fixture.h"
 #include "test/common/benchmarker.h"
@@ -614,4 +615,53 @@ TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_QUINT8) {
    }
 }
 TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_NAIVE_ALGO) {
    Checker<ConvolutionBackwardData> checker(handle());
    checker.set_before_exec_callback(
            AlgoChecker<ConvolutionBackwardData>("DeconvNaive"));
    using Param = ConvolutionBackwardData::Param;
    Param param;
    auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
                   size_t fh, size_t fw, size_t stride, size_t padding,
                   size_t dilate = 1, size_t group = 1) {
        param.pad_h = param.pad_w = padding;
        param.stride_h = param.stride_w = stride;
        param.dilate_h = param.dilate_w = dilate;
        TensorLayout diff =
                TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
        TensorLayout grad;
        TensorLayout filter;
        if (group == 1) {
            param.sparse = Param::Sparse::DENSE;
            filter = {{oc, ic, fh, fw}, dtype::Float32()};
        } else {
            param.sparse = Param::Sparse::GROUP;
            filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
        }
        // TensorLayout grad;
        {
            auto opr = handle()->create_operator<ConvolutionBackwardData>();
            opr->param() = param;
            opr->deduce_layout(filter, diff, grad);
        }
        checker.set_param(param);
        checker.exec(TensorLayoutArray{filter, diff, grad});
    };
    for (auto mode :
         {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
        param.mode = mode;
        run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
        run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
        run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
        run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
        run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
        run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
        run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
        run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
    }
 }
 // vim: syntax=cpp.doxygen