| @@ -15,15 +15,23 @@ | |||
| #include "src/arm_common/elemwise_helper/kimpl/op_base.h" | |||
| #include "src/arm_common/elemwise_op.h" | |||
| #include "src/fallback/conv_bias/opr_impl.h" | |||
| #include "midout.h" | |||
| MIDOUT_DECL(arm_common_conv_bias_postprocess_helper) | |||
| namespace { | |||
| #define CONCAT_OP(_name) megdnn::arm_common::_name | |||
| #define CONCAT_NL(_name) megdnn::NonlineMode::_name | |||
| #define CB(_caller, _op, _mode) \ | |||
| case _mode: \ | |||
| _caller(_op); \ | |||
| #define CB(_caller, _op, _mode, midout_tag) \ | |||
| case _mode: \ | |||
| MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 1, midout_tag) { \ | |||
| _caller(_op); \ | |||
| } \ | |||
| MIDOUT_END(); \ | |||
| break; | |||
| #define DEFAULT \ | |||
| @@ -65,44 +73,53 @@ namespace { | |||
| reinterpret_cast<ctype*>(dst_ptr), bias_type, bias_type, \ | |||
| dst_type, N* OC* OH* OW* pack_oc_size); | |||
| #define FOR_BIAS(_mode) \ | |||
| switch (_mode) { \ | |||
| case megdnn::BiasMode::NO_BIAS: \ | |||
| FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY) \ | |||
| break; \ | |||
| case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \ | |||
| if (pack_oc_size == 1) { \ | |||
| FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST); \ | |||
| } else { \ | |||
| megdnn_assert(pack_oc_size == 4, \ | |||
| "Only support nchw44 in ARM"); \ | |||
| FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \ | |||
| } \ | |||
| break; \ | |||
| case megdnn::BiasMode::BIAS: \ | |||
| FOR_NONLINEAR(FOR_NONLINEAR_BINARY) \ | |||
| break; \ | |||
| default: \ | |||
| megdnn_throw("no quantized unsupported biasmode"); \ | |||
| break; \ | |||
| #define FOR_BIAS(_mode) \ | |||
| switch (_mode) { \ | |||
| case megdnn::BiasMode::NO_BIAS: \ | |||
| MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 0) { \ | |||
| FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY); \ | |||
| } \ | |||
| MIDOUT_END(); \ | |||
| break; \ | |||
| case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \ | |||
| MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 1) { \ | |||
| if (pack_oc_size == 1) { \ | |||
| FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST); \ | |||
| } else { \ | |||
| megdnn_assert(pack_oc_size == 4, \ | |||
| "Only support nchw44 in ARM"); \ | |||
| FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \ | |||
| } \ | |||
| } \ | |||
| MIDOUT_END(); \ | |||
| break; \ | |||
| case megdnn::BiasMode::BIAS: \ | |||
| MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 2) { \ | |||
| FOR_NONLINEAR(FOR_NONLINEAR_BINARY); \ | |||
| } \ | |||
| MIDOUT_END(); \ | |||
| break; \ | |||
| default: \ | |||
| megdnn_throw("no quantized unsupported biasmode"); \ | |||
| break; \ | |||
| } | |||
| #define FOR_NONLINEAR(_caller) \ | |||
| switch (nonlineMode) { \ | |||
| CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY)) \ | |||
| CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU)) \ | |||
| CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID)) \ | |||
| CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \ | |||
| DEFAULT \ | |||
| #define FOR_NONLINEAR(_caller) \ | |||
| switch (nonlineMode) { \ | |||
| CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY), 3) \ | |||
| CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 4) \ | |||
| CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID), 5) \ | |||
| CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 6) \ | |||
| DEFAULT \ | |||
| } | |||
| #define FOR_NONLINEAR_NOBIAS(_caller) \ | |||
| switch (nonlineMode) { \ | |||
| HANDLE_IDENTITY() \ | |||
| CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU)) \ | |||
| CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID)) \ | |||
| CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \ | |||
| DEFAULT \ | |||
| #define FOR_NONLINEAR_NOBIAS(_caller) \ | |||
| switch (nonlineMode) { \ | |||
| HANDLE_IDENTITY() \ | |||
| CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 7); \ | |||
| CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID), 8); \ | |||
| CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 9); \ | |||
| DEFAULT \ | |||
| } | |||
| template <typename ctype, typename dtype = ctype, | |||
| @@ -177,20 +194,20 @@ struct PostProcess<ctype, dtype, megdnn::PostprocessMode::NO_PROCESS> { | |||
| case megdnn::NonlineMode::IDENTITY: \ | |||
| _caller(_op) break; | |||
| #define FOR_NONLINEAR(_caller) \ | |||
| switch (nonlineMode) { \ | |||
| HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp)) \ | |||
| CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU)) \ | |||
| CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \ | |||
| DEFAULT \ | |||
| #define FOR_NONLINEAR(_caller) \ | |||
| switch (nonlineMode) { \ | |||
| HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp)) \ | |||
| CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 10) \ | |||
| CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 11) \ | |||
| DEFAULT \ | |||
| } | |||
| #define FOR_NONLINEAR_NOBIAS(_caller) \ | |||
| switch (nonlineMode) { \ | |||
| HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp)) \ | |||
| CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU)) \ | |||
| CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \ | |||
| DEFAULT \ | |||
| #define FOR_NONLINEAR_NOBIAS(_caller) \ | |||
| switch (nonlineMode) { \ | |||
| HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp)) \ | |||
| CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 12) \ | |||
| CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 13) \ | |||
| DEFAULT \ | |||
| } | |||
| #define FOR_BIAS(_bias_mode, OH, OW) \ | |||
| @@ -18,6 +18,10 @@ | |||
| #include <mutex> | |||
| #include "midout.h" | |||
| MIDOUT_DECL(dnn_src_common_handle_impl) | |||
| namespace megdnn { | |||
| class HandleImplHelper : public Handle { | |||
| @@ -63,19 +67,23 @@ protected: | |||
| template <class Opr, size_t idx, class Self> | |||
| static Opr* get_helper_opr(Self self, | |||
| const typename Opr::Param& param = {}) { | |||
| static_assert(idx < NR_HELPER_OPRS, "invalid idx"); | |||
| if (!self->m_helper_oprs[idx]) { | |||
| std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx}; | |||
| MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) { | |||
| static_assert(idx < NR_HELPER_OPRS, "invalid idx"); | |||
| if (!self->m_helper_oprs[idx]) { | |||
| self->m_helper_oprs[idx] = | |||
| self->template create_operator<Opr>(); | |||
| auto ret = static_cast<Opr*>(self->m_helper_oprs[idx].get()); | |||
| ret->param() = param; | |||
| megdnn_assert(ret->is_thread_safe()); | |||
| return ret; | |||
| std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx}; | |||
| if (!self->m_helper_oprs[idx]) { | |||
| self->m_helper_oprs[idx] = | |||
| self->template create_operator<Opr>(); | |||
| auto ret = | |||
| static_cast<Opr*>(self->m_helper_oprs[idx].get()); | |||
| ret->param() = param; | |||
| megdnn_assert(ret->is_thread_safe()); | |||
| return ret; | |||
| } | |||
| } | |||
| return static_cast<Opr*>(self->m_helper_oprs[idx].get()); | |||
| } | |||
| return static_cast<Opr*>(self->m_helper_oprs[idx].get()); | |||
| MIDOUT_END(); | |||
| } | |||
| private: | |||
| @@ -13,6 +13,10 @@ | |||
| #include "megdnn/oprs.h" | |||
| #include "src/common/utils.h" | |||
| #include "midout.h" | |||
| MIDOUT_DECL(transpose_fallback) | |||
| namespace megdnn { | |||
| namespace relayout { | |||
| @@ -107,13 +111,15 @@ void transpose(size_t batch, size_t m, size_t n, T* src, T* dst) { | |||
| auto work_block = [m, n, &batch_src, &batch_dst]( | |||
| const size_t i, const size_t j, const size_t h, | |||
| const size_t w) { | |||
| auto src = batch_src + i * n + j, dst = batch_dst + j * m + i; | |||
| if (h == B && w == B) { | |||
| transpose_block(src, dst, n, m); | |||
| } else { | |||
| transpose_block(src, dst, n, m, h, w); | |||
| MIDOUT_BEGIN(transpose_fallback, midout_iv(0)) { | |||
| if (h == B && w == B) { | |||
| transpose_block(src, dst, n, m); | |||
| } else { | |||
| transpose_block(src, dst, n, m, h, w); | |||
| } | |||
| } | |||
| MIDOUT_END(); | |||
| }; | |||
| auto work_row = [&work_block, n](size_t i, size_t h) { | |||
| size_t j = 0; | |||
| @@ -442,20 +442,35 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle( | |||
| get_matmul_kern_param(param, ohw_tile_size, oc_tile_size); | |||
| if (m_matmul_algo->packmode() == Pack_Mode::DEFAULT) { | |||
| Im2colKerns<Pack_Mode::DEFAULT> defaultkern; | |||
| ws = defaultkern.get_thread_bundle(param, im2col_kern_param, | |||
| m_matmul_algo, ohw_tile_size, | |||
| oc_tile_size); | |||
| MIDOUT_BEGIN( | |||
| megdnn_fallback_im2col, | |||
| midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_dft"_hash)) { | |||
| Im2colKerns<Pack_Mode::DEFAULT> defaultkern; | |||
| ws = defaultkern.get_thread_bundle(param, im2col_kern_param, | |||
| m_matmul_algo, ohw_tile_size, | |||
| oc_tile_size); | |||
| } | |||
| MIDOUT_END(); | |||
| } else if (m_matmul_algo->packmode() == Pack_Mode::ONLY_PACKA) { | |||
| Im2colKerns<Pack_Mode::ONLY_PACKA> onlypackakern; | |||
| ws = onlypackakern.get_thread_bundle(param, im2col_kern_param, | |||
| m_matmul_algo, ohw_tile_size, | |||
| oc_tile_size); | |||
| MIDOUT_BEGIN( | |||
| megdnn_fallback_im2col, | |||
| midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_packa"_hash)) { | |||
| Im2colKerns<Pack_Mode::ONLY_PACKA> onlypackakern; | |||
| ws = onlypackakern.get_thread_bundle(param, im2col_kern_param, | |||
| m_matmul_algo, ohw_tile_size, | |||
| oc_tile_size); | |||
| } | |||
| MIDOUT_END(); | |||
| } else { | |||
| Im2colKerns<Pack_Mode::NO_PACK> nopackkern; | |||
| ws = nopackkern.get_thread_bundle(param, im2col_kern_param, | |||
| m_matmul_algo, ohw_tile_size, | |||
| oc_tile_size); | |||
| MIDOUT_BEGIN( | |||
| megdnn_fallback_im2col, | |||
| midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_other"_hash)) { | |||
| Im2colKerns<Pack_Mode::NO_PACK> nopackkern; | |||
| ws = nopackkern.get_thread_bundle(param, im2col_kern_param, | |||
| m_matmul_algo, ohw_tile_size, | |||
| oc_tile_size); | |||
| } | |||
| MIDOUT_END(); | |||
| } | |||
| return {nullptr, | |||
| @@ -19,6 +19,9 @@ | |||
| #include "src/fallback/conv_bias/opr_impl.h" | |||
| #include "src/fallback/matrix_mul/opr_impl.h" | |||
| #include "midout.h" | |||
| MIDOUT_DECL(megdnn_fallback_conv_bias_winograd_common) | |||
| namespace megdnn { | |||
| namespace winograd { | |||
| @@ -440,9 +443,12 @@ public: | |||
| unit_oc_size]( | |||
| const NCBKernParam& ncb_param, | |||
| const NCBKernIndex& ncb_index) { | |||
| winograd_compute(strategy, bundle_top, bundle_compute, matmul_algo, | |||
| matmul_param, unit_tile_size, unit_oc_size, | |||
| ncb_param, std::move(ncb_index)); | |||
| MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, 0, 0) { | |||
| winograd_compute(strategy, bundle_top, bundle_compute, | |||
| matmul_algo, matmul_param, unit_tile_size, | |||
| unit_oc_size, ncb_param, std::move(ncb_index)); | |||
| } | |||
| MIDOUT_END(); | |||
| }; | |||
| kerns.push_back( | |||
| {winograd_compute_kern, {GROUP, N, nr_hw_tiles, nr_oc_tiles}}); | |||
| @@ -250,8 +250,11 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern( | |||
| param.compute_mode == param::ConvBias::ComputeMode::cmode) { \ | |||
| using ctype = DTypeTrait<dt>::ctype; \ | |||
| using comp_type = DTypeTrait<compute_type>::ctype; \ | |||
| return {{kern_naive_forward<ctype, ctype, comp_type>, \ | |||
| {group, N, 1_z}}}; \ | |||
| MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(1)) { \ | |||
| return {{kern_naive_forward<ctype, ctype, comp_type>, \ | |||
| {group, N, 1_z}}}; \ | |||
| } \ | |||
| MIDOUT_END(); \ | |||
| } \ | |||
| } while (0) | |||
| @@ -262,16 +265,19 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern( | |||
| #endif | |||
| #undef cb | |||
| #define cb(dt_src, dt_dst) \ | |||
| do { \ | |||
| if (param.src_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.dst_type.enumv() == DTypeTrait<dt_dst>::enumv) { \ | |||
| return {{kern_naive_forward<DTypeTrait<dt_src>::ctype, \ | |||
| DTypeTrait<dt_dst>::ctype, \ | |||
| DTypeTrait<dt_dst>::ctype>, \ | |||
| {group, N, 1_z}}}; \ | |||
| } \ | |||
| #define cb(dt_src, dt_dst) \ | |||
| do { \ | |||
| if (param.src_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
| param.dst_type.enumv() == DTypeTrait<dt_dst>::enumv) { \ | |||
| MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(2)) { \ | |||
| return {{kern_naive_forward<DTypeTrait<dt_src>::ctype, \ | |||
| DTypeTrait<dt_dst>::ctype, \ | |||
| DTypeTrait<dt_dst>::ctype>, \ | |||
| {group, N, 1_z}}}; \ | |||
| } \ | |||
| MIDOUT_END(); \ | |||
| } \ | |||
| } while (0) | |||
| cb(dtype::Int8, dtype::Int16); | |||
| cb(dtype::Int8, dtype::Int32); | |||
| @@ -14,6 +14,10 @@ | |||
| #include "megdnn/tensor_iter.h" | |||
| #include "src/naive/handle.h" | |||
| #include "midout.h" | |||
| MIDOUT_DECL(naive_relayout) | |||
| using namespace megdnn; | |||
| using namespace naive; | |||
| @@ -48,22 +52,24 @@ void RelayoutForwardImpl::exec( | |||
| do_exec(src, dst); | |||
| } | |||
| void RelayoutForwardImpl::do_exec( | |||
| _megdnn_tensor_in src, _megdnn_tensor_out dst) { | |||
| switch(src.layout.dtype.enumv()) { | |||
| #define cb(_dt) \ | |||
| case DTypeEnum::_dt: \ | |||
| { \ | |||
| MEGDNN_DISPATCH_CPU_KERN_OPR( \ | |||
| do_copy<DTypeTrait<dtype::_dt>::ctype>(dst, src)); \ | |||
| return; \ | |||
| } | |||
| MEGDNN_FOREACH_DTYPE_NAME(cb) | |||
| MEGDNN_FOREACH_PARAMETERIZED_DTYPE(cb) | |||
| void RelayoutForwardImpl::do_exec(_megdnn_tensor_in src, | |||
| _megdnn_tensor_out dst) { | |||
| MIDOUT_BEGIN(naive_relayout, midout_iv(0)) { | |||
| switch (src.layout.dtype.enumv()) { | |||
| #define cb(_dt) \ | |||
| case DTypeEnum::_dt: { \ | |||
| MEGDNN_DISPATCH_CPU_KERN_OPR( \ | |||
| do_copy<DTypeTrait<dtype::_dt>::ctype>(dst, src)); \ | |||
| return; \ | |||
| } | |||
| MEGDNN_FOREACH_DTYPE_NAME(cb) | |||
| MEGDNN_FOREACH_PARAMETERIZED_DTYPE(cb) | |||
| #undef cb | |||
| default: | |||
| megdnn_throw("bad dtype"); | |||
| default: | |||
| megdnn_throw("bad dtype"); | |||
| } | |||
| } | |||
| MIDOUT_END(); | |||
| } | |||
| void RelayoutForwardImpl::check_cpu_handle(Handle *handle) { | |||
| @@ -27,10 +27,16 @@ endif() | |||
| add_executable(megdnn_test ${SOURCES}) | |||
| target_link_libraries(megdnn_test gtest) | |||
| target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS}) | |||
| target_include_directories(megdnn_test | |||
| PRIVATE | |||
| ${PROJECT_SOURCE_DIR}/third_party/midout/src | |||
| ) | |||
| if(UNIX) | |||
| set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++") | |||
| endif() | |||
| @@ -135,7 +135,7 @@ MGB_OPR_REGISTRY_CALLER_SPECIALIZE | |||
| */ | |||
| #define MGB_SEREG_OPR_INTL_CALL_ENTRY(_cls, _impl) \ | |||
| namespace { \ | |||
| ::mgb::serialization::OprRegistryCaller<_cls, _impl> \ | |||
| [[gnu::unused]] ::mgb::serialization::OprRegistryCaller<_cls, _impl> \ | |||
| __caller_OprReg##_cls##_ins; \ | |||
| } | |||
| @@ -244,7 +244,7 @@ struct IsComplete<T, decltype(void(sizeof(T)))> : std::true_type {}; | |||
| MGB_REG_OPR_SHALLOW_COPY_IMPL(_cls, _copy); \ | |||
| } \ | |||
| }; \ | |||
| ::mgb::serialization::OprRegistryCaller< \ | |||
| [[gnu::unused]] ::mgb::serialization::OprRegistryCaller< \ | |||
| _cls, _OprRegShallowCopy##_cls> \ | |||
| __caller_OprRegShallowCopy##_cls##_ins; \ | |||
| } | |||