GitOrigin-RevId: db433164d2
tags/v1.2.0
| @@ -36,8 +36,6 @@ using namespace opr; | |||
| using namespace cg::static_infer; | |||
| using intl::WorkspaceLimitGetter; | |||
| #define CACHE_KEY_VERSION "v2" | |||
| /* ==================== misc impl ==================== */ | |||
| mixin::Convolution::~Convolution() = default; | |||
| @@ -103,26 +101,12 @@ void mixin::Convolution::init_output_static_infer_desc_for_bwd_data( | |||
| {SourceType::DEP, inp_deps, infer_wk}); | |||
| } | |||
| #define IMPL_CONV(_cls, _prof_name) \ | |||
| void _cls::init_profile_cache() { \ | |||
| std::string name(_prof_name CACHE_KEY_VERSION); \ | |||
| name.append(megdnn_opr()->get_algorithm_set_name()); \ | |||
| m_profile_cache = std::make_unique<AlgoChooserProfileCache>( \ | |||
| comp_node(), name.c_str()); \ | |||
| } \ | |||
| std::pair<const void*, size_t> _cls::param_blob() const { \ | |||
| return {¶m(), sizeof(Param)}; \ | |||
| } \ | |||
| #define IMPL_CONV(_cls) \ | |||
| std::pair<const void*, size_t> _cls::param_blob() const { \ | |||
| return {¶m(), sizeof(Param)}; \ | |||
| } \ | |||
| MGB_DYN_TYPE_OBJ_FINAL_IMPL(_cls) | |||
| AlgoChooserProfileCache& mixin::Convolution::profile_cache() const { | |||
| if (!m_profile_cache) { | |||
| const_cast<Convolution*>(this)->init_profile_cache(); | |||
| mgb_assert(m_profile_cache); | |||
| } | |||
| return *m_profile_cache; | |||
| } | |||
| class mixin::WeightPreprocessExecutor::PreprocessedFilterExecDep final | |||
| : public cg::GraphExecutable::ExecDependency { | |||
| std::unique_ptr<PreprocessedFilter> m_pf; | |||
| @@ -209,7 +193,7 @@ bool mixin::WeightPreprocessExecutor::mixin_allow_weight_preprocess( | |||
| /* ==================== ConvolutionForward ==================== */ | |||
| IMPL_CONV(ConvolutionForward, "conv_fwd"); | |||
| IMPL_CONV(ConvolutionForward); | |||
| ConvolutionForward::ConvolutionForward(VarNode* src, VarNode* filter, | |||
| const Param& param, | |||
| @@ -335,7 +319,7 @@ void ConvolutionForward::scn_do_execute_preprocess() { | |||
| } | |||
| /* ==================== ConvolutionBackwardData ==================== */ | |||
| IMPL_CONV(ConvolutionBackwardData, "conv_bwd_data"); | |||
| IMPL_CONV(ConvolutionBackwardData); | |||
| ConvolutionBackwardData::ConvolutionBackwardData( | |||
| VarNode* filter, VarNode* diff, VarNode* src_for_shp, | |||
| @@ -426,7 +410,7 @@ MGB_IMPL_OPR_GRAD(ConvolutionBackwardData) { | |||
| #endif | |||
| /* ==================== ConvolutionBackwardFilter ==================== */ | |||
| IMPL_CONV(ConvolutionBackwardFilter, "conv_bwd_filter"); | |||
| IMPL_CONV(ConvolutionBackwardFilter); | |||
| ConvolutionBackwardFilter::ConvolutionBackwardFilter( | |||
| VarNode* src, VarNode* diff, VarNode* filter, const Param& param, | |||
| @@ -480,7 +464,7 @@ MGB_IMPL_OPR_GRAD(ConvolutionBackwardFilter) { | |||
| #endif | |||
| /* ==================== Convolution3DForward ==================== */ | |||
| IMPL_CONV(Convolution3DForward, "conv3d_fwd"); | |||
| IMPL_CONV(Convolution3DForward); | |||
| Convolution3DForward::Convolution3DForward(VarNode* src, VarNode* filter, | |||
| const Param& param, | |||
| @@ -553,7 +537,7 @@ size_t Convolution3DForward::get_workspace_size_bytes( | |||
| } | |||
| /* ==================== Convolution3DBackwardData ==================== */ | |||
| IMPL_CONV(Convolution3DBackwardData, "conv3d_bwd_data"); | |||
| IMPL_CONV(Convolution3DBackwardData); | |||
| Convolution3DBackwardData::Convolution3DBackwardData( | |||
| VarNode* filter, VarNode* diff, VarNode* src_for_shp, | |||
| @@ -631,7 +615,7 @@ MGB_IMPL_OPR_GRAD(Convolution3DBackwardData) { | |||
| #endif | |||
| /* ==================== Convolution3DBackwardFilter ==================== */ | |||
| IMPL_CONV(Convolution3DBackwardFilter, "conv3d_bwd_filter"); | |||
| IMPL_CONV(Convolution3DBackwardFilter); | |||
| Convolution3DBackwardFilter::Convolution3DBackwardFilter( | |||
| VarNode* src, VarNode* diff, VarNode* filter, const Param& param, | |||
| @@ -719,7 +703,7 @@ SymbolVar MaskPropagate::make(SymbolVar src, const Param& param, | |||
| } | |||
| /* ==================== ConvBiasForward ==================== */ | |||
| IMPL_CONV(ConvBiasForward, "conv_bias_fwd"); | |||
| IMPL_CONV(ConvBiasForward); | |||
| ConvBiasForward::ConvBiasForward(VarNode* src, VarNode* filter, | |||
| const Param& param, | |||
| @@ -1005,7 +989,7 @@ void ConvBiasForward::scn_do_execute_preprocess() { | |||
| /* ===================== LocalShareForward ==================== */ | |||
| IMPL_CONV(LocalShareForward, "local_share"); | |||
| IMPL_CONV(LocalShareForward); | |||
| LocalShareForward::LocalShareForward(VarNode* src, VarNode* filter, | |||
| const Param& param, | |||
| @@ -1073,7 +1057,7 @@ MGB_IMPL_OPR_GRAD(LocalShareForward) { | |||
| /* ===================== LocalShareBackwardData ==================== */ | |||
| IMPL_CONV(LocalShareBackwardData, "local_share_bwd_data"); | |||
| IMPL_CONV(LocalShareBackwardData); | |||
| LocalShareBackwardData::LocalShareBackwardData(VarNode* filter, VarNode* diff, | |||
| VarNode* src_for_shp, | |||
| @@ -1153,7 +1137,7 @@ MGB_IMPL_OPR_GRAD(LocalShareBackwardData) { | |||
| /* ==================== LocalShareBackwardFilter ==================== */ | |||
| IMPL_CONV(LocalShareBackwardFilter, "local_share_bwd_filter"); | |||
| IMPL_CONV(LocalShareBackwardFilter); | |||
| LocalShareBackwardFilter::LocalShareBackwardFilter( | |||
| VarNode* src, VarNode* diff, VarNode* filter, const Param& param, | |||
| @@ -1208,7 +1192,7 @@ MGB_IMPL_OPR_GRAD(LocalShareBackwardFilter) { | |||
| /* ===================== DeformableConvForward ==================== */ | |||
| IMPL_CONV(DeformableConvForward, "deformable_conv"); | |||
| IMPL_CONV(DeformableConvForward); | |||
| DeformableConvForward::DeformableConvForward(VarNode* src, VarNode* filter, | |||
| VarNode* offset, VarNode* mask, | |||
| @@ -1293,7 +1277,7 @@ MGB_IMPL_OPR_GRAD(DeformableConvForward) { | |||
| /* ==================== DeformableConvBackwardData ==================== */ | |||
| IMPL_CONV(DeformableConvBackwardData, "deformalbe_conv_backward_data"); | |||
| IMPL_CONV(DeformableConvBackwardData); | |||
| DeformableConvBackwardData::DeformableConvBackwardData( | |||
| VarNode* src, VarNode* filter, VarNode* offset, VarNode* mask, | |||
| @@ -1425,7 +1409,7 @@ void DeformableConvBackwardData::init_output_static_infer_desc() { | |||
| /* ==================== DeformableConvBackwardFilter ==================== */ | |||
| IMPL_CONV(DeformableConvBackwardFilter, "deformalbe_conv_backward_filter"); | |||
| IMPL_CONV(DeformableConvBackwardFilter); | |||
| DeformableConvBackwardFilter::DeformableConvBackwardFilter( | |||
| VarNode* src, VarNode* filter, VarNode* offset, VarNode* mask, | |||
| @@ -1484,7 +1468,7 @@ size_t DeformableConvBackwardFilter::get_workspace_size_bytes( | |||
| } | |||
| /* ==================== BatchConvBiasForward ==================== */ | |||
| IMPL_CONV(BatchConvBiasForward, "batch_conv_bias_fwd"); | |||
| IMPL_CONV(BatchConvBiasForward); | |||
| BatchConvBiasForward::BatchConvBiasForward(VarNode* src, VarNode* filter, | |||
| const Param& param, | |||
| @@ -36,15 +36,29 @@ using mgb::opr::intl::WorkspaceLimitGetter; | |||
| // timeout delta to be added with fastest known algorithm for new algos | |||
| constexpr double TIMEOUT_TOLERANCE = 2; | |||
| #define CACHE_KEY_VERSION "v3" | |||
| namespace { | |||
| template <typename Opr> | |||
| std::string profile_name(Opr* opr) { | |||
| std::string ret = | |||
| std::string(MegDNNOpr2MGBOpr<Opr>::MGBOpr::typeinfo()->name) + | |||
| CACHE_KEY_VERSION; | |||
| ret.append(opr->get_algorithm_set_name()); | |||
| return ret; | |||
| } | |||
| } | |||
| namespace mgb { | |||
| namespace opr { | |||
| template <typename Opr> | |||
| AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result( | |||
| ExeContext& ctx, bool enable_update) { | |||
| AlgoChooserProfileCache& cache = ctx.mgb_opr()->profile_cache(); | |||
| AlgoChooserProfileCache cache(ctx.mgb_opr()->comp_node(), | |||
| profile_name(ctx.megdnn_opr()).c_str()); | |||
| ConvTensorLayouts origin_layouts = ctx.layouts(); | |||
| TensorLayoutArray origin_layouts = ctx.layouts(); | |||
| typename Opr::Param origin_param = ctx.mgb_opr()->param(); | |||
| AlgoChooserProfileCache::Key cache_key{origin_layouts.data(), | |||
| origin_layouts.size(), &origin_param, | |||
| @@ -131,12 +145,12 @@ typename AlgoChooser<Opr>::ImplAlgo AlgoChooser<Opr>::choose_by_profile( | |||
| "profiling result but not in algo_map; please " | |||
| "report this " | |||
| "bug; opr: %s{%s}, shapes: %s %s %s", | |||
| i.algo.c_str(), | |||
| ctx.mgb_opr()->cname(), | |||
| ctx.mgb_opr()->dyn_typeinfo()->name, | |||
| ctx.layouts()[0].TensorShape::to_string().c_str(), | |||
| ctx.layouts()[1].TensorShape::to_string().c_str(), | |||
| ctx.layouts()[2].TensorShape::to_string().c_str(), | |||
| i.algo.c_str()); | |||
| ctx.layouts()[2].TensorShape::to_string().c_str()); | |||
| return iter->second; | |||
| } | |||
| } | |||
| @@ -153,7 +167,7 @@ typename AlgoChooser<Opr>::ImplAlgo AlgoChooser<Opr>::choose_by_profile( | |||
| } | |||
| template <typename Opr> | |||
| size_t AlgoChooser<Opr>::setup_algo(const ConvTensorLayouts& layouts, | |||
| size_t AlgoChooser<Opr>::setup_algo(const TensorLayoutArray& layouts, | |||
| Opr* megdnn_opr, const MGBOpr* mgb_opr, | |||
| bool allow_weight_preprocess) { | |||
| if (WorkspaceLimitGetter::is_prealloc_run(mgb_opr->owner_graph())) { | |||
| @@ -220,7 +234,7 @@ typename AlgoChooser<Opr>::ImplAlgo AlgoChooser<Opr>::get_algo( | |||
| AlgoChooser<megdnn::Opr>::choose_by_profile( \ | |||
| ExeContext& ctx, bool require_reproducible, bool enable_update); \ | |||
| template size_t AlgoChooser<megdnn::Opr>::setup_algo( \ | |||
| const ConvTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \ | |||
| const TensorLayoutArray& layouts, megdnn::Opr* megdnn_opr, \ | |||
| const MGBOpr* mgb_opr, bool allow_weight_preprocess); | |||
| MGB_FOREACH_FASTRUN_OPR(INST) | |||
| @@ -74,12 +74,8 @@ class Convolution { | |||
| mutable bool m_policy_accessed = false; | |||
| ExecutionPolicy m_policy; | |||
| std::unique_ptr<AlgoChooserProfileCache> m_profile_cache; | |||
| AlgoChooserHook m_algo_chooser; | |||
| virtual void init_profile_cache() = 0; | |||
| //! init output desc for conv backward data oprs; it handles both grad | |||
| //! usage and deconv usage | |||
| template <class MgbOpr, class MegDNNOpr> | |||
| @@ -159,7 +155,6 @@ class ConvolutionTestingPeer; | |||
| MGB_DEFINE_OPR_CLASS(ConvolutionForward, | |||
| intl::ConvolutionForwardBase, public mixin::Convolution) // { | |||
| void init_profile_cache() override; | |||
| void init_output_dtype() override; | |||
| size_t get_workspace_size_bytes( | |||
| const TensorShapeArray &input_shapes, | |||
| @@ -245,7 +240,6 @@ public: | |||
| const ExecutionPolicy& policy = {}, | |||
| const OperatorNodeConfig& config = {}); | |||
| void init_profile_cache() override; | |||
| std::pair<const void*, size_t> param_blob() const override; | |||
| static void check_winograd_param_valid( | |||
| @@ -268,7 +262,6 @@ MGB_DEFINE_OPR_CLASS(ConvolutionBackwardData, | |||
| void init_output_format() override; | |||
| void add_input_layout_constraint() override; | |||
| void init_profile_cache() override; | |||
| void scn_do_execute() override; | |||
| NodeProp *do_make_node_prop() const override; | |||
| @@ -310,7 +303,6 @@ MGB_DEFINE_OPR_CLASS(ConvolutionBackwardFilter, | |||
| intl::MegDNNOprWrapperBwd<megdnn::ConvolutionBackwardFilter>, | |||
| public mixin::Convolution ) // { | |||
| void init_profile_cache() override final; | |||
| size_t get_workspace_size_bytes( | |||
| const TensorShapeArray &input_shapes, | |||
| @@ -360,7 +352,6 @@ MGB_DEFINE_OPR_CLASS(Convolution3DForward, | |||
| intl::MegDNNOprWrapperFwd<megdnn::Convolution3DForward>, | |||
| public mixin::Convolution) // { | |||
| void init_profile_cache() override; | |||
| void init_output_dtype() override; | |||
| size_t get_workspace_size_bytes( | |||
| const TensorShapeArray &input_shapes, | |||
| @@ -391,7 +382,6 @@ MGB_DEFINE_OPR_CLASS(Convolution3DBackwardData, | |||
| void init_output_static_infer_desc() override; | |||
| void add_input_layout_constraint() override; | |||
| void init_profile_cache() override; | |||
| void scn_do_execute() override; | |||
| NodeProp *do_make_node_prop() const override; | |||
| @@ -433,8 +423,6 @@ MGB_DEFINE_OPR_CLASS(Convolution3DBackwardFilter, | |||
| intl::MegDNNOprWrapperBwd<megdnn::Convolution3DBackwardFilter>, | |||
| public mixin::Convolution) // { | |||
| void init_profile_cache() override final; | |||
| size_t get_workspace_size_bytes( | |||
| const TensorShapeArray &input_shapes, | |||
| const TensorShapeArray &output_shapes) const override final; | |||
| @@ -455,7 +443,6 @@ MGB_DEFINE_OPR_CLASS(Convolution3DBackwardFilter, | |||
| MGB_DEFINE_OPR_CLASS(LocalShareForward, | |||
| intl::MegDNNOprWrapperFwd<megdnn::LocalShareForward>, | |||
| public mixin::Convolution) // { | |||
| void init_profile_cache() override final; | |||
| void init_output_dtype() override; | |||
| void init_output_format() override; | |||
| @@ -483,7 +470,6 @@ MGB_DEFINE_OPR_CLASS( | |||
| void init_output_dtype() override; | |||
| void add_input_layout_constraint() override; | |||
| void init_profile_cache() override; | |||
| void scn_do_execute() override; | |||
| NodeProp* do_make_node_prop() const override; | |||
| @@ -506,7 +492,6 @@ MGB_DEFINE_OPR_CLASS( | |||
| LocalShareBackwardFilter, | |||
| intl::MegDNNOprWrapperBwd<megdnn::LocalShareBackwardFilter>, | |||
| public mixin::Convolution) // { | |||
| void init_profile_cache() override final; | |||
| size_t get_workspace_size_bytes( | |||
| const TensorShapeArray& input_shapes, | |||
| @@ -542,7 +527,6 @@ MGB_DEFINE_OPR_CLASS(DeformableConvForward, | |||
| std::pair<const void*, size_t> param_blob() const override; | |||
| private: | |||
| void init_profile_cache() override; | |||
| void init_output_dtype() override; | |||
| void init_output_format() override; | |||
| size_t get_workspace_size_bytes( | |||
| @@ -589,7 +573,6 @@ private: | |||
| void add_input_layout_constraint() override { | |||
| mixin::megdnn_utils::add_input_layout_constraint_contig(*this); | |||
| } | |||
| void init_profile_cache() override; | |||
| }; | |||
| MGB_DEFINE_OPR_CLASS( | |||
| @@ -612,7 +595,6 @@ public: | |||
| std::pair<const void*, size_t> param_blob() const override; | |||
| private: | |||
| void init_profile_cache() override; | |||
| size_t get_workspace_size_bytes(const TensorShapeArray& input_shapes, | |||
| const TensorShapeArray& output_shapes) | |||
| const override final; | |||
| @@ -668,7 +650,6 @@ public: | |||
| const ExecutionPolicy& policy = {}, | |||
| const OperatorNodeConfig& config = {}); | |||
| void init_profile_cache() override; | |||
| std::pair<const void*, size_t> param_blob() const override; | |||
| }; | |||
| using BatchConvBias = BatchConvBiasForward; | |||
| @@ -48,16 +48,16 @@ class AlgoChooser { | |||
| using ImplAlgo = typename Opr::AlgorithmInfo; | |||
| using MGBOpr = typename MegDNNOpr2MGBOpr<Opr>::MGBOpr; | |||
| using ConvTensorLayouts = std::array<TensorLayout, arity>; | |||
| using TensorLayoutArray = std::array<TensorLayout, arity>; | |||
| class ExeContext { | |||
| const ConvTensorLayouts& m_layouts; | |||
| const TensorLayoutArray& m_layouts; | |||
| Opr* m_megdnn_opr; | |||
| const MGBOpr* m_mgb_opr; | |||
| bool m_allow_weight_preprocess; | |||
| public: | |||
| ExeContext(const ConvTensorLayouts& layouts, Opr* megdnn_opr, | |||
| ExeContext(const TensorLayoutArray& layouts, Opr* megdnn_opr, | |||
| const MGBOpr* mgb_opr, bool allow_weight_preprocess) | |||
| : m_layouts{layouts}, | |||
| m_megdnn_opr{megdnn_opr}, | |||
| @@ -65,9 +65,9 @@ class AlgoChooser { | |||
| m_allow_weight_preprocess{allow_weight_preprocess} { | |||
| mgb_assert(m_layouts.size() == layouts.size()); | |||
| static_assert( | |||
| std::tuple_size<ConvTensorLayouts>::value == 3 || | |||
| std::tuple_size<ConvTensorLayouts>::value == 5 || | |||
| std::tuple_size<ConvTensorLayouts>::value == 8, | |||
| std::tuple_size<TensorLayoutArray>::value == 3 || | |||
| std::tuple_size<TensorLayoutArray>::value == 5 || | |||
| std::tuple_size<TensorLayoutArray>::value == 8, | |||
| "Convolution AlgoChooser assumes arity = 3 , 5 or 8 (for " | |||
| "deformable conv)"); | |||
| } | |||
| @@ -80,7 +80,7 @@ class AlgoChooser { | |||
| return m_layouts[idx]; | |||
| } | |||
| const ConvTensorLayouts& layouts() const { return m_layouts; } | |||
| const TensorLayoutArray& layouts() const { return m_layouts; } | |||
| ImplAlgo choose_by_heuristic(bool reproducible = false) const; | |||
| @@ -125,7 +125,7 @@ public: | |||
| /*! | |||
| * \brief setup algorithm and return workspace size | |||
| */ | |||
| static size_t setup_algo(const ConvTensorLayouts& layouts, Opr* megdnn_opr, | |||
| static size_t setup_algo(const TensorLayoutArray& layouts, Opr* megdnn_opr, | |||
| const MGBOpr* mgb_opr, | |||
| bool allow_weight_preprocess = false); | |||
| }; | |||