|
- #include <limits>
- #include <unordered_set>
-
- #include "megbrain/exception.h"
- #include "megbrain/rdnn/algo_chooser.h"
- #include "megbrain/utils/invoke.h"
-
- //! TODO: here has to be know some megdnn::opr when there is produced midout.h
- //! fix it if there is another graceful way.
- #include "megdnn/opr_param_defs.h"
- #include "megdnn/oprs.h"
- #include "megdnn/oprs/base.h"
- #include "midout.h"
- MIDOUT_DECL(megbrain_opr_algo_chooser)
- #define MIDOUT_B(...) MIDOUT_BEGIN(megbrain_opr_algo_chooser, __VA_ARGS__) {
- #define MIDOUT_E \
- } \
- MIDOUT_END();
-
- using namespace megdnn;
- using namespace mgb;
-
- #define APPLY(statement, ...) \
- mgb::apply( \
- [&](const auto&... args) { return statement; }, \
- std::tuple_cat(__VA_ARGS__))
-
- // timeout delta to be added with fastest known algorithm for new algos
- constexpr double TIMEOUT_TOLERANCE = 2;
-
- namespace {
-
- template <class MegDNNOpr>
- struct MegDNNOpr2Typename;
-
- #define cb(_Opr) \
- template <> \
- struct MegDNNOpr2Typename<megdnn::_Opr> { \
- static const char* name; \
- }; \
- const char* MegDNNOpr2Typename<megdnn::_Opr>::name = #_Opr;
-
- DNN_FOREACH_FASTRUN_OPR(cb)
- #undef cb
-
- template <typename Opr>
- std::string profile_name(Opr* opr) {
- std::string ret = std::string(::MegDNNOpr2Typename<Opr>::name) + CACHE_KEY_VERSION;
- ret.append(opr->get_algorithm_set_name());
- return ret;
- }
-
- template <typename Opr>
- std::string format_fixlayouts(
- const typename rdnn::AlgoChooser<Opr>::FixedTensorLayouts& layouts,
- size_t arity_in, size_t arity_out, const std::string& delimiter = " -> ") {
- std::string ret;
- if (arity_in) {
- ret.append("(");
- for (size_t i = 0; i < arity_in; ++i) {
- if (i) {
- ret.append(", ");
- }
- ret.append(layouts[i].to_string() + " ");
- }
- ret.append(")");
- }
- if (arity_in && arity_out) {
- ret.append(delimiter);
- }
- if (arity_out) {
- ret.append("(");
- for (size_t i = 0; i < arity_out; ++i) {
- if (i) {
- ret.append(", ");
- }
- ret.append(layouts[i + arity_in].to_string() + " ");
- }
- ret.append(")");
- }
- return ret;
- }
-
- /**
- * \brief Check if the sub opr list has circular dependence.
- */
- class CircularDepsChecker {
- struct SearchItemStorage {
- std::string data_hold;
- size_t hash = 0;
-
- SearchItemStorage(const Algorithm::SearchItem& item) {
- Algorithm::serialize_write_pod(item.opr_type, data_hold);
- for (auto&& layout : item.layouts) {
- data_hold += layout.serialize();
- }
- data_hold += item.param;
- }
-
- SearchItemStorage& init_hash() {
- hash = XXHash64CT::hash(data_hold.data(), data_hold.size(), 20201225);
- return *this;
- }
-
- bool operator==(const SearchItemStorage& rhs) const {
- return data_hold == rhs.data_hold;
- }
-
- struct Hash {
- size_t operator()(const SearchItemStorage& s) const { return s.hash; }
- };
- };
- std::unordered_set<SearchItemStorage, SearchItemStorage::Hash> m_set;
-
- public:
- void put(const megdnn::Algorithm::SearchItem& key) {
- SearchItemStorage key_storage(key);
- key_storage.init_hash();
- mgb_assert(
- m_set.find(key_storage) == m_set.end(),
- "Circular dependency during flatten search space");
- auto ret = m_set.insert(std::move(key_storage));
- mgb_assert(ret.second);
- }
- void remove(const megdnn::Algorithm::SearchItem& key) {
- SearchItemStorage key_storage(key);
- key_storage.init_hash();
- auto&& iter = m_set.find(key_storage);
- mgb_assert(iter != m_set.end());
- m_set.erase(iter);
- }
- };
-
- ///////////////// OprTypeTrait /////////////////////////////
- template <megdnn::Algorithm::OprType>
- struct OprFromOprTypeTrait;
-
- template <typename Opr>
- struct OprTypeFromOprTrait;
-
- #define cb(_opr_type, _opr) \
- template <> \
- struct OprFromOprTypeTrait<megdnn::Algorithm::OprType::_opr_type> { \
- using Opr = megdnn::_opr; \
- }; \
- template <> \
- struct OprTypeFromOprTrait<megdnn::_opr> { \
- constexpr static megdnn::Algorithm::OprType opr_type = \
- megdnn::Algorithm::OprType::_opr_type; \
- }
-
- cb(MATRIX_MUL_FORWARD, MatrixMulForward);
- cb(BATCHED_MATRIX_MUL_FORWARD, BatchedMatrixMulForward);
- cb(CONVOLUTION_FORWARD, ConvolutionForward);
- cb(CONVOLUTION_BACKWARD_DATA, ConvolutionBackwardData);
- cb(CONVOLUTION_BACKWARD_FILTER, ConvolutionBackwardFilter);
- cb(CONVOLUTION3D_FORWARD, Convolution3DForward);
- cb(CONVOLUTION3D_BACKWARD_DATA, Convolution3DBackwardData);
- cb(CONVOLUTION3D_BACKWARD_FILTER, Convolution3DBackwardFilter);
- cb(LOCAL_SHARE_FORWARD, LocalShareForward);
- cb(LOCAL_SHARE_BACKWARD_DATA, LocalShareBackwardData);
- cb(LOCAL_SHARE_BACKWARD_FILTER, LocalShareBackwardFilter);
- cb(DEFORMABLE_CONV_FORWARD, DeformableConvForward);
- cb(DEFORMABLE_CONV_BACKWARD_DATA, DeformableConvBackwardData);
- cb(DEFORMABLE_CONV_BACKWARD_FILTER, DeformableConvBackwardFilter);
- cb(BATCH_CONV_FORWARD, BatchConvBiasForward);
- cb(CONVBIAS_FORWARD, ConvBiasForward);
- cb(POOLING_FORWARD, PoolingForward);
- cb(POOLING_BACKWARD, PoolingBackward);
-
- #undef cb
-
- // clang-format off
- #define FOREACH_OPR_TYPE_WITH_STMT(cb, stmt) \
- cb(MATRIX_MUL_FORWARD, stmt) \
- cb(BATCHED_MATRIX_MUL_FORWARD, stmt) \
- cb(CONVOLUTION_FORWARD, stmt) \
- cb(CONVOLUTION_BACKWARD_DATA, stmt) \
- cb(CONVOLUTION_BACKWARD_FILTER, stmt) \
- cb(CONVOLUTION3D_FORWARD, stmt) \
- cb(CONVOLUTION3D_BACKWARD_DATA, stmt) \
- cb(CONVOLUTION3D_BACKWARD_FILTER, stmt) \
- cb(LOCAL_SHARE_FORWARD, stmt) \
- cb(LOCAL_SHARE_BACKWARD_DATA, stmt) \
- cb(LOCAL_SHARE_BACKWARD_FILTER, stmt) \
- cb(DEFORMABLE_CONV_FORWARD, stmt) \
- cb(DEFORMABLE_CONV_BACKWARD_DATA, stmt) \
- cb(DEFORMABLE_CONV_BACKWARD_FILTER, stmt) \
- cb(BATCH_CONV_FORWARD, stmt) \
- cb(CONVBIAS_FORWARD, stmt) \
- cb(POOLING_FORWARD, stmt) \
- cb(POOLING_BACKWARD, stmt)
- // clang-format on
-
- #define _OPR_TYPE_CASE(_opr_type, _stmt) \
- case Algorithm::OprType::_opr_type: { \
- using _Opr = typename OprFromOprTypeTrait<Algorithm::OprType::_opr_type>::Opr; \
- _stmt; \
- break; \
- }
-
- #define FOREACH_OPR_TYPE_DISPATCH(_search_items, _stmt) \
- for (size_t _item_idx = 0; _item_idx < _search_items.size(); _item_idx++) { \
- auto&& _item = _search_items[_item_idx]; \
- switch (_item.opr_type) { \
- FOREACH_OPR_TYPE_WITH_STMT(_OPR_TYPE_CASE, _stmt) \
- default: \
- mgb_throw(MegBrainError, "unknown opr_type"); \
- } \
- }
-
- template <typename Opr>
- TensorLayoutArray to_layout_array(
- const typename rdnn::AlgoChooser<Opr>::FixedTensorLayouts& layouts) {
- TensorLayoutArray ret;
- for (auto&& layout : layouts) {
- ret.push_back(layout);
- }
- return ret;
- }
-
- template <typename Opr>
- typename rdnn::AlgoChooser<Opr>::FixedTensorLayouts to_fixed_layouts(
- const TensorLayoutArray& layouts) {
- typename rdnn::AlgoChooser<Opr>::FixedTensorLayouts ret;
- mgb_assert(ret.size() == layouts.size());
- size_t idx = 0;
- for (auto&& layout : layouts) {
- ret[idx++] = layout;
- }
- return ret;
- }
-
- /**
- * flatten search space in postorder traversal
- * The subopr search construct a search tree
- *
- * A
- * / \
- * B1B2 C
- * / \
- * D1D2D3 E
- * We use postorder traverse the search tree.
- * D1 -> D2 -> D3 -> E -> B1 -> B2 -> C -> A
- */
- template <typename Opr>
- std::vector<megdnn::Algorithm::SearchItem> flatten_search_space(
- const typename rdnn::AlgoChooser<Opr>::AlgoChooserHelper& helper,
- CircularDepsChecker& checker) {
- auto&& search_item = megdnn::Algorithm::SearchItem{
- OprTypeFromOprTrait<Opr>::opr_type, helper.param(),
- to_layout_array<Opr>(helper.fastrun_layouts())};
- checker.put(search_item);
- std::vector<megdnn::Algorithm::SearchItem> ret;
- for (auto algo_info : helper.get_all_candidates()) {
- megdnn::Algorithm* algo = helper.get_algorithm_from_desc(algo_info.desc);
- mgb_assert(algo, "Unknown algo description");
- std::vector<megdnn::Algorithm::SearchItem>&& sub_items = algo->get_subopr_list(
- to_layout_array<Opr>(helper.fastrun_layouts()), helper.megdnn_opr());
-
- FOREACH_OPR_TYPE_DISPATCH(sub_items, {
- auto&& megdnn_opr = opr::intl::create_megdnn_opr<_Opr>(helper.comp_node());
- megdnn_opr->param() =
- Algorithm::deserialize_read_pod<typename _Opr::Param>(_item.param);
- typename rdnn::AlgoChooser<_Opr>::AlgoChooserHelper sub_helper(
- to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(),
- _item.param, helper.comp_node(), helper.execution_policy(),
- helper.allow_weight_preprocess(), helper.desc());
- auto space = flatten_search_space<_Opr>(sub_helper, checker);
- ret.insert(ret.end(), space.begin(), space.end());
- });
- }
- ret.push_back(search_item);
- checker.remove(search_item);
- return ret;
- }
-
- //! serialize a algo's desc to string. format is
- //! handle_type|algo_type|size_of_param|size_of_name|string_of_param|string_of_name
- static void serialize_write_pod(const Algorithm::Info::Desc& val, std::string& result) {
- megdnn::Algorithm::serialize_write_pod(val.handle_type, result);
- megdnn::Algorithm::serialize_write_pod(val.type, result);
- uint32_t param_size = val.param.size();
- uint32_t name_size = val.name.size();
- megdnn::Algorithm::serialize_write_pod<uint32_t>(param_size, result);
- megdnn::Algorithm::serialize_write_pod<uint32_t>(name_size, result);
- megdnn::Algorithm::serialize_write_pod(val.param, result);
- megdnn::Algorithm::serialize_write_pod(val.name, result);
- }
-
- static Algorithm::Info::Desc deserialize_read_pod(
- const std::string& data, size_t offset = 0) {
- Algorithm::Info::Desc ret;
- #define cb(_val, _type) \
- _val = megdnn::Algorithm::deserialize_read_pod<_type>(data.data(), offset); \
- offset += sizeof(_val)
-
- cb(ret.handle_type, megdnn::Handle::HandleType);
- cb(ret.type, uint32_t);
-
- uint32_t param_size = 0;
- uint32_t name_size = 0;
- cb(param_size, uint32_t);
- cb(name_size, uint32_t);
-
- if (param_size > 0) {
- ret.param = std::string(data.data() + offset, param_size);
- offset += param_size;
- }
- if (name_size > 0) {
- ret.name = std::string(data.data() + offset, name_size);
- offset += name_size;
- }
- return ret;
- }
-
- } // namespace
-
- namespace megdnn {
- namespace param {
- MGB_DEF_ENUM_CLASS_BIT_OPR(ExecutionPolicy::Strategy)
- } // namespace param
- } // namespace megdnn
-
- namespace mgb {
- namespace rdnn {
- template <class Opr>
- class LayoutsModifier {
- using FixedTensorLayouts = typename AlgoChooser<Opr>::FixedTensorLayouts;
-
- public:
- static void on(FixedTensorLayouts&, const typename Opr::Param&, size_t) {}
-
- private:
- //! index of batch in tensor, 3 for CHWN4 e.g.
- static size_t index_of_batch(const typename Opr::Param&) { return 0; }
-
- //! indices contain batch in inputs and outputs, src(0) dst(2) for conv e.g.
- static std::vector<size_t> sm_indices_contain_batch;
- };
- template <class Opr>
- std::vector<size_t> LayoutsModifier<Opr>::sm_indices_contain_batch = {};
-
- #define DEFAULT_OPR_WITHOUT_INPUT_BROADCAST(opr, idxs) \
- template <> \
- class LayoutsModifier<opr> { \
- public: \
- using FixedTensorLayouts = typename AlgoChooser<opr>::FixedTensorLayouts; \
- static void on( \
- FixedTensorLayouts& layouts, const opr::Param& param, \
- size_t new_batch_size) { \
- size_t batch_index = index_of_batch(param); \
- for (size_t index : sm_indices_contain_batch) { \
- layouts.at(index)[batch_index] = new_batch_size; \
- } \
- } \
- \
- private: \
- static size_t index_of_batch(const opr::Param&) { return 0; } \
- static std::vector<size_t> sm_indices_contain_batch; \
- }; \
- std::vector<size_t> LayoutsModifier<opr>::sm_indices_contain_batch = idxs;
-
- DEFAULT_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::Convolution3DForward, (std::initializer_list<size_t>{0, 2}))
- DEFAULT_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::Convolution3DBackwardData, (std::initializer_list<size_t>{1, 2}))
- DEFAULT_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::Convolution3DBackwardFilter, (std::initializer_list<size_t>{0, 1}))
- DEFAULT_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::BatchedMatrixMul, (std::initializer_list<size_t>{0, 1, 2}))
- #undef DEFAULT_OPR_WITHOUT_INPUT_BROADCAST
-
- #define CONV_LIKE_OPR_WITHOUT_INPUT_BROADCAST(opr, idxs) \
- template <> \
- class LayoutsModifier<opr> { \
- public: \
- using FixedTensorLayouts = typename AlgoChooser<opr>::FixedTensorLayouts; \
- static void on( \
- FixedTensorLayouts& layouts, const opr::Param& param, \
- size_t new_batch_size) { \
- size_t batch_index = index_of_batch(param); \
- for (size_t index : sm_indices_contain_batch) { \
- layouts.at(index)[batch_index] = new_batch_size; \
- } \
- } \
- \
- private: \
- static size_t index_of_batch(const opr::Param& param) { \
- if (param.format == opr::Param::Format::CHWN4) { \
- return 3; \
- } \
- return 0; \
- } \
- static std::vector<size_t> sm_indices_contain_batch; \
- }; \
- std::vector<size_t> LayoutsModifier<opr>::sm_indices_contain_batch = idxs;
-
- CONV_LIKE_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::ConvolutionForward, (std::initializer_list<size_t>{0, 2}))
- CONV_LIKE_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::ConvolutionBackwardData, (std::initializer_list<size_t>{1, 2}))
- CONV_LIKE_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::ConvolutionBackwardFilter, (std::initializer_list<size_t>{0, 1}))
- CONV_LIKE_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::LocalShareForward, (std::initializer_list<size_t>{0, 2}))
- CONV_LIKE_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::LocalShareBackwardData, (std::initializer_list<size_t>{1, 2}))
- CONV_LIKE_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::LocalShareBackwardFilter, (std::initializer_list<size_t>{0, 1}))
- CONV_LIKE_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::DeformableConvForward, (std::initializer_list<size_t>{0, 2, 3, 4}))
- CONV_LIKE_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::DeformableConvBackwardData,
- (std::initializer_list<size_t>{0, 2, 3, 4, 5, 6, 7}))
- CONV_LIKE_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::DeformableConvBackwardFilter,
- (std::initializer_list<size_t>{0, 1, 2, 3}))
- CONV_LIKE_OPR_WITHOUT_INPUT_BROADCAST(
- megdnn::BatchConvBiasForward, (std::initializer_list<size_t>{0, 1, 2, 3, 4}))
- #undef CONV_LIKE_OPR_WITHOUT_INPUT_BROADCAST
-
- template <>
- class LayoutsModifier<megdnn::ConvBiasForward> {
- public:
- using FixedTensorLayouts =
- typename AlgoChooser<megdnn::ConvBiasForward>::FixedTensorLayouts;
- static void on(
- FixedTensorLayouts& layouts, const megdnn::ConvBiasForward::Param& param,
- size_t new_batch_size) {
- size_t batch_index = index_of_batch(param);
- for (size_t index : sm_indices_contain_batch) {
- layouts.at(index)[batch_index] = new_batch_size;
- }
- for (size_t index : sm_indices_contain_batch_broadcast) {
- if (!check_bias_share_in_channel(layouts.at(index), param.format)) {
- layouts.at(index)[batch_index] = new_batch_size;
- }
- }
- }
-
- private:
- static std::vector<size_t> sm_indices_contain_batch;
- static std::vector<size_t> sm_indices_contain_batch_broadcast;
- static size_t index_of_batch(const megdnn::ConvBiasForward::Param& param) {
- if (param.format == megdnn::ConvBiasForward::Param::Format::CHWN4) {
- return 3;
- }
- return 0;
- }
- };
- std::vector<size_t> LayoutsModifier<megdnn::ConvBiasForward>::sm_indices_contain_batch =
- {0, 3, 4};
- std::vector<size_t>
- LayoutsModifier<megdnn::ConvBiasForward>::sm_indices_contain_batch_broadcast = {
- 2};
-
- template <>
- class LayoutsModifier<megdnn::MatrixMul> {
- public:
- using FixedTensorLayouts =
- typename AlgoChooser<megdnn::MatrixMul>::FixedTensorLayouts;
- static void on(
- FixedTensorLayouts& layouts, const megdnn::MatrixMul::Param& param,
- size_t new_batch_size) {
- //! Because we do not know whether the batch size is in the dimension m
- //! or the dimension n, we just ignore both m and n here.
- // FIXME Find a way to make mgb obtain batch size information from R or
- // automatically
- layouts.at(2)[0] = new_batch_size;
- layouts.at(2)[1] = new_batch_size;
- if (param.transposeA) {
- layouts.at(0)[1] = new_batch_size;
- } else {
- layouts.at(0)[0] = new_batch_size;
- }
- if (param.transposeB) {
- layouts.at(1)[0] = new_batch_size;
- } else {
- layouts.at(1)[1] = new_batch_size;
- }
- }
- };
-
- ///////////////////////////// AlgoChooserHelper //////////////////////////
- template <typename Opr>
- AlgoChooser<Opr>::AlgoChooserHelper::AlgoChooserHelper(
- const FixedTensorLayouts& layouts, Opr* megdnn_opr,
- const std::string& param_str, const CompNode& cn,
- const megdnn::param::ExecutionPolicy& execution_policy,
- bool allow_weight_preprocess, const AlgoChooserDesc& desc)
- : m_fastrun_layouts{layouts},
- m_incache_layouts{layouts},
- m_dnn_opr{megdnn_opr},
- m_param{param_str},
- m_cn{cn},
- m_execution_policy{execution_policy},
- m_allow_weight_preprocess{allow_weight_preprocess},
- m_desc{desc} {
- auto fastrun_batch_size = desc.shared_batch_size;
-
- if (fastrun_batch_size) {
- LayoutsModifier<Opr>::on(m_incache_layouts, m_dnn_opr->param(), 0);
- LayoutsModifier<Opr>::on(
- m_fastrun_layouts, m_dnn_opr->param(), fastrun_batch_size);
- }
-
- if (m_desc.no_profiling_on_shape_change) {
- for (size_t i = 0; i < m_incache_layouts.size(); i++) {
- for (size_t j = 0; j < m_incache_layouts.at(i).ndim; j++) {
- m_incache_layouts.at(i)[j] = 0;
- m_incache_layouts.at(i).stride[j] = 0;
- }
- }
- }
-
- mgb_assert(m_fastrun_layouts.size() == layouts.size());
-
- static_assert(
- std::tuple_size<FixedTensorLayouts>::value == 2 ||
- std::tuple_size<FixedTensorLayouts>::value == 3 ||
- std::tuple_size<FixedTensorLayouts>::value == 4 ||
- std::tuple_size<FixedTensorLayouts>::value == 5 ||
- std::tuple_size<FixedTensorLayouts>::value == 8,
- "Pooling assumes arity = 2 or 4,Convolution AlgoChooser assumes "
- "arity = 3 , 5 or 8 (for deformable conv)");
- }
-
- template <typename Opr>
- typename AlgoChooser<Opr>::ImplExecutionPolicy AlgoChooser<Opr>::AlgoChooserHelper::
- choose_by_heuristic(const ExecutionStrategy& selected_strategy) const {
- MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("choose_by_heuristic")))
- ImplExecutionPolicy policy;
- auto workspace_limit =
- m_desc.get_workspace_limit(m_cn, m_execution_policy.workspace_limit);
- auto attr = extract_algo_attribute(selected_strategy);
- policy.algo = APPLY(m_dnn_opr->get_algorithm_info_heuristic(
- args..., workspace_limit, attr.first, attr.second),
- m_fastrun_layouts)
- .desc;
-
- Algorithm* algo = m_dnn_opr->get_algorithm_from_desc(policy.algo);
- mgb_assert(algo, "Unknown algo description");
- std::vector<Algorithm::SearchItem>&& sub_items =
- algo->get_subopr_list(to_layout_array<Opr>(m_fastrun_layouts), m_dnn_opr);
-
- FOREACH_OPR_TYPE_DISPATCH(sub_items, {
- auto&& megdnn_opr = opr::intl::create_megdnn_opr<_Opr>(m_cn);
- megdnn_opr->param() =
- Algorithm::deserialize_read_pod<typename _Opr::Param>(_item.param);
- typename AlgoChooser<_Opr>::AlgoChooserHelper sub_helper(
- to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(), _item.param,
- m_cn, m_execution_policy, m_allow_weight_preprocess, m_desc);
- policy.sub_policy.push_back(sub_helper.choose_by_heuristic(selected_strategy));
- });
-
- return policy;
- MIDOUT_E
- }
-
- template <typename Opr>
- typename AlgoChooser<Opr>::ImplExecutionPolicy AlgoChooser<Opr>::AlgoChooserHelper::
- choose_by_profile(
- const ExecutionStrategy& selected_strategy, bool enable_update) const {
- MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("choose_by_profile")))
- // no_profiling_on_shape_change is usually false, no interface to change it easily
- if (m_desc.no_profiling_on_shape_change) {
- auto policy = m_dnn_opr->execution_policy();
- if (policy.algo.valid()) {
- return policy;
- }
-
- if (is_matmul<Opr>()) {
- mgb_log_warn(
- "choose algo by heuristic, which may cause performance "
- "regression.");
- return choose_by_heuristic(selected_strategy);
- }
- }
-
- typename AlgoChooser<Opr>::ImplExecutionPolicy tmp_policy;
- bool retrive_from_cache = true;
- bool allow_log = false;
- construct_execution_policy(
- selected_strategy, tmp_policy, retrive_from_cache, allow_log);
- if (tmp_policy.algo.valid()) {
- // return policy when contruct successed
- return tmp_policy;
- }
-
- // if update enabled, do profiling and update cache
- // enable_update = false only when using HEURISRIC_PROFILE strategy
- if (enable_update) {
- CircularDepsChecker circular_deps_checker;
- auto&& search_items = flatten_search_space<Opr>(*this, circular_deps_checker);
- FOREACH_OPR_TYPE_DISPATCH(search_items, {
- auto&& megdnn_opr = opr::intl::create_megdnn_opr<_Opr>(m_cn);
- // skip different sub opr, for example:
- // skip matmul algo when profiling convolution
- if (m_dnn_opr->get_opr_type() != megdnn_opr->get_opr_type())
- continue;
- megdnn_opr->param() =
- Algorithm::deserialize_read_pod<typename _Opr::Param>(_item.param);
- typename AlgoChooser<_Opr>::AlgoChooserHelper sub_helper(
- to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(),
- _item.param, m_cn, m_execution_policy, m_allow_weight_preprocess,
- m_desc);
- sub_helper.profile(selected_strategy);
- });
- }
-
- // try to retrive algorithm from fastrun cache, this time it's guaranteed to get
- // result, retrive_from_cache = true, allow_log = true
- typename AlgoChooser<Opr>::ImplExecutionPolicy policy;
- construct_execution_policy(selected_strategy, policy);
- if (policy.algo.valid())
- return policy;
- return choose_by_heuristic(selected_strategy);
- MIDOUT_E
- }
-
- template <typename Opr>
- std::pair<
- typename AlgoChooser<Opr>::ImplAlgoDesc, Maybe<AlgoChooserProfileCache::Result>>
- AlgoChooser<Opr>::AlgoChooserHelper::get_profile_result_from_cache(
- const ExecutionStrategy& selected_strategy) const {
- MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("get_profile_result_from_cache")))
- AlgoChooserProfileCache cache(m_cn, profile_name(m_dnn_opr).c_str());
-
- typename Opr::Param origin_param = m_dnn_opr->param();
- AlgoChooserProfileCache::Key cache_key{
- m_incache_layouts.data(), m_incache_layouts.size(), &origin_param,
- sizeof(origin_param)};
- auto&& rst = cache.get(cache_key);
- // failed to find a cache entry, return
- if (!rst.valid())
- return {{}, rst};
-
- // found a cache entry(it's a vector of Result), but it's empty
- auto&& prof = rst.val();
- if (prof.empty())
- return {{}, rst};
-
- // found non-empty cache result, filter it by workspace limit and attribute
- size_t workspace_limit =
- m_desc.get_workspace_limit(m_cn, m_execution_policy.workspace_limit);
- auto target_attr = extract_algo_attribute(selected_strategy);
- bool skip_by_negative = false;
- bool skip_by_workspace = false;
- for (auto&& i : prof) {
- auto attr_of_algo = static_cast<megdnn::Algorithm::Attribute>(i.attribute);
- bool contain_attr_all_positive =
- (target_attr.first == (attr_of_algo & target_attr.first));
- bool contain_attr_any_negative =
- static_cast<bool>(attr_of_algo & target_attr.second);
- if (contain_attr_all_positive) {
- if (!contain_attr_any_negative) {
- if (i.workspace <= workspace_limit) {
- // found a well-suited algothrim with good workspace limit and
- // correct attribute
- Algorithm::Info::Desc algo_desc = deserialize_read_pod(i.algo);
- return {algo_desc, rst};
- }
- skip_by_workspace = true;
- } else {
- skip_by_negative = true;
- }
- }
- }
-
- // failed to find an algorithm that satisfies the actual workspace limit
- if (skip_by_workspace)
- return {};
-
- // failed to find an algorithm that satisfies the actual attribute
- std::string layouts_str = AlgoChooser::format_fixlayouts(m_fastrun_layouts);
- if (skip_by_negative) {
- mgb_log_error(
- "opr: %s, layouts: %s, No usable algo. There are available "
- "algos match "
- "positive strategy(%s), but filtered by negative stategy(%s).",
- ::MegDNNOpr2Typename<Opr>::name, layouts_str.c_str(),
- Algorithm::attribute_str(target_attr.first).c_str(),
- Algorithm::attribute_str(target_attr.second).c_str());
- } else {
- mgb_log_error(
- "opr: %s, layouts: %s, No usable algo. algos read from cache "
- "could not "
- "satisfy positive strategy(%s)",
- ::MegDNNOpr2Typename<Opr>::name, layouts_str.c_str(),
- Algorithm::attribute_str(target_attr.first).c_str());
- }
-
- mgb_trap();
- MIDOUT_E
- }
-
- template <typename Opr>
- void AlgoChooser<Opr>::AlgoChooserHelper::construct_execution_policy(
- const ExecutionStrategy& selected_strategy,
- typename AlgoChooser<Opr>::ImplExecutionPolicy& policy, bool retrive_from_cache,
- bool allow_log) const {
- MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("construct_execution_policy")))
- // policy.algo is always invalid when called from choose_by_profile
- // policy.algo will be valid when called from profile
- if (!policy.algo.valid()) {
- if (retrive_from_cache) {
- policy.algo = get_profile_result_from_cache(selected_strategy).first;
- // nothing is found even with profiling
- if (!policy.algo.valid()) {
- if (allow_log) {
- auto target_attr = extract_algo_attribute(selected_strategy);
- std::string layouts_str =
- AlgoChooser::format_fixlayouts(m_fastrun_layouts);
- std::string msg = ssprintf(
- "(opr : %s, layouts %s, with attribute(%s) and "
- "without attribute(%s)",
- ::MegDNNOpr2Typename<Opr>::name, layouts_str.c_str(),
- Algorithm::attribute_str(target_attr.first).c_str(),
- Algorithm::attribute_str(target_attr.second).c_str());
- mgb_log_debug(
- "No algo get from cache for %s. This may caused by "
- "mismatch with model and cache file or imcomplete "
- "cache file. ex. profiling with version1, but "
- "inferencing on version2 or profiling modelA but "
- "inferencing modelB",
- msg.c_str());
- }
- return;
- }
- } else {
- // retrive_from_cache = false happens when using algo choose hook in
- // megbrain graph return heuristic algorithm in this case
- auto workspace_limit = m_desc.get_workspace_limit(
- m_cn, m_execution_policy.workspace_limit);
-
- auto attr = extract_algo_attribute(selected_strategy);
- policy.algo =
- APPLY(m_dnn_opr->get_algorithm_info_heuristic(
- args..., workspace_limit, attr.first, attr.second),
- m_fastrun_layouts)
- .desc;
- mgb_assert(
- policy.algo.valid(),
- "No algo found from heuristic with strategy %u and "
- "workspace limit %zu",
- static_cast<uint32_t>(selected_strategy), workspace_limit);
- }
- }
-
- // construct current algorithm
- Algorithm* algo = m_dnn_opr->get_algorithm_from_desc(policy.algo);
- mgb_assert(algo, "Unknown algo description");
- std::vector<Algorithm::SearchItem>&& sub_items =
- algo->get_subopr_list(to_layout_array<Opr>(m_fastrun_layouts), m_dnn_opr);
-
- // construct sub oprs' algorithm
- FOREACH_OPR_TYPE_DISPATCH(sub_items, {
- auto&& megdnn_opr = opr::intl::create_megdnn_opr<_Opr>(m_cn);
- megdnn_opr->param() =
- Algorithm::deserialize_read_pod<typename _Opr::Param>(_item.param);
- typename AlgoChooser<_Opr>::AlgoChooserHelper sub_helper(
- to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(), _item.param,
- m_cn, m_execution_policy, m_allow_weight_preprocess, m_desc);
- policy.sub_policy.push_back({});
- sub_helper.construct_execution_policy(
- selected_strategy, policy.sub_policy.back(), retrive_from_cache,
- allow_log);
- if (!policy.sub_policy.back().algo.valid()) {
- // means sub_helper.construct_execution_policy fails. clean up
- // policy.algo and return
- policy = {};
- return;
- }
- });
- MIDOUT_E
- }
-
- template <typename Opr>
- size_t AlgoChooser<Opr>::AlgoChooserHelper::get_workspace_size_bytes(
- const ImplExecutionPolicy& policy, const FixedTensorLayouts& layouts) const {
- MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("get_workspace_size_bytes")))
- m_dnn_opr->execution_policy() = policy;
- size_t result;
- const FixedTensorLayouts* layouts_ptr = &m_fastrun_layouts;
- if (layouts.at(0).ndim) {
- layouts_ptr = &layouts;
- }
- if_constexpr<opr_supports_preprocess<Opr>()>(
- [&](auto _) {
- auto&& opr = _(m_dnn_opr);
- auto prep = this->construct_fake_preprocess_filter(*layouts_ptr);
- PreprocessFilter<Opr>* prep_ptr = prep.valid() ? &prep.val() : nullptr;
- result = std::max(
- APPLY(opr->get_preprocess_workspace_in_bytes(args...),
- *layouts_ptr),
- APPLY(opr->get_workspace_in_bytes(args..., prep_ptr),
- *layouts_ptr));
- },
- /* else */
- [&](auto _) {
- result = APPLY(
- _(m_dnn_opr)->get_workspace_in_bytes(args...), *layouts_ptr);
- });
- return result;
- MIDOUT_E
- }
-
- template <typename Opr>
- std::vector<typename AlgoChooser<Opr>::ImplAlgo> AlgoChooser<
- Opr>::AlgoChooserHelper::get_all_candidates() const {
- MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("get_all_candidates")))
- auto heu = choose_by_heuristic(m_execution_policy.strategy);
- auto&& ret = APPLY(m_dnn_opr->get_all_algorithms_info(args...), m_fastrun_layouts);
- bool found = false;
- // make heuristic algorithm always the first in all candidate alrogrithms
- // so profiling step will always run heuristic algorithm first
- for (size_t i = 0; i < ret.size(); ++i) {
- if (ret[i].desc == heu.algo) {
- found = true;
- std::swap(ret[i], ret[0]);
- break;
- }
- }
-
- // make sure heuristic algorithm is valid
- Algorithm* palgo = m_dnn_opr->get_algorithm_from_desc(heu.algo);
- mgb_assert(palgo, "Unknown algo description");
- mgb_assert(
- found,
- "algo %s got by heuristic not found in "
- "candidate list",
- palgo->name());
- return std::move(ret);
- MIDOUT_E
- }
-
- template <typename Opr>
- Maybe<AlgoChooserProfileCache::ResultEntry> AlgoChooser<Opr>::AlgoChooserHelper::
- profile_single_algo(const ImplExecutionPolicy& policy, double& timeout) const {
- MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("profile_single_algo")))
- // fill TimedProfiler<Opr>::param and run actual timed profiler
- typename TimedProfiler<Opr>::Param param;
- // force check copy size <= dest len-1 from gcc8 for safe
- param.execution_policy =
- TimedProfiler<Opr>::Param::ExecutionPolicyBlob::serialize(policy);
- param.workspace = get_workspace_size_bytes(policy);
- for (int i = 0; i < arity; ++i) {
- auto&& src = m_fastrun_layouts[i];
- bool cond_normal = src.format.is_default() &&
- (src.dtype.category() == DTypeCategory::FLOAT ||
- src.dtype.category() == DTypeCategory::INT ||
- src.dtype.category() == DTypeCategory::QUANTIZED);
- bool cond_low_bit = src.dtype.is_low_bit() && src.format.is_lowbit_aligned() &&
- (src.dtype.category() == DTypeCategory::QUANTIZED ||
- src.dtype.category() == DTypeCategory::LOWBIT);
- MGB_MARK_USED_VAR(cond_normal);
- MGB_MARK_USED_VAR(cond_low_bit);
- mgb_assert(
- cond_normal || cond_low_bit, "unsupported layout in profiling: %s",
- src.to_string().c_str());
- param.dtypes[i] = src.dtype.enumv();
- }
- param.comp_node_physical = m_cn.locator();
- param.comp_node_logical = m_cn.locator_logical();
- mgb_assert(param.shapes.size() == m_fastrun_layouts.size());
- for (size_t i = 0; i < param.shapes.size(); ++i)
- param.shapes[i] = m_fastrun_layouts[i];
- param.opr_param = m_dnn_opr->param();
- param.allow_weight_preprocess = m_allow_weight_preprocess;
-
- Algorithm* palgo = m_dnn_opr->get_algorithm_from_desc(policy.algo);
- mgb_assert(palgo, "can not find algo when profile single algo");
-
- auto rst = TimedProfiler<Opr>::profile(param, timeout);
- // MIOpen conv profiles all available algos when a specfic shape is
- // provided for the first time, which probably adds to the result time.
- // Therefore, a second profile execution is needed.
- if (strncmp(palgo->name(), "MIOpen", 6) == 0) {
- rst = TimedProfiler<Opr>::profile(param, timeout);
- }
- if (!rst.valid())
- return None;
-
- // subprocess will return dbl_max when meomry limit is not satisfied
- if (rst.val().time == std::numeric_limits<double>::max())
- return None;
-
- std::string algo_desc;
- serialize_write_pod(policy.algo, algo_desc);
- return AlgoChooserProfileCache::ResultEntry{
- algo_desc, static_cast<uint32_t>(palgo->attribute()), rst.val().time,
- param.workspace};
- MIDOUT_E
- }
-
- template <typename Opr>
- void AlgoChooser<Opr>::AlgoChooserHelper::profile(
- const ExecutionStrategy& selected_strategy) const {
- MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("profile")))
- // some sub oprs have beed profiled before
- // sub oprs won't be checked at the beginning of choose_by_profile
- auto&& rst = get_profile_result_from_cache(selected_strategy);
- // rst.first.valid means there exists valid algorithms for current opr, just return
- // otherwise need to profile
- // in order to avoid reprofile in fastrun
- if (rst.first.valid())
- return;
- AlgoChooserProfileCache::Result prof_rst;
-
- auto target_attr = extract_algo_attribute(selected_strategy);
- std::string layouts_str = AlgoChooser::format_fixlayouts(m_fastrun_layouts);
- double cur_timeout = 0;
-
- size_t data_size = 0;
- for (auto ly : m_fastrun_layouts)
- data_size += ly.span().dist_byte();
-
- auto workspace_limit =
- m_desc.get_workspace_limit(m_cn, m_execution_policy.workspace_limit);
- RealTimer timer;
- std::unordered_set<std::string> rst_algos;
- if (rst.second.valid()) {
- std::transform(
- rst.second.val().begin(), rst.second.val().end(),
- std::inserter(rst_algos, rst_algos.end()),
- [](const AlgoChooserProfileCache::ResultEntry& result) {
- return result.algo;
- });
- }
-
- for (auto algo : get_all_candidates()) {
- std::string desc;
- serialize_write_pod(algo.desc, desc);
- if (rst_algos.find(desc) != rst_algos.end()) {
- continue;
- }
- Maybe<AlgoChooserProfileCache::ResultEntry> cur_rst;
-
- ImplExecutionPolicy policy;
- policy.algo = algo.desc;
-
- // skip naive algo, can not using attribute to determine naive algo, thus using
- // strcmp
- if (algo.desc.name.compare("NAIVE") == 0) {
- continue;
- }
-
- //! check negative attribute : skip negative attribute
- auto palgo = m_dnn_opr->get_algorithm_from_desc(policy.algo);
- if (palgo->contain_attribute_any(target_attr.second)) {
- mgb_log_debug(
- "skip algo %s, which matches the profile strategy required "
- "'not contain attribute(%s).'",
- algo.desc.name.c_str(),
- Algorithm::attribute_str(target_attr.second).c_str());
- continue;
- }
-
- //! check workspace limit
- construct_execution_policy(selected_strategy, policy);
- // this will failed
- // when construct matmul algorithm for convolution opr
- if (!policy.algo.valid())
- continue;
- size_t workspace_needed = get_workspace_size_bytes(policy);
- if (data_size + workspace_needed >
- m_desc.get_workspace_limit(m_cn, m_execution_policy.workspace_limit)) {
- continue;
- }
-
- std::string msg = ssprintf(
- "profiling %s algorithm %s %s", ::MegDNNOpr2Typename<Opr>::name,
- algo.desc.name.c_str(), layouts_str.c_str());
- timer.reset();
- MGB_TRY { cur_rst = profile_single_algo(policy, cur_timeout); }
- // megbrain catched exception
- MGB_CATCH(std::exception & exc, {
- mgb_log_debug("caught exception during %s: %s", msg.c_str(), exc.what());
- continue;
- })
- // megbrain uncatched exception
- MGB_CATCH(..., {
- mgb_log_debug("caught exception during %s", msg.c_str());
- continue;
- })
- if (!cur_rst.valid()) {
- mgb_log_debug(
- "timeout when %s; timeout setting: %.3fsec", msg.c_str(),
- cur_timeout);
- continue;
- }
- if (!cur_timeout) {
- cur_timeout = timer.get_secs() + TIMEOUT_TOLERANCE;
- } else {
- cur_timeout = std::min(cur_timeout, timer.get_secs() + TIMEOUT_TOLERANCE);
- }
- auto&& rst = cur_rst.val();
- mgb_log_debug(
- "%s: workspace: %zu; time: %.3gsec", msg.c_str(), rst.workspace,
- rst.time);
- prof_rst.push_back(rst);
- }
- std::string msg = ssprintf(
- "no usable %s algorithm %s without attribute(%s) or could not meet "
- "workspace limite requirement(%zu)",
- ::MegDNNOpr2Typename<Opr>::name, layouts_str.c_str(),
- Algorithm::attribute_str(target_attr.second).c_str(), workspace_limit);
- // allowed to have empty profile result for current opr
-
- // append some previous profiled results
- if (rst.second.valid())
- prof_rst.insert(
- prof_rst.end(), rst.second.val().begin(), rst.second.val().end());
- if (!prof_rst.empty()) {
- FixedTensorLayouts incache_layouts = m_incache_layouts;
- typename Opr::Param origin_param = m_dnn_opr->param();
- AlgoChooserProfileCache::Key cache_key{
- incache_layouts.data(), incache_layouts.size(), &origin_param,
- sizeof(origin_param)};
-
- AlgoChooserProfileCache cache(m_cn, profile_name(m_dnn_opr).c_str());
- cache.put(cache_key, prof_rst);
- }
- MIDOUT_E
- }
-
- template <typename Opr>
- Maybe<PreprocessFilter<Opr>> AlgoChooser<Opr>::AlgoChooserHelper::
- construct_fake_preprocess_filter(const FixedTensorLayouts& layouts) const {
- MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("construct_fake_preprocess_filter")))
- Maybe<PreprocessFilter<Opr>> result = None;
- const FixedTensorLayouts* layouts_ptr = &m_fastrun_layouts;
- if (layouts.at(0).ndim) {
- layouts_ptr = &layouts;
- }
- if_constexpr<opr_supports_preprocess<Opr>()>([&](auto _) {
- if (!m_allow_weight_preprocess)
- return;
- auto opr = _(m_dnn_opr);
- auto layouts =
- APPLY(opr->deduce_preprocessed_filter_layout(args...), *layouts_ptr);
- //! No preprocess layout means no need weight preprocess
- if (layouts.empty()) {
- return;
- }
- //! all layouts arm empty means no need weight preprocess
- bool layout_valid = false;
- for (auto&& layout : layouts) {
- if (!layout.is_empty()) {
- layout_valid = true;
- }
- }
- if (!layout_valid) {
- return;
- }
-
- result = PreprocessFilter<Opr>{};
- auto& res = result.val();
- res.algorithm_id = nullptr;
- res.tensors.resize(layouts.size());
- for (size_t i = 0; i < layouts.size(); i++) {
- res.tensors[i] = megdnn::TensorND(nullptr, layouts[i]);
- }
- });
- return result;
- MIDOUT_E
- }
-
- template <typename Opr>
- std::pair<AlgoAttribute, AlgoAttribute> AlgoChooser<Opr>::AlgoChooserHelper::
- extract_algo_attribute(const ExecutionStrategy& strategy) const {
- std::pair<AlgoAttribute, AlgoAttribute> ret =
- std::make_pair(AlgoAttribute::DEFAULT, AlgoAttribute::DEFAULT);
-
- //! from strategy
- if (strategy & ExecutionStrategy::REPRODUCIBLE) {
- ret.first |= AlgoAttribute::REPRODUCIBLE;
- }
- if (strategy & ExecutionStrategy::OPTMIZED) {
- ret.second |= AlgoAttribute::NAIVE;
- }
-
- //! from graph option
- // FIXME: no_profiling_on_shape_change extract USABLE_DEPEND_ON_SHAPE
- // attribute when fixed usable
- if (m_desc.shared_batch_size) {
- ret.second |= AlgoAttribute::USABLE_DEPEND_ON_SHAPE;
- }
-
- if (m_desc.binary_equal_between_batch) {
- ret.first |= AlgoAttribute::REPRODUCIBLE;
- ret.second |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
- }
-
- return ret;
- }
-
- #define INST(Opr) \
- template AlgoChooser<megdnn::Opr>::AlgoChooserHelper::AlgoChooserHelper( \
- const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \
- const std::string& param_str, const CompNode& cn, \
- const megdnn::param::ExecutionPolicy& execution_policy, \
- bool allow_weight_preprocess, const AlgoChooserDesc& desc); \
- template typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
- AlgoChooser<megdnn::Opr>::AlgoChooserHelper::choose_by_heuristic( \
- const ExecutionStrategy& select_strategy) const; \
- template typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
- AlgoChooser<megdnn::Opr>::AlgoChooserHelper::choose_by_profile( \
- const ExecutionStrategy& select_strategy, bool enable_update) const; \
- template typename std::pair< \
- AlgoChooser<megdnn::Opr>::ImplAlgoDesc, \
- Maybe<AlgoChooserProfileCache::Result>> \
- AlgoChooser<megdnn::Opr>::AlgoChooserHelper::get_profile_result_from_cache( \
- const ExecutionStrategy& select_strategy) const; \
- template void \
- AlgoChooser<megdnn::Opr>::AlgoChooserHelper::construct_execution_policy( \
- const ExecutionStrategy& select_strategy, \
- typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& policy, \
- bool retrive_from_cache, bool allow_log) const; \
- template size_t \
- AlgoChooser<megdnn::Opr>::AlgoChooserHelper::get_workspace_size_bytes( \
- const typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& policy, \
- const FixedTensorLayouts& layouts) const; \
- template std::vector<typename AlgoChooser<megdnn::Opr>::ImplAlgo> \
- AlgoChooser<megdnn::Opr>::AlgoChooserHelper::get_all_candidates() const; \
- template Maybe<AlgoChooserProfileCache::ResultEntry> \
- AlgoChooser<megdnn::Opr>::AlgoChooserHelper::profile_single_algo( \
- const typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& policy, \
- double& timeout) const; \
- template std::pair<AlgoAttribute, AlgoAttribute> \
- AlgoChooser<megdnn::Opr>::AlgoChooserHelper::extract_algo_attribute( \
- const ExecutionStrategy& strategy) const; \
- template void AlgoChooser<megdnn::Opr>::AlgoChooserHelper::profile( \
- const ExecutionStrategy& selected_strategy) const;
-
- DNN_FOREACH_FASTRUN_OPR(INST)
- #undef INST
-
- //////////////////////////////// AlgoChoose /////////////////////////////
- template <typename Opr>
- typename AlgoChooser<Opr>::ImplExecutionPolicy AlgoChooser<Opr>::get_policy(
- const AlgoChooserHelper& helper) {
- auto opr_strategy = helper.execution_policy().strategy;
- auto strategy2str = [](auto strategy) {
- std::string ret;
- if (strategy & ExecutionStrategy::HEURISTIC) {
- ret += "HEURISTIC ";
- }
- if (strategy & ExecutionStrategy::PROFILE) {
- ret += "PROFILE ";
- }
- if (strategy & ExecutionStrategy::REPRODUCIBLE) {
- ret += "REPRODUCIBLE ";
- }
- if (strategy & ExecutionStrategy::OPTIMIZED) {
- ret += "OPTIMIZED ";
- }
- return ret;
- };
- mgb_log_debug("Use Stragegy :%s", strategy2str(opr_strategy).c_str());
- if (opr_strategy & ExecutionStrategy::HEURISTIC) {
- if (opr_strategy & ExecutionStrategy::PROFILE) {
- //! this strategy will choose from cache first, then choost by
- //! heuristic if fail.
- ImplExecutionPolicy policy = helper.choose_by_profile(opr_strategy, false);
- if (!policy.algo.valid()) {
- policy = helper.choose_by_heuristic(opr_strategy);
- }
- return policy;
- } else {
- return helper.choose_by_heuristic(opr_strategy);
- }
- }
- #if MGB_ENABLE_FASTRUN
- else if (opr_strategy & ExecutionStrategy::PROFILE) {
- return helper.choose_by_profile(opr_strategy, true);
- }
- #endif
- else {
- mgb_throw(InternalError, "bad ExecutionPolicy strategy");
- }
- }
-
- template <typename Opr>
- std::string AlgoChooser<Opr>::format_fixlayouts(const FixedTensorLayouts& layout) {
- return ::format_fixlayouts<Opr>(layout, arity_in, arity_out);
- }
-
- #define INST(Opr) \
- template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
- AlgoChooser<megdnn::Opr>::get_policy(const AlgoChooserHelper& proxy); \
- template std::string AlgoChooser<Opr>::format_fixlayouts( \
- const FixedTensorLayouts& layout);
-
- DNN_FOREACH_FASTRUN_OPR(INST)
- #undef INST
-
- } // namespace rdnn
- } // namespace mgb
-
- // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
|