feat(mgb/gopt): add interface to reproducible

GitOrigin-RevId: f341bea40b
5 years ago · 12dc36a6ab
--- a/sdk/load-and-run/src/mgblar.cpp
+++ b/sdk/load-and-run/src/mgblar.cpp
@@ -14,6 +14,7 @@
 #include "./json_loader.h"
 #include "./npy.h"
 #include "megbrain/opr/dnn/convolution.h"
 #include "megbrain/utils/debug.h"
 #include "megbrain/serialization/serializer.h"
 #include "megbrain/serialization/extern_c_opr.h"
@@ -144,6 +145,10 @@ R"__usage__(
 R"__usage__(
  --fast-run-algo-policy <path>
    It will read the cache file before profile, and save new fastrun in cache file.
  --reproducible
    Enable choose algo which is reproducible. It mainly used for cudnn algos.
    See https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#reproducibility
    for more details.
  --wait-gdb
    Print PID and wait for a line from stdin before starting execution. Useful
    for waiting for gdb attach.
@@ -467,6 +472,7 @@ struct Args {
 #if MGB_ENABLE_FASTRUN
    bool use_fast_run = false;
 #endif
    bool reproducible = false;
    std::string fast_run_cache_path;
    bool copy_to_host = false;
    int nr_run = 10;
@@ -647,10 +653,24 @@ void run_test_st(Args &env) {
    }
    mgb::gopt::set_opr_algo_workspace_limit_inplace(vars, env.workspace_limit);
    using S = opr::mixin::Convolution::ExecutionPolicy::Strategy;
    S strategy = S::HEURISTIC;
 #if MGB_ENABLE_FASTRUN
    if (env.use_fast_run)
        mgb::gopt::enable_opr_algo_profiling_inplace(vars);
    if (env.use_fast_run) {
        if (env.reproducible) {
            strategy = S::PROFILE_REPRODUCIBLE;
        } else {
            strategy = S::PROFILE;
        }
    } else if (env.reproducible) {
        strategy = S::HEURISTIC_REPRODUCIBLE;
    }
 #else
    if (env.reproducible) {
        strategy = S::HEURISTIC_REPRODUCIBLE;
    }
 #endif
    mgb::gopt::modify_opr_algo_strategy_inplace(vars, strategy);
    if (!env.fast_run_cache_path.empty()) {
 #if MGB_ENABLE_FASTRUN
        if (!access(env.fast_run_cache_path.c_str(), F_OK)) {
@@ -1149,6 +1169,10 @@ Args Args::from_argv(int argc, char **argv) {
            ret.fast_run_cache_path = argv[i];
            continue;
        }
        if (!strcmp(argv[i], "--reproducible")) {
            ret.reproducible = true;
            continue;
        }
        if (!strcmp(argv[i], "--const-shape")) {
            ret.load_config.const_var_shape = true;
            continue;
--- a/src/gopt/impl/inference.cpp
+++ b/src/gopt/impl/inference.cpp
@@ -104,25 +104,21 @@ SymbolVarArray gopt::optimize_for_inference(
 }
 namespace {
 void modify_conv_policy(opr::mixin::Convolution& conv,
                        megdnn::param::ExecutionPolicy::Strategy strategy) {
 void modify_conv_strategy(
        opr::mixin::Convolution& conv,
        opr::mixin::Convolution::ExecutionPolicy::Strategy strategy) {
    auto policy = conv.execution_policy_transient();
    policy.strategy = strategy;
    conv.set_execution_policy(policy);
 }
 template <typename Opr>
 void inplace_conv_opr_profile_modifier(OperatorNodeBase& opr) {
    modify_conv_policy(
 void inplace_conv_opr_modifier(
        OperatorNodeBase& opr,
        opr::mixin::Convolution::ExecutionPolicy::Strategy strategy) {
    modify_conv_strategy(
            opr.cast_final_safe<Opr>(),
            opr::mixin::Convolution::ExecutionPolicy::Strategy::PROFILE);
 }
 template <typename Opr>
 void inplace_conv_opr_profile_cache_modifier(OperatorNodeBase& opr) {
    modify_conv_policy(opr.cast_final_safe<Opr>(),
                       opr::mixin::Convolution::ExecutionPolicy::Strategy::
                               PROFILE_HEURISTIC);
            strategy);
 }
 void modify_conv_policy_workspace_limit(opr::mixin::Convolution& conv,
@@ -150,12 +146,20 @@ void inplace_conv_opr_workspace_limit_modifier(OperatorNodeBase& opr,
            cb(DeformableConvBackwardFilter), cb(DeformableConvBackwardData), \
            cb(BatchConvBiasForward),
 void gopt::enable_opr_algo_profiling_inplace(
        const VarNodeArrayView& dest_vars) {
 #if MGB_ENABLE_FASTRUN
    static const ThinHashMap<Typeinfo*, void (*)(OperatorNodeBase&)> modifiers =
            {
 #define CONV(t) {opr::t::typeinfo(), &inplace_conv_opr_profile_modifier<opr::t>}
 void gopt::modify_opr_algo_strategy_inplace(
        const VarNodeArrayView& dest_vars,
        opr::mixin::Convolution::ExecutionPolicy::Strategy strategy) {
 #if !MGB_ENABLE_FASTRUN
    using S = opr::mixin::Convolution::ExecutionPolicy::Strategy;
    if (strategy == S::PROFILE || strategy == S::PROFILE_REPRODUCIBLE) {
        mgb_throw(MegBrainError, "fastrun is disabled at compile time");
    }
 #endif
    const ThinHashMap<Typeinfo*, std::function<void(OperatorNodeBase&)>>
            modifiers = {
 #define CONV(t)                                                       \
    {opr::t::typeinfo(), std::bind(inplace_conv_opr_modifier<opr::t>, \
                                   std::placeholders::_1, strategy)}
                    MGB_FOREACH_FASTRUN_OPR(CONV)
 #undef CONV
            };
@@ -171,34 +175,23 @@ void gopt::enable_opr_algo_profiling_inplace(
    for (auto i : dest_vars) {
        dep_iter.add(i);
    }
 #else
    mgb_throw(MegBrainError, "fastrun is disabled at compile time");
 #endif
 }
 void gopt::enable_opr_use_profiling_cache_inplace(
 void gopt::enable_opr_algo_profiling_inplace(
        const VarNodeArrayView& dest_vars) {
    static const ThinHashMap<Typeinfo*, void (*)(OperatorNodeBase&)> modifiers =
            {
 #define CONV(t) \
    {opr::t::typeinfo(), &inplace_conv_opr_profile_cache_modifier<opr::t>}
                    MGB_FOREACH_FASTRUN_OPR(CONV)
 #undef CONV
            };
    auto on_opr = [&](OperatorNodeBase* opr) {
        auto iter = modifiers.find(opr->dyn_typeinfo());
        if (iter != modifiers.end()) {
            iter->second(*opr);
        }
    };
    modify_opr_algo_strategy_inplace(dest_vars,
                                     opr::mixin::Convolution::ExecutionPolicy::
                                             Strategy::PROFILE);
 }
    cg::DepOprIter dep_iter{on_opr};
    for (auto i : dest_vars) {
        dep_iter.add(i);
    }
 void gopt::enable_opr_use_profiling_cache_inplace(
        const VarNodeArrayView& dest_vars) {
    modify_opr_algo_strategy_inplace(dest_vars,
                                     opr::mixin::Convolution::ExecutionPolicy::
                                             Strategy::PROFILE_HEURISTIC);
 }
 void gopt::set_opr_algo_workspace_limit_inplace(
        const VarNodeArrayView& dest_vars, size_t workspace_limit) {
    static const ThinHashMap<Typeinfo*, void (*)(OperatorNodeBase&, size_t)>
--- a/src/gopt/include/megbrain/gopt/inference.h
+++ b/src/gopt/include/megbrain/gopt/inference.h
@@ -13,6 +13,7 @@
 #include "megbrain/gopt/framework.h"
 #include "megbrain/graph/cg.h"
 #include "megbrain/opr/dnn/convolution.h"
 namespace mgb {
 namespace gopt {
@@ -302,6 +303,17 @@ namespace gopt {
            const SymbolVarArray& dest_vars,
            const OptimizeForInferenceOptions& opt = {});
    /*!
     * \brief modify execution strategy for oprs with multiple
     *      algorithms
     *
     * This would modify the operators inplace. It can be used for implement
     * the fast-run mode.
     */
    void modify_opr_algo_strategy_inplace(
            const VarNodeArrayView& dest_vars,
            opr::mixin::Convolution::ExecutionPolicy::Strategy strategy);
    /*!
     * \brief enable PROFILE execution strategy for oprs with multiple
     *      algorithms
@@ -315,7 +327,7 @@ namespace gopt {
    void enable_opr_algo_profiling_inplace(const VarNodeArrayView& dest_vars);
    /*!
     * \brief enable opr try profiling cache first, if failed, then try
     * \brief enable opr try profiling cache first, if failed, fallback to
     * heuristic
     *
     * This would modify the operators inplace. It is usually used to enable
@@ -324,7 +336,8 @@ namespace gopt {
     * You may want to implement TimedFuncInvoker::ForkExecImpl and/or
     * PersistentCache for better performance in an SDK.
     */
    void enable_opr_use_profiling_cache_inplace(const VarNodeArrayView& dest_vars);
    void enable_opr_use_profiling_cache_inplace(
            const VarNodeArrayView& dest_vars);
    /*!
     * \brief set workspace_limit for execution strategy for oprs with multiple