| @@ -185,7 +185,7 @@ SmallVector<AlgoCategory> ConvBiasImpl::suggest_algo_category_order( | |||||
| } | } | ||||
| //! conv1x1 | //! conv1x1 | ||||
| im2col_prefer |= (FH == 1 && FW == 1); | im2col_prefer |= (FH == 1 && FW == 1); | ||||
| //! x86 8x8x16 not optmized, so it will use fallback im2col+matmul | |||||
| //! x86 8x8x16 not optimized, so it will use fallback im2col+matmul | |||||
| if (param.deduce_algo_data_type() == AlgoDataType::INT8X8X16) { | if (param.deduce_algo_data_type() == AlgoDataType::INT8X8X16) { | ||||
| im2col_prefer = true; | im2col_prefer = true; | ||||
| } | } | ||||
| @@ -40,7 +40,7 @@ def set_execution_strategy(option): | |||||
| * HEURISTIC uses heuristic to choose the fastest algorithm. | * HEURISTIC uses heuristic to choose the fastest algorithm. | ||||
| * PROFILE runs possible algorithms on real device to find the best one. | * PROFILE runs possible algorithms on real device to find the best one. | ||||
| * REPRODUCIBLE uses the algorithms that is reproducible. | * REPRODUCIBLE uses the algorithms that is reproducible. | ||||
| * OPTMIZED uses the algorithms that is optimized. | |||||
| * OPTIMIZED uses the algorithms that is optimized. | |||||
| The default strategy is HEURISTIC, this options can be combined to | The default strategy is HEURISTIC, this options can be combined to | ||||
| form a combination option, e.g. PROFILE | REPRODUCIBLE | form a combination option, e.g. PROFILE | REPRODUCIBLE | ||||
| @@ -271,7 +271,7 @@ void init_graph_rt(py::module m) { | |||||
| {"HEURISTIC", [&]() { stg = _AlgoStrategy::HEURISTIC; }}, | {"HEURISTIC", [&]() { stg = _AlgoStrategy::HEURISTIC; }}, | ||||
| {"PROFILE", [&]() { stg = _AlgoStrategy::PROFILE; }}, | {"PROFILE", [&]() { stg = _AlgoStrategy::PROFILE; }}, | ||||
| {"REPRODUCIBLE", [&]() { stg = _AlgoStrategy::REPRODUCIBLE; }}, | {"REPRODUCIBLE", [&]() { stg = _AlgoStrategy::REPRODUCIBLE; }}, | ||||
| {"OPTMIZED", [&]() { stg = _AlgoStrategy::OPTMIZED; }}, | |||||
| {"OPTIMIZED", [&]() { stg = _AlgoStrategy::OPTIMIZED; }}, | |||||
| }; | }; | ||||
| auto it = m.find(strategy); | auto it = m.find(strategy); | ||||
| mgb_assert(it != m.end(), "Invalid strategy string!"); | mgb_assert(it != m.end(), "Invalid strategy string!"); | ||||
| @@ -709,7 +709,7 @@ void run_test_st(Args &env) { | |||||
| strategy = S::PROFILE; | strategy = S::PROFILE; | ||||
| } | } | ||||
| } else if (env.use_fast_run) { | } else if (env.use_fast_run) { | ||||
| strategy = S::PROFILE | S::OPTMIZED; | |||||
| strategy = S::PROFILE | S::OPTIMIZED; | |||||
| } else if (env.reproducible) { | } else if (env.reproducible) { | ||||
| strategy = S::HEURISTIC | S::REPRODUCIBLE; | strategy = S::HEURISTIC | S::REPRODUCIBLE; | ||||
| } | } | ||||
| @@ -1756,8 +1756,8 @@ TEST(TestGoptInference, FastProfileCache) { | |||||
| using S = opr::Convolution::ExecutionPolicy::Strategy; | using S = opr::Convolution::ExecutionPolicy::Strategy; | ||||
| ASSERT_EQ(S::HEURISTIC, conv.execution_policy_transient().strategy); | ASSERT_EQ(S::HEURISTIC, conv.execution_policy_transient().strategy); | ||||
| gopt::modify_opr_algo_strategy_inplace({z + 2.3f}, | gopt::modify_opr_algo_strategy_inplace({z + 2.3f}, | ||||
| S::PROFILE | S::OPTMIZED); | |||||
| ASSERT_EQ(S::PROFILE | S::OPTMIZED, conv.execution_policy().strategy); | |||||
| S::PROFILE | S::OPTIMIZED); | |||||
| ASSERT_EQ(S::PROFILE | S::OPTIMIZED, conv.execution_policy().strategy); | |||||
| } | } | ||||
| TEST(TestGoptInference, AlgoWorkspaceLimit) { | TEST(TestGoptInference, AlgoWorkspaceLimit) { | ||||
| @@ -287,7 +287,7 @@ extract_algo_attribute_from_execution_strategy( | |||||
| if (strategy & ExecutionStrategy::REPRODUCIBLE) { | if (strategy & ExecutionStrategy::REPRODUCIBLE) { | ||||
| ret.first |= AlgoAttribute::REPRODUCIBLE; | ret.first |= AlgoAttribute::REPRODUCIBLE; | ||||
| } | } | ||||
| if (strategy & ExecutionStrategy::OPTMIZED) { | |||||
| if (strategy & ExecutionStrategy::OPTIMIZED) { | |||||
| ret.second |= AlgoAttribute::NAIVE; | ret.second |= AlgoAttribute::NAIVE; | ||||
| } | } | ||||
| return ret; | return ret; | ||||
| @@ -357,7 +357,7 @@ TEST(TestOprDNN, ConvBiasExePolicy) { | |||||
| #if MGB_ENABLE_FASTRUN | #if MGB_ENABLE_FASTRUN | ||||
| for (auto strategy : | for (auto strategy : | ||||
| SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | ||||
| S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { | |||||
| S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTIMIZED}) { | |||||
| #else | #else | ||||
| for (auto strategy : | for (auto strategy : | ||||
| SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | ||||
| @@ -444,7 +444,7 @@ TEST(TestOprDNN, ConvolutionExePolicy) { | |||||
| #if MGB_ENABLE_FASTRUN | #if MGB_ENABLE_FASTRUN | ||||
| for (auto strategy : | for (auto strategy : | ||||
| SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | ||||
| S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { | |||||
| S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTIMIZED}) { | |||||
| #else | #else | ||||
| for (auto strategy : | for (auto strategy : | ||||
| SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | ||||
| @@ -1717,7 +1717,7 @@ TEST(TestOprDNN, LocalShareForwardExecPolicy) { | |||||
| #if MGB_ENABLE_FASTRUN | #if MGB_ENABLE_FASTRUN | ||||
| for (auto strategy : | for (auto strategy : | ||||
| SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | ||||
| S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { | |||||
| S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTIMIZED}) { | |||||
| #else | #else | ||||
| for (auto strategy : | for (auto strategy : | ||||
| SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | ||||
| @@ -1828,7 +1828,7 @@ TEST(TestOprDNN, DeformableConvForward) { | |||||
| #if MGB_ENABLE_FASTRUN | #if MGB_ENABLE_FASTRUN | ||||
| for (auto strategy : | for (auto strategy : | ||||
| SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | ||||
| S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { | |||||
| S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTIMIZED}) { | |||||
| #else | #else | ||||
| for (auto strategy : | for (auto strategy : | ||||
| SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | ||||
| @@ -1997,7 +1997,7 @@ TEST(TestOprDNN, BatchConvBiasForward) { | |||||
| #if MGB_ENABLE_FASTRUN | #if MGB_ENABLE_FASTRUN | ||||
| for (auto strategy : | for (auto strategy : | ||||
| SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | ||||
| S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { | |||||
| S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTIMIZED}) { | |||||
| #else | #else | ||||
| for (auto strategy : | for (auto strategy : | ||||
| SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | ||||
| @@ -41,12 +41,13 @@ pdef('PersistentOutputStorage').add_fields( | |||||
| Doc('REPRODUCIBLE', | Doc('REPRODUCIBLE', | ||||
| 'when profile or heuristic algo selection it require the algos' | 'when profile or heuristic algo selection it require the algos' | ||||
| 'must be reproducible'), | 'must be reproducible'), | ||||
| Doc('OPTMIZED', | |||||
| Doc('OPTIMIZED', | |||||
| 'profile require algos are optmized to achieve fast-profile'), | 'profile require algos are optmized to achieve fast-profile'), | ||||
| default=('HEURISTIC',), | default=('HEURISTIC',), | ||||
| member_alias=[(('HEURISTIC', 'REPRODUCIBLE'), 'HEURISTIC_REPRODUCIBLE'), | member_alias=[(('HEURISTIC', 'REPRODUCIBLE'), 'HEURISTIC_REPRODUCIBLE'), | ||||
| (('PROFILE', 'REPRODUCIBLE'), 'PROFILE_REPRODUCIBLE'), | (('PROFILE', 'REPRODUCIBLE'), 'PROFILE_REPRODUCIBLE'), | ||||
| (('PROFILE', 'HEURISTIC'), 'PROFILE_HEURISTIC'), | (('PROFILE', 'HEURISTIC'), 'PROFILE_HEURISTIC'), | ||||
| (('OPTIMIZED',), 'OPTMIZED'), | |||||
| ]). | ]). | ||||
| add_fields('uint64', | add_fields('uint64', | ||||
| Doc('workspace_limit', 'workspace limit in bytes'), | Doc('workspace_limit', 'workspace limit in bytes'), | ||||