Merge pull request !216 from YuJianfeng/mastertags/v0.2.0-alpha
| @@ -58,6 +58,7 @@ | |||
| #include "pre_activate/ascend/ir_fission/add_memcpy_async.h" | |||
| #include "pre_activate/ascend/format_type/insert_cast_for_runop.h" | |||
| #include "pre_activate/ascend/format_type/insert_transdata_for_runop.h" | |||
| #include "pre_activate/ascend/ir_fission/addn_fission.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "debug/anf_ir_utils.h" | |||
| @@ -177,6 +178,7 @@ void AscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGrap | |||
| ir_fusion_pm->AddPass(std::make_shared<MulAddFusion>()); | |||
| ir_fusion_pm->AddPass(std::make_shared<MulAddNFusion>()); | |||
| ir_fusion_pm->AddPass(std::make_shared<MatmulBiasaddFusion>()); | |||
| ir_fusion_pm->AddPass(std::make_shared<AddnFission>()); | |||
| ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>()); | |||
| ir_fusion_pm->AddPass(std::make_shared<TransposeTransDataFusion>()); | |||
| } | |||
| @@ -0,0 +1,81 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/ir_fission/addn_fission.h" | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "session/anf_runtime_algorithm.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| namespace { | |||
| AnfNodePtr CreateNewAddn(const FuncGraphPtr &func_graph, const CNodePtr &origin_addn_cnode, size_t begin_index, | |||
| size_t offset) { | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| MS_EXCEPTION_IF_NULL(origin_addn_cnode); | |||
| std::vector<AnfNodePtr> new_addn_inputs{NewValueNode(std::make_shared<Primitive>(prim::kPrimAddN->name()))}; | |||
| for (size_t i = begin_index; i < begin_index + offset; ++i) { | |||
| new_addn_inputs.push_back(origin_addn_cnode->input(i)); | |||
| } | |||
| CNodePtr new_addn = func_graph->NewCNode(new_addn_inputs); | |||
| MS_EXCEPTION_IF_NULL(new_addn); | |||
| new_addn->set_scope(origin_addn_cnode->scope()); | |||
| new_addn->set_abstract(origin_addn_cnode->abstract()); | |||
| AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToInt(offset)), new_addn); | |||
| return new_addn; | |||
| } | |||
| } // namespace | |||
| const BaseRef AddnFission::DefinePattern() const { | |||
| VarPtr Xs = std::make_shared<SeqVar>(); | |||
| return VectorRef({prim::kPrimAddN, Xs}); | |||
| } | |||
| const AnfNodePtr AddnFission::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const { | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| // The real input begins with index 1. | |||
| size_t origin_input_size = cnode->inputs().size() - 1; | |||
| if (origin_input_size <= inputs_divisor_) { | |||
| return nullptr; | |||
| } | |||
| CNodePtr new_cnode = cnode; | |||
| while (origin_input_size > inputs_divisor_) { | |||
| std::vector<AnfNodePtr> base_addn_inputs{NewValueNode(std::make_shared<Primitive>(prim::kPrimAddN->name()))}; | |||
| size_t cur_input_index = 1; | |||
| // Divide the inputs of addn by 63. | |||
| while (origin_input_size - cur_input_index + 1 > inputs_divisor_) { | |||
| base_addn_inputs.push_back(CreateNewAddn(func_graph, new_cnode, cur_input_index, inputs_divisor_)); | |||
| cur_input_index += inputs_divisor_; | |||
| } | |||
| base_addn_inputs.push_back( | |||
| CreateNewAddn(func_graph, new_cnode, cur_input_index, origin_input_size - cur_input_index + 1)); | |||
| CNodePtr base_addn = func_graph->NewCNode(base_addn_inputs); | |||
| MS_EXCEPTION_IF_NULL(base_addn); | |||
| MS_EXCEPTION_IF_NULL(new_cnode); | |||
| base_addn->set_scope(new_cnode->scope()); | |||
| base_addn->set_abstract(new_cnode->abstract()); | |||
| AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToInt(base_addn_inputs.size() - 1)), base_addn); | |||
| new_cnode = base_addn; | |||
| origin_input_size = base_addn->inputs().size() - 1; | |||
| } | |||
| return new_cnode; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,37 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADDN_FISSION_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADDN_FISSION_H_ | |||
| #include "pre_activate/common/optimizer.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| constexpr size_t kAddnInputsDivisor = 63; | |||
| class AddnFission : public PatternProcessPass { | |||
| public: | |||
| explicit AddnFission(bool multigraph = true) | |||
| : PatternProcessPass("addn_fission", multigraph), inputs_divisor_(kAddnInputsDivisor) {} | |||
| ~AddnFission() override = default; | |||
| const BaseRef DefinePattern() const override; | |||
| const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; | |||
| private: | |||
| size_t inputs_divisor_; | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADDN_FISSION_H_ | |||
| @@ -142,6 +142,7 @@ constexpr auto kAttrDynInputSizes = "dyn_input_sizes"; | |||
| constexpr auto kAttrSrcFormat = "src_format"; | |||
| constexpr auto kAttrOutputUsedNum = "output_used_num"; | |||
| constexpr auto kAttrHasBias = "has_bias"; | |||
| constexpr auto kAttrN = "N"; | |||
| // attr value | |||
| constexpr auto kValueTargetSwitch = "target_switch"; | |||
| @@ -0,0 +1,160 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "common/backend_common_test.h" | |||
| #include "common/py_func_graph_fetcher.h" | |||
| #define private public | |||
| #define protected public | |||
| #include "pre_activate/ascend/ir_fission/addn_fission.h" | |||
| #undef private | |||
| #undef protected | |||
| namespace mindspore { | |||
| namespace opt { | |||
| class TestHWAddnFission : public BackendCommon { | |||
| public: | |||
| TestHWAddnFission() : get_py_fun_("gtest_input.pre_activate.addn_fission_test", true) {} | |||
| ~TestHWAddnFission() override = default; | |||
| UT::PyFuncGraphFetcher get_py_fun_; | |||
| }; | |||
| TEST_F(TestHWAddnFission, test_addn_fission_divided_by_2) { | |||
| FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before"); | |||
| EXPECT_NE(g, nullptr); | |||
| std::vector<int> shp{2, 32, 224, 224}; | |||
| auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp); | |||
| AbstractBasePtrList args_spec_list; | |||
| for (size_t i = 0; i < 9; ++i) { | |||
| args_spec_list.push_back(x_abstract); | |||
| } | |||
| auto kg = GetKernelGraph(g, args_spec_list); | |||
| auto optimizer = std::make_shared<opt::GraphOptimizer>(); | |||
| auto pm = std::make_shared<opt::PassManager>(); | |||
| auto addn_fission = std::make_shared<opt::AddnFission>(); | |||
| addn_fission->inputs_divisor_ = 2; | |||
| pm->AddPass(addn_fission); | |||
| optimizer->AddPassManager(pm); | |||
| FuncGraphPtr new_graph = optimizer->Optimize(kg); | |||
| FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_2"); | |||
| EXPECT_NE(g_after, nullptr); | |||
| auto kg_after = GetKernelGraph(g_after, args_spec_list); | |||
| EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph)); | |||
| } | |||
| TEST_F(TestHWAddnFission, test_addn_fission_divided_by_3) { | |||
| FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before"); | |||
| EXPECT_NE(g, nullptr); | |||
| std::vector<int> shp{2, 32, 224, 224}; | |||
| auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp); | |||
| AbstractBasePtrList args_spec_list; | |||
| for (size_t i = 0; i < 9; ++i) { | |||
| args_spec_list.push_back(x_abstract); | |||
| } | |||
| auto kg = GetKernelGraph(g, args_spec_list); | |||
| auto optimizer = std::make_shared<opt::GraphOptimizer>(); | |||
| auto pm = std::make_shared<opt::PassManager>(); | |||
| auto addn_fission = std::make_shared<opt::AddnFission>(); | |||
| addn_fission->inputs_divisor_ = 3; | |||
| pm->AddPass(addn_fission); | |||
| optimizer->AddPassManager(pm); | |||
| FuncGraphPtr new_graph = optimizer->Optimize(kg); | |||
| FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_3"); | |||
| EXPECT_NE(g_after, nullptr); | |||
| auto kg_after = GetKernelGraph(g_after, args_spec_list); | |||
| EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph)); | |||
| } | |||
| TEST_F(TestHWAddnFission, test_addn_fission_divided_by_4) { | |||
| FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before"); | |||
| EXPECT_NE(g, nullptr); | |||
| std::vector<int> shp{2, 32, 224, 224}; | |||
| auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp); | |||
| AbstractBasePtrList args_spec_list; | |||
| for (size_t i = 0; i < 9; ++i) { | |||
| args_spec_list.push_back(x_abstract); | |||
| } | |||
| auto kg = GetKernelGraph(g, args_spec_list); | |||
| auto optimizer = std::make_shared<opt::GraphOptimizer>(); | |||
| auto pm = std::make_shared<opt::PassManager>(); | |||
| auto addn_fission = std::make_shared<opt::AddnFission>(); | |||
| addn_fission->inputs_divisor_ = 4; | |||
| pm->AddPass(addn_fission); | |||
| optimizer->AddPassManager(pm); | |||
| FuncGraphPtr new_graph = optimizer->Optimize(kg); | |||
| FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_4"); | |||
| EXPECT_NE(g_after, nullptr); | |||
| auto kg_after = GetKernelGraph(g_after, args_spec_list); | |||
| EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph)); | |||
| } | |||
| TEST_F(TestHWAddnFission, test_addn_fission_divided_by_8) { | |||
| FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before"); | |||
| EXPECT_NE(g, nullptr); | |||
| std::vector<int> shp{2, 32, 224, 224}; | |||
| auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp); | |||
| AbstractBasePtrList args_spec_list; | |||
| for (size_t i = 0; i < 9; ++i) { | |||
| args_spec_list.push_back(x_abstract); | |||
| } | |||
| auto kg = GetKernelGraph(g, args_spec_list); | |||
| auto optimizer = std::make_shared<opt::GraphOptimizer>(); | |||
| auto pm = std::make_shared<opt::PassManager>(); | |||
| auto addn_fission = std::make_shared<opt::AddnFission>(); | |||
| addn_fission->inputs_divisor_ = 8; | |||
| pm->AddPass(addn_fission); | |||
| optimizer->AddPassManager(pm); | |||
| FuncGraphPtr new_graph = optimizer->Optimize(kg); | |||
| FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_8"); | |||
| EXPECT_NE(g_after, nullptr); | |||
| auto kg_after = GetKernelGraph(g_after, args_spec_list); | |||
| EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph)); | |||
| } | |||
| TEST_F(TestHWAddnFission, test_addn_fission_divided_by_9) { | |||
| FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before"); | |||
| EXPECT_NE(g, nullptr); | |||
| std::vector<int> shp{2, 32, 224, 224}; | |||
| auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp); | |||
| AbstractBasePtrList args_spec_list; | |||
| for (size_t i = 0; i < 9; ++i) { | |||
| args_spec_list.push_back(x_abstract); | |||
| } | |||
| auto kg = GetKernelGraph(g, args_spec_list); | |||
| auto optimizer = std::make_shared<opt::GraphOptimizer>(); | |||
| auto pm = std::make_shared<opt::PassManager>(); | |||
| auto addn_fission = std::make_shared<opt::AddnFission>(); | |||
| addn_fission->inputs_divisor_ = 9; | |||
| pm->AddPass(addn_fission); | |||
| optimizer->AddPassManager(pm); | |||
| FuncGraphPtr new_graph = optimizer->Optimize(kg); | |||
| FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_9"); | |||
| EXPECT_NE(g_after, nullptr); | |||
| auto kg_after = GetKernelGraph(g_after, args_spec_list); | |||
| EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph)); | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,80 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops import Primitive | |||
| addn = P.AddN() | |||
| make_tuple = Primitive('make_tuple') | |||
| class FnDict: | |||
| def __init__(self): | |||
| self.fnDict = {} | |||
| def __call__(self, fn): | |||
| self.fnDict[fn.__name__] = fn | |||
| def __getitem__(self, name): | |||
| return self.fnDict[name] | |||
| def test_addn_fission(tag): | |||
| """ test_adam_apply_one_with_decay_rule """ | |||
| fns = FnDict() | |||
| @fns | |||
| def before(input0, input1, input2, input3, input4, input5, input6, input7, input8): | |||
| return addn((input0, input1, input2, input3, input4, input5, input6, input7, input8)) | |||
| @fns | |||
| def after_divided_by_2(input0, input1, input2, input3, input4, input5, input6, input7, input8): | |||
| a = addn((input0, input1)) | |||
| b = addn((input2, input3)) | |||
| c = addn((input4, input5)) | |||
| d = addn((input6, input7)) | |||
| e = addn((input8,)) | |||
| f = addn((a, b)) | |||
| g = addn((c, d)) | |||
| h = addn((e,)) | |||
| i = addn((f, g)) | |||
| j = addn((h,)) | |||
| return addn((i, j)) | |||
| @fns | |||
| def after_divided_by_3(input0, input1, input2, input3, input4, input5, input6, input7, input8): | |||
| a = addn((input0, input1, input2)) | |||
| b = addn((input3, input4, input5)) | |||
| c = addn((input6, input7, input8)) | |||
| return addn((a, b, c)) | |||
| @fns | |||
| def after_divided_by_4(input0, input1, input2, input3, input4, input5, input6, input7, input8): | |||
| a = addn((input0, input1, input2, input3)) | |||
| b = addn((input4, input5, input6, input7)) | |||
| c = addn((input8,)) | |||
| return addn((a, b, c)) | |||
| @fns | |||
| def after_divided_by_8(input0, input1, input2, input3, input4, input5, input6, input7, input8): | |||
| a = addn((input0, input1, input2, input3, input4, input5, input6, input7)) | |||
| b = addn((input8,)) | |||
| return addn((a, b)) | |||
| @fns | |||
| def after_divided_by_9(input0, input1, input2, input3, input4, input5, input6, input7, input8): | |||
| return addn((input0, input1, input2, input3, input4, input5, input6, input7, input8)) | |||
| return fns[tag] | |||