Merge pull request !1278 from Etone.Chan/NBFtags/v0.3.0-alpha
| @@ -63,10 +63,17 @@ | |||
| #include "pre_activate/ascend/format_type/merge_cast_to_op.h" | |||
| #include "pre_activate/ascend/format_type/check_consistency.h" | |||
| #include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" | |||
| #include "pre_activate/ascend/buffer_fusion/tbe_buffer_fusion.h" | |||
| #include "pre_activate/ascend/buffer_fusion/pass/depthwiseconv_eltwise_fusion_pass.h" | |||
| #include "pre_activate/ascend/buffer_fusion/pass/bnupdate_eltwise_fusion_pass.h" | |||
| #include "pre_activate/ascend/buffer_fusion/pass/fusion_type_fusion_pass.h" | |||
| #include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h" | |||
| #include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h" | |||
| #include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h" | |||
| #include "pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h" | |||
| #include "pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h" | |||
| #include "pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h" | |||
| #include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h" | |||
| #include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h" | |||
| #include "pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h" | |||
| #include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h" | |||
| #include "pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h" | |||
| #include "pre_activate/ascend/format_type/deal_ref_trans_and_cast.h" | |||
| #include "pre_activate/ascend/enhancer/add_memcpy_async.h" | |||
| #include "pre_activate/ascend/format_type/insert_cast_for_runop.h" | |||
| @@ -281,7 +288,6 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern | |||
| AscendDataLayout(kernel_graph); | |||
| // mixed precision optimization | |||
| AscendMixPrecision(kernel_graph); | |||
| // buffer fusion | |||
| // other optimization | |||
| auto optimizer = std::make_shared<GraphOptimizer>(); | |||
| auto other_pm = std::make_shared<PassManager>("other_pm"); | |||
| @@ -291,7 +297,6 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern | |||
| other_pm->AddPass(std::make_shared<BroadcastFusion>()); | |||
| other_pm->AddPass(std::make_shared<ParameterTransOpFusion>()); | |||
| other_pm->AddPass(std::make_shared<RefreshParameterFormat>()); | |||
| other_pm->AddPass(std::make_shared<BufferFusion>()); | |||
| other_pm->AddPass(std::make_shared<GetitemTuple>()); | |||
| other_pm->AddPass(std::make_shared<CommonSubexpressionElimination>()); | |||
| if (context_ptr->enable_task_sink() && context_ptr->loop_sink_flag() && ConfigManager::GetInstance().iter_num() > 1) { | |||
| @@ -307,6 +312,8 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern | |||
| DumpIR(file_path, kernel_graph, true); | |||
| DumpIRProto(kernel_graph, "after_hwopt"); | |||
| } | |||
| // buffer fusion | |||
| AscendBackendUBFusionOptimization(kernel_graph); | |||
| } | |||
| void AscendBackendUBFusionOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| @@ -330,10 +337,17 @@ void AscendBackendUBFusionOptimization(const std::shared_ptr<session::KernelGrap | |||
| fusion_id_allocator->Init(); | |||
| auto optimizer = std::make_shared<GraphOptimizer>(); | |||
| auto ub_fusion_pm = std::make_shared<PassManager>("ub_fusion_pm"); | |||
| ub_fusion_pm->AddPass(std::make_shared<ConvDoubleInFusionPass>(fusion_id_allocator.get())); | |||
| ub_fusion_pm->AddPass(std::make_shared<ConvSingleInFusionPass>(fusion_id_allocator.get())); | |||
| ub_fusion_pm->AddPass(std::make_shared<EltwiseFusionPass>(fusion_id_allocator.get())); | |||
| ub_fusion_pm->AddPass(std::make_shared<MatmulEltwiseFusionPass>(fusion_id_allocator.get())); | |||
| ub_fusion_pm->AddPass(std::make_shared<BnupdateEltwiseFusionPass>(fusion_id_allocator.get())); | |||
| ub_fusion_pm->AddPass(std::make_shared<DepthwiseConvEltwiseFusionPass>(fusion_id_allocator.get())); | |||
| ub_fusion_pm->AddPass(std::make_shared<FusionTypeFusionPass>(fusion_id_allocator.get())); | |||
| ub_fusion_pm->AddPass(std::make_shared<TbeBufferFusion>()); | |||
| ub_fusion_pm->AddPass(std::make_shared<BnupdateEltwiseEltwiseFusionPass>(fusion_id_allocator.get())); | |||
| ub_fusion_pm->AddPass(std::make_shared<ConvBnReduceFusionPass>(fusion_id_allocator.get())); | |||
| ub_fusion_pm->AddPass(std::make_shared<ReduceEltwiseFusionPass>(fusion_id_allocator.get())); | |||
| ub_fusion_pm->AddPass(std::make_shared<SegmentEltwiseFusionPass>(fusion_id_allocator.get())); | |||
| ub_fusion_pm->AddPass(std::make_shared<UbPatternFusion>()); | |||
| optimizer->AddPassManager(ub_fusion_pm); | |||
| (void)optimizer->Optimize(kernel_graph); | |||
| kernel_graph->SetExecOrderByDefault(); | |||
| @@ -0,0 +1,82 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h" | |||
| #include <vector> | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| void BnupdateEltwiseEltwiseFusionPass::MatchBnupdateAddRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, | |||
| const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto add = relu_input->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(add); | |||
| auto tuple_getitem = add->input(1); | |||
| if (tuple_getitem->isa<CNode>() && AnfAlgo::GetCNodeName(tuple_getitem) == prim::kPrimTupleGetItem->name()) { | |||
| auto getitem = tuple_getitem->cast<CNodePtr>(); | |||
| auto bnupdate = getitem->input(1); | |||
| if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { | |||
| std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); | |||
| for (auto out_getitem : manager->node_users()[bnupdate]) { | |||
| auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>(); | |||
| auto input2 = out_getitem_ptr->input(2); | |||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||
| output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); | |||
| } | |||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); | |||
| std::unordered_set<AnfNodePtr> record{cnode, relu_input, bnupdate}; | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| } | |||
| } | |||
| void BnupdateEltwiseEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : node_list) { | |||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { | |||
| auto eltwise_input = cnode->input(1); | |||
| if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { | |||
| if (eltwise_input->isa<CNode>() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimTensorAdd)) { | |||
| MatchBnupdateAddRelu(cnode, eltwise_input, kernel_graph, candidate_fusion); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,48 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_ELTWISE_FUSION_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_ELTWISE_FUSION_PASS_H_ | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class BnupdateEltwiseEltwiseFusionPass : public FusionBasePass { | |||
| public: | |||
| explicit BnupdateEltwiseEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||
| : FusionBasePass("BnupdateEltwiseEltwiseFusionPass", idAllocator) {} | |||
| ~BnupdateEltwiseEltwiseFusionPass() override = default; | |||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||
| private: | |||
| void MatchBnupdateAddRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, | |||
| const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_ELTWISE_FUSION_PASS_H_ | |||
| @@ -1,96 +1,77 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/pass/bnupdate_eltwise_fusion_pass.h" | |||
| #include <vector> | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| void BnupdateEltwiseFusionPass::MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, | |||
| const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto getitem = relu_input->cast<CNodePtr>(); | |||
| auto bnupdate = getitem->input(1); | |||
| if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { | |||
| std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); | |||
| for (auto out_getitem : manager->node_users()[bnupdate]) { | |||
| auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>(); | |||
| auto input2 = out_getitem_ptr->input(2); | |||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||
| output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); | |||
| } | |||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); | |||
| std::unordered_set<AnfNodePtr> record{cnode, bnupdate}; | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| } | |||
| void BnupdateEltwiseFusionPass::MatchBnupdateOpNamePattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : node_list) { | |||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { | |||
| auto eltwise_input = cnode->input(1); | |||
| if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { | |||
| if (eltwise_input->isa<CNode>() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimTupleGetItem)) { | |||
| MatchBnupdateRelu(cnode, eltwise_input, kernel_graph, candidate_fusion); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| bool BnupdateEltwiseFusionPass::MatchUBFusionPattern(const session::KernelGraph &kernel_graph) { | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto return_node = kernel_graph.get_return(); | |||
| MS_EXCEPTION_IF_NULL(return_node); | |||
| if (return_node->inputs().size() <= 1) { | |||
| return false; | |||
| } | |||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern start..."; | |||
| FusedNodeRecord candidate_fusion; | |||
| MatchBnupdateOpNamePattern(kernel_graph, &candidate_fusion); | |||
| if (candidate_fusion.empty()) { | |||
| return false; | |||
| } | |||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern Success..."; | |||
| return true; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h" | |||
| #include <vector> | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| void BnupdateEltwiseFusionPass::MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, | |||
| const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto getitem = relu_input->cast<CNodePtr>(); | |||
| auto bnupdate = getitem->input(1); | |||
| if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { | |||
| std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); | |||
| for (auto out_getitem : manager->node_users()[bnupdate]) { | |||
| auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>(); | |||
| auto input2 = out_getitem_ptr->input(2); | |||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||
| output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); | |||
| } | |||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); | |||
| std::unordered_set<AnfNodePtr> record{cnode, bnupdate}; | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| } | |||
| void BnupdateEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : node_list) { | |||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { | |||
| auto eltwise_input = cnode->input(1); | |||
| if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { | |||
| if (eltwise_input->isa<CNode>() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimTupleGetItem)) { | |||
| MatchBnupdateRelu(cnode, eltwise_input, kernel_graph, candidate_fusion); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -1,50 +1,48 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h" | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class BnupdateEltwiseFusionPass : public FusionBasePass { | |||
| public: | |||
| BnupdateEltwiseFusionPass() : FusionBasePass("BnupdateEltwiseFusionPass") {} | |||
| explicit BnupdateEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||
| : FusionBasePass("BnupdateEltwiseFusionPass", idAllocator) {} | |||
| ~BnupdateEltwiseFusionPass() override = default; | |||
| bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph) override; | |||
| private: | |||
| void MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion); | |||
| void MatchBnupdateOpNamePattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class BnupdateEltwiseFusionPass : public FusionBasePass { | |||
| public: | |||
| explicit BnupdateEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||
| : FusionBasePass("BnupdateEltwiseFusionPass", idAllocator) {} | |||
| ~BnupdateEltwiseFusionPass() override = default; | |||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||
| private: | |||
| void MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ | |||
| @@ -0,0 +1,64 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h" | |||
| #include <vector> | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| void ConvBnReduceFusionPass::MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto conv = cnode->input(1); | |||
| if (conv->isa<CNode>() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) { | |||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[conv].size())}; | |||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), conv); | |||
| std::unordered_set<AnfNodePtr> record{cnode, conv}; | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| } | |||
| void ConvBnReduceFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : node_list) { | |||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::GetCNodeName(cnode) == kBNTrainingReduceOpName) { | |||
| MatchConvBnreduce(cnode, kernel_graph, candidate_fusion); | |||
| } | |||
| } | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,48 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_CONV_BNREDUCE_FUSION_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_CONV_BNREDUCE_FUSION_PASS_H_ | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class ConvBnReduceFusionPass : public FusionBasePass { | |||
| public: | |||
| explicit ConvBnReduceFusionPass(FusionIdAllocator *idAllocator) | |||
| : FusionBasePass("ConvBnReduceFusionPass", idAllocator) {} | |||
| ~ConvBnReduceFusionPass() override = default; | |||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||
| private: | |||
| void MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_CONV_BNREDUCE_FUSION_PASS_H_ | |||
| @@ -0,0 +1,89 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h" | |||
| #include <vector> | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| bool ConvDoubleInFusionPass::CheckDoubleInEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| if (!node->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node)) { | |||
| return false; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| auto user_nodes = manager->node_users()[node]; | |||
| return AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && user_nodes.size() == ELTWISE_USE && | |||
| cnode->inputs().size() == ELTWISE_INPUT_SIZE; | |||
| } | |||
| void ConvDoubleInFusionPass::MatchConvDoubleInEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| std::unordered_set<AnfNodePtr> record{cnode}; | |||
| auto eltwise_input = cnode->input(1); | |||
| if (CheckDoubleInEltWiseNode(manager.get(), eltwise_input)) { | |||
| (void)record.insert(eltwise_input); | |||
| } else { | |||
| return; | |||
| } | |||
| auto input_cnode = eltwise_input->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(input_cnode); | |||
| auto double_in_eltwise_input = input_cnode->input(1); | |||
| if (!double_in_eltwise_input->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(double_in_eltwise_input) || | |||
| fusion_id_allocator->HasFusionIdAttr(double_in_eltwise_input)) { | |||
| return; | |||
| } | |||
| if (AnfAlgo::GetKernelType(double_in_eltwise_input) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(double_in_eltwise_input) == kernel::FusionType::CONVLUTION) { | |||
| (void)record.insert(double_in_eltwise_input); | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| } | |||
| void ConvDoubleInFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : node_list) { | |||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { | |||
| MatchConvDoubleInEltwise(cnode, kernel_graph, candidate_fusion); | |||
| } | |||
| } | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,48 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_DOUBLE_IN_FUSION_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_DOUBLE_IN_FUSION_PASS_H_ | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class ConvDoubleInFusionPass : public FusionBasePass { | |||
| public: | |||
| explicit ConvDoubleInFusionPass(FusionIdAllocator *idAllocator) | |||
| : FusionBasePass("ConvDoubleInFusionPass", idAllocator) {} | |||
| ~ConvDoubleInFusionPass() override = default; | |||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||
| private: | |||
| void MatchConvDoubleInEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion); | |||
| bool CheckDoubleInEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_DOUBLE_IN_FUSION_PASS_H_ | |||
| @@ -0,0 +1,77 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h" | |||
| #include <vector> | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| void ConvSingleInFusionPass::MatchConvSingleInEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| std::unordered_set<AnfNodePtr> record{cnode}; | |||
| auto eltwise_input = cnode->input(1); | |||
| while (CheckEltWiseNode(manager.get(), eltwise_input)) { | |||
| (void)record.insert(eltwise_input); | |||
| auto input_cnode = eltwise_input->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(input_cnode); | |||
| eltwise_input = input_cnode->input(1); | |||
| if (record.size() == MAX_ELTWISE_NUM) { | |||
| break; | |||
| } | |||
| } | |||
| if (!eltwise_input->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(eltwise_input) || | |||
| fusion_id_allocator->HasFusionIdAttr(eltwise_input)) { | |||
| return; | |||
| } | |||
| if (AnfAlgo::GetKernelType(eltwise_input) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(eltwise_input) == kernel::FusionType::CONVLUTION) { | |||
| (void)record.insert(eltwise_input); | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| } | |||
| void ConvSingleInFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : node_list) { | |||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { | |||
| MatchConvSingleInEltwise(cnode, kernel_graph, candidate_fusion); | |||
| } | |||
| } | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -1,47 +1,48 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_TYPE_FUSION_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_TYPE_FUSION_PASS_H_ | |||
| #include <unordered_map> | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h" | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class FusionTypeFusionPass : public FusionBasePass { | |||
| public: | |||
| FusionTypeFusionPass() : FusionBasePass("FusionTypeFusionPass") {} | |||
| explicit FusionTypeFusionPass(FusionIdAllocator *idAllocator) : FusionBasePass("FusionTypeFusionPass", idAllocator) {} | |||
| ~FusionTypeFusionPass() override = default; | |||
| bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph) override; | |||
| private: | |||
| void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_TYPE_FUSION_PASS_H_ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_SINGLE_IN_FUSION_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_SINGLE_IN_FUSION_PASS_H_ | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class ConvSingleInFusionPass : public FusionBasePass { | |||
| public: | |||
| explicit ConvSingleInFusionPass(FusionIdAllocator *idAllocator) | |||
| : FusionBasePass("ConvSingleInFusionPass", idAllocator) {} | |||
| ~ConvSingleInFusionPass() override = default; | |||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||
| private: | |||
| void MatchConvSingleInEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_SINGLE_IN_FUSION_PASS_H_ | |||
| @@ -1,107 +1,89 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/pass/depthwiseconv_eltwise_fusion_pass.h" | |||
| #include <vector> | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| void DepthwiseConvEltwiseFusionPass::MatchDepthwiseConvRelu(const CNodePtr &cnode, | |||
| const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion, bool is_order) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| if (is_order) { | |||
| // DepthwiseConvolution--->Elemwise | |||
| auto depthwise_conv = cnode->input(1); | |||
| MS_EXCEPTION_IF_NULL(depthwise_conv); | |||
| if (cnode->isa<CNode>() && IsPrimitiveCNode(depthwise_conv, prim::kPrimDepthwiseConv2dNative)) { | |||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[depthwise_conv].size())}; | |||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), depthwise_conv); | |||
| std::unordered_set<AnfNodePtr> record{cnode, depthwise_conv}; | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| } else { | |||
| // Elemwise-->DepthwiseConvolution | |||
| auto relu = cnode->input(1); | |||
| MS_EXCEPTION_IF_NULL(relu); | |||
| if (cnode->isa<CNode>() && (IsPrimitiveCNode(relu, prim::kPrimRelu) || IsPrimitiveCNode(relu, prim::kPrimReluV2))) { | |||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[relu].size())}; | |||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), relu); | |||
| std::unordered_set<AnfNodePtr> record{cnode, relu}; | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| } | |||
| } | |||
| void DepthwiseConvEltwiseFusionPass::MatchDepthwiseOpNamePattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : node_list) { | |||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { | |||
| auto eltwise_input = cnode->input(1); | |||
| if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { | |||
| if (eltwise_input->isa<CNode>() && | |||
| AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimDepthwiseConv2dNative)) { | |||
| MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, true); | |||
| } | |||
| } | |||
| } else if (AnfAlgo::GetCNodeName(cnode) == prim::kPrimDepthwiseConv2dNative->name()) { | |||
| MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, false); | |||
| } | |||
| } | |||
| } | |||
| bool DepthwiseConvEltwiseFusionPass::MatchUBFusionPattern(const session::KernelGraph &kernel_graph) { | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto return_node = kernel_graph.get_return(); | |||
| MS_EXCEPTION_IF_NULL(return_node); | |||
| if (return_node->inputs().size() <= 1) { | |||
| return false; | |||
| } | |||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern start..."; | |||
| FusedNodeRecord candidate_fusion; | |||
| MatchDepthwiseOpNamePattern(kernel_graph, &candidate_fusion); | |||
| if (candidate_fusion.empty()) { | |||
| return false; | |||
| } | |||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern Success..."; | |||
| return true; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h" | |||
| #include <vector> | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| void DepthwiseConvEltwiseFusionPass::MatchDepthwiseConvRelu(const CNodePtr &cnode, | |||
| const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion, bool is_order) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| if (is_order) { | |||
| // DepthwiseConvolution--->Elemwise | |||
| auto depthwise_conv = cnode->input(1); | |||
| MS_EXCEPTION_IF_NULL(depthwise_conv); | |||
| if (cnode->isa<CNode>() && IsPrimitiveCNode(depthwise_conv, prim::kPrimDepthwiseConv2dNative)) { | |||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[depthwise_conv].size())}; | |||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), depthwise_conv); | |||
| std::unordered_set<AnfNodePtr> record{cnode, depthwise_conv}; | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| } else { | |||
| // Elemwise-->DepthwiseConvolution | |||
| auto relu = cnode->input(1); | |||
| MS_EXCEPTION_IF_NULL(relu); | |||
| if (cnode->isa<CNode>() && (IsPrimitiveCNode(relu, prim::kPrimRelu) || IsPrimitiveCNode(relu, prim::kPrimReluV2))) { | |||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[relu].size())}; | |||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), relu); | |||
| std::unordered_set<AnfNodePtr> record{cnode, relu}; | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| } | |||
| } | |||
| void DepthwiseConvEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : node_list) { | |||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { | |||
| auto eltwise_input = cnode->input(1); | |||
| if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { | |||
| if (eltwise_input->isa<CNode>() && | |||
| AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimDepthwiseConv2dNative)) { | |||
| MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, true); | |||
| } | |||
| } | |||
| } else if (AnfAlgo::GetCNodeName(cnode) == prim::kPrimDepthwiseConv2dNative->name()) { | |||
| MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, false); | |||
| } | |||
| } | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -1,50 +1,48 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h" | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class DepthwiseConvEltwiseFusionPass : public FusionBasePass { | |||
| public: | |||
| DepthwiseConvEltwiseFusionPass() : FusionBasePass("DepthwiseConvEltwiseFusionPass") {} | |||
| explicit DepthwiseConvEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||
| : FusionBasePass("DepthwiseConvEltwiseFusionPass", idAllocator) {} | |||
| ~DepthwiseConvEltwiseFusionPass() override = default; | |||
| bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph) override; | |||
| private: | |||
| void MatchDepthwiseConvRelu(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion, bool is_order); | |||
| void MatchDepthwiseOpNamePattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class DepthwiseConvEltwiseFusionPass : public FusionBasePass { | |||
| public: | |||
| explicit DepthwiseConvEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||
| : FusionBasePass("DepthwiseConvEltwiseFusionPass", idAllocator) {} | |||
| ~DepthwiseConvEltwiseFusionPass() override = default; | |||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||
| private: | |||
| void MatchDepthwiseConvRelu(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion, bool is_order); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ | |||
| @@ -0,0 +1,72 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h" | |||
| #include <vector> | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| void EltwiseFusionPass::MatchEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| std::unordered_set<AnfNodePtr> record{cnode}; | |||
| auto eltwise_input = cnode->input(1); | |||
| while (CheckEltWiseNode(manager.get(), eltwise_input)) { | |||
| (void)record.insert(eltwise_input); | |||
| if (record.size() == MAX_ELTWISE_SIZE) { | |||
| break; | |||
| } | |||
| auto input_cnode = eltwise_input->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(input_cnode); | |||
| eltwise_input = input_cnode->input(1); | |||
| } | |||
| if (record.size() < MIN_ELTWISE_SIZE) { | |||
| return; | |||
| } | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| void EltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : node_list) { | |||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { | |||
| MatchEltwise(cnode, kernel_graph, candidate_fusion); | |||
| } | |||
| } | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,46 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_ELTWISE_FUSION_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_ELTWISE_FUSION_PASS_H_ | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class EltwiseFusionPass : public FusionBasePass { | |||
| public: | |||
| explicit EltwiseFusionPass(FusionIdAllocator *idAllocator) : FusionBasePass("EltwiseFusionPass", idAllocator) {} | |||
| ~EltwiseFusionPass() override = default; | |||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||
| private: | |||
| void MatchEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_ELTWISE_FUSION_PASS_H_ | |||
| @@ -0,0 +1,71 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| bool FusionBasePass::CheckEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| if (!node->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node)) { | |||
| return false; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| auto user_nodes = manager->node_users()[node]; | |||
| return AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && user_nodes.size() == ELTWISE_USE && | |||
| cnode->inputs().size() == ELTWISE_INPUT_SIZE; | |||
| } | |||
| void FusionBasePass::SetRecordFusionId(const std::unordered_set<AnfNodePtr> &record) { | |||
| auto id = fusion_id_allocator->AllocateFusionId(); | |||
| for (auto node : record) { | |||
| fusion_id_allocator->SetFusionId(node, id); | |||
| } | |||
| } | |||
| bool FusionBasePass::MatchUBFusionPattern(const session::KernelGraph &kernel_graph) { | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto return_node = kernel_graph.get_return(); | |||
| MS_EXCEPTION_IF_NULL(return_node); | |||
| if (return_node->inputs().size() <= 1) { | |||
| return false; | |||
| } | |||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern start..."; | |||
| FusedNodeRecord candidate_fusion; | |||
| MatchSingleFusionPattern(kernel_graph, &candidate_fusion); | |||
| if (candidate_fusion.empty()) { | |||
| return false; | |||
| } | |||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern Success..."; | |||
| return true; | |||
| } | |||
| bool FusionBasePass::Run(const FuncGraphPtr &graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| auto kernel_graph = graph->cast<std::shared_ptr<session::KernelGraph>>(); | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| return MatchUBFusionPattern(*kernel_graph); | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -1,50 +1,57 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ | |||
| #include <unordered_map> | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include <string> | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class FusionBasePass : public Pass { | |||
| public: | |||
| explicit FusionBasePass(const std::string &name) : Pass(name) {} | |||
| FusionBasePass(const std::string &name, FusionIdAllocator *idAllocator) | |||
| : Pass(name), fusion_id_allocator(idAllocator) {} | |||
| ~FusionBasePass() override = default; | |||
| bool Run(const FuncGraphPtr &graph) override; | |||
| protected: | |||
| virtual bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph) = 0; | |||
| void SetRecordFusionId(const std::unordered_set<AnfNodePtr> &record); | |||
| FusionIdAllocator *fusion_id_allocator; | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ | |||
| #include <unordered_map> | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include <string> | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| const int8_t MAX_ELTWISE_NUM = 3; | |||
| const int8_t MIN_ELTWISE_SIZE = 2; | |||
| const int8_t ELTWISE_INPUT_SIZE = 2; | |||
| const int8_t ELTWISE_USE = 1; | |||
| const int8_t MAX_ELTWISE_SIZE = 6; | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class FusionBasePass : public Pass { | |||
| public: | |||
| FusionBasePass(const std::string &name, FusionIdAllocator *idAllocator) | |||
| : Pass(name), fusion_id_allocator(idAllocator) {} | |||
| ~FusionBasePass() override = default; | |||
| bool Run(const FuncGraphPtr &graph) override; | |||
| bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph); | |||
| protected: | |||
| virtual void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) = 0; | |||
| void SetRecordFusionId(const std::unordered_set<AnfNodePtr> &record); | |||
| bool CheckEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node); | |||
| FusionIdAllocator *fusion_id_allocator; | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ | |||
| @@ -0,0 +1,65 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h" | |||
| #include <vector> | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| void MatmulEltwiseFusionPass::MatchMatmulEltwise(const CNodePtr &cnode, const AnfNodePtr &relu_input, | |||
| const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[relu_input].size())}; | |||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), relu_input); | |||
| std::unordered_set<AnfNodePtr> record{cnode, relu_input}; | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| void MatmulEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : node_list) { | |||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { | |||
| auto eltwise_input = cnode->input(1); | |||
| if (eltwise_input->isa<CNode>() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimMatMul)) { | |||
| MatchMatmulEltwise(cnode, eltwise_input, kernel_graph, candidate_fusion); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,48 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MATMUL_ELTWISE_FUSION_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MATMUL_ELTWISE_FUSION_PASS_H_ | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class MatmulEltwiseFusionPass : public FusionBasePass { | |||
| public: | |||
| explicit MatmulEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||
| : FusionBasePass("MatmulEltwiseFusionPass", idAllocator) {} | |||
| ~MatmulEltwiseFusionPass() override = default; | |||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||
| private: | |||
| void MatchMatmulEltwise(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MATMUL_ELTWISE_FUSION_PASS_H_ | |||
| @@ -1,38 +0,0 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h" | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| void FusionBasePass::SetRecordFusionId(const std::unordered_set<AnfNodePtr> &record) { | |||
| auto id = fusion_id_allocator->AllocateFusionId(); | |||
| for (auto node : record) { | |||
| fusion_id_allocator->SetFusionId(node, id); | |||
| } | |||
| } | |||
| bool FusionBasePass::Run(const FuncGraphPtr &graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| auto kernel_graph = graph->cast<std::shared_ptr<session::KernelGraph>>(); | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| return MatchUBFusionPattern(*kernel_graph); | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -1,245 +0,0 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/pass/fusion_type_fusion_pass.h" | |||
| #include <tuple> | |||
| #include <unordered_set> | |||
| #include <unordered_map> | |||
| #include <deque> | |||
| #include <memory> | |||
| #include <algorithm> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| namespace { | |||
| const int8_t MAX_PATTERN_SIZE = 7; | |||
| const int8_t MIN_PATTERN_SIZE = 2; | |||
| const int8_t ELTWISE_INPUT_SIZE = 2; | |||
| const int8_t ELTWISE_USE = 1; | |||
| const int8_t MULTI_ELTWISE_USE = 2; | |||
| const int8_t MAX_MULTI_ELTWISE_SIZE = 4; | |||
| const int8_t MAX_PURE_BUFFER_SUCC_SIZE = 3; | |||
| constexpr auto kOpAttrFusionId = "fusion_id"; | |||
| bool CheckEltWiseNode(FuncGraphManager *manager, std::unordered_set<AnfNodePtr> *record, const CNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| MS_EXCEPTION_IF_NULL(record); | |||
| auto user_nodes = manager->node_users()[node]; | |||
| return (AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && | |||
| (user_nodes.size() <= ELTWISE_USE || record->size() == 0)); | |||
| } | |||
| // Common method to check for predecessors and successors in a fusion pattern | |||
| std::tuple<bool, CNodePtr> FindPredAndSuccEltWiseNodes(const int8_t &max_size, FuncGraphManager *manager, | |||
| std::unordered_set<AnfNodePtr> *visited_set, | |||
| std::deque<AnfNodePtr> *todo, | |||
| std::unordered_set<AnfNodePtr> *record, const CNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| MS_EXCEPTION_IF_NULL(visited_set); | |||
| MS_EXCEPTION_IF_NULL(todo); | |||
| MS_EXCEPTION_IF_NULL(record); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| CNodePtr new_node = node; | |||
| if (new_node->inputs().size() < ELTWISE_INPUT_SIZE) { | |||
| return std::make_tuple(false, new_node); | |||
| } | |||
| int8_t index = 1; | |||
| auto &users = manager->node_users(); | |||
| while (CheckEltWiseNode(manager, record, new_node)) { | |||
| (void)record->insert(new_node); | |||
| (void)visited_set->insert(new_node); | |||
| (void)todo->insert(todo->end(), new_node->inputs().begin() + 1, new_node->inputs().end()); | |||
| auto cnode = new_node->input(1); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (!cnode->isa<CNode>()) { | |||
| return std::make_tuple(false, new_node); | |||
| } | |||
| new_node = cnode->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(new_node); | |||
| if (!AnfAlgo::IsRealKernel(new_node) || new_node->inputs().size() < ELTWISE_INPUT_SIZE || | |||
| users[(new_node)].size() >= MULTI_ELTWISE_USE || visited_set->find(new_node) != visited_set->end()) { | |||
| return std::make_tuple(false, new_node); | |||
| } | |||
| if (index >= max_size) { | |||
| break; | |||
| } | |||
| index++; | |||
| } | |||
| return std::make_tuple(true, new_node); | |||
| } | |||
| std::tuple<bool, CNodePtr> MatchGeneralPattern(FuncGraphManager *manager, std::unordered_set<AnfNodePtr> *record, | |||
| std::unordered_set<AnfNodePtr> *visited_set, | |||
| std::deque<AnfNodePtr> *todo, const CNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| MS_EXCEPTION_IF_NULL(record); | |||
| MS_EXCEPTION_IF_NULL(visited_set); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| MS_EXCEPTION_IF_NULL(todo); | |||
| CNodePtr new_node = node; | |||
| auto &users = manager->node_users(); | |||
| if (users[(new_node)].size() >= MULTI_ELTWISE_USE) { | |||
| return std::make_tuple(false, new_node); | |||
| } | |||
| (void)record->insert(node); | |||
| (void)visited_set->insert(node); | |||
| (void)todo->insert(todo->end(), new_node->inputs().begin() + 1, new_node->inputs().end()); | |||
| if (node->inputs().size() < 2) { | |||
| return std::make_tuple(false, new_node); | |||
| } | |||
| // only check the first real input, will check all | |||
| auto cnode = node->input(1); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (!cnode->isa<CNode>()) { | |||
| return std::make_tuple(false, new_node); | |||
| } | |||
| new_node = cnode->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(new_node); | |||
| if (!AnfAlgo::IsRealKernel(new_node) || users[(new_node)].size() >= MULTI_ELTWISE_USE || | |||
| visited_set->find(new_node) != visited_set->end()) { | |||
| return std::make_tuple(false, new_node); | |||
| } | |||
| return std::make_tuple(true, new_node); | |||
| } | |||
| CNodePtr FindFusionAnfNode(FuncGraphManager *manager, std::unordered_set<AnfNodePtr> *visited_set, | |||
| std::unordered_set<AnfNodePtr> *record, std::deque<AnfNodePtr> *todo, const CNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| MS_EXCEPTION_IF_NULL(visited_set); | |||
| MS_EXCEPTION_IF_NULL(record); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| MS_EXCEPTION_IF_NULL(todo); | |||
| // find fusion pattern predecessor nodes | |||
| auto ret = FindPredAndSuccEltWiseNodes(MAX_MULTI_ELTWISE_SIZE, manager, visited_set, todo, record, node); | |||
| auto new_node = std::get<1>(ret); | |||
| auto node_use_size = manager->node_users()[new_node].size(); | |||
| if (!std::get<0>(ret) || (record->size() > 1 && node_use_size > 1) || record->size() >= MAX_MULTI_ELTWISE_SIZE || | |||
| AnfAlgo::GetKernelType(new_node) != KernelType::TBE_KERNEL) { | |||
| return new_node; | |||
| } | |||
| // key of fusion precessor | |||
| auto node_fusion_type = AnfAlgo::GetFusionType(new_node); | |||
| switch (node_fusion_type) { | |||
| case kernel::FusionType::COMMREDUCE: | |||
| case kernel::FusionType::SEGMENT: | |||
| ret = MatchGeneralPattern(manager, record, visited_set, todo, new_node); | |||
| new_node = std::get<1>(ret); | |||
| if (!std::get<0>(ret)) { | |||
| return new_node; | |||
| } | |||
| break; | |||
| case kernel::FusionType::ELEMWISE: | |||
| return new_node; | |||
| // -fallthrough to default and return | |||
| case kernel::FusionType::CONVLUTION: | |||
| (void)record->insert(new_node); | |||
| default: | |||
| (void)visited_set->insert(new_node); | |||
| if (new_node != nullptr) { | |||
| (void)todo->insert(todo->end(), new_node->inputs().begin() + 1, new_node->inputs().end()); | |||
| } | |||
| return new_node; | |||
| } | |||
| // find fusion pattern successor nodes | |||
| ret = FindPredAndSuccEltWiseNodes(MAX_PURE_BUFFER_SUCC_SIZE, manager, visited_set, todo, record, new_node); | |||
| return std::get<1>(ret); | |||
| } | |||
| } // namespace | |||
| void FusionTypeFusionPass::MatchFusionTypePattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto return_node = kernel_graph.get_return(); | |||
| MS_EXCEPTION_IF_NULL(return_node); | |||
| if (return_node->inputs().size() <= 1) { | |||
| return; | |||
| } | |||
| std::deque<AnfNodePtr> todo; | |||
| todo.push_back(return_node->input(1)); | |||
| std::unordered_set<AnfNodePtr> visited_set; | |||
| while (!todo.empty()) { | |||
| auto node = todo.front(); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| todo.pop_front(); | |||
| std::unordered_set<AnfNodePtr> record; | |||
| if (visited_set.find(node) != visited_set.end() || fusion_id_allocator->HasFusionIdAttr(node)) { | |||
| continue; | |||
| } | |||
| // Only fuse real cnode | |||
| if (!AnfAlgo::IsRealCNodeKernel(node)) { | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| if (cnode != nullptr) { | |||
| (void)todo.insert(todo.end(), cnode->inputs().begin() + 1, cnode->inputs().end()); | |||
| } | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| // cnode maybe updated | |||
| cnode = FindFusionAnfNode(manager.get(), &visited_set, &record, &todo, cnode); | |||
| if (record.size() >= MIN_PATTERN_SIZE && record.size() <= MAX_PATTERN_SIZE) { | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| if (record.find(cnode) == record.end()) { | |||
| todo.push_back(cnode); | |||
| } | |||
| // no node matched | |||
| if (record.size() == 0) { | |||
| (void)visited_set.insert(node); | |||
| } | |||
| (void)todo.insert(todo.end(), cnode->inputs().begin() + 1, cnode->inputs().end()); | |||
| } | |||
| } | |||
| bool FusionTypeFusionPass::MatchUBFusionPattern(const session::KernelGraph &kernel_graph) { | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto return_node = kernel_graph.get_return(); | |||
| MS_EXCEPTION_IF_NULL(return_node); | |||
| if (return_node->inputs().size() <= 1) { | |||
| return false; | |||
| } | |||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern start..."; | |||
| FusedNodeRecord candidate_fusion; | |||
| MatchFusionTypePattern(kernel_graph, &candidate_fusion); | |||
| if (candidate_fusion.empty()) { | |||
| return false; | |||
| } | |||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern Success..."; | |||
| return true; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,88 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h" | |||
| #include <vector> | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| void ReduceEltwiseFusionPass::MatchReduceEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| std::unordered_set<AnfNodePtr> record{cnode}; | |||
| auto eltwise_input = cnode->input(1); | |||
| while (CheckEltWiseNode(manager.get(), eltwise_input)) { | |||
| (void)record.insert(eltwise_input); | |||
| auto input_cnode = eltwise_input->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(input_cnode); | |||
| eltwise_input = input_cnode->input(1); | |||
| if (record.size() == MAX_ELTWISE_NUM) { | |||
| break; | |||
| } | |||
| } | |||
| if (!eltwise_input->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(eltwise_input) || | |||
| fusion_id_allocator->HasFusionIdAttr(eltwise_input)) { | |||
| return; | |||
| } | |||
| if (AnfAlgo::GetKernelType(eltwise_input) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(eltwise_input) == kernel::FusionType::COMMREDUCE) { | |||
| (void)record.insert(eltwise_input); | |||
| auto previous_eltwise_input = cnode->input(1); | |||
| auto previous_size = record.size(); | |||
| while (CheckEltWiseNode(manager.get(), previous_eltwise_input)) { | |||
| (void)record.insert(previous_eltwise_input); | |||
| auto previous_node = previous_eltwise_input->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(previous_node); | |||
| previous_eltwise_input = previous_node->input(1); | |||
| if (record.size() - previous_size == MAX_ELTWISE_NUM) { | |||
| break; | |||
| } | |||
| } | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| } | |||
| void ReduceEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : node_list) { | |||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { | |||
| MatchReduceEltwise(cnode, kernel_graph, candidate_fusion); | |||
| } | |||
| } | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,48 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_REDUCE_ELTWISE_FUSION_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_REDUCE_ELTWISE_FUSION_PASS_H_ | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class ReduceEltwiseFusionPass : public FusionBasePass { | |||
| public: | |||
| explicit ReduceEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||
| : FusionBasePass("ReduceEltwiseFusionPass", idAllocator) {} | |||
| ~ReduceEltwiseFusionPass() override = default; | |||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||
| private: | |||
| void MatchReduceEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_REDUCE_ELTWSIE_FUSION_PASS_H_ | |||
| @@ -0,0 +1,88 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h" | |||
| #include <vector> | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| void SegmentEltwiseFusionPass::MatchSegmentEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| std::unordered_set<AnfNodePtr> record{cnode}; | |||
| auto eltwise_input = cnode->input(1); | |||
| while (CheckEltWiseNode(manager.get(), eltwise_input)) { | |||
| (void)record.insert(eltwise_input); | |||
| auto input_cnode = eltwise_input->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(input_cnode); | |||
| eltwise_input = input_cnode->input(1); | |||
| if (record.size() == MAX_ELTWISE_NUM) { | |||
| break; | |||
| } | |||
| } | |||
| if (!eltwise_input->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(eltwise_input) || | |||
| fusion_id_allocator->HasFusionIdAttr(eltwise_input)) { | |||
| return; | |||
| } | |||
| if (AnfAlgo::GetKernelType(eltwise_input) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(eltwise_input) == kernel::FusionType::SEGMENT) { | |||
| (void)record.insert(eltwise_input); | |||
| auto previous_eltwise_input = cnode->input(1); | |||
| auto previous_size = record.size(); | |||
| while (CheckEltWiseNode(manager.get(), previous_eltwise_input)) { | |||
| (void)record.insert(previous_eltwise_input); | |||
| auto previous_node = previous_eltwise_input->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(previous_node); | |||
| previous_eltwise_input = previous_node->input(1); | |||
| if (record.size() - previous_size == MAX_ELTWISE_NUM) { | |||
| break; | |||
| } | |||
| } | |||
| candidate_fusion->push_back(record); | |||
| SetRecordFusionId(record); | |||
| } | |||
| } | |||
| void SegmentEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : node_list) { | |||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { | |||
| MatchSegmentEltwise(cnode, kernel_graph, candidate_fusion); | |||
| } | |||
| } | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,48 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_SEGMENT_ELTWISE_FUSION_PASS_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_SEGMENT_ELTWISE_FUSION_PASS_H_ | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class SegmentEltwiseFusionPass : public FusionBasePass { | |||
| public: | |||
| explicit SegmentEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||
| : FusionBasePass("SegmentEltwiseFusionPass", idAllocator) {} | |||
| ~SegmentEltwiseFusionPass() override = default; | |||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||
| private: | |||
| void MatchSegmentEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| FusedNodeRecord *candidate_fusion); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_SEGMENT_ELTWSIE_FUSION_PASS_H_ | |||
| @@ -1,435 +1,435 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/tbe_buffer_fusion.h" | |||
| #include <vector> | |||
| #include <tuple> | |||
| #include <utility> | |||
| #include <unordered_set> | |||
| #include <unordered_map> | |||
| #include <deque> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "device/kernel_info.h" | |||
| #include "utils/context/ms_context.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| namespace { | |||
| const int8_t MAX_PATTERN_SIZE = 7; | |||
| const int8_t MIN_PATTERN_SIZE = 2; | |||
| const int8_t ELTWISE_INPUT_SIZE = 2; | |||
| const int8_t ELTWISE_USE = 1; | |||
| const int8_t MULTI_ELTWISE_USE = 2; | |||
| const int8_t MAX_MULTI_ELTWISE_SIZE = 4; | |||
| const int8_t MAX_PURE_BUFFER_SUCC_SIZE = 3; | |||
| constexpr auto kOpAttrFusionId = "fusion_id"; | |||
| #ifdef DEBUG | |||
| std::string GetFusionTypeName(const kernel::FusionType &type) { | |||
| switch (type) { | |||
| case kernel::FusionType::COMMREDUCE: | |||
| return "COMMREDUCE"; | |||
| case kernel::FusionType::SEGMENT: | |||
| return "SEGMENT"; | |||
| case kernel::FusionType::ELEMWISE: | |||
| return "ELEMWISE"; | |||
| case kernel::FusionType::CONVLUTION: | |||
| return "CONVLUTION"; | |||
| case kernel::FusionType::OPAQUE: | |||
| return "OPAQUE"; | |||
| default: | |||
| return "OPAQUE"; | |||
| } | |||
| } | |||
| void DumpFusionScopeInfo(const kernel::FusionScopeInfo &info) { | |||
| MS_LOG(INFO) << "=== Dump FusionScopeInfo start id: " << info.scope_id; | |||
| for (auto &node : info.input_nodes) { | |||
| MS_LOG(INFO) << "=== Input: " << node->DebugString(); | |||
| } | |||
| for (auto &node : info.output_nodes) { | |||
| MS_LOG(INFO) << "=== Output: " << node->DebugString(); | |||
| } | |||
| for (auto &node : info.compute_nodes) { | |||
| MS_LOG(INFO) << "=== Compute: (" << node->DebugString() << ")-(" << GetFusionTypeName(AnfAlgo::GetFusionType(node)) | |||
| << ")"; | |||
| } | |||
| MS_LOG(INFO) << "=== Dump FusionScopeInfo end"; | |||
| } | |||
| #endif | |||
| CNodePtr CreateFusionOp(const std::vector<AnfNodePtr> &inputs_list, const std::vector<AnfNodePtr> &outputs_list, | |||
| const std::vector<AnfNodePtr> &anf_nodes, session::KernelGraph *kernel_graph) { | |||
| MS_LOG(DEBUG) << "Start Create FusionOp Kernel"; | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| std::string fusion_op_name = "FusionOp"; | |||
| for (auto node : anf_nodes) { | |||
| fusion_op_name += '_' + AnfAlgo::GetCNodeName(node); | |||
| } | |||
| auto fusion_op = std::make_shared<Primitive>(fusion_op_name); | |||
| MS_EXCEPTION_IF_NULL(fusion_op); | |||
| std::vector<std::string> input_names; | |||
| for (uint8_t i = 0; i < inputs_list.size(); i++) { | |||
| input_names.emplace_back("input" + std::to_string(i)); | |||
| } | |||
| std::vector<std::string> output_names; | |||
| for (uint8_t i = 0; i < outputs_list.size(); i++) { | |||
| output_names.emplace_back("output" + std::to_string(i)); | |||
| } | |||
| ValuePtr input_names_v = MakeValue(input_names); | |||
| ValuePtr output_names_v = MakeValue(output_names); | |||
| fusion_op->set_attr("input_names", input_names_v); | |||
| fusion_op->set_attr("output_names", output_names_v); | |||
| std::vector<AnfNodePtr> fusion_inputs_list = inputs_list; | |||
| auto value_node = std::make_shared<ValueNode>(fusion_op); | |||
| (void)fusion_inputs_list.insert(fusion_inputs_list.begin(), value_node); | |||
| auto buffer_fusion_kernel = kernel_graph->NewCNode(fusion_inputs_list); | |||
| if (buffer_fusion_kernel == nullptr) { | |||
| MS_LOG(EXCEPTION) << "New FusionOp kernel failed!"; | |||
| } | |||
| buffer_fusion_kernel->set_scope((anf_nodes.back())->scope()); | |||
| return buffer_fusion_kernel; | |||
| } | |||
| kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr> &inputs_list, | |||
| const std::vector<AnfNodePtr> &outputs_list) { | |||
| MS_LOG(DEBUG) << "Start Create Kernel Info"; | |||
| kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; | |||
| // inputs format and data type | |||
| std::vector<std::string> inputs_format; | |||
| std::vector<TypeId> inputs_data_type; | |||
| for (const auto &input : inputs_list) { | |||
| auto real_input = AnfAlgo::VisitKernel(input, 0); | |||
| inputs_format.push_back(AnfAlgo::GetOutputFormat(real_input.first, real_input.second)); | |||
| inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(real_input.first, real_input.second)); | |||
| } | |||
| // outputs format and data type | |||
| std::vector<std::string> outputs_format; | |||
| std::vector<TypeId> outputs_data_type; | |||
| for (const auto &output : outputs_list) { | |||
| if (AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { | |||
| auto tuple_getitem = output->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(tuple_getitem); | |||
| outputs_format.push_back(AnfAlgo::GetOutputFormat( | |||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType( | |||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||
| } else { | |||
| outputs_format.push_back(AnfAlgo::GetOutputFormat(output, 0)); | |||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(output, 0)); | |||
| } | |||
| } | |||
| builder.SetInputsFormat(inputs_format); | |||
| builder.SetInputsDeviceType(inputs_data_type); | |||
| builder.SetOutputsFormat(outputs_format); | |||
| builder.SetOutputsDeviceType(outputs_data_type); | |||
| builder.SetKernelType(KernelType::TBE_KERNEL); | |||
| return builder.Build(); | |||
| } | |||
| AnfNodePtr CreateTupleGetItem(const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph, | |||
| size_t output_index) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| std::vector<AnfNodePtr> tuple_getitem_inputs_list; | |||
| auto value = std::make_shared<ValueNode>(prim::kPrimTupleGetItem); | |||
| MS_EXCEPTION_IF_NULL(value); | |||
| auto idx = NewValueNode(SizeToInt(output_index)); | |||
| MS_EXCEPTION_IF_NULL(idx); | |||
| int temp = SizeToInt(output_index); | |||
| auto imm = std::make_shared<Int32Imm>(temp); | |||
| auto abstract_scalar = std::make_shared<abstract::AbstractScalar>(imm); | |||
| idx->set_abstract(abstract_scalar); | |||
| tuple_getitem_inputs_list.push_back(value); | |||
| tuple_getitem_inputs_list.push_back(buffer_fusion_kernel); | |||
| tuple_getitem_inputs_list.push_back(idx); | |||
| auto tuple_item = kernel_graph->NewCNode(tuple_getitem_inputs_list); | |||
| MS_EXCEPTION_IF_NULL(tuple_item); | |||
| AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(buffer_fusion_kernel, output_index)}, | |||
| {AnfAlgo::GetOutputInferShape(buffer_fusion_kernel, output_index)}, | |||
| tuple_item.get()); | |||
| return tuple_item; | |||
| } | |||
| void ReplaceInputNodeInOtherFusionScope(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, | |||
| int32_t fusion_id, const AnfNodePtr &output_item, | |||
| const AnfNodePtr &replace_item) { | |||
| for (int32_t id = fusion_id + 1; id <= SizeToInt(buffer_fusion_infos->size()); ++id) { | |||
| auto itr = std::find((*buffer_fusion_infos)[id].inputs_list.begin(), (*buffer_fusion_infos)[id].inputs_list.end(), | |||
| output_item); | |||
| if (itr != (*buffer_fusion_infos)[id].inputs_list.end()) { | |||
| MS_LOG(DEBUG) << "replace input of other pattern, id = " << id; | |||
| *itr = replace_item; | |||
| } | |||
| } | |||
| } | |||
| void ReplaceOldNode(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id, | |||
| const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto manager = kernel_graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; | |||
| if (buffer_fusion_info.outputs_list.size() == 1) { // single output | |||
| (void)manager->Replace(buffer_fusion_info.outputs_list[0], buffer_fusion_kernel); | |||
| ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[0], | |||
| buffer_fusion_kernel); | |||
| } else { // multiple output | |||
| for (size_t index = 0; index < buffer_fusion_info.outputs_list.size(); ++index) { | |||
| auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, index); | |||
| (void)manager->Replace(buffer_fusion_info.outputs_list[index], tuple_item); | |||
| ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[index], | |||
| tuple_item); | |||
| } | |||
| } | |||
| } | |||
| void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| auto nodes = TopoSort(kernel_graph->get_return()); | |||
| for (auto &node : nodes) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (!node->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| if (AnfAlgo::IsRealCNodeKernel(cnode) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) { | |||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(cnode, kOpAttrFusionId); | |||
| (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(cnode); | |||
| } | |||
| } | |||
| } | |||
| void GetFusionScopeInputNodeList(const session::KernelGraph &kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||
| auto fusion_id = buffer_fusion_info.first; | |||
| auto fusion_info = buffer_fusion_info.second; | |||
| for (const auto &node : fusion_info.anf_nodes) { | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| for (size_t idx = 1; idx < cnode->inputs().size(); ++idx) { | |||
| auto real_input = AnfAlgo::VisitKernel(cnode->input(idx), 0); | |||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), real_input.first) == | |||
| fusion_info.anf_nodes.end()) { | |||
| if (std::find((*buffer_fusion_infos)[fusion_id].inputs_list.begin(), | |||
| (*buffer_fusion_infos)[fusion_id].inputs_list.end(), | |||
| cnode->input(idx)) == (*buffer_fusion_infos)[fusion_id].inputs_list.end()) { | |||
| (*buffer_fusion_infos)[fusion_id].inputs_list.push_back(cnode->input(idx)); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) { | |||
| MS_EXCEPTION_IF_NULL(node1); | |||
| MS_EXCEPTION_IF_NULL(node2); | |||
| auto getitem1 = node1->cast<CNodePtr>(); | |||
| auto getitem2 = node2->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(getitem1); | |||
| MS_EXCEPTION_IF_NULL(getitem2); | |||
| auto output_idx1 = GetValue<int>(GetValueNode(getitem1->input(2))); | |||
| auto output_idx2 = GetValue<int>(GetValueNode(getitem2->input(2))); | |||
| return output_idx1 < output_idx2; | |||
| } | |||
| void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| auto manager = kernel_graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||
| auto fusion_id = buffer_fusion_info.first; | |||
| auto fusion_info = buffer_fusion_info.second; | |||
| for (const auto &node : fusion_info.anf_nodes) { | |||
| if (AnfAlgo::GetOutputTensorNum(node) == 1) { | |||
| for (auto use_node : manager->node_users()[node]) { | |||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), use_node.first) == | |||
| fusion_info.anf_nodes.end()) { | |||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(node); | |||
| break; | |||
| } | |||
| } | |||
| } else { | |||
| int prev_idx = 0; | |||
| std::vector<AnfNodePtr> tuple_getitem_nodes; | |||
| std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(), | |||
| std::back_inserter(tuple_getitem_nodes), | |||
| [](const std::pair<AnfNodePtr, int> &use_node) { return use_node.first; }); | |||
| std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), TupleGetitemNodeCompare); | |||
| for (auto getitem : tuple_getitem_nodes) { | |||
| auto getitem_ptr = getitem->cast<CNodePtr>(); | |||
| auto input2 = getitem_ptr->input(2); | |||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||
| for (int stub_idx = prev_idx; stub_idx < output_idx; ++stub_idx) { | |||
| auto stub_node = CreateTupleGetItem(node, kernel_graph, IntToSize(stub_idx)); | |||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(stub_node); | |||
| } | |||
| prev_idx = output_idx + 1; | |||
| for (auto item_use_node : manager->node_users()[getitem]) { | |||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), item_use_node.first) == | |||
| fusion_info.anf_nodes.end()) { | |||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(getitem); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void SetFusionOpRefInfos(session::KernelGraph *kernel_graph, const std::vector<AnfNodePtr> &outputs_list, | |||
| const AnfNodePtr &fusion_kernel) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto manager = kernel_graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| for (size_t idx = 0; idx < outputs_list.size(); ++idx) { | |||
| auto output = outputs_list[idx]; | |||
| if (output->isa<CNode>() && AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { | |||
| auto real_output = AnfAlgo::VisitKernel(output, 0); | |||
| auto output_cnode = output->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(output_cnode); | |||
| auto input2 = output_cnode->input(2); | |||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||
| session::AnfWithOutIndex out_pair(real_output.first, output_idx); | |||
| if (kernel_graph->IsInRefOutputMap(out_pair)) { | |||
| auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); | |||
| session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); | |||
| kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); | |||
| } | |||
| } else { | |||
| session::AnfWithOutIndex out_pair(output, 0); | |||
| if (kernel_graph->IsInRefOutputMap(out_pair)) { | |||
| auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); | |||
| session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); | |||
| kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } // namespace | |||
| void TbeBufferFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const { | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); | |||
| GetFusionScopeInputNodeList(*kernel_graph, buffer_fusion_infos); | |||
| GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); | |||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||
| buffer_fusion_info.second.kernel_build_info = | |||
| CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); | |||
| } | |||
| } | |||
| bool TbeBufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| bool change = false; | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> buffer_fusion_infos; | |||
| buffer_fusion_infos.clear(); | |||
| GetBufferFusionInfo(kernel_graph, &buffer_fusion_infos); | |||
| std::vector<mindspore::kernel::FusionScopeInfo> fusion_scope_infos; | |||
| for (auto &buffer_fusion_info : buffer_fusion_infos) { | |||
| mindspore::kernel::FusionScopeInfo fusion_scope_info; | |||
| fusion_scope_info.scope_id = buffer_fusion_info.first; | |||
| fusion_scope_info.input_nodes = buffer_fusion_info.second.inputs_list; | |||
| fusion_scope_info.compute_nodes = buffer_fusion_info.second.anf_nodes; | |||
| fusion_scope_info.output_nodes = buffer_fusion_info.second.outputs_list; | |||
| fusion_scope_infos.push_back(fusion_scope_info); | |||
| #ifdef DEBUG | |||
| DumpFusionScopeInfo(fusion_scope_info); | |||
| #endif | |||
| } | |||
| auto kernel_mods = mindspore::kernel::KernelFusion(fusion_scope_infos); | |||
| std::vector<int32_t> fusion_ids; | |||
| for (auto &buffer_fusion_info : buffer_fusion_infos) { | |||
| MS_LOG(DEBUG) << "anf node size: " << buffer_fusion_info.second.anf_nodes.size() | |||
| << ", inputs_list size: " << buffer_fusion_info.second.inputs_list.size() | |||
| << ", outputs list size: " << buffer_fusion_info.second.outputs_list.size(); | |||
| fusion_ids.push_back(buffer_fusion_info.first); | |||
| } | |||
| // Replace fusion op from return to head | |||
| std::sort(fusion_ids.begin(), fusion_ids.end()); | |||
| for (auto &fusion_id : fusion_ids) { | |||
| // Get kernel mod when supporting tbe | |||
| if (kernel_mods.find(fusion_id) == kernel_mods.end() || kernel_mods[fusion_id] == nullptr) { | |||
| MS_LOG(DEBUG) << "fusion id: " << fusion_id << ", fusion op compiling failed"; | |||
| continue; | |||
| } | |||
| change = ReplaceFusionOp(&buffer_fusion_infos, fusion_id, kernel_mods[fusion_id], kernel_graph); | |||
| } | |||
| MS_LOG(DEBUG) << "End Buffer Fusion"; | |||
| return change; | |||
| } | |||
| bool TbeBufferFusion::ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, | |||
| int32_t fusion_id, const kernel::KernelModPtr &kernel_ptr, | |||
| session::KernelGraph *kernel_graph) const { | |||
| auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; | |||
| auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, | |||
| buffer_fusion_info.anf_nodes, kernel_graph); | |||
| AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get()); | |||
| // Set abstract of fusion_op node | |||
| std::vector<TypeId> types; | |||
| std::vector<std::vector<size_t>> shapes; | |||
| for (const auto &out_node : buffer_fusion_info.outputs_list) { | |||
| for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(out_node); ++idx) { | |||
| types.push_back(AnfAlgo::GetOutputInferDataType(out_node, idx)); | |||
| shapes.push_back(AnfAlgo::GetOutputInferShape(out_node, idx)); | |||
| } | |||
| } | |||
| if (types.empty() || shapes.empty()) { | |||
| MS_LOG(WARNING) << "buffer_fusion_info.outputs_list is empty"; | |||
| return false; | |||
| } | |||
| AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get()); | |||
| AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get()); | |||
| SetFusionOpRefInfos(kernel_graph, buffer_fusion_info.outputs_list, buffer_fusion); | |||
| ReplaceOldNode(buffer_fusion_infos, fusion_id, buffer_fusion, kernel_graph); | |||
| return true; | |||
| } | |||
| bool TbeBufferFusion::Run(const FuncGraphPtr &graph) { | |||
| bool changed = false; | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| auto kernel_graph = graph->cast<std::shared_ptr<session::KernelGraph>>(); | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| changed = FuseBufferFusionPattern(kernel_graph.get()); | |||
| // clear fusion_id attr | |||
| for (auto &node : graph->nodes()) { | |||
| if (node != nullptr && node->isa<CNode>()) { | |||
| AnfAlgo::EraseNodeAttr(kAttrFusionId, node); | |||
| } | |||
| } | |||
| return changed; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h" | |||
| #include <vector> | |||
| #include <tuple> | |||
| #include <utility> | |||
| #include <unordered_set> | |||
| #include <unordered_map> | |||
| #include <deque> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "operator/ops.h" | |||
| #include "device/kernel_info.h" | |||
| #include "utils/context/ms_context.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| namespace { | |||
| const int8_t MAX_PATTERN_SIZE = 7; | |||
| const int8_t MIN_PATTERN_SIZE = 2; | |||
| const int8_t ELTWISE_INPUT_SIZE = 2; | |||
| const int8_t ELTWISE_USE = 1; | |||
| const int8_t MULTI_ELTWISE_USE = 2; | |||
| const int8_t MAX_MULTI_ELTWISE_SIZE = 4; | |||
| const int8_t MAX_PURE_BUFFER_SUCC_SIZE = 3; | |||
| constexpr auto kOpAttrFusionId = "fusion_id"; | |||
| #ifdef DEBUG | |||
| std::string GetFusionTypeName(const kernel::FusionType &type) { | |||
| switch (type) { | |||
| case kernel::FusionType::COMMREDUCE: | |||
| return "COMMREDUCE"; | |||
| case kernel::FusionType::SEGMENT: | |||
| return "SEGMENT"; | |||
| case kernel::FusionType::ELEMWISE: | |||
| return "ELEMWISE"; | |||
| case kernel::FusionType::CONVLUTION: | |||
| return "CONVLUTION"; | |||
| case kernel::FusionType::OPAQUE: | |||
| return "OPAQUE"; | |||
| default: | |||
| return "OPAQUE"; | |||
| } | |||
| } | |||
| void DumpFusionScopeInfo(const kernel::FusionScopeInfo &info) { | |||
| MS_LOG(INFO) << "=== Dump FusionScopeInfo start id: " << info.scope_id; | |||
| for (auto &node : info.input_nodes) { | |||
| MS_LOG(INFO) << "=== Input: " << node->DebugString(); | |||
| } | |||
| for (auto &node : info.output_nodes) { | |||
| MS_LOG(INFO) << "=== Output: " << node->DebugString(); | |||
| } | |||
| for (auto &node : info.compute_nodes) { | |||
| MS_LOG(INFO) << "=== Compute: (" << node->DebugString() << ")-(" << GetFusionTypeName(AnfAlgo::GetFusionType(node)) | |||
| << ")"; | |||
| } | |||
| MS_LOG(INFO) << "=== Dump FusionScopeInfo end"; | |||
| } | |||
| #endif | |||
| CNodePtr CreateFusionOp(const std::vector<AnfNodePtr> &inputs_list, const std::vector<AnfNodePtr> &outputs_list, | |||
| const std::vector<AnfNodePtr> &anf_nodes, session::KernelGraph *kernel_graph) { | |||
| MS_LOG(DEBUG) << "Start Create FusionOp Kernel"; | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| std::string fusion_op_name = "FusionOp"; | |||
| for (auto node : anf_nodes) { | |||
| fusion_op_name += '_' + AnfAlgo::GetCNodeName(node); | |||
| } | |||
| auto fusion_op = std::make_shared<Primitive>(fusion_op_name); | |||
| MS_EXCEPTION_IF_NULL(fusion_op); | |||
| std::vector<std::string> input_names; | |||
| for (uint8_t i = 0; i < inputs_list.size(); i++) { | |||
| input_names.emplace_back("input" + std::to_string(i)); | |||
| } | |||
| std::vector<std::string> output_names; | |||
| for (uint8_t i = 0; i < outputs_list.size(); i++) { | |||
| output_names.emplace_back("output" + std::to_string(i)); | |||
| } | |||
| ValuePtr input_names_v = MakeValue(input_names); | |||
| ValuePtr output_names_v = MakeValue(output_names); | |||
| fusion_op->set_attr("input_names", input_names_v); | |||
| fusion_op->set_attr("output_names", output_names_v); | |||
| std::vector<AnfNodePtr> fusion_inputs_list = inputs_list; | |||
| auto value_node = std::make_shared<ValueNode>(fusion_op); | |||
| (void)fusion_inputs_list.insert(fusion_inputs_list.begin(), value_node); | |||
| auto buffer_fusion_kernel = kernel_graph->NewCNode(fusion_inputs_list); | |||
| if (buffer_fusion_kernel == nullptr) { | |||
| MS_LOG(EXCEPTION) << "New FusionOp kernel failed!"; | |||
| } | |||
| buffer_fusion_kernel->set_scope((anf_nodes.back())->scope()); | |||
| return buffer_fusion_kernel; | |||
| } | |||
| kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr> &inputs_list, | |||
| const std::vector<AnfNodePtr> &outputs_list) { | |||
| MS_LOG(DEBUG) << "Start Create Kernel Info"; | |||
| kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; | |||
| // inputs format and data type | |||
| std::vector<std::string> inputs_format; | |||
| std::vector<TypeId> inputs_data_type; | |||
| for (const auto &input : inputs_list) { | |||
| auto real_input = AnfAlgo::VisitKernel(input, 0); | |||
| inputs_format.push_back(AnfAlgo::GetOutputFormat(real_input.first, real_input.second)); | |||
| inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(real_input.first, real_input.second)); | |||
| } | |||
| // outputs format and data type | |||
| std::vector<std::string> outputs_format; | |||
| std::vector<TypeId> outputs_data_type; | |||
| for (const auto &output : outputs_list) { | |||
| if (AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { | |||
| auto tuple_getitem = output->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(tuple_getitem); | |||
| outputs_format.push_back(AnfAlgo::GetOutputFormat( | |||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType( | |||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||
| } else { | |||
| outputs_format.push_back(AnfAlgo::GetOutputFormat(output, 0)); | |||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(output, 0)); | |||
| } | |||
| } | |||
| builder.SetInputsFormat(inputs_format); | |||
| builder.SetInputsDeviceType(inputs_data_type); | |||
| builder.SetOutputsFormat(outputs_format); | |||
| builder.SetOutputsDeviceType(outputs_data_type); | |||
| builder.SetKernelType(KernelType::TBE_KERNEL); | |||
| return builder.Build(); | |||
| } | |||
| AnfNodePtr CreateTupleGetItem(const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph, | |||
| size_t output_index) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| std::vector<AnfNodePtr> tuple_getitem_inputs_list; | |||
| auto value = std::make_shared<ValueNode>(prim::kPrimTupleGetItem); | |||
| MS_EXCEPTION_IF_NULL(value); | |||
| auto idx = NewValueNode(SizeToInt(output_index)); | |||
| MS_EXCEPTION_IF_NULL(idx); | |||
| int temp = SizeToInt(output_index); | |||
| auto imm = std::make_shared<Int32Imm>(temp); | |||
| auto abstract_scalar = std::make_shared<abstract::AbstractScalar>(imm); | |||
| idx->set_abstract(abstract_scalar); | |||
| tuple_getitem_inputs_list.push_back(value); | |||
| tuple_getitem_inputs_list.push_back(buffer_fusion_kernel); | |||
| tuple_getitem_inputs_list.push_back(idx); | |||
| auto tuple_item = kernel_graph->NewCNode(tuple_getitem_inputs_list); | |||
| MS_EXCEPTION_IF_NULL(tuple_item); | |||
| AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(buffer_fusion_kernel, output_index)}, | |||
| {AnfAlgo::GetOutputInferShape(buffer_fusion_kernel, output_index)}, | |||
| tuple_item.get()); | |||
| return tuple_item; | |||
| } | |||
| void ReplaceInputNodeInOtherFusionScope(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, | |||
| int32_t fusion_id, const AnfNodePtr &output_item, | |||
| const AnfNodePtr &replace_item) { | |||
| for (int32_t id = fusion_id + 1; id <= SizeToInt(buffer_fusion_infos->size()); ++id) { | |||
| auto itr = std::find((*buffer_fusion_infos)[id].inputs_list.begin(), (*buffer_fusion_infos)[id].inputs_list.end(), | |||
| output_item); | |||
| if (itr != (*buffer_fusion_infos)[id].inputs_list.end()) { | |||
| MS_LOG(DEBUG) << "replace input of other pattern, id = " << id; | |||
| *itr = replace_item; | |||
| } | |||
| } | |||
| } | |||
| void ReplaceOldNode(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id, | |||
| const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto manager = kernel_graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; | |||
| if (buffer_fusion_info.outputs_list.size() == 1) { // single output | |||
| (void)manager->Replace(buffer_fusion_info.outputs_list[0], buffer_fusion_kernel); | |||
| ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[0], | |||
| buffer_fusion_kernel); | |||
| } else { // multiple output | |||
| for (size_t index = 0; index < buffer_fusion_info.outputs_list.size(); ++index) { | |||
| auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, index); | |||
| (void)manager->Replace(buffer_fusion_info.outputs_list[index], tuple_item); | |||
| ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[index], | |||
| tuple_item); | |||
| } | |||
| } | |||
| } | |||
| void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| auto nodes = TopoSort(kernel_graph->get_return()); | |||
| for (auto &node : nodes) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (!node->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| if (AnfAlgo::IsRealCNodeKernel(cnode) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) { | |||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(cnode, kOpAttrFusionId); | |||
| (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(cnode); | |||
| } | |||
| } | |||
| } | |||
| void GetFusionScopeInputNodeList(const session::KernelGraph &kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||
| auto fusion_id = buffer_fusion_info.first; | |||
| auto fusion_info = buffer_fusion_info.second; | |||
| for (const auto &node : fusion_info.anf_nodes) { | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| for (size_t idx = 1; idx < cnode->inputs().size(); ++idx) { | |||
| auto real_input = AnfAlgo::VisitKernel(cnode->input(idx), 0); | |||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), real_input.first) == | |||
| fusion_info.anf_nodes.end()) { | |||
| if (std::find((*buffer_fusion_infos)[fusion_id].inputs_list.begin(), | |||
| (*buffer_fusion_infos)[fusion_id].inputs_list.end(), | |||
| cnode->input(idx)) == (*buffer_fusion_infos)[fusion_id].inputs_list.end()) { | |||
| (*buffer_fusion_infos)[fusion_id].inputs_list.push_back(cnode->input(idx)); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) { | |||
| MS_EXCEPTION_IF_NULL(node1); | |||
| MS_EXCEPTION_IF_NULL(node2); | |||
| auto getitem1 = node1->cast<CNodePtr>(); | |||
| auto getitem2 = node2->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(getitem1); | |||
| MS_EXCEPTION_IF_NULL(getitem2); | |||
| auto output_idx1 = GetValue<int>(GetValueNode(getitem1->input(2))); | |||
| auto output_idx2 = GetValue<int>(GetValueNode(getitem2->input(2))); | |||
| return output_idx1 < output_idx2; | |||
| } | |||
| void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| auto manager = kernel_graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||
| auto fusion_id = buffer_fusion_info.first; | |||
| auto fusion_info = buffer_fusion_info.second; | |||
| for (const auto &node : fusion_info.anf_nodes) { | |||
| if (AnfAlgo::GetOutputTensorNum(node) == 1) { | |||
| for (auto use_node : manager->node_users()[node]) { | |||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), use_node.first) == | |||
| fusion_info.anf_nodes.end()) { | |||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(node); | |||
| break; | |||
| } | |||
| } | |||
| } else { | |||
| int prev_idx = 0; | |||
| std::vector<AnfNodePtr> tuple_getitem_nodes; | |||
| std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(), | |||
| std::back_inserter(tuple_getitem_nodes), | |||
| [](const std::pair<AnfNodePtr, int> &use_node) { return use_node.first; }); | |||
| std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), TupleGetitemNodeCompare); | |||
| for (auto getitem : tuple_getitem_nodes) { | |||
| auto getitem_ptr = getitem->cast<CNodePtr>(); | |||
| auto input2 = getitem_ptr->input(2); | |||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||
| for (int stub_idx = prev_idx; stub_idx < output_idx; ++stub_idx) { | |||
| auto stub_node = CreateTupleGetItem(node, kernel_graph, IntToSize(stub_idx)); | |||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(stub_node); | |||
| } | |||
| prev_idx = output_idx + 1; | |||
| for (auto item_use_node : manager->node_users()[getitem]) { | |||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), item_use_node.first) == | |||
| fusion_info.anf_nodes.end()) { | |||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(getitem); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void SetFusionOpRefInfos(session::KernelGraph *kernel_graph, const std::vector<AnfNodePtr> &outputs_list, | |||
| const AnfNodePtr &fusion_kernel) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto manager = kernel_graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| for (size_t idx = 0; idx < outputs_list.size(); ++idx) { | |||
| auto output = outputs_list[idx]; | |||
| if (output->isa<CNode>() && AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { | |||
| auto real_output = AnfAlgo::VisitKernel(output, 0); | |||
| auto output_cnode = output->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(output_cnode); | |||
| auto input2 = output_cnode->input(2); | |||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||
| session::AnfWithOutIndex out_pair(real_output.first, output_idx); | |||
| if (kernel_graph->IsInRefOutputMap(out_pair)) { | |||
| auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); | |||
| session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); | |||
| kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); | |||
| } | |||
| } else { | |||
| session::AnfWithOutIndex out_pair(output, 0); | |||
| if (kernel_graph->IsInRefOutputMap(out_pair)) { | |||
| auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); | |||
| session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); | |||
| kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } // namespace | |||
| void UbPatternFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const { | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); | |||
| GetFusionScopeInputNodeList(*kernel_graph, buffer_fusion_infos); | |||
| GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); | |||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||
| buffer_fusion_info.second.kernel_build_info = | |||
| CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); | |||
| } | |||
| } | |||
| bool UbPatternFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| bool change = false; | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> buffer_fusion_infos; | |||
| buffer_fusion_infos.clear(); | |||
| GetBufferFusionInfo(kernel_graph, &buffer_fusion_infos); | |||
| std::vector<mindspore::kernel::FusionScopeInfo> fusion_scope_infos; | |||
| for (auto &buffer_fusion_info : buffer_fusion_infos) { | |||
| mindspore::kernel::FusionScopeInfo fusion_scope_info; | |||
| fusion_scope_info.scope_id = buffer_fusion_info.first; | |||
| fusion_scope_info.input_nodes = buffer_fusion_info.second.inputs_list; | |||
| fusion_scope_info.compute_nodes = buffer_fusion_info.second.anf_nodes; | |||
| fusion_scope_info.output_nodes = buffer_fusion_info.second.outputs_list; | |||
| fusion_scope_infos.push_back(fusion_scope_info); | |||
| #ifdef DEBUG | |||
| DumpFusionScopeInfo(fusion_scope_info); | |||
| #endif | |||
| } | |||
| auto kernel_mods = mindspore::kernel::KernelFusion(fusion_scope_infos); | |||
| std::vector<int32_t> fusion_ids; | |||
| for (auto &buffer_fusion_info : buffer_fusion_infos) { | |||
| MS_LOG(DEBUG) << "anf node size: " << buffer_fusion_info.second.anf_nodes.size() | |||
| << ", inputs_list size: " << buffer_fusion_info.second.inputs_list.size() | |||
| << ", outputs list size: " << buffer_fusion_info.second.outputs_list.size(); | |||
| fusion_ids.push_back(buffer_fusion_info.first); | |||
| } | |||
| // Replace fusion op from return to head | |||
| std::sort(fusion_ids.begin(), fusion_ids.end()); | |||
| for (auto &fusion_id : fusion_ids) { | |||
| // Get kernel mod when supporting tbe | |||
| if (kernel_mods.find(fusion_id) == kernel_mods.end() || kernel_mods[fusion_id] == nullptr) { | |||
| MS_LOG(DEBUG) << "fusion id: " << fusion_id << ", fusion op compiling failed"; | |||
| continue; | |||
| } | |||
| change = ReplaceFusionOp(&buffer_fusion_infos, fusion_id, kernel_mods[fusion_id], kernel_graph); | |||
| } | |||
| MS_LOG(DEBUG) << "End Buffer Fusion"; | |||
| return change; | |||
| } | |||
| bool UbPatternFusion::ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, | |||
| int32_t fusion_id, const kernel::KernelModPtr &kernel_ptr, | |||
| session::KernelGraph *kernel_graph) const { | |||
| auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; | |||
| auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, | |||
| buffer_fusion_info.anf_nodes, kernel_graph); | |||
| AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get()); | |||
| // Set abstract of fusion_op node | |||
| std::vector<TypeId> types; | |||
| std::vector<std::vector<size_t>> shapes; | |||
| for (const auto &out_node : buffer_fusion_info.outputs_list) { | |||
| for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(out_node); ++idx) { | |||
| types.push_back(AnfAlgo::GetOutputInferDataType(out_node, idx)); | |||
| shapes.push_back(AnfAlgo::GetOutputInferShape(out_node, idx)); | |||
| } | |||
| } | |||
| if (types.empty() || shapes.empty()) { | |||
| MS_LOG(WARNING) << "buffer_fusion_info.outputs_list is empty"; | |||
| return false; | |||
| } | |||
| AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get()); | |||
| AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get()); | |||
| SetFusionOpRefInfos(kernel_graph, buffer_fusion_info.outputs_list, buffer_fusion); | |||
| ReplaceOldNode(buffer_fusion_infos, fusion_id, buffer_fusion, kernel_graph); | |||
| return true; | |||
| } | |||
| bool UbPatternFusion::Run(const FuncGraphPtr &graph) { | |||
| bool changed = false; | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| auto kernel_graph = graph->cast<std::shared_ptr<session::KernelGraph>>(); | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| changed = FuseBufferFusionPattern(kernel_graph.get()); | |||
| // clear fusion_id attr | |||
| for (auto &node : graph->nodes()) { | |||
| if (node != nullptr && node->isa<CNode>()) { | |||
| AnfAlgo::EraseNodeAttr(kAttrFusionId, node); | |||
| } | |||
| } | |||
| return changed; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -1,50 +1,50 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_TBE_BUFFER_FUSION_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_TBE_BUFFER_FUSION_H_ | |||
| #include <unordered_map> | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| #include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class TbeBufferFusion : public Pass { | |||
| public: | |||
| TbeBufferFusion() : Pass("TbeBufferFusion") {} | |||
| ~TbeBufferFusion() override = default; | |||
| bool Run(const FuncGraphPtr &graph) override; | |||
| private: | |||
| void GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const; | |||
| bool ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id, | |||
| const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const; | |||
| bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const; | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_TBE_BUFFER_FUSION_H_ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_UB_PATTERN_FUSION_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_UB_PATTERN_FUSION_H_ | |||
| #include <unordered_map> | |||
| #include <unordered_set> | |||
| #include <vector> | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| #include "pre_activate/common/fusion_id_allocator.h" | |||
| #include "device/kernel_info.h" | |||
| #include "kernel/kernel.h" | |||
| #include "session/kernel_graph.h" | |||
| #include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||
| class UbPatternFusion : public Pass { | |||
| public: | |||
| UbPatternFusion() : Pass("TbeBufferFusion") {} | |||
| ~UbPatternFusion() override = default; | |||
| bool Run(const FuncGraphPtr &graph) override; | |||
| private: | |||
| void GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const; | |||
| bool ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id, | |||
| const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const; | |||
| bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const; | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_UB_PATTERN_FUSION_H_ | |||