Merge pull request !1278 from Etone.Chan/NBFtags/v0.3.0-alpha
| @@ -63,10 +63,17 @@ | |||||
| #include "pre_activate/ascend/format_type/merge_cast_to_op.h" | #include "pre_activate/ascend/format_type/merge_cast_to_op.h" | ||||
| #include "pre_activate/ascend/format_type/check_consistency.h" | #include "pre_activate/ascend/format_type/check_consistency.h" | ||||
| #include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" | #include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" | ||||
| #include "pre_activate/ascend/buffer_fusion/tbe_buffer_fusion.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/pass/depthwiseconv_eltwise_fusion_pass.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/pass/bnupdate_eltwise_fusion_pass.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/pass/fusion_type_fusion_pass.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h" | |||||
| #include "pre_activate/ascend/format_type/deal_ref_trans_and_cast.h" | #include "pre_activate/ascend/format_type/deal_ref_trans_and_cast.h" | ||||
| #include "pre_activate/ascend/enhancer/add_memcpy_async.h" | #include "pre_activate/ascend/enhancer/add_memcpy_async.h" | ||||
| #include "pre_activate/ascend/format_type/insert_cast_for_runop.h" | #include "pre_activate/ascend/format_type/insert_cast_for_runop.h" | ||||
| @@ -281,7 +288,6 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern | |||||
| AscendDataLayout(kernel_graph); | AscendDataLayout(kernel_graph); | ||||
| // mixed precision optimization | // mixed precision optimization | ||||
| AscendMixPrecision(kernel_graph); | AscendMixPrecision(kernel_graph); | ||||
| // buffer fusion | |||||
| // other optimization | // other optimization | ||||
| auto optimizer = std::make_shared<GraphOptimizer>(); | auto optimizer = std::make_shared<GraphOptimizer>(); | ||||
| auto other_pm = std::make_shared<PassManager>("other_pm"); | auto other_pm = std::make_shared<PassManager>("other_pm"); | ||||
| @@ -291,7 +297,6 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern | |||||
| other_pm->AddPass(std::make_shared<BroadcastFusion>()); | other_pm->AddPass(std::make_shared<BroadcastFusion>()); | ||||
| other_pm->AddPass(std::make_shared<ParameterTransOpFusion>()); | other_pm->AddPass(std::make_shared<ParameterTransOpFusion>()); | ||||
| other_pm->AddPass(std::make_shared<RefreshParameterFormat>()); | other_pm->AddPass(std::make_shared<RefreshParameterFormat>()); | ||||
| other_pm->AddPass(std::make_shared<BufferFusion>()); | |||||
| other_pm->AddPass(std::make_shared<GetitemTuple>()); | other_pm->AddPass(std::make_shared<GetitemTuple>()); | ||||
| other_pm->AddPass(std::make_shared<CommonSubexpressionElimination>()); | other_pm->AddPass(std::make_shared<CommonSubexpressionElimination>()); | ||||
| if (context_ptr->enable_task_sink() && context_ptr->loop_sink_flag() && ConfigManager::GetInstance().iter_num() > 1) { | if (context_ptr->enable_task_sink() && context_ptr->loop_sink_flag() && ConfigManager::GetInstance().iter_num() > 1) { | ||||
| @@ -307,6 +312,8 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern | |||||
| DumpIR(file_path, kernel_graph, true); | DumpIR(file_path, kernel_graph, true); | ||||
| DumpIRProto(kernel_graph, "after_hwopt"); | DumpIRProto(kernel_graph, "after_hwopt"); | ||||
| } | } | ||||
| // buffer fusion | |||||
| AscendBackendUBFusionOptimization(kernel_graph); | |||||
| } | } | ||||
| void AscendBackendUBFusionOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | void AscendBackendUBFusionOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | ||||
| @@ -330,10 +337,17 @@ void AscendBackendUBFusionOptimization(const std::shared_ptr<session::KernelGrap | |||||
| fusion_id_allocator->Init(); | fusion_id_allocator->Init(); | ||||
| auto optimizer = std::make_shared<GraphOptimizer>(); | auto optimizer = std::make_shared<GraphOptimizer>(); | ||||
| auto ub_fusion_pm = std::make_shared<PassManager>("ub_fusion_pm"); | auto ub_fusion_pm = std::make_shared<PassManager>("ub_fusion_pm"); | ||||
| ub_fusion_pm->AddPass(std::make_shared<ConvDoubleInFusionPass>(fusion_id_allocator.get())); | |||||
| ub_fusion_pm->AddPass(std::make_shared<ConvSingleInFusionPass>(fusion_id_allocator.get())); | |||||
| ub_fusion_pm->AddPass(std::make_shared<EltwiseFusionPass>(fusion_id_allocator.get())); | |||||
| ub_fusion_pm->AddPass(std::make_shared<MatmulEltwiseFusionPass>(fusion_id_allocator.get())); | |||||
| ub_fusion_pm->AddPass(std::make_shared<BnupdateEltwiseFusionPass>(fusion_id_allocator.get())); | ub_fusion_pm->AddPass(std::make_shared<BnupdateEltwiseFusionPass>(fusion_id_allocator.get())); | ||||
| ub_fusion_pm->AddPass(std::make_shared<DepthwiseConvEltwiseFusionPass>(fusion_id_allocator.get())); | ub_fusion_pm->AddPass(std::make_shared<DepthwiseConvEltwiseFusionPass>(fusion_id_allocator.get())); | ||||
| ub_fusion_pm->AddPass(std::make_shared<FusionTypeFusionPass>(fusion_id_allocator.get())); | |||||
| ub_fusion_pm->AddPass(std::make_shared<TbeBufferFusion>()); | |||||
| ub_fusion_pm->AddPass(std::make_shared<BnupdateEltwiseEltwiseFusionPass>(fusion_id_allocator.get())); | |||||
| ub_fusion_pm->AddPass(std::make_shared<ConvBnReduceFusionPass>(fusion_id_allocator.get())); | |||||
| ub_fusion_pm->AddPass(std::make_shared<ReduceEltwiseFusionPass>(fusion_id_allocator.get())); | |||||
| ub_fusion_pm->AddPass(std::make_shared<SegmentEltwiseFusionPass>(fusion_id_allocator.get())); | |||||
| ub_fusion_pm->AddPass(std::make_shared<UbPatternFusion>()); | |||||
| optimizer->AddPassManager(ub_fusion_pm); | optimizer->AddPassManager(ub_fusion_pm); | ||||
| (void)optimizer->Optimize(kernel_graph); | (void)optimizer->Optimize(kernel_graph); | ||||
| kernel_graph->SetExecOrderByDefault(); | kernel_graph->SetExecOrderByDefault(); | ||||
| @@ -0,0 +1,82 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h" | |||||
| #include <vector> | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| void BnupdateEltwiseEltwiseFusionPass::MatchBnupdateAddRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, | |||||
| const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto add = relu_input->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(add); | |||||
| auto tuple_getitem = add->input(1); | |||||
| if (tuple_getitem->isa<CNode>() && AnfAlgo::GetCNodeName(tuple_getitem) == prim::kPrimTupleGetItem->name()) { | |||||
| auto getitem = tuple_getitem->cast<CNodePtr>(); | |||||
| auto bnupdate = getitem->input(1); | |||||
| if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { | |||||
| std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); | |||||
| for (auto out_getitem : manager->node_users()[bnupdate]) { | |||||
| auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>(); | |||||
| auto input2 = out_getitem_ptr->input(2); | |||||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||||
| output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); | |||||
| } | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); | |||||
| std::unordered_set<AnfNodePtr> record{cnode, relu_input, bnupdate}; | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| } | |||||
| } | |||||
| void BnupdateEltwiseEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : node_list) { | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { | |||||
| auto eltwise_input = cnode->input(1); | |||||
| if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { | |||||
| if (eltwise_input->isa<CNode>() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimTensorAdd)) { | |||||
| MatchBnupdateAddRelu(cnode, eltwise_input, kernel_graph, candidate_fusion); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,48 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_ELTWISE_FUSION_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_ELTWISE_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class BnupdateEltwiseEltwiseFusionPass : public FusionBasePass { | |||||
| public: | |||||
| explicit BnupdateEltwiseEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||||
| : FusionBasePass("BnupdateEltwiseEltwiseFusionPass", idAllocator) {} | |||||
| ~BnupdateEltwiseEltwiseFusionPass() override = default; | |||||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||||
| private: | |||||
| void MatchBnupdateAddRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, | |||||
| const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_ELTWISE_FUSION_PASS_H_ | |||||
| @@ -1,96 +1,77 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/pass/bnupdate_eltwise_fusion_pass.h" | |||||
| #include <vector> | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| void BnupdateEltwiseFusionPass::MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, | |||||
| const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto getitem = relu_input->cast<CNodePtr>(); | |||||
| auto bnupdate = getitem->input(1); | |||||
| if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { | |||||
| std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); | |||||
| for (auto out_getitem : manager->node_users()[bnupdate]) { | |||||
| auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>(); | |||||
| auto input2 = out_getitem_ptr->input(2); | |||||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||||
| output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); | |||||
| } | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); | |||||
| std::unordered_set<AnfNodePtr> record{cnode, bnupdate}; | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| } | |||||
| void BnupdateEltwiseFusionPass::MatchBnupdateOpNamePattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : node_list) { | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { | |||||
| auto eltwise_input = cnode->input(1); | |||||
| if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { | |||||
| if (eltwise_input->isa<CNode>() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimTupleGetItem)) { | |||||
| MatchBnupdateRelu(cnode, eltwise_input, kernel_graph, candidate_fusion); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| bool BnupdateEltwiseFusionPass::MatchUBFusionPattern(const session::KernelGraph &kernel_graph) { | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto return_node = kernel_graph.get_return(); | |||||
| MS_EXCEPTION_IF_NULL(return_node); | |||||
| if (return_node->inputs().size() <= 1) { | |||||
| return false; | |||||
| } | |||||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern start..."; | |||||
| FusedNodeRecord candidate_fusion; | |||||
| MatchBnupdateOpNamePattern(kernel_graph, &candidate_fusion); | |||||
| if (candidate_fusion.empty()) { | |||||
| return false; | |||||
| } | |||||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern Success..."; | |||||
| return true; | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h" | |||||
| #include <vector> | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| void BnupdateEltwiseFusionPass::MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, | |||||
| const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto getitem = relu_input->cast<CNodePtr>(); | |||||
| auto bnupdate = getitem->input(1); | |||||
| if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { | |||||
| std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); | |||||
| for (auto out_getitem : manager->node_users()[bnupdate]) { | |||||
| auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>(); | |||||
| auto input2 = out_getitem_ptr->input(2); | |||||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||||
| output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); | |||||
| } | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); | |||||
| std::unordered_set<AnfNodePtr> record{cnode, bnupdate}; | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| } | |||||
| void BnupdateEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : node_list) { | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { | |||||
| auto eltwise_input = cnode->input(1); | |||||
| if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { | |||||
| if (eltwise_input->isa<CNode>() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimTupleGetItem)) { | |||||
| MatchBnupdateRelu(cnode, eltwise_input, kernel_graph, candidate_fusion); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -1,50 +1,48 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h" | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class BnupdateEltwiseFusionPass : public FusionBasePass { | |||||
| public: | |||||
| BnupdateEltwiseFusionPass() : FusionBasePass("BnupdateEltwiseFusionPass") {} | |||||
| explicit BnupdateEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||||
| : FusionBasePass("BnupdateEltwiseFusionPass", idAllocator) {} | |||||
| ~BnupdateEltwiseFusionPass() override = default; | |||||
| bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph) override; | |||||
| private: | |||||
| void MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion); | |||||
| void MatchBnupdateOpNamePattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class BnupdateEltwiseFusionPass : public FusionBasePass { | |||||
| public: | |||||
| explicit BnupdateEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||||
| : FusionBasePass("BnupdateEltwiseFusionPass", idAllocator) {} | |||||
| ~BnupdateEltwiseFusionPass() override = default; | |||||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||||
| private: | |||||
| void MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ | |||||
| @@ -0,0 +1,64 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h" | |||||
| #include <vector> | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| void ConvBnReduceFusionPass::MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto conv = cnode->input(1); | |||||
| if (conv->isa<CNode>() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) { | |||||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[conv].size())}; | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), conv); | |||||
| std::unordered_set<AnfNodePtr> record{cnode, conv}; | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| } | |||||
| void ConvBnReduceFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : node_list) { | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::GetCNodeName(cnode) == kBNTrainingReduceOpName) { | |||||
| MatchConvBnreduce(cnode, kernel_graph, candidate_fusion); | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,48 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_CONV_BNREDUCE_FUSION_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_CONV_BNREDUCE_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class ConvBnReduceFusionPass : public FusionBasePass { | |||||
| public: | |||||
| explicit ConvBnReduceFusionPass(FusionIdAllocator *idAllocator) | |||||
| : FusionBasePass("ConvBnReduceFusionPass", idAllocator) {} | |||||
| ~ConvBnReduceFusionPass() override = default; | |||||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||||
| private: | |||||
| void MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_CONV_BNREDUCE_FUSION_PASS_H_ | |||||
| @@ -0,0 +1,89 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h" | |||||
| #include <vector> | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| bool ConvDoubleInFusionPass::CheckDoubleInEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node) { | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| if (!node->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node)) { | |||||
| return false; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| auto user_nodes = manager->node_users()[node]; | |||||
| return AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && user_nodes.size() == ELTWISE_USE && | |||||
| cnode->inputs().size() == ELTWISE_INPUT_SIZE; | |||||
| } | |||||
| void ConvDoubleInFusionPass::MatchConvDoubleInEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| std::unordered_set<AnfNodePtr> record{cnode}; | |||||
| auto eltwise_input = cnode->input(1); | |||||
| if (CheckDoubleInEltWiseNode(manager.get(), eltwise_input)) { | |||||
| (void)record.insert(eltwise_input); | |||||
| } else { | |||||
| return; | |||||
| } | |||||
| auto input_cnode = eltwise_input->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(input_cnode); | |||||
| auto double_in_eltwise_input = input_cnode->input(1); | |||||
| if (!double_in_eltwise_input->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(double_in_eltwise_input) || | |||||
| fusion_id_allocator->HasFusionIdAttr(double_in_eltwise_input)) { | |||||
| return; | |||||
| } | |||||
| if (AnfAlgo::GetKernelType(double_in_eltwise_input) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(double_in_eltwise_input) == kernel::FusionType::CONVLUTION) { | |||||
| (void)record.insert(double_in_eltwise_input); | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| } | |||||
| void ConvDoubleInFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : node_list) { | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { | |||||
| MatchConvDoubleInEltwise(cnode, kernel_graph, candidate_fusion); | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,48 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_DOUBLE_IN_FUSION_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_DOUBLE_IN_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class ConvDoubleInFusionPass : public FusionBasePass { | |||||
| public: | |||||
| explicit ConvDoubleInFusionPass(FusionIdAllocator *idAllocator) | |||||
| : FusionBasePass("ConvDoubleInFusionPass", idAllocator) {} | |||||
| ~ConvDoubleInFusionPass() override = default; | |||||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||||
| private: | |||||
| void MatchConvDoubleInEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion); | |||||
| bool CheckDoubleInEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_DOUBLE_IN_FUSION_PASS_H_ | |||||
| @@ -0,0 +1,77 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h" | |||||
| #include <vector> | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| void ConvSingleInFusionPass::MatchConvSingleInEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| std::unordered_set<AnfNodePtr> record{cnode}; | |||||
| auto eltwise_input = cnode->input(1); | |||||
| while (CheckEltWiseNode(manager.get(), eltwise_input)) { | |||||
| (void)record.insert(eltwise_input); | |||||
| auto input_cnode = eltwise_input->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(input_cnode); | |||||
| eltwise_input = input_cnode->input(1); | |||||
| if (record.size() == MAX_ELTWISE_NUM) { | |||||
| break; | |||||
| } | |||||
| } | |||||
| if (!eltwise_input->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(eltwise_input) || | |||||
| fusion_id_allocator->HasFusionIdAttr(eltwise_input)) { | |||||
| return; | |||||
| } | |||||
| if (AnfAlgo::GetKernelType(eltwise_input) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(eltwise_input) == kernel::FusionType::CONVLUTION) { | |||||
| (void)record.insert(eltwise_input); | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| } | |||||
| void ConvSingleInFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : node_list) { | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { | |||||
| MatchConvSingleInEltwise(cnode, kernel_graph, candidate_fusion); | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -1,47 +1,48 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_TYPE_FUSION_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_TYPE_FUSION_PASS_H_ | |||||
| #include <unordered_map> | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h" | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class FusionTypeFusionPass : public FusionBasePass { | |||||
| public: | |||||
| FusionTypeFusionPass() : FusionBasePass("FusionTypeFusionPass") {} | |||||
| explicit FusionTypeFusionPass(FusionIdAllocator *idAllocator) : FusionBasePass("FusionTypeFusionPass", idAllocator) {} | |||||
| ~FusionTypeFusionPass() override = default; | |||||
| bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph) override; | |||||
| private: | |||||
| void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_TYPE_FUSION_PASS_H_ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_SINGLE_IN_FUSION_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_SINGLE_IN_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class ConvSingleInFusionPass : public FusionBasePass { | |||||
| public: | |||||
| explicit ConvSingleInFusionPass(FusionIdAllocator *idAllocator) | |||||
| : FusionBasePass("ConvSingleInFusionPass", idAllocator) {} | |||||
| ~ConvSingleInFusionPass() override = default; | |||||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||||
| private: | |||||
| void MatchConvSingleInEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_SINGLE_IN_FUSION_PASS_H_ | |||||
| @@ -1,107 +1,89 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/pass/depthwiseconv_eltwise_fusion_pass.h" | |||||
| #include <vector> | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| void DepthwiseConvEltwiseFusionPass::MatchDepthwiseConvRelu(const CNodePtr &cnode, | |||||
| const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion, bool is_order) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| if (is_order) { | |||||
| // DepthwiseConvolution--->Elemwise | |||||
| auto depthwise_conv = cnode->input(1); | |||||
| MS_EXCEPTION_IF_NULL(depthwise_conv); | |||||
| if (cnode->isa<CNode>() && IsPrimitiveCNode(depthwise_conv, prim::kPrimDepthwiseConv2dNative)) { | |||||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[depthwise_conv].size())}; | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), depthwise_conv); | |||||
| std::unordered_set<AnfNodePtr> record{cnode, depthwise_conv}; | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| } else { | |||||
| // Elemwise-->DepthwiseConvolution | |||||
| auto relu = cnode->input(1); | |||||
| MS_EXCEPTION_IF_NULL(relu); | |||||
| if (cnode->isa<CNode>() && (IsPrimitiveCNode(relu, prim::kPrimRelu) || IsPrimitiveCNode(relu, prim::kPrimReluV2))) { | |||||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[relu].size())}; | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), relu); | |||||
| std::unordered_set<AnfNodePtr> record{cnode, relu}; | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| } | |||||
| } | |||||
| void DepthwiseConvEltwiseFusionPass::MatchDepthwiseOpNamePattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : node_list) { | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { | |||||
| auto eltwise_input = cnode->input(1); | |||||
| if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { | |||||
| if (eltwise_input->isa<CNode>() && | |||||
| AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimDepthwiseConv2dNative)) { | |||||
| MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, true); | |||||
| } | |||||
| } | |||||
| } else if (AnfAlgo::GetCNodeName(cnode) == prim::kPrimDepthwiseConv2dNative->name()) { | |||||
| MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, false); | |||||
| } | |||||
| } | |||||
| } | |||||
| bool DepthwiseConvEltwiseFusionPass::MatchUBFusionPattern(const session::KernelGraph &kernel_graph) { | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto return_node = kernel_graph.get_return(); | |||||
| MS_EXCEPTION_IF_NULL(return_node); | |||||
| if (return_node->inputs().size() <= 1) { | |||||
| return false; | |||||
| } | |||||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern start..."; | |||||
| FusedNodeRecord candidate_fusion; | |||||
| MatchDepthwiseOpNamePattern(kernel_graph, &candidate_fusion); | |||||
| if (candidate_fusion.empty()) { | |||||
| return false; | |||||
| } | |||||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern Success..."; | |||||
| return true; | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h" | |||||
| #include <vector> | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| void DepthwiseConvEltwiseFusionPass::MatchDepthwiseConvRelu(const CNodePtr &cnode, | |||||
| const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion, bool is_order) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| if (is_order) { | |||||
| // DepthwiseConvolution--->Elemwise | |||||
| auto depthwise_conv = cnode->input(1); | |||||
| MS_EXCEPTION_IF_NULL(depthwise_conv); | |||||
| if (cnode->isa<CNode>() && IsPrimitiveCNode(depthwise_conv, prim::kPrimDepthwiseConv2dNative)) { | |||||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[depthwise_conv].size())}; | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), depthwise_conv); | |||||
| std::unordered_set<AnfNodePtr> record{cnode, depthwise_conv}; | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| } else { | |||||
| // Elemwise-->DepthwiseConvolution | |||||
| auto relu = cnode->input(1); | |||||
| MS_EXCEPTION_IF_NULL(relu); | |||||
| if (cnode->isa<CNode>() && (IsPrimitiveCNode(relu, prim::kPrimRelu) || IsPrimitiveCNode(relu, prim::kPrimReluV2))) { | |||||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[relu].size())}; | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), relu); | |||||
| std::unordered_set<AnfNodePtr> record{cnode, relu}; | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| } | |||||
| } | |||||
| void DepthwiseConvEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : node_list) { | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { | |||||
| auto eltwise_input = cnode->input(1); | |||||
| if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { | |||||
| if (eltwise_input->isa<CNode>() && | |||||
| AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimDepthwiseConv2dNative)) { | |||||
| MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, true); | |||||
| } | |||||
| } | |||||
| } else if (AnfAlgo::GetCNodeName(cnode) == prim::kPrimDepthwiseConv2dNative->name()) { | |||||
| MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, false); | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -1,50 +1,48 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h" | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class DepthwiseConvEltwiseFusionPass : public FusionBasePass { | |||||
| public: | |||||
| DepthwiseConvEltwiseFusionPass() : FusionBasePass("DepthwiseConvEltwiseFusionPass") {} | |||||
| explicit DepthwiseConvEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||||
| : FusionBasePass("DepthwiseConvEltwiseFusionPass", idAllocator) {} | |||||
| ~DepthwiseConvEltwiseFusionPass() override = default; | |||||
| bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph) override; | |||||
| private: | |||||
| void MatchDepthwiseConvRelu(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion, bool is_order); | |||||
| void MatchDepthwiseOpNamePattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class DepthwiseConvEltwiseFusionPass : public FusionBasePass { | |||||
| public: | |||||
| explicit DepthwiseConvEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||||
| : FusionBasePass("DepthwiseConvEltwiseFusionPass", idAllocator) {} | |||||
| ~DepthwiseConvEltwiseFusionPass() override = default; | |||||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||||
| private: | |||||
| void MatchDepthwiseConvRelu(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion, bool is_order); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ | |||||
| @@ -0,0 +1,72 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h" | |||||
| #include <vector> | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| void EltwiseFusionPass::MatchEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| std::unordered_set<AnfNodePtr> record{cnode}; | |||||
| auto eltwise_input = cnode->input(1); | |||||
| while (CheckEltWiseNode(manager.get(), eltwise_input)) { | |||||
| (void)record.insert(eltwise_input); | |||||
| if (record.size() == MAX_ELTWISE_SIZE) { | |||||
| break; | |||||
| } | |||||
| auto input_cnode = eltwise_input->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(input_cnode); | |||||
| eltwise_input = input_cnode->input(1); | |||||
| } | |||||
| if (record.size() < MIN_ELTWISE_SIZE) { | |||||
| return; | |||||
| } | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| void EltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : node_list) { | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { | |||||
| MatchEltwise(cnode, kernel_graph, candidate_fusion); | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,46 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_ELTWISE_FUSION_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_ELTWISE_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class EltwiseFusionPass : public FusionBasePass { | |||||
| public: | |||||
| explicit EltwiseFusionPass(FusionIdAllocator *idAllocator) : FusionBasePass("EltwiseFusionPass", idAllocator) {} | |||||
| ~EltwiseFusionPass() override = default; | |||||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||||
| private: | |||||
| void MatchEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_ELTWISE_FUSION_PASS_H_ | |||||
| @@ -0,0 +1,71 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| bool FusionBasePass::CheckEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node) { | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| if (!node->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node)) { | |||||
| return false; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| auto user_nodes = manager->node_users()[node]; | |||||
| return AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && user_nodes.size() == ELTWISE_USE && | |||||
| cnode->inputs().size() == ELTWISE_INPUT_SIZE; | |||||
| } | |||||
| void FusionBasePass::SetRecordFusionId(const std::unordered_set<AnfNodePtr> &record) { | |||||
| auto id = fusion_id_allocator->AllocateFusionId(); | |||||
| for (auto node : record) { | |||||
| fusion_id_allocator->SetFusionId(node, id); | |||||
| } | |||||
| } | |||||
| bool FusionBasePass::MatchUBFusionPattern(const session::KernelGraph &kernel_graph) { | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto return_node = kernel_graph.get_return(); | |||||
| MS_EXCEPTION_IF_NULL(return_node); | |||||
| if (return_node->inputs().size() <= 1) { | |||||
| return false; | |||||
| } | |||||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern start..."; | |||||
| FusedNodeRecord candidate_fusion; | |||||
| MatchSingleFusionPattern(kernel_graph, &candidate_fusion); | |||||
| if (candidate_fusion.empty()) { | |||||
| return false; | |||||
| } | |||||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern Success..."; | |||||
| return true; | |||||
| } | |||||
| bool FusionBasePass::Run(const FuncGraphPtr &graph) { | |||||
| MS_EXCEPTION_IF_NULL(graph); | |||||
| auto kernel_graph = graph->cast<std::shared_ptr<session::KernelGraph>>(); | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| return MatchUBFusionPattern(*kernel_graph); | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -1,50 +1,57 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ | |||||
| #include <unordered_map> | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class FusionBasePass : public Pass { | |||||
| public: | |||||
| explicit FusionBasePass(const std::string &name) : Pass(name) {} | |||||
| FusionBasePass(const std::string &name, FusionIdAllocator *idAllocator) | |||||
| : Pass(name), fusion_id_allocator(idAllocator) {} | |||||
| ~FusionBasePass() override = default; | |||||
| bool Run(const FuncGraphPtr &graph) override; | |||||
| protected: | |||||
| virtual bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph) = 0; | |||||
| void SetRecordFusionId(const std::unordered_set<AnfNodePtr> &record); | |||||
| FusionIdAllocator *fusion_id_allocator; | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ | |||||
| #include <unordered_map> | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| const int8_t MAX_ELTWISE_NUM = 3; | |||||
| const int8_t MIN_ELTWISE_SIZE = 2; | |||||
| const int8_t ELTWISE_INPUT_SIZE = 2; | |||||
| const int8_t ELTWISE_USE = 1; | |||||
| const int8_t MAX_ELTWISE_SIZE = 6; | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class FusionBasePass : public Pass { | |||||
| public: | |||||
| FusionBasePass(const std::string &name, FusionIdAllocator *idAllocator) | |||||
| : Pass(name), fusion_id_allocator(idAllocator) {} | |||||
| ~FusionBasePass() override = default; | |||||
| bool Run(const FuncGraphPtr &graph) override; | |||||
| bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph); | |||||
| protected: | |||||
| virtual void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) = 0; | |||||
| void SetRecordFusionId(const std::unordered_set<AnfNodePtr> &record); | |||||
| bool CheckEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node); | |||||
| FusionIdAllocator *fusion_id_allocator; | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ | |||||
| @@ -0,0 +1,65 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h" | |||||
| #include <vector> | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| void MatmulEltwiseFusionPass::MatchMatmulEltwise(const CNodePtr &cnode, const AnfNodePtr &relu_input, | |||||
| const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[relu_input].size())}; | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), relu_input); | |||||
| std::unordered_set<AnfNodePtr> record{cnode, relu_input}; | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| void MatmulEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : node_list) { | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { | |||||
| auto eltwise_input = cnode->input(1); | |||||
| if (eltwise_input->isa<CNode>() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimMatMul)) { | |||||
| MatchMatmulEltwise(cnode, eltwise_input, kernel_graph, candidate_fusion); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,48 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MATMUL_ELTWISE_FUSION_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MATMUL_ELTWISE_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class MatmulEltwiseFusionPass : public FusionBasePass { | |||||
| public: | |||||
| explicit MatmulEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||||
| : FusionBasePass("MatmulEltwiseFusionPass", idAllocator) {} | |||||
| ~MatmulEltwiseFusionPass() override = default; | |||||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||||
| private: | |||||
| void MatchMatmulEltwise(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MATMUL_ELTWISE_FUSION_PASS_H_ | |||||
| @@ -1,38 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h" | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| void FusionBasePass::SetRecordFusionId(const std::unordered_set<AnfNodePtr> &record) { | |||||
| auto id = fusion_id_allocator->AllocateFusionId(); | |||||
| for (auto node : record) { | |||||
| fusion_id_allocator->SetFusionId(node, id); | |||||
| } | |||||
| } | |||||
| bool FusionBasePass::Run(const FuncGraphPtr &graph) { | |||||
| MS_EXCEPTION_IF_NULL(graph); | |||||
| auto kernel_graph = graph->cast<std::shared_ptr<session::KernelGraph>>(); | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| return MatchUBFusionPattern(*kernel_graph); | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -1,245 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/pass/fusion_type_fusion_pass.h" | |||||
| #include <tuple> | |||||
| #include <unordered_set> | |||||
| #include <unordered_map> | |||||
| #include <deque> | |||||
| #include <memory> | |||||
| #include <algorithm> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| namespace { | |||||
| const int8_t MAX_PATTERN_SIZE = 7; | |||||
| const int8_t MIN_PATTERN_SIZE = 2; | |||||
| const int8_t ELTWISE_INPUT_SIZE = 2; | |||||
| const int8_t ELTWISE_USE = 1; | |||||
| const int8_t MULTI_ELTWISE_USE = 2; | |||||
| const int8_t MAX_MULTI_ELTWISE_SIZE = 4; | |||||
| const int8_t MAX_PURE_BUFFER_SUCC_SIZE = 3; | |||||
| constexpr auto kOpAttrFusionId = "fusion_id"; | |||||
| bool CheckEltWiseNode(FuncGraphManager *manager, std::unordered_set<AnfNodePtr> *record, const CNodePtr &node) { | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| MS_EXCEPTION_IF_NULL(record); | |||||
| auto user_nodes = manager->node_users()[node]; | |||||
| return (AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && | |||||
| (user_nodes.size() <= ELTWISE_USE || record->size() == 0)); | |||||
| } | |||||
| // Common method to check for predecessors and successors in a fusion pattern | |||||
| std::tuple<bool, CNodePtr> FindPredAndSuccEltWiseNodes(const int8_t &max_size, FuncGraphManager *manager, | |||||
| std::unordered_set<AnfNodePtr> *visited_set, | |||||
| std::deque<AnfNodePtr> *todo, | |||||
| std::unordered_set<AnfNodePtr> *record, const CNodePtr &node) { | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| MS_EXCEPTION_IF_NULL(visited_set); | |||||
| MS_EXCEPTION_IF_NULL(todo); | |||||
| MS_EXCEPTION_IF_NULL(record); | |||||
| MS_EXCEPTION_IF_NULL(node); | |||||
| CNodePtr new_node = node; | |||||
| if (new_node->inputs().size() < ELTWISE_INPUT_SIZE) { | |||||
| return std::make_tuple(false, new_node); | |||||
| } | |||||
| int8_t index = 1; | |||||
| auto &users = manager->node_users(); | |||||
| while (CheckEltWiseNode(manager, record, new_node)) { | |||||
| (void)record->insert(new_node); | |||||
| (void)visited_set->insert(new_node); | |||||
| (void)todo->insert(todo->end(), new_node->inputs().begin() + 1, new_node->inputs().end()); | |||||
| auto cnode = new_node->input(1); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (!cnode->isa<CNode>()) { | |||||
| return std::make_tuple(false, new_node); | |||||
| } | |||||
| new_node = cnode->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(new_node); | |||||
| if (!AnfAlgo::IsRealKernel(new_node) || new_node->inputs().size() < ELTWISE_INPUT_SIZE || | |||||
| users[(new_node)].size() >= MULTI_ELTWISE_USE || visited_set->find(new_node) != visited_set->end()) { | |||||
| return std::make_tuple(false, new_node); | |||||
| } | |||||
| if (index >= max_size) { | |||||
| break; | |||||
| } | |||||
| index++; | |||||
| } | |||||
| return std::make_tuple(true, new_node); | |||||
| } | |||||
| std::tuple<bool, CNodePtr> MatchGeneralPattern(FuncGraphManager *manager, std::unordered_set<AnfNodePtr> *record, | |||||
| std::unordered_set<AnfNodePtr> *visited_set, | |||||
| std::deque<AnfNodePtr> *todo, const CNodePtr &node) { | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| MS_EXCEPTION_IF_NULL(record); | |||||
| MS_EXCEPTION_IF_NULL(visited_set); | |||||
| MS_EXCEPTION_IF_NULL(node); | |||||
| MS_EXCEPTION_IF_NULL(todo); | |||||
| CNodePtr new_node = node; | |||||
| auto &users = manager->node_users(); | |||||
| if (users[(new_node)].size() >= MULTI_ELTWISE_USE) { | |||||
| return std::make_tuple(false, new_node); | |||||
| } | |||||
| (void)record->insert(node); | |||||
| (void)visited_set->insert(node); | |||||
| (void)todo->insert(todo->end(), new_node->inputs().begin() + 1, new_node->inputs().end()); | |||||
| if (node->inputs().size() < 2) { | |||||
| return std::make_tuple(false, new_node); | |||||
| } | |||||
| // only check the first real input, will check all | |||||
| auto cnode = node->input(1); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (!cnode->isa<CNode>()) { | |||||
| return std::make_tuple(false, new_node); | |||||
| } | |||||
| new_node = cnode->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(new_node); | |||||
| if (!AnfAlgo::IsRealKernel(new_node) || users[(new_node)].size() >= MULTI_ELTWISE_USE || | |||||
| visited_set->find(new_node) != visited_set->end()) { | |||||
| return std::make_tuple(false, new_node); | |||||
| } | |||||
| return std::make_tuple(true, new_node); | |||||
| } | |||||
| CNodePtr FindFusionAnfNode(FuncGraphManager *manager, std::unordered_set<AnfNodePtr> *visited_set, | |||||
| std::unordered_set<AnfNodePtr> *record, std::deque<AnfNodePtr> *todo, const CNodePtr &node) { | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| MS_EXCEPTION_IF_NULL(visited_set); | |||||
| MS_EXCEPTION_IF_NULL(record); | |||||
| MS_EXCEPTION_IF_NULL(node); | |||||
| MS_EXCEPTION_IF_NULL(todo); | |||||
| // find fusion pattern predecessor nodes | |||||
| auto ret = FindPredAndSuccEltWiseNodes(MAX_MULTI_ELTWISE_SIZE, manager, visited_set, todo, record, node); | |||||
| auto new_node = std::get<1>(ret); | |||||
| auto node_use_size = manager->node_users()[new_node].size(); | |||||
| if (!std::get<0>(ret) || (record->size() > 1 && node_use_size > 1) || record->size() >= MAX_MULTI_ELTWISE_SIZE || | |||||
| AnfAlgo::GetKernelType(new_node) != KernelType::TBE_KERNEL) { | |||||
| return new_node; | |||||
| } | |||||
| // key of fusion precessor | |||||
| auto node_fusion_type = AnfAlgo::GetFusionType(new_node); | |||||
| switch (node_fusion_type) { | |||||
| case kernel::FusionType::COMMREDUCE: | |||||
| case kernel::FusionType::SEGMENT: | |||||
| ret = MatchGeneralPattern(manager, record, visited_set, todo, new_node); | |||||
| new_node = std::get<1>(ret); | |||||
| if (!std::get<0>(ret)) { | |||||
| return new_node; | |||||
| } | |||||
| break; | |||||
| case kernel::FusionType::ELEMWISE: | |||||
| return new_node; | |||||
| // -fallthrough to default and return | |||||
| case kernel::FusionType::CONVLUTION: | |||||
| (void)record->insert(new_node); | |||||
| default: | |||||
| (void)visited_set->insert(new_node); | |||||
| if (new_node != nullptr) { | |||||
| (void)todo->insert(todo->end(), new_node->inputs().begin() + 1, new_node->inputs().end()); | |||||
| } | |||||
| return new_node; | |||||
| } | |||||
| // find fusion pattern successor nodes | |||||
| ret = FindPredAndSuccEltWiseNodes(MAX_PURE_BUFFER_SUCC_SIZE, manager, visited_set, todo, record, new_node); | |||||
| return std::get<1>(ret); | |||||
| } | |||||
| } // namespace | |||||
| void FusionTypeFusionPass::MatchFusionTypePattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto return_node = kernel_graph.get_return(); | |||||
| MS_EXCEPTION_IF_NULL(return_node); | |||||
| if (return_node->inputs().size() <= 1) { | |||||
| return; | |||||
| } | |||||
| std::deque<AnfNodePtr> todo; | |||||
| todo.push_back(return_node->input(1)); | |||||
| std::unordered_set<AnfNodePtr> visited_set; | |||||
| while (!todo.empty()) { | |||||
| auto node = todo.front(); | |||||
| MS_EXCEPTION_IF_NULL(node); | |||||
| todo.pop_front(); | |||||
| std::unordered_set<AnfNodePtr> record; | |||||
| if (visited_set.find(node) != visited_set.end() || fusion_id_allocator->HasFusionIdAttr(node)) { | |||||
| continue; | |||||
| } | |||||
| // Only fuse real cnode | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node)) { | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| if (cnode != nullptr) { | |||||
| (void)todo.insert(todo.end(), cnode->inputs().begin() + 1, cnode->inputs().end()); | |||||
| } | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| // cnode maybe updated | |||||
| cnode = FindFusionAnfNode(manager.get(), &visited_set, &record, &todo, cnode); | |||||
| if (record.size() >= MIN_PATTERN_SIZE && record.size() <= MAX_PATTERN_SIZE) { | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| if (record.find(cnode) == record.end()) { | |||||
| todo.push_back(cnode); | |||||
| } | |||||
| // no node matched | |||||
| if (record.size() == 0) { | |||||
| (void)visited_set.insert(node); | |||||
| } | |||||
| (void)todo.insert(todo.end(), cnode->inputs().begin() + 1, cnode->inputs().end()); | |||||
| } | |||||
| } | |||||
| bool FusionTypeFusionPass::MatchUBFusionPattern(const session::KernelGraph &kernel_graph) { | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto return_node = kernel_graph.get_return(); | |||||
| MS_EXCEPTION_IF_NULL(return_node); | |||||
| if (return_node->inputs().size() <= 1) { | |||||
| return false; | |||||
| } | |||||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern start..."; | |||||
| FusedNodeRecord candidate_fusion; | |||||
| MatchFusionTypePattern(kernel_graph, &candidate_fusion); | |||||
| if (candidate_fusion.empty()) { | |||||
| return false; | |||||
| } | |||||
| MS_LOG(DEBUG) << "MatchBufferFusionPattern Success..."; | |||||
| return true; | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,88 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h" | |||||
| #include <vector> | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| void ReduceEltwiseFusionPass::MatchReduceEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| std::unordered_set<AnfNodePtr> record{cnode}; | |||||
| auto eltwise_input = cnode->input(1); | |||||
| while (CheckEltWiseNode(manager.get(), eltwise_input)) { | |||||
| (void)record.insert(eltwise_input); | |||||
| auto input_cnode = eltwise_input->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(input_cnode); | |||||
| eltwise_input = input_cnode->input(1); | |||||
| if (record.size() == MAX_ELTWISE_NUM) { | |||||
| break; | |||||
| } | |||||
| } | |||||
| if (!eltwise_input->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(eltwise_input) || | |||||
| fusion_id_allocator->HasFusionIdAttr(eltwise_input)) { | |||||
| return; | |||||
| } | |||||
| if (AnfAlgo::GetKernelType(eltwise_input) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(eltwise_input) == kernel::FusionType::COMMREDUCE) { | |||||
| (void)record.insert(eltwise_input); | |||||
| auto previous_eltwise_input = cnode->input(1); | |||||
| auto previous_size = record.size(); | |||||
| while (CheckEltWiseNode(manager.get(), previous_eltwise_input)) { | |||||
| (void)record.insert(previous_eltwise_input); | |||||
| auto previous_node = previous_eltwise_input->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(previous_node); | |||||
| previous_eltwise_input = previous_node->input(1); | |||||
| if (record.size() - previous_size == MAX_ELTWISE_NUM) { | |||||
| break; | |||||
| } | |||||
| } | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| } | |||||
| void ReduceEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : node_list) { | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { | |||||
| MatchReduceEltwise(cnode, kernel_graph, candidate_fusion); | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,48 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_REDUCE_ELTWISE_FUSION_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_REDUCE_ELTWISE_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class ReduceEltwiseFusionPass : public FusionBasePass { | |||||
| public: | |||||
| explicit ReduceEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||||
| : FusionBasePass("ReduceEltwiseFusionPass", idAllocator) {} | |||||
| ~ReduceEltwiseFusionPass() override = default; | |||||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||||
| private: | |||||
| void MatchReduceEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_REDUCE_ELTWSIE_FUSION_PASS_H_ | |||||
| @@ -0,0 +1,88 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h" | |||||
| #include <vector> | |||||
| #include <unordered_set> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| void SegmentEltwiseFusionPass::MatchSegmentEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| std::unordered_set<AnfNodePtr> record{cnode}; | |||||
| auto eltwise_input = cnode->input(1); | |||||
| while (CheckEltWiseNode(manager.get(), eltwise_input)) { | |||||
| (void)record.insert(eltwise_input); | |||||
| auto input_cnode = eltwise_input->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(input_cnode); | |||||
| eltwise_input = input_cnode->input(1); | |||||
| if (record.size() == MAX_ELTWISE_NUM) { | |||||
| break; | |||||
| } | |||||
| } | |||||
| if (!eltwise_input->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(eltwise_input) || | |||||
| fusion_id_allocator->HasFusionIdAttr(eltwise_input)) { | |||||
| return; | |||||
| } | |||||
| if (AnfAlgo::GetKernelType(eltwise_input) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(eltwise_input) == kernel::FusionType::SEGMENT) { | |||||
| (void)record.insert(eltwise_input); | |||||
| auto previous_eltwise_input = cnode->input(1); | |||||
| auto previous_size = record.size(); | |||||
| while (CheckEltWiseNode(manager.get(), previous_eltwise_input)) { | |||||
| (void)record.insert(previous_eltwise_input); | |||||
| auto previous_node = previous_eltwise_input->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(previous_node); | |||||
| previous_eltwise_input = previous_node->input(1); | |||||
| if (record.size() - previous_size == MAX_ELTWISE_NUM) { | |||||
| break; | |||||
| } | |||||
| } | |||||
| candidate_fusion->push_back(record); | |||||
| SetRecordFusionId(record); | |||||
| } | |||||
| } | |||||
| void SegmentEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : node_list) { | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || | |||||
| AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && | |||||
| AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { | |||||
| MatchSegmentEltwise(cnode, kernel_graph, candidate_fusion); | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,48 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_SEGMENT_ELTWISE_FUSION_PASS_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_SEGMENT_ELTWISE_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class SegmentEltwiseFusionPass : public FusionBasePass { | |||||
| public: | |||||
| explicit SegmentEltwiseFusionPass(FusionIdAllocator *idAllocator) | |||||
| : FusionBasePass("SegmentEltwiseFusionPass", idAllocator) {} | |||||
| ~SegmentEltwiseFusionPass() override = default; | |||||
| void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; | |||||
| private: | |||||
| void MatchSegmentEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| FusedNodeRecord *candidate_fusion); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_SEGMENT_ELTWSIE_FUSION_PASS_H_ | |||||
| @@ -1,435 +1,435 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/tbe_buffer_fusion.h" | |||||
| #include <vector> | |||||
| #include <tuple> | |||||
| #include <utility> | |||||
| #include <unordered_set> | |||||
| #include <unordered_map> | |||||
| #include <deque> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <algorithm> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| namespace { | |||||
| const int8_t MAX_PATTERN_SIZE = 7; | |||||
| const int8_t MIN_PATTERN_SIZE = 2; | |||||
| const int8_t ELTWISE_INPUT_SIZE = 2; | |||||
| const int8_t ELTWISE_USE = 1; | |||||
| const int8_t MULTI_ELTWISE_USE = 2; | |||||
| const int8_t MAX_MULTI_ELTWISE_SIZE = 4; | |||||
| const int8_t MAX_PURE_BUFFER_SUCC_SIZE = 3; | |||||
| constexpr auto kOpAttrFusionId = "fusion_id"; | |||||
| #ifdef DEBUG | |||||
| std::string GetFusionTypeName(const kernel::FusionType &type) { | |||||
| switch (type) { | |||||
| case kernel::FusionType::COMMREDUCE: | |||||
| return "COMMREDUCE"; | |||||
| case kernel::FusionType::SEGMENT: | |||||
| return "SEGMENT"; | |||||
| case kernel::FusionType::ELEMWISE: | |||||
| return "ELEMWISE"; | |||||
| case kernel::FusionType::CONVLUTION: | |||||
| return "CONVLUTION"; | |||||
| case kernel::FusionType::OPAQUE: | |||||
| return "OPAQUE"; | |||||
| default: | |||||
| return "OPAQUE"; | |||||
| } | |||||
| } | |||||
| void DumpFusionScopeInfo(const kernel::FusionScopeInfo &info) { | |||||
| MS_LOG(INFO) << "=== Dump FusionScopeInfo start id: " << info.scope_id; | |||||
| for (auto &node : info.input_nodes) { | |||||
| MS_LOG(INFO) << "=== Input: " << node->DebugString(); | |||||
| } | |||||
| for (auto &node : info.output_nodes) { | |||||
| MS_LOG(INFO) << "=== Output: " << node->DebugString(); | |||||
| } | |||||
| for (auto &node : info.compute_nodes) { | |||||
| MS_LOG(INFO) << "=== Compute: (" << node->DebugString() << ")-(" << GetFusionTypeName(AnfAlgo::GetFusionType(node)) | |||||
| << ")"; | |||||
| } | |||||
| MS_LOG(INFO) << "=== Dump FusionScopeInfo end"; | |||||
| } | |||||
| #endif | |||||
| CNodePtr CreateFusionOp(const std::vector<AnfNodePtr> &inputs_list, const std::vector<AnfNodePtr> &outputs_list, | |||||
| const std::vector<AnfNodePtr> &anf_nodes, session::KernelGraph *kernel_graph) { | |||||
| MS_LOG(DEBUG) << "Start Create FusionOp Kernel"; | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| std::string fusion_op_name = "FusionOp"; | |||||
| for (auto node : anf_nodes) { | |||||
| fusion_op_name += '_' + AnfAlgo::GetCNodeName(node); | |||||
| } | |||||
| auto fusion_op = std::make_shared<Primitive>(fusion_op_name); | |||||
| MS_EXCEPTION_IF_NULL(fusion_op); | |||||
| std::vector<std::string> input_names; | |||||
| for (uint8_t i = 0; i < inputs_list.size(); i++) { | |||||
| input_names.emplace_back("input" + std::to_string(i)); | |||||
| } | |||||
| std::vector<std::string> output_names; | |||||
| for (uint8_t i = 0; i < outputs_list.size(); i++) { | |||||
| output_names.emplace_back("output" + std::to_string(i)); | |||||
| } | |||||
| ValuePtr input_names_v = MakeValue(input_names); | |||||
| ValuePtr output_names_v = MakeValue(output_names); | |||||
| fusion_op->set_attr("input_names", input_names_v); | |||||
| fusion_op->set_attr("output_names", output_names_v); | |||||
| std::vector<AnfNodePtr> fusion_inputs_list = inputs_list; | |||||
| auto value_node = std::make_shared<ValueNode>(fusion_op); | |||||
| (void)fusion_inputs_list.insert(fusion_inputs_list.begin(), value_node); | |||||
| auto buffer_fusion_kernel = kernel_graph->NewCNode(fusion_inputs_list); | |||||
| if (buffer_fusion_kernel == nullptr) { | |||||
| MS_LOG(EXCEPTION) << "New FusionOp kernel failed!"; | |||||
| } | |||||
| buffer_fusion_kernel->set_scope((anf_nodes.back())->scope()); | |||||
| return buffer_fusion_kernel; | |||||
| } | |||||
| kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr> &inputs_list, | |||||
| const std::vector<AnfNodePtr> &outputs_list) { | |||||
| MS_LOG(DEBUG) << "Start Create Kernel Info"; | |||||
| kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; | |||||
| // inputs format and data type | |||||
| std::vector<std::string> inputs_format; | |||||
| std::vector<TypeId> inputs_data_type; | |||||
| for (const auto &input : inputs_list) { | |||||
| auto real_input = AnfAlgo::VisitKernel(input, 0); | |||||
| inputs_format.push_back(AnfAlgo::GetOutputFormat(real_input.first, real_input.second)); | |||||
| inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(real_input.first, real_input.second)); | |||||
| } | |||||
| // outputs format and data type | |||||
| std::vector<std::string> outputs_format; | |||||
| std::vector<TypeId> outputs_data_type; | |||||
| for (const auto &output : outputs_list) { | |||||
| if (AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { | |||||
| auto tuple_getitem = output->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(tuple_getitem); | |||||
| outputs_format.push_back(AnfAlgo::GetOutputFormat( | |||||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType( | |||||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||||
| } else { | |||||
| outputs_format.push_back(AnfAlgo::GetOutputFormat(output, 0)); | |||||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(output, 0)); | |||||
| } | |||||
| } | |||||
| builder.SetInputsFormat(inputs_format); | |||||
| builder.SetInputsDeviceType(inputs_data_type); | |||||
| builder.SetOutputsFormat(outputs_format); | |||||
| builder.SetOutputsDeviceType(outputs_data_type); | |||||
| builder.SetKernelType(KernelType::TBE_KERNEL); | |||||
| return builder.Build(); | |||||
| } | |||||
| AnfNodePtr CreateTupleGetItem(const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph, | |||||
| size_t output_index) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| std::vector<AnfNodePtr> tuple_getitem_inputs_list; | |||||
| auto value = std::make_shared<ValueNode>(prim::kPrimTupleGetItem); | |||||
| MS_EXCEPTION_IF_NULL(value); | |||||
| auto idx = NewValueNode(SizeToInt(output_index)); | |||||
| MS_EXCEPTION_IF_NULL(idx); | |||||
| int temp = SizeToInt(output_index); | |||||
| auto imm = std::make_shared<Int32Imm>(temp); | |||||
| auto abstract_scalar = std::make_shared<abstract::AbstractScalar>(imm); | |||||
| idx->set_abstract(abstract_scalar); | |||||
| tuple_getitem_inputs_list.push_back(value); | |||||
| tuple_getitem_inputs_list.push_back(buffer_fusion_kernel); | |||||
| tuple_getitem_inputs_list.push_back(idx); | |||||
| auto tuple_item = kernel_graph->NewCNode(tuple_getitem_inputs_list); | |||||
| MS_EXCEPTION_IF_NULL(tuple_item); | |||||
| AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(buffer_fusion_kernel, output_index)}, | |||||
| {AnfAlgo::GetOutputInferShape(buffer_fusion_kernel, output_index)}, | |||||
| tuple_item.get()); | |||||
| return tuple_item; | |||||
| } | |||||
| void ReplaceInputNodeInOtherFusionScope(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, | |||||
| int32_t fusion_id, const AnfNodePtr &output_item, | |||||
| const AnfNodePtr &replace_item) { | |||||
| for (int32_t id = fusion_id + 1; id <= SizeToInt(buffer_fusion_infos->size()); ++id) { | |||||
| auto itr = std::find((*buffer_fusion_infos)[id].inputs_list.begin(), (*buffer_fusion_infos)[id].inputs_list.end(), | |||||
| output_item); | |||||
| if (itr != (*buffer_fusion_infos)[id].inputs_list.end()) { | |||||
| MS_LOG(DEBUG) << "replace input of other pattern, id = " << id; | |||||
| *itr = replace_item; | |||||
| } | |||||
| } | |||||
| } | |||||
| void ReplaceOldNode(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id, | |||||
| const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| auto manager = kernel_graph->manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; | |||||
| if (buffer_fusion_info.outputs_list.size() == 1) { // single output | |||||
| (void)manager->Replace(buffer_fusion_info.outputs_list[0], buffer_fusion_kernel); | |||||
| ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[0], | |||||
| buffer_fusion_kernel); | |||||
| } else { // multiple output | |||||
| for (size_t index = 0; index < buffer_fusion_info.outputs_list.size(); ++index) { | |||||
| auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, index); | |||||
| (void)manager->Replace(buffer_fusion_info.outputs_list[index], tuple_item); | |||||
| ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[index], | |||||
| tuple_item); | |||||
| } | |||||
| } | |||||
| } | |||||
| void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| auto nodes = TopoSort(kernel_graph->get_return()); | |||||
| for (auto &node : nodes) { | |||||
| MS_EXCEPTION_IF_NULL(node); | |||||
| if (!node->isa<CNode>()) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| if (AnfAlgo::IsRealCNodeKernel(cnode) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) { | |||||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(cnode, kOpAttrFusionId); | |||||
| (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(cnode); | |||||
| } | |||||
| } | |||||
| } | |||||
| void GetFusionScopeInputNodeList(const session::KernelGraph &kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||||
| auto fusion_id = buffer_fusion_info.first; | |||||
| auto fusion_info = buffer_fusion_info.second; | |||||
| for (const auto &node : fusion_info.anf_nodes) { | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| for (size_t idx = 1; idx < cnode->inputs().size(); ++idx) { | |||||
| auto real_input = AnfAlgo::VisitKernel(cnode->input(idx), 0); | |||||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), real_input.first) == | |||||
| fusion_info.anf_nodes.end()) { | |||||
| if (std::find((*buffer_fusion_infos)[fusion_id].inputs_list.begin(), | |||||
| (*buffer_fusion_infos)[fusion_id].inputs_list.end(), | |||||
| cnode->input(idx)) == (*buffer_fusion_infos)[fusion_id].inputs_list.end()) { | |||||
| (*buffer_fusion_infos)[fusion_id].inputs_list.push_back(cnode->input(idx)); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) { | |||||
| MS_EXCEPTION_IF_NULL(node1); | |||||
| MS_EXCEPTION_IF_NULL(node2); | |||||
| auto getitem1 = node1->cast<CNodePtr>(); | |||||
| auto getitem2 = node2->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(getitem1); | |||||
| MS_EXCEPTION_IF_NULL(getitem2); | |||||
| auto output_idx1 = GetValue<int>(GetValueNode(getitem1->input(2))); | |||||
| auto output_idx2 = GetValue<int>(GetValueNode(getitem2->input(2))); | |||||
| return output_idx1 < output_idx2; | |||||
| } | |||||
| void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| auto manager = kernel_graph->manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||||
| auto fusion_id = buffer_fusion_info.first; | |||||
| auto fusion_info = buffer_fusion_info.second; | |||||
| for (const auto &node : fusion_info.anf_nodes) { | |||||
| if (AnfAlgo::GetOutputTensorNum(node) == 1) { | |||||
| for (auto use_node : manager->node_users()[node]) { | |||||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), use_node.first) == | |||||
| fusion_info.anf_nodes.end()) { | |||||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(node); | |||||
| break; | |||||
| } | |||||
| } | |||||
| } else { | |||||
| int prev_idx = 0; | |||||
| std::vector<AnfNodePtr> tuple_getitem_nodes; | |||||
| std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(), | |||||
| std::back_inserter(tuple_getitem_nodes), | |||||
| [](const std::pair<AnfNodePtr, int> &use_node) { return use_node.first; }); | |||||
| std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), TupleGetitemNodeCompare); | |||||
| for (auto getitem : tuple_getitem_nodes) { | |||||
| auto getitem_ptr = getitem->cast<CNodePtr>(); | |||||
| auto input2 = getitem_ptr->input(2); | |||||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||||
| for (int stub_idx = prev_idx; stub_idx < output_idx; ++stub_idx) { | |||||
| auto stub_node = CreateTupleGetItem(node, kernel_graph, IntToSize(stub_idx)); | |||||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(stub_node); | |||||
| } | |||||
| prev_idx = output_idx + 1; | |||||
| for (auto item_use_node : manager->node_users()[getitem]) { | |||||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), item_use_node.first) == | |||||
| fusion_info.anf_nodes.end()) { | |||||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(getitem); | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| void SetFusionOpRefInfos(session::KernelGraph *kernel_graph, const std::vector<AnfNodePtr> &outputs_list, | |||||
| const AnfNodePtr &fusion_kernel) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| auto manager = kernel_graph->manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| for (size_t idx = 0; idx < outputs_list.size(); ++idx) { | |||||
| auto output = outputs_list[idx]; | |||||
| if (output->isa<CNode>() && AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { | |||||
| auto real_output = AnfAlgo::VisitKernel(output, 0); | |||||
| auto output_cnode = output->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(output_cnode); | |||||
| auto input2 = output_cnode->input(2); | |||||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||||
| session::AnfWithOutIndex out_pair(real_output.first, output_idx); | |||||
| if (kernel_graph->IsInRefOutputMap(out_pair)) { | |||||
| auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); | |||||
| session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); | |||||
| kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); | |||||
| } | |||||
| } else { | |||||
| session::AnfWithOutIndex out_pair(output, 0); | |||||
| if (kernel_graph->IsInRefOutputMap(out_pair)) { | |||||
| auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); | |||||
| session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); | |||||
| kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace | |||||
| void TbeBufferFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const { | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); | |||||
| GetFusionScopeInputNodeList(*kernel_graph, buffer_fusion_infos); | |||||
| GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); | |||||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||||
| buffer_fusion_info.second.kernel_build_info = | |||||
| CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); | |||||
| } | |||||
| } | |||||
| bool TbeBufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| bool change = false; | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> buffer_fusion_infos; | |||||
| buffer_fusion_infos.clear(); | |||||
| GetBufferFusionInfo(kernel_graph, &buffer_fusion_infos); | |||||
| std::vector<mindspore::kernel::FusionScopeInfo> fusion_scope_infos; | |||||
| for (auto &buffer_fusion_info : buffer_fusion_infos) { | |||||
| mindspore::kernel::FusionScopeInfo fusion_scope_info; | |||||
| fusion_scope_info.scope_id = buffer_fusion_info.first; | |||||
| fusion_scope_info.input_nodes = buffer_fusion_info.second.inputs_list; | |||||
| fusion_scope_info.compute_nodes = buffer_fusion_info.second.anf_nodes; | |||||
| fusion_scope_info.output_nodes = buffer_fusion_info.second.outputs_list; | |||||
| fusion_scope_infos.push_back(fusion_scope_info); | |||||
| #ifdef DEBUG | |||||
| DumpFusionScopeInfo(fusion_scope_info); | |||||
| #endif | |||||
| } | |||||
| auto kernel_mods = mindspore::kernel::KernelFusion(fusion_scope_infos); | |||||
| std::vector<int32_t> fusion_ids; | |||||
| for (auto &buffer_fusion_info : buffer_fusion_infos) { | |||||
| MS_LOG(DEBUG) << "anf node size: " << buffer_fusion_info.second.anf_nodes.size() | |||||
| << ", inputs_list size: " << buffer_fusion_info.second.inputs_list.size() | |||||
| << ", outputs list size: " << buffer_fusion_info.second.outputs_list.size(); | |||||
| fusion_ids.push_back(buffer_fusion_info.first); | |||||
| } | |||||
| // Replace fusion op from return to head | |||||
| std::sort(fusion_ids.begin(), fusion_ids.end()); | |||||
| for (auto &fusion_id : fusion_ids) { | |||||
| // Get kernel mod when supporting tbe | |||||
| if (kernel_mods.find(fusion_id) == kernel_mods.end() || kernel_mods[fusion_id] == nullptr) { | |||||
| MS_LOG(DEBUG) << "fusion id: " << fusion_id << ", fusion op compiling failed"; | |||||
| continue; | |||||
| } | |||||
| change = ReplaceFusionOp(&buffer_fusion_infos, fusion_id, kernel_mods[fusion_id], kernel_graph); | |||||
| } | |||||
| MS_LOG(DEBUG) << "End Buffer Fusion"; | |||||
| return change; | |||||
| } | |||||
| bool TbeBufferFusion::ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, | |||||
| int32_t fusion_id, const kernel::KernelModPtr &kernel_ptr, | |||||
| session::KernelGraph *kernel_graph) const { | |||||
| auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; | |||||
| auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, | |||||
| buffer_fusion_info.anf_nodes, kernel_graph); | |||||
| AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get()); | |||||
| // Set abstract of fusion_op node | |||||
| std::vector<TypeId> types; | |||||
| std::vector<std::vector<size_t>> shapes; | |||||
| for (const auto &out_node : buffer_fusion_info.outputs_list) { | |||||
| for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(out_node); ++idx) { | |||||
| types.push_back(AnfAlgo::GetOutputInferDataType(out_node, idx)); | |||||
| shapes.push_back(AnfAlgo::GetOutputInferShape(out_node, idx)); | |||||
| } | |||||
| } | |||||
| if (types.empty() || shapes.empty()) { | |||||
| MS_LOG(WARNING) << "buffer_fusion_info.outputs_list is empty"; | |||||
| return false; | |||||
| } | |||||
| AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get()); | |||||
| AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get()); | |||||
| SetFusionOpRefInfos(kernel_graph, buffer_fusion_info.outputs_list, buffer_fusion); | |||||
| ReplaceOldNode(buffer_fusion_infos, fusion_id, buffer_fusion, kernel_graph); | |||||
| return true; | |||||
| } | |||||
| bool TbeBufferFusion::Run(const FuncGraphPtr &graph) { | |||||
| bool changed = false; | |||||
| MS_EXCEPTION_IF_NULL(graph); | |||||
| auto kernel_graph = graph->cast<std::shared_ptr<session::KernelGraph>>(); | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| changed = FuseBufferFusionPattern(kernel_graph.get()); | |||||
| // clear fusion_id attr | |||||
| for (auto &node : graph->nodes()) { | |||||
| if (node != nullptr && node->isa<CNode>()) { | |||||
| AnfAlgo::EraseNodeAttr(kAttrFusionId, node); | |||||
| } | |||||
| } | |||||
| return changed; | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h" | |||||
| #include <vector> | |||||
| #include <tuple> | |||||
| #include <utility> | |||||
| #include <unordered_set> | |||||
| #include <unordered_map> | |||||
| #include <deque> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <algorithm> | |||||
| #include "kernel/kernel_fusion.h" | |||||
| #include "debug/anf_ir_dump.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "operator/ops.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| namespace { | |||||
| const int8_t MAX_PATTERN_SIZE = 7; | |||||
| const int8_t MIN_PATTERN_SIZE = 2; | |||||
| const int8_t ELTWISE_INPUT_SIZE = 2; | |||||
| const int8_t ELTWISE_USE = 1; | |||||
| const int8_t MULTI_ELTWISE_USE = 2; | |||||
| const int8_t MAX_MULTI_ELTWISE_SIZE = 4; | |||||
| const int8_t MAX_PURE_BUFFER_SUCC_SIZE = 3; | |||||
| constexpr auto kOpAttrFusionId = "fusion_id"; | |||||
| #ifdef DEBUG | |||||
| std::string GetFusionTypeName(const kernel::FusionType &type) { | |||||
| switch (type) { | |||||
| case kernel::FusionType::COMMREDUCE: | |||||
| return "COMMREDUCE"; | |||||
| case kernel::FusionType::SEGMENT: | |||||
| return "SEGMENT"; | |||||
| case kernel::FusionType::ELEMWISE: | |||||
| return "ELEMWISE"; | |||||
| case kernel::FusionType::CONVLUTION: | |||||
| return "CONVLUTION"; | |||||
| case kernel::FusionType::OPAQUE: | |||||
| return "OPAQUE"; | |||||
| default: | |||||
| return "OPAQUE"; | |||||
| } | |||||
| } | |||||
| void DumpFusionScopeInfo(const kernel::FusionScopeInfo &info) { | |||||
| MS_LOG(INFO) << "=== Dump FusionScopeInfo start id: " << info.scope_id; | |||||
| for (auto &node : info.input_nodes) { | |||||
| MS_LOG(INFO) << "=== Input: " << node->DebugString(); | |||||
| } | |||||
| for (auto &node : info.output_nodes) { | |||||
| MS_LOG(INFO) << "=== Output: " << node->DebugString(); | |||||
| } | |||||
| for (auto &node : info.compute_nodes) { | |||||
| MS_LOG(INFO) << "=== Compute: (" << node->DebugString() << ")-(" << GetFusionTypeName(AnfAlgo::GetFusionType(node)) | |||||
| << ")"; | |||||
| } | |||||
| MS_LOG(INFO) << "=== Dump FusionScopeInfo end"; | |||||
| } | |||||
| #endif | |||||
| CNodePtr CreateFusionOp(const std::vector<AnfNodePtr> &inputs_list, const std::vector<AnfNodePtr> &outputs_list, | |||||
| const std::vector<AnfNodePtr> &anf_nodes, session::KernelGraph *kernel_graph) { | |||||
| MS_LOG(DEBUG) << "Start Create FusionOp Kernel"; | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| std::string fusion_op_name = "FusionOp"; | |||||
| for (auto node : anf_nodes) { | |||||
| fusion_op_name += '_' + AnfAlgo::GetCNodeName(node); | |||||
| } | |||||
| auto fusion_op = std::make_shared<Primitive>(fusion_op_name); | |||||
| MS_EXCEPTION_IF_NULL(fusion_op); | |||||
| std::vector<std::string> input_names; | |||||
| for (uint8_t i = 0; i < inputs_list.size(); i++) { | |||||
| input_names.emplace_back("input" + std::to_string(i)); | |||||
| } | |||||
| std::vector<std::string> output_names; | |||||
| for (uint8_t i = 0; i < outputs_list.size(); i++) { | |||||
| output_names.emplace_back("output" + std::to_string(i)); | |||||
| } | |||||
| ValuePtr input_names_v = MakeValue(input_names); | |||||
| ValuePtr output_names_v = MakeValue(output_names); | |||||
| fusion_op->set_attr("input_names", input_names_v); | |||||
| fusion_op->set_attr("output_names", output_names_v); | |||||
| std::vector<AnfNodePtr> fusion_inputs_list = inputs_list; | |||||
| auto value_node = std::make_shared<ValueNode>(fusion_op); | |||||
| (void)fusion_inputs_list.insert(fusion_inputs_list.begin(), value_node); | |||||
| auto buffer_fusion_kernel = kernel_graph->NewCNode(fusion_inputs_list); | |||||
| if (buffer_fusion_kernel == nullptr) { | |||||
| MS_LOG(EXCEPTION) << "New FusionOp kernel failed!"; | |||||
| } | |||||
| buffer_fusion_kernel->set_scope((anf_nodes.back())->scope()); | |||||
| return buffer_fusion_kernel; | |||||
| } | |||||
| kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr> &inputs_list, | |||||
| const std::vector<AnfNodePtr> &outputs_list) { | |||||
| MS_LOG(DEBUG) << "Start Create Kernel Info"; | |||||
| kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; | |||||
| // inputs format and data type | |||||
| std::vector<std::string> inputs_format; | |||||
| std::vector<TypeId> inputs_data_type; | |||||
| for (const auto &input : inputs_list) { | |||||
| auto real_input = AnfAlgo::VisitKernel(input, 0); | |||||
| inputs_format.push_back(AnfAlgo::GetOutputFormat(real_input.first, real_input.second)); | |||||
| inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(real_input.first, real_input.second)); | |||||
| } | |||||
| // outputs format and data type | |||||
| std::vector<std::string> outputs_format; | |||||
| std::vector<TypeId> outputs_data_type; | |||||
| for (const auto &output : outputs_list) { | |||||
| if (AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { | |||||
| auto tuple_getitem = output->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(tuple_getitem); | |||||
| outputs_format.push_back(AnfAlgo::GetOutputFormat( | |||||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType( | |||||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||||
| } else { | |||||
| outputs_format.push_back(AnfAlgo::GetOutputFormat(output, 0)); | |||||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(output, 0)); | |||||
| } | |||||
| } | |||||
| builder.SetInputsFormat(inputs_format); | |||||
| builder.SetInputsDeviceType(inputs_data_type); | |||||
| builder.SetOutputsFormat(outputs_format); | |||||
| builder.SetOutputsDeviceType(outputs_data_type); | |||||
| builder.SetKernelType(KernelType::TBE_KERNEL); | |||||
| return builder.Build(); | |||||
| } | |||||
| AnfNodePtr CreateTupleGetItem(const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph, | |||||
| size_t output_index) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| std::vector<AnfNodePtr> tuple_getitem_inputs_list; | |||||
| auto value = std::make_shared<ValueNode>(prim::kPrimTupleGetItem); | |||||
| MS_EXCEPTION_IF_NULL(value); | |||||
| auto idx = NewValueNode(SizeToInt(output_index)); | |||||
| MS_EXCEPTION_IF_NULL(idx); | |||||
| int temp = SizeToInt(output_index); | |||||
| auto imm = std::make_shared<Int32Imm>(temp); | |||||
| auto abstract_scalar = std::make_shared<abstract::AbstractScalar>(imm); | |||||
| idx->set_abstract(abstract_scalar); | |||||
| tuple_getitem_inputs_list.push_back(value); | |||||
| tuple_getitem_inputs_list.push_back(buffer_fusion_kernel); | |||||
| tuple_getitem_inputs_list.push_back(idx); | |||||
| auto tuple_item = kernel_graph->NewCNode(tuple_getitem_inputs_list); | |||||
| MS_EXCEPTION_IF_NULL(tuple_item); | |||||
| AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(buffer_fusion_kernel, output_index)}, | |||||
| {AnfAlgo::GetOutputInferShape(buffer_fusion_kernel, output_index)}, | |||||
| tuple_item.get()); | |||||
| return tuple_item; | |||||
| } | |||||
| void ReplaceInputNodeInOtherFusionScope(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, | |||||
| int32_t fusion_id, const AnfNodePtr &output_item, | |||||
| const AnfNodePtr &replace_item) { | |||||
| for (int32_t id = fusion_id + 1; id <= SizeToInt(buffer_fusion_infos->size()); ++id) { | |||||
| auto itr = std::find((*buffer_fusion_infos)[id].inputs_list.begin(), (*buffer_fusion_infos)[id].inputs_list.end(), | |||||
| output_item); | |||||
| if (itr != (*buffer_fusion_infos)[id].inputs_list.end()) { | |||||
| MS_LOG(DEBUG) << "replace input of other pattern, id = " << id; | |||||
| *itr = replace_item; | |||||
| } | |||||
| } | |||||
| } | |||||
| void ReplaceOldNode(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id, | |||||
| const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| auto manager = kernel_graph->manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; | |||||
| if (buffer_fusion_info.outputs_list.size() == 1) { // single output | |||||
| (void)manager->Replace(buffer_fusion_info.outputs_list[0], buffer_fusion_kernel); | |||||
| ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[0], | |||||
| buffer_fusion_kernel); | |||||
| } else { // multiple output | |||||
| for (size_t index = 0; index < buffer_fusion_info.outputs_list.size(); ++index) { | |||||
| auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, index); | |||||
| (void)manager->Replace(buffer_fusion_info.outputs_list[index], tuple_item); | |||||
| ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[index], | |||||
| tuple_item); | |||||
| } | |||||
| } | |||||
| } | |||||
| void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| auto nodes = TopoSort(kernel_graph->get_return()); | |||||
| for (auto &node : nodes) { | |||||
| MS_EXCEPTION_IF_NULL(node); | |||||
| if (!node->isa<CNode>()) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| if (AnfAlgo::IsRealCNodeKernel(cnode) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) { | |||||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(cnode, kOpAttrFusionId); | |||||
| (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(cnode); | |||||
| } | |||||
| } | |||||
| } | |||||
| void GetFusionScopeInputNodeList(const session::KernelGraph &kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||||
| auto fusion_id = buffer_fusion_info.first; | |||||
| auto fusion_info = buffer_fusion_info.second; | |||||
| for (const auto &node : fusion_info.anf_nodes) { | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| for (size_t idx = 1; idx < cnode->inputs().size(); ++idx) { | |||||
| auto real_input = AnfAlgo::VisitKernel(cnode->input(idx), 0); | |||||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), real_input.first) == | |||||
| fusion_info.anf_nodes.end()) { | |||||
| if (std::find((*buffer_fusion_infos)[fusion_id].inputs_list.begin(), | |||||
| (*buffer_fusion_infos)[fusion_id].inputs_list.end(), | |||||
| cnode->input(idx)) == (*buffer_fusion_infos)[fusion_id].inputs_list.end()) { | |||||
| (*buffer_fusion_infos)[fusion_id].inputs_list.push_back(cnode->input(idx)); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) { | |||||
| MS_EXCEPTION_IF_NULL(node1); | |||||
| MS_EXCEPTION_IF_NULL(node2); | |||||
| auto getitem1 = node1->cast<CNodePtr>(); | |||||
| auto getitem2 = node2->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(getitem1); | |||||
| MS_EXCEPTION_IF_NULL(getitem2); | |||||
| auto output_idx1 = GetValue<int>(GetValueNode(getitem1->input(2))); | |||||
| auto output_idx2 = GetValue<int>(GetValueNode(getitem2->input(2))); | |||||
| return output_idx1 < output_idx2; | |||||
| } | |||||
| void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| auto manager = kernel_graph->manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||||
| auto fusion_id = buffer_fusion_info.first; | |||||
| auto fusion_info = buffer_fusion_info.second; | |||||
| for (const auto &node : fusion_info.anf_nodes) { | |||||
| if (AnfAlgo::GetOutputTensorNum(node) == 1) { | |||||
| for (auto use_node : manager->node_users()[node]) { | |||||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), use_node.first) == | |||||
| fusion_info.anf_nodes.end()) { | |||||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(node); | |||||
| break; | |||||
| } | |||||
| } | |||||
| } else { | |||||
| int prev_idx = 0; | |||||
| std::vector<AnfNodePtr> tuple_getitem_nodes; | |||||
| std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(), | |||||
| std::back_inserter(tuple_getitem_nodes), | |||||
| [](const std::pair<AnfNodePtr, int> &use_node) { return use_node.first; }); | |||||
| std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), TupleGetitemNodeCompare); | |||||
| for (auto getitem : tuple_getitem_nodes) { | |||||
| auto getitem_ptr = getitem->cast<CNodePtr>(); | |||||
| auto input2 = getitem_ptr->input(2); | |||||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||||
| for (int stub_idx = prev_idx; stub_idx < output_idx; ++stub_idx) { | |||||
| auto stub_node = CreateTupleGetItem(node, kernel_graph, IntToSize(stub_idx)); | |||||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(stub_node); | |||||
| } | |||||
| prev_idx = output_idx + 1; | |||||
| for (auto item_use_node : manager->node_users()[getitem]) { | |||||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), item_use_node.first) == | |||||
| fusion_info.anf_nodes.end()) { | |||||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(getitem); | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| void SetFusionOpRefInfos(session::KernelGraph *kernel_graph, const std::vector<AnfNodePtr> &outputs_list, | |||||
| const AnfNodePtr &fusion_kernel) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| auto manager = kernel_graph->manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| for (size_t idx = 0; idx < outputs_list.size(); ++idx) { | |||||
| auto output = outputs_list[idx]; | |||||
| if (output->isa<CNode>() && AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { | |||||
| auto real_output = AnfAlgo::VisitKernel(output, 0); | |||||
| auto output_cnode = output->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(output_cnode); | |||||
| auto input2 = output_cnode->input(2); | |||||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||||
| session::AnfWithOutIndex out_pair(real_output.first, output_idx); | |||||
| if (kernel_graph->IsInRefOutputMap(out_pair)) { | |||||
| auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); | |||||
| session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); | |||||
| kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); | |||||
| } | |||||
| } else { | |||||
| session::AnfWithOutIndex out_pair(output, 0); | |||||
| if (kernel_graph->IsInRefOutputMap(out_pair)) { | |||||
| auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); | |||||
| session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); | |||||
| kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace | |||||
| void UbPatternFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const { | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); | |||||
| GetFusionScopeInputNodeList(*kernel_graph, buffer_fusion_infos); | |||||
| GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); | |||||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||||
| buffer_fusion_info.second.kernel_build_info = | |||||
| CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); | |||||
| } | |||||
| } | |||||
| bool UbPatternFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| bool change = false; | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> buffer_fusion_infos; | |||||
| buffer_fusion_infos.clear(); | |||||
| GetBufferFusionInfo(kernel_graph, &buffer_fusion_infos); | |||||
| std::vector<mindspore::kernel::FusionScopeInfo> fusion_scope_infos; | |||||
| for (auto &buffer_fusion_info : buffer_fusion_infos) { | |||||
| mindspore::kernel::FusionScopeInfo fusion_scope_info; | |||||
| fusion_scope_info.scope_id = buffer_fusion_info.first; | |||||
| fusion_scope_info.input_nodes = buffer_fusion_info.second.inputs_list; | |||||
| fusion_scope_info.compute_nodes = buffer_fusion_info.second.anf_nodes; | |||||
| fusion_scope_info.output_nodes = buffer_fusion_info.second.outputs_list; | |||||
| fusion_scope_infos.push_back(fusion_scope_info); | |||||
| #ifdef DEBUG | |||||
| DumpFusionScopeInfo(fusion_scope_info); | |||||
| #endif | |||||
| } | |||||
| auto kernel_mods = mindspore::kernel::KernelFusion(fusion_scope_infos); | |||||
| std::vector<int32_t> fusion_ids; | |||||
| for (auto &buffer_fusion_info : buffer_fusion_infos) { | |||||
| MS_LOG(DEBUG) << "anf node size: " << buffer_fusion_info.second.anf_nodes.size() | |||||
| << ", inputs_list size: " << buffer_fusion_info.second.inputs_list.size() | |||||
| << ", outputs list size: " << buffer_fusion_info.second.outputs_list.size(); | |||||
| fusion_ids.push_back(buffer_fusion_info.first); | |||||
| } | |||||
| // Replace fusion op from return to head | |||||
| std::sort(fusion_ids.begin(), fusion_ids.end()); | |||||
| for (auto &fusion_id : fusion_ids) { | |||||
| // Get kernel mod when supporting tbe | |||||
| if (kernel_mods.find(fusion_id) == kernel_mods.end() || kernel_mods[fusion_id] == nullptr) { | |||||
| MS_LOG(DEBUG) << "fusion id: " << fusion_id << ", fusion op compiling failed"; | |||||
| continue; | |||||
| } | |||||
| change = ReplaceFusionOp(&buffer_fusion_infos, fusion_id, kernel_mods[fusion_id], kernel_graph); | |||||
| } | |||||
| MS_LOG(DEBUG) << "End Buffer Fusion"; | |||||
| return change; | |||||
| } | |||||
| bool UbPatternFusion::ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, | |||||
| int32_t fusion_id, const kernel::KernelModPtr &kernel_ptr, | |||||
| session::KernelGraph *kernel_graph) const { | |||||
| auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; | |||||
| auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, | |||||
| buffer_fusion_info.anf_nodes, kernel_graph); | |||||
| AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get()); | |||||
| // Set abstract of fusion_op node | |||||
| std::vector<TypeId> types; | |||||
| std::vector<std::vector<size_t>> shapes; | |||||
| for (const auto &out_node : buffer_fusion_info.outputs_list) { | |||||
| for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(out_node); ++idx) { | |||||
| types.push_back(AnfAlgo::GetOutputInferDataType(out_node, idx)); | |||||
| shapes.push_back(AnfAlgo::GetOutputInferShape(out_node, idx)); | |||||
| } | |||||
| } | |||||
| if (types.empty() || shapes.empty()) { | |||||
| MS_LOG(WARNING) << "buffer_fusion_info.outputs_list is empty"; | |||||
| return false; | |||||
| } | |||||
| AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get()); | |||||
| AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get()); | |||||
| SetFusionOpRefInfos(kernel_graph, buffer_fusion_info.outputs_list, buffer_fusion); | |||||
| ReplaceOldNode(buffer_fusion_infos, fusion_id, buffer_fusion, kernel_graph); | |||||
| return true; | |||||
| } | |||||
| bool UbPatternFusion::Run(const FuncGraphPtr &graph) { | |||||
| bool changed = false; | |||||
| MS_EXCEPTION_IF_NULL(graph); | |||||
| auto kernel_graph = graph->cast<std::shared_ptr<session::KernelGraph>>(); | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| changed = FuseBufferFusionPattern(kernel_graph.get()); | |||||
| // clear fusion_id attr | |||||
| for (auto &node : graph->nodes()) { | |||||
| if (node != nullptr && node->isa<CNode>()) { | |||||
| AnfAlgo::EraseNodeAttr(kAttrFusionId, node); | |||||
| } | |||||
| } | |||||
| return changed; | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -1,50 +1,50 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_TBE_BUFFER_FUSION_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_TBE_BUFFER_FUSION_H_ | |||||
| #include <unordered_map> | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class TbeBufferFusion : public Pass { | |||||
| public: | |||||
| TbeBufferFusion() : Pass("TbeBufferFusion") {} | |||||
| ~TbeBufferFusion() override = default; | |||||
| bool Run(const FuncGraphPtr &graph) override; | |||||
| private: | |||||
| void GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const; | |||||
| bool ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id, | |||||
| const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const; | |||||
| bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const; | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_TBE_BUFFER_FUSION_H_ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_UB_PATTERN_FUSION_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_UB_PATTERN_FUSION_H_ | |||||
| #include <unordered_map> | |||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| #include "pre_activate/common/fusion_id_allocator.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "kernel/kernel.h" | |||||
| #include "session/kernel_graph.h" | |||||
| #include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; | |||||
| class UbPatternFusion : public Pass { | |||||
| public: | |||||
| UbPatternFusion() : Pass("TbeBufferFusion") {} | |||||
| ~UbPatternFusion() override = default; | |||||
| bool Run(const FuncGraphPtr &graph) override; | |||||
| private: | |||||
| void GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const; | |||||
| bool ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id, | |||||
| const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const; | |||||
| bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const; | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_UB_PATTERN_FUSION_H_ | |||||