Merge pull request !7892 from DeshiChen/1028_nonscalar_tensor_to_inputtags/v1.1.0
| @@ -1 +1 @@ | |||||
| Subproject commit 03ef896b90a34ebdb7eeb3fa77d7d4252d021011 | |||||
| Subproject commit f308919c39811c2c3e07fb0dcc8054a533c84cbc | |||||
| @@ -205,6 +205,8 @@ class CompositeGraph: | |||||
| if output.shape[i] == 1 and inputs[0].shape[i] > 1: | if output.shape[i] == 1 and inputs[0].shape[i] > 1: | ||||
| red_axis.append(i) | red_axis.append(i) | ||||
| else: | else: | ||||
| if isinstance(a['value'], int): | |||||
| a['value'] = [a['value']] | |||||
| for i in a['value']: | for i in a['value']: | ||||
| red_axis.append(i if i >= 0 else dim_size + i) | red_axis.append(i if i >= 0 else dim_size + i) | ||||
| attr['reduce_axis'] = red_axis | attr['reduce_axis'] = red_axis | ||||
| @@ -203,6 +203,7 @@ bool FuseBasicOps(const FuncGraphPtr &kernel_graph, const std::vector<AnfNodePtr | |||||
| AnfNodePtrList outputs; | AnfNodePtrList outputs; | ||||
| std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(fuse_nodes); | std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(fuse_nodes); | ||||
| RemoveControlDependOut(fg, &outputs, mng); | RemoveControlDependOut(fg, &outputs, mng); | ||||
| ConvertNonscalarTensorToParameter(fg, &inputs); | |||||
| auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, outputs, is_before_kernel_select); | auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, outputs, is_before_kernel_select); | ||||
| if (!is_before_kernel_select) { | if (!is_before_kernel_select) { | ||||
| SetNewKernelInfo(fuse_new_node, fg, inputs, outputs, AnfAlgo::GetProcessor(fuse_nodes[0])); | SetNewKernelInfo(fuse_new_node, fg, inputs, outputs, AnfAlgo::GetProcessor(fuse_nodes[0])); | ||||
| @@ -263,8 +263,9 @@ bool GenJson(const AnfNodePtrList &op_nodes, const AnfNodePtrList &inputs, const | |||||
| MS_LOG(INFO) << "Collect fusion json: " << fused_name; | MS_LOG(INFO) << "Collect fusion json: " << fused_name; | ||||
| return true; | return true; | ||||
| } | } | ||||
| } // namespace | |||||
| void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) { | |||||
| bool ConvertNonscalarTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) { | |||||
| MS_EXCEPTION_IF_NULL(inputs_ptr); | MS_EXCEPTION_IF_NULL(inputs_ptr); | ||||
| auto nodes = TopoSort(fg->get_return()); | auto nodes = TopoSort(fg->get_return()); | ||||
| @@ -284,7 +285,7 @@ void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inp | |||||
| } | } | ||||
| if (vmap.empty()) { | if (vmap.empty()) { | ||||
| return; | |||||
| return false; | |||||
| } | } | ||||
| auto mng = fg->manager(); | auto mng = fg->manager(); | ||||
| @@ -310,11 +311,12 @@ void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inp | |||||
| inputs.push_back(vnode); | inputs.push_back(vnode); | ||||
| } | } | ||||
| return true; | |||||
| } | } | ||||
| // Transform nodes(including basic and composite node) to a new graph, and collect their inputs and outputs. | // Transform nodes(including basic and composite node) to a new graph, and collect their inputs and outputs. | ||||
| std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes, | std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes, | ||||
| AnfNodePtrList *src_outputs = nullptr) { | |||||
| AnfNodePtrList *src_outputs) { | |||||
| FuncGraphPtr fg; | FuncGraphPtr fg; | ||||
| AnfNodePtrList inputs; | AnfNodePtrList inputs; | ||||
| AnfNodePtrList outputs; | AnfNodePtrList outputs; | ||||
| @@ -341,13 +343,12 @@ std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph( | |||||
| } | } | ||||
| EliminateMakeTuple(fg, mng); | EliminateMakeTuple(fg, mng); | ||||
| ConvertComplexTensorToParameter(fg, &inputs); | |||||
| ConvertNonscalarTensorToParameter(fg, &inputs); | |||||
| outputs.clear(); | outputs.clear(); | ||||
| kernel::GetFuncGraphOutputNodes(fg, &outputs); | kernel::GetFuncGraphOutputNodes(fg, &outputs); | ||||
| return std::make_tuple(fg, inputs, outputs); | return std::make_tuple(fg, inputs, outputs); | ||||
| } | } | ||||
| } // namespace | |||||
| void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs, | void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs, | ||||
| const AnfNodePtrList &outputs, kernel::Processor processor) { | const AnfNodePtrList &outputs, kernel::Processor processor) { | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include <memory> | #include <memory> | ||||
| #include <map> | #include <map> | ||||
| #include <tuple> | |||||
| #include <unordered_set> | #include <unordered_set> | ||||
| #include <nlohmann/json.hpp> | #include <nlohmann/json.hpp> | ||||
| #include "ir/anf.h" | #include "ir/anf.h" | ||||
| @@ -37,6 +38,9 @@ constexpr auto kJsonKeyMultiGraph = "multi_graph"; | |||||
| constexpr auto kJsonKeyGraphDesc = "graph_desc"; | constexpr auto kJsonKeyGraphDesc = "graph_desc"; | ||||
| constexpr auto kJsonKeyGraphMode = "graph_mode"; | constexpr auto kJsonKeyGraphMode = "graph_mode"; | ||||
| bool ConvertNonscalarTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr); | |||||
| std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes, | |||||
| AnfNodePtrList *src_outputs = nullptr); | |||||
| void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs, | void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs, | ||||
| const AnfNodePtrList &outputs, kernel::Processor processor); | const AnfNodePtrList &outputs, kernel::Processor processor); | ||||
| AnfNodePtrList GetExpandOuts(const AnfNodePtrList &outs); | AnfNodePtrList GetExpandOuts(const AnfNodePtrList &outs); | ||||
| @@ -0,0 +1,57 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/optimizer/graph_kernel/tensor_promotion.h" | |||||
| #include <vector> | |||||
| #include "backend/kernel_compiler/common_utils.h" | |||||
| #include "backend/optimizer/graph_kernel/graph_kernel_helper.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| #include "ir/func_graph.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| bool TensorPromotion::Run(const FuncGraphPtr &func_graph) { | |||||
| MS_EXCEPTION_IF_NULL(func_graph); | |||||
| auto mng = func_graph->manager(); | |||||
| if (mng == nullptr) { | |||||
| mng = Manage(func_graph, true); | |||||
| func_graph->set_manager(mng); | |||||
| } | |||||
| auto todos = TopoSort(func_graph->get_return()); | |||||
| bool changed = false; | |||||
| for (auto iter = todos.crbegin(); iter != todos.crend(); ++iter) { | |||||
| auto node = *iter; | |||||
| if (!AnfAlgo::IsGraphKernel(node)) { | |||||
| continue; | |||||
| } | |||||
| auto args = node->cast<CNodePtr>()->inputs(); | |||||
| auto fg = GetValueNode<FuncGraphPtr>(args[kAnfPrimitiveIndex]); | |||||
| if (!ConvertNonscalarTensorToParameter(fg, &args)) { | |||||
| continue; | |||||
| } | |||||
| AnfNodePtrList inputs, outputs; | |||||
| inputs.insert(inputs.end(), args.begin() + 1, args.end()); | |||||
| kernel::GetFuncGraphOutputNodes(fg, &outputs); | |||||
| auto new_cnode = CreateNewFuseCNode(func_graph, fg, inputs, outputs, false); | |||||
| SetNewKernelInfo(new_cnode, fg, inputs, outputs, AnfAlgo::GetProcessor(node)); | |||||
| mng->Replace(node, new_cnode); | |||||
| changed = true; | |||||
| } | |||||
| return changed; | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,33 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_ | |||||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_ | |||||
| #include <memory> | |||||
| #include "ir/func_graph.h" | |||||
| #include "backend/optimizer/common/pass.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| class TensorPromotion : public Pass { | |||||
| public: | |||||
| TensorPromotion() : Pass("graph_kernel_tensor_promotion") {} | |||||
| ~TensorPromotion() override = default; | |||||
| bool Run(const FuncGraphPtr &func_graph); | |||||
| }; | |||||
| using TensorPromotionPtr = std::shared_ptr<TensorPromotion>; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_ | |||||
| @@ -38,6 +38,7 @@ | |||||
| #include "backend/optimizer/graph_kernel/arithmetic_simplify.h" | #include "backend/optimizer/graph_kernel/arithmetic_simplify.h" | ||||
| #include "backend/optimizer/graph_kernel/basic_ops_fusion.h" | #include "backend/optimizer/graph_kernel/basic_ops_fusion.h" | ||||
| #include "backend/optimizer/graph_kernel/composite_ops_fusion.h" | #include "backend/optimizer/graph_kernel/composite_ops_fusion.h" | ||||
| #include "backend/optimizer/graph_kernel/tensor_promotion.h" | |||||
| #include "backend/optimizer/graph_kernel/graph_kernel_splitter.h" | #include "backend/optimizer/graph_kernel/graph_kernel_splitter.h" | ||||
| #include "backend/optimizer/graph_kernel/graph_kernel_expander.h" | #include "backend/optimizer/graph_kernel/graph_kernel_expander.h" | ||||
| #include "backend/optimizer/graph_kernel/graph_kernel_cse.h" | #include "backend/optimizer/graph_kernel/graph_kernel_cse.h" | ||||
| @@ -164,6 +165,7 @@ void GPUSession::GraphKernelOptimize(const std::shared_ptr<KernelGraph> &kernel_ | |||||
| pm->AddPass(std::make_shared<opt::GraphKernelCSE>()); | pm->AddPass(std::make_shared<opt::GraphKernelCSE>()); | ||||
| pm->AddPass(std::make_shared<opt::ArithmeticSimplify>()); | pm->AddPass(std::make_shared<opt::ArithmeticSimplify>()); | ||||
| pm->AddPass(std::make_shared<opt::GraphKernelCSE>()); | pm->AddPass(std::make_shared<opt::GraphKernelCSE>()); | ||||
| pm->AddPass(std::make_shared<opt::TensorPromotion>()); | |||||
| pm->AddPass(std::make_shared<opt::GraphKernelSplitter>()); | pm->AddPass(std::make_shared<opt::GraphKernelSplitter>()); | ||||
| // After Simplify and Splitter, a lot of redundant getitem/maketuple | // After Simplify and Splitter, a lot of redundant getitem/maketuple | ||||
| // will be exposed, use GetitemTuple Pass to delete them. | // will be exposed, use GetitemTuple Pass to delete them. | ||||
| @@ -395,8 +395,9 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) { | |||||
| kernel::GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list); | kernel::GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list); | ||||
| for (auto &anf_node : node_list) { | for (auto &anf_node : node_list) { | ||||
| MS_EXCEPTION_IF_NULL(anf_node); | MS_EXCEPTION_IF_NULL(anf_node); | ||||
| auto kernel_info = std::make_shared<device::KernelInfo>(); | |||||
| anf_node->set_kernel_info(kernel_info); | |||||
| if (anf_node->kernel_info() == nullptr) { | |||||
| anf_node->set_kernel_info(std::make_shared<device::KernelInfo>()); | |||||
| } | |||||
| auto anf_cnode = anf_node->cast<CNodePtr>(); | auto anf_cnode = anf_node->cast<CNodePtr>(); | ||||
| MS_EXCEPTION_IF_NULL(anf_cnode); | MS_EXCEPTION_IF_NULL(anf_cnode); | ||||
| for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_cnode); ++i) { | for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_cnode); ++i) { | ||||
| @@ -412,8 +413,9 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) { | |||||
| } | } | ||||
| for (auto &anf_node : input_list) { | for (auto &anf_node : input_list) { | ||||
| MS_EXCEPTION_IF_NULL(anf_node); | MS_EXCEPTION_IF_NULL(anf_node); | ||||
| auto kernel_info = std::make_shared<device::KernelInfo>(); | |||||
| anf_node->set_kernel_info(kernel_info); | |||||
| if (anf_node->kernel_info() == nullptr) { | |||||
| anf_node->set_kernel_info(std::make_shared<device::KernelInfo>()); | |||||
| } | |||||
| } | } | ||||
| } | } | ||||