Merge pull request !7892 from DeshiChen/1028_nonscalar_tensor_to_inputtags/v1.1.0
| @@ -1 +1 @@ | |||
| Subproject commit 03ef896b90a34ebdb7eeb3fa77d7d4252d021011 | |||
| Subproject commit f308919c39811c2c3e07fb0dcc8054a533c84cbc | |||
| @@ -205,6 +205,8 @@ class CompositeGraph: | |||
| if output.shape[i] == 1 and inputs[0].shape[i] > 1: | |||
| red_axis.append(i) | |||
| else: | |||
| if isinstance(a['value'], int): | |||
| a['value'] = [a['value']] | |||
| for i in a['value']: | |||
| red_axis.append(i if i >= 0 else dim_size + i) | |||
| attr['reduce_axis'] = red_axis | |||
| @@ -203,6 +203,7 @@ bool FuseBasicOps(const FuncGraphPtr &kernel_graph, const std::vector<AnfNodePtr | |||
| AnfNodePtrList outputs; | |||
| std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(fuse_nodes); | |||
| RemoveControlDependOut(fg, &outputs, mng); | |||
| ConvertNonscalarTensorToParameter(fg, &inputs); | |||
| auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, outputs, is_before_kernel_select); | |||
| if (!is_before_kernel_select) { | |||
| SetNewKernelInfo(fuse_new_node, fg, inputs, outputs, AnfAlgo::GetProcessor(fuse_nodes[0])); | |||
| @@ -263,8 +263,9 @@ bool GenJson(const AnfNodePtrList &op_nodes, const AnfNodePtrList &inputs, const | |||
| MS_LOG(INFO) << "Collect fusion json: " << fused_name; | |||
| return true; | |||
| } | |||
| } // namespace | |||
| void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) { | |||
| bool ConvertNonscalarTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) { | |||
| MS_EXCEPTION_IF_NULL(inputs_ptr); | |||
| auto nodes = TopoSort(fg->get_return()); | |||
| @@ -284,7 +285,7 @@ void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inp | |||
| } | |||
| if (vmap.empty()) { | |||
| return; | |||
| return false; | |||
| } | |||
| auto mng = fg->manager(); | |||
| @@ -310,11 +311,12 @@ void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inp | |||
| inputs.push_back(vnode); | |||
| } | |||
| return true; | |||
| } | |||
| // Transform nodes(including basic and composite node) to a new graph, and collect their inputs and outputs. | |||
| std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes, | |||
| AnfNodePtrList *src_outputs = nullptr) { | |||
| AnfNodePtrList *src_outputs) { | |||
| FuncGraphPtr fg; | |||
| AnfNodePtrList inputs; | |||
| AnfNodePtrList outputs; | |||
| @@ -341,13 +343,12 @@ std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph( | |||
| } | |||
| EliminateMakeTuple(fg, mng); | |||
| ConvertComplexTensorToParameter(fg, &inputs); | |||
| ConvertNonscalarTensorToParameter(fg, &inputs); | |||
| outputs.clear(); | |||
| kernel::GetFuncGraphOutputNodes(fg, &outputs); | |||
| return std::make_tuple(fg, inputs, outputs); | |||
| } | |||
| } // namespace | |||
| void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs, | |||
| const AnfNodePtrList &outputs, kernel::Processor processor) { | |||
| @@ -19,6 +19,7 @@ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <map> | |||
| #include <tuple> | |||
| #include <unordered_set> | |||
| #include <nlohmann/json.hpp> | |||
| #include "ir/anf.h" | |||
| @@ -37,6 +38,9 @@ constexpr auto kJsonKeyMultiGraph = "multi_graph"; | |||
| constexpr auto kJsonKeyGraphDesc = "graph_desc"; | |||
| constexpr auto kJsonKeyGraphMode = "graph_mode"; | |||
| bool ConvertNonscalarTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr); | |||
| std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes, | |||
| AnfNodePtrList *src_outputs = nullptr); | |||
| void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs, | |||
| const AnfNodePtrList &outputs, kernel::Processor processor); | |||
| AnfNodePtrList GetExpandOuts(const AnfNodePtrList &outs); | |||
| @@ -0,0 +1,57 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/optimizer/graph_kernel/tensor_promotion.h" | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/common_utils.h" | |||
| #include "backend/optimizer/graph_kernel/graph_kernel_helper.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "ir/func_graph.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| bool TensorPromotion::Run(const FuncGraphPtr &func_graph) { | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| auto mng = func_graph->manager(); | |||
| if (mng == nullptr) { | |||
| mng = Manage(func_graph, true); | |||
| func_graph->set_manager(mng); | |||
| } | |||
| auto todos = TopoSort(func_graph->get_return()); | |||
| bool changed = false; | |||
| for (auto iter = todos.crbegin(); iter != todos.crend(); ++iter) { | |||
| auto node = *iter; | |||
| if (!AnfAlgo::IsGraphKernel(node)) { | |||
| continue; | |||
| } | |||
| auto args = node->cast<CNodePtr>()->inputs(); | |||
| auto fg = GetValueNode<FuncGraphPtr>(args[kAnfPrimitiveIndex]); | |||
| if (!ConvertNonscalarTensorToParameter(fg, &args)) { | |||
| continue; | |||
| } | |||
| AnfNodePtrList inputs, outputs; | |||
| inputs.insert(inputs.end(), args.begin() + 1, args.end()); | |||
| kernel::GetFuncGraphOutputNodes(fg, &outputs); | |||
| auto new_cnode = CreateNewFuseCNode(func_graph, fg, inputs, outputs, false); | |||
| SetNewKernelInfo(new_cnode, fg, inputs, outputs, AnfAlgo::GetProcessor(node)); | |||
| mng->Replace(node, new_cnode); | |||
| changed = true; | |||
| } | |||
| return changed; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,33 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_ | |||
| #include <memory> | |||
| #include "ir/func_graph.h" | |||
| #include "backend/optimizer/common/pass.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| class TensorPromotion : public Pass { | |||
| public: | |||
| TensorPromotion() : Pass("graph_kernel_tensor_promotion") {} | |||
| ~TensorPromotion() override = default; | |||
| bool Run(const FuncGraphPtr &func_graph); | |||
| }; | |||
| using TensorPromotionPtr = std::shared_ptr<TensorPromotion>; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_ | |||
| @@ -38,6 +38,7 @@ | |||
| #include "backend/optimizer/graph_kernel/arithmetic_simplify.h" | |||
| #include "backend/optimizer/graph_kernel/basic_ops_fusion.h" | |||
| #include "backend/optimizer/graph_kernel/composite_ops_fusion.h" | |||
| #include "backend/optimizer/graph_kernel/tensor_promotion.h" | |||
| #include "backend/optimizer/graph_kernel/graph_kernel_splitter.h" | |||
| #include "backend/optimizer/graph_kernel/graph_kernel_expander.h" | |||
| #include "backend/optimizer/graph_kernel/graph_kernel_cse.h" | |||
| @@ -164,6 +165,7 @@ void GPUSession::GraphKernelOptimize(const std::shared_ptr<KernelGraph> &kernel_ | |||
| pm->AddPass(std::make_shared<opt::GraphKernelCSE>()); | |||
| pm->AddPass(std::make_shared<opt::ArithmeticSimplify>()); | |||
| pm->AddPass(std::make_shared<opt::GraphKernelCSE>()); | |||
| pm->AddPass(std::make_shared<opt::TensorPromotion>()); | |||
| pm->AddPass(std::make_shared<opt::GraphKernelSplitter>()); | |||
| // After Simplify and Splitter, a lot of redundant getitem/maketuple | |||
| // will be exposed, use GetitemTuple Pass to delete them. | |||
| @@ -395,8 +395,9 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) { | |||
| kernel::GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list); | |||
| for (auto &anf_node : node_list) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| auto kernel_info = std::make_shared<device::KernelInfo>(); | |||
| anf_node->set_kernel_info(kernel_info); | |||
| if (anf_node->kernel_info() == nullptr) { | |||
| anf_node->set_kernel_info(std::make_shared<device::KernelInfo>()); | |||
| } | |||
| auto anf_cnode = anf_node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(anf_cnode); | |||
| for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_cnode); ++i) { | |||
| @@ -412,8 +413,9 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) { | |||
| } | |||
| for (auto &anf_node : input_list) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| auto kernel_info = std::make_shared<device::KernelInfo>(); | |||
| anf_node->set_kernel_info(kernel_info); | |||
| if (anf_node->kernel_info() == nullptr) { | |||
| anf_node->set_kernel_info(std::make_shared<device::KernelInfo>()); | |||
| } | |||
| } | |||
| } | |||