From c32bf5ac28575d63c2d9fed1390d413e9c355747 Mon Sep 17 00:00:00 2001 From: tronzhang <6517937+tronzhang@user.noreply.gitee.com> Date: Wed, 30 Sep 2020 11:50:24 +0800 Subject: [PATCH] promote complex tensor as graph's input and recorrect getitem index for graph kernels fusion. --- .../akg/akg_kernel_json_generator.cc | 5 +- .../backend/kernel_compiler/common_utils.cc | 5 +- .../graph_kernel/graph_kernel_helper.cc | 169 +++++++++++++----- .../graph_kernel/graph_kernel_helper.h | 3 +- .../graph_kernel/value_graph_binder.cc | 45 +++++ .../graph_kernel/value_graph_binder.h | 33 ++++ .../ccsrc/backend/session/gpu_session.cc | 2 + 7 files changed, 216 insertions(+), 46 deletions(-) create mode 100644 mindspore/ccsrc/backend/optimizer/graph_kernel/value_graph_binder.cc create mode 100644 mindspore/ccsrc/backend/optimizer/graph_kernel/value_graph_binder.h diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc index e390702163..ebba02e9a6 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc @@ -112,8 +112,9 @@ bool AkgKernelJsonGenerator::CreateInputDescJson(const AnfNodePtr &anf_node, con input_desc_json[kJsonKeyName] = input_ptr->name(); input_desc_json[kJsonKeyTensorName] = "input_" + std::to_string(GetInputTensorIdxInc(anf_node, real_input_index)); auto input_shape = this->GetInputShape(anf_node, real_input_index); - if (anf_node->func_graph() != nullptr && anf_node->func_graph()->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL) && - GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) { + bool fold_const = + anf_node->func_graph() != nullptr && anf_node->func_graph()->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); + if (fold_const && GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) { MS_LOG(DEBUG) << "Take input[" << real_input_index << "] of [" << anf_node->DebugString(2) << "] as const tensor, shape: [" << Vector2Str(input_shape) << "], value: " << input_desc_json[kJsonKeyValue]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc index 25ce15fee8..2c1cf5dcc9 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc @@ -701,9 +701,10 @@ bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann: auto type_id = tensor->data_type(); auto *data = tensor->data_c(); MS_EXCEPTION_IF_NULL(data); - if (tensor->DataDim() > 1 || tensor->DataSize() != 1) { + if (tensor->DataSize() > 1) { // not const tensor. - MS_LOG(WARNING) << "We take first value of tensor whose datasize != 1, [" << input_node->DebugString(2) << "]"; + MS_LOG(WARNING) << "Not take value of tensor whose datasize greater than 1, [" << input_node->DebugString(2) << "]"; + return false; } if (type_id == kFloat32->type_id()) { diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc index 97ec115cdd..c9861547b6 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc @@ -15,6 +15,7 @@ */ #include "backend/optimizer/graph_kernel/graph_kernel_helper.h" #include +#include #include #include "pipeline/jit/parse/python_adapter.h" #include "pipeline/jit/action.h" @@ -244,7 +245,7 @@ AnfNodePtrList EliminateMakeTuple(const FuncGraphPtr &fg, const FuncGraphManager bool GenJson(const AnfNodePtrList &op_nodes, const AnfNodePtrList &inputs, const AnfNodePtrList &outputs, const DumpOption &dump_option, nlohmann::json *op_desc, - std::map *address_node_map) { + std::map *address_node_map = nullptr) { kernel::AkgKernelJsonGenerator akg_kernel_json_generator(dump_option); if (!akg_kernel_json_generator.CollectFusedJson(op_nodes, inputs, outputs)) { MS_LOG(ERROR) << "Collect json desc failed."; @@ -262,6 +263,90 @@ bool GenJson(const AnfNodePtrList &op_nodes, const AnfNodePtrList &inputs, const MS_LOG(INFO) << "Collect fusion json: " << fused_name; return true; } + +void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) { + MS_EXCEPTION_IF_NULL(inputs_ptr); + auto nodes = TopoSort(fg->get_return()); + + std::map vmap; + for (const auto &node : nodes) { + if (!node->isa()) { + continue; + } + auto &inputs = node->cast()->inputs(); + for (size_t i = 1; i < inputs.size(); ++i) { + auto tnode = inputs[i]; + auto tensor = GetValueNode(tnode); + if (tensor && (tensor->DataSize() > 1)) { + vmap[GetValueNode(tnode)].push_back(tnode); + } + } + } + + if (vmap.empty()) { + return; + } + + auto mng = fg->manager(); + if (mng == nullptr) { + mng = Manage(fg, false); + fg->set_manager(mng); + } + + auto &inputs = *inputs_ptr; + for (auto iter : vmap) { + auto value_nodes = iter.second; + if (value_nodes.empty()) { + MS_LOG(EXCEPTION) << "Invalid value in map!"; + } + + auto vnode = value_nodes[0]; + auto parameter = fg->add_parameter(); + parameter->set_abstract(vnode->abstract()); + parameter->set_kernel_info(vnode->kernel_info_ptr()); + for (const auto &value_node : value_nodes) { + mng->Replace(value_node, parameter); + } + + inputs.push_back(vnode); + } +} + +// Transform nodes(including basic and composite node) to a new graph, and collect their inputs and outputs. +std::tuple MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes, + AnfNodePtrList *src_outputs = nullptr) { + FuncGraphPtr fg; + AnfNodePtrList inputs; + AnfNodePtrList outputs; + AnfNodePtrList *soutputs = (src_outputs != nullptr) ? src_outputs : &outputs; + std::tie(fg, inputs, *soutputs) = compile::TransformSegmentToAnfGraph(fuse_nodes); + + FuncGraphManagerPtr mng = fg->manager(); + if (mng == nullptr) { + mng = Manage(fg, false); + fg->set_manager(mng); + } + + // Inline origin graphkernel + auto cnodes = fg->GetOrderedCnodes(); + for (const auto &n : cnodes) { + if (!AnfAlgo::IsGraphKernel(n)) { + continue; + } + auto graph_kernel_g = GetValueNode(n->input(0)); + AnfNodePtrList ins; + ins.insert(ins.end(), n->inputs().begin() + 1, n->inputs().end()); + auto out = InlineClone(graph_kernel_g, fg, ins, n->input(0)->scope()); + mng->Replace(n, out); + } + + EliminateMakeTuple(fg, mng); + ConvertComplexTensorToParameter(fg, &inputs); + + outputs.clear(); + kernel::GetFuncGraphOutputNodes(fg, &outputs); + return std::make_tuple(fg, inputs, outputs); +} } // namespace void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs, @@ -400,6 +485,7 @@ void ReplaceNewFuseCNode(const FuncGraphPtr &func_graph, const AnfNodePtr &new_f } std::vector fn_inputs; + size_t offset = 0; for (size_t out_idx = 0; out_idx < outputs.size(); out_idx++) { AnfNodePtrList real_outs; // not make tuple out, replace @@ -427,7 +513,7 @@ void ReplaceNewFuseCNode(const FuncGraphPtr &func_graph, const AnfNodePtr &new_f auto value_node = value_input->cast(); MS_EXCEPTION_IF_NULL(value_node); int item_idx = GetValue(value_node->value()); - int new_item_idx = SizeToInt(out_idx) + item_idx; + int new_item_idx = SizeToInt(out_idx) + offset + item_idx; fn_inputs.clear(); fn_inputs.push_back(NewValueNode(prim::kPrimTupleGetItem)); fn_inputs.push_back(new_fuse_cnode); @@ -436,6 +522,8 @@ void ReplaceNewFuseCNode(const FuncGraphPtr &func_graph, const AnfNodePtr &new_f new_out->set_abstract(get_item_cnode->abstract()); mng->Replace(get_item_cnode, new_out); } + + offset += real_outs.size() - 1; } } @@ -454,31 +542,17 @@ void FuseNodesToSubGraph(const std::vector &fuse_nodes, FuncGraphPtr fg; AnfNodePtrList inputs; + AnfNodePtrList src_outputs; AnfNodePtrList outputs; - std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(fuse_nodes); - // Remove nest make tuple in outs - auto expand_out = GetExpandOuts(outputs); - auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, expand_out, is_before_kernel_select); + std::tie(fg, inputs, outputs) = MixedNodesTransToGraph(fuse_nodes, &src_outputs); + auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, outputs, is_before_kernel_select); if (!is_before_kernel_select) { - SetNewKernelInfo(fuse_new_node, fg, inputs, expand_out, AnfAlgo::GetProcessor(fuse_nodes[0])); + SetNewKernelInfo(fuse_new_node, fg, inputs, outputs, AnfAlgo::GetProcessor(fuse_nodes[0])); } - ReplaceNewFuseCNode(kernel_graph, fuse_new_node, outputs); + // Handle get-item probleam. + ReplaceNewFuseCNode(kernel_graph, fuse_new_node, src_outputs); - // Inline origin graphkernel - auto cnodes = fg->GetOrderedCnodes(); - for (const auto &n : cnodes) { - if (!AnfAlgo::IsGraphKernel(n)) { - continue; - } - auto graph_kernel_g = GetValueNode(n->input(0)); - AnfNodePtrList ins; - ins.insert(ins.end(), n->inputs().begin() + 1, n->inputs().end()); - auto out = InlineClone(graph_kernel_g, fg, ins, n->input(0)->scope()); - mng->Replace(n, out); - } - - EliminateMakeTuple(fg, mng); // set graphKernel attr std::string fuse_op_name = ""; for (auto &fuse_node : fuse_nodes) { @@ -512,32 +586,45 @@ bool AnfToJsonDesc(const AnfNodePtrList &nodes, const DumpOption &dump_option, n if (is_single_graph_kernel) { fg = AnfAlgo::GetCNodeFuncGraphPtr(nodes[0]); kernel::GetValidKernelNodes(fg, &op_nodes, &inputs, &outputs); - return GenJson(op_nodes, inputs, outputs, dump_option, op_desc, address_node_map); } else if (!has_graph_kernel) { std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(nodes); op_nodes = nodes; - return GenJson(op_nodes, inputs, outputs, dump_option, op_desc, address_node_map); + } else { + // When there are basic and composite ops, the composite ops should be inline to the basic ones' graph, + // so a new graph generation should be done (beacuse they may in the main graph!). + // If address_node_map is wanted, we should map the new node in new graph to the old nodes. But... not support now. + MS_LOG(EXCEPTION) << "No support mixed with basic and composite ops now!"; } - std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(nodes); - auto mng = Manage(fg, false); - fg->set_manager(mng); - // Inline origin graph kernel - auto fg_nodes = fg->GetOrderedCnodes(); - for (auto const &n : fg_nodes) { - if (!AnfAlgo::IsGraphKernel(n)) { - continue; - } - auto graph_kernel_g = GetValueNode(n->input(0)); - AnfNodePtrList ins; - ins.insert(ins.end(), n->inputs().begin() + 1, n->inputs().end()); - auto out = InlineClone(graph_kernel_g, fg, ins, n->input(0)->scope()); - mng->Replace(n, out); + return GenJson(op_nodes, inputs, outputs, dump_option, op_desc, address_node_map); +} + +bool AnfToJsonDesc(const AnfNodePtrList &nodes, const DumpOption &dump_option, nlohmann::json *op_desc) { + MS_EXCEPTION_IF_NULL(op_desc); + if (nodes.empty()) { + MS_LOG(ERROR) << "Input nodes is empty."; + return false; } - inputs.clear(); - outputs.clear(); + + FuncGraphPtr fg; + AnfNodePtrList op_nodes, inputs, outputs; + if (nodes.size() == 1 && AnfAlgo::IsGraphKernel(nodes[0])) { + fg = AnfAlgo::GetCNodeFuncGraphPtr(nodes[0]); + } else { + std::tie(fg, inputs, outputs) = MixedNodesTransToGraph(nodes); + inputs.clear(); + outputs.clear(); + } + kernel::GetValidKernelNodes(fg, &op_nodes, &inputs, &outputs); - return GenJson(op_nodes, inputs, outputs, dump_option, op_desc, address_node_map); + + auto mng = fg->manager(); + if (mng == nullptr) { + mng = Manage(fg, false); + fg->set_manager(mng); + } + + return GenJson(op_nodes, inputs, outputs, dump_option, op_desc); } bool AnfToJsonDesc(const std::vector &graphs, const DumpOption &dump_option, nlohmann::json *op_desc) { diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h index ebcddbdff3..64a66d9e7b 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h @@ -47,8 +47,9 @@ void ReplaceNewFuseCNode(const FuncGraphPtr &kernel_graph, const AnfNodePtr &new void FuseNodesToSubGraph(const std::vector &fuse_nodes, const std::shared_ptr &kernel_graph, const std::string &postfix, bool is_before_kernel_select); +bool AnfToJsonDesc(const AnfNodePtrList &nodes, const DumpOption &dump_option, nlohmann::json *op_desc); bool AnfToJsonDesc(const AnfNodePtrList &nodes, const DumpOption &dump_option, nlohmann::json *op_desc, - std::map *address_node_map = nullptr); + std::map *address_node_map); bool AnfToJsonDesc(const std::vector &graphs, const DumpOption &dump_option, nlohmann::json *op_desc); FuncGraphPtr JsonDescToAnf(const std::string &json_desc, const std::vector &inputs); std::unordered_set GetExpandOps(); diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/value_graph_binder.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/value_graph_binder.cc new file mode 100644 index 0000000000..db0078833d --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/value_graph_binder.cc @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/optimizer/graph_kernel/value_graph_binder.h" +#include +#include "frontend/optimizer/irpass.h" +#include "backend/session/anf_runtime_algorithm.h" +#include "backend/kernel_compiler/common_utils.h" +#include "backend/optimizer/graph_kernel/graph_kernel_helper.h" + +namespace mindspore { +namespace opt { +bool BindValueToGraph::Run(const FuncGraphPtr &func_graph) { + MS_EXCEPTION_IF_NULL(func_graph); + auto todos = TopoSort(func_graph->get_return()); + auto kernel_graph = std::dynamic_pointer_cast(func_graph); + MS_EXCEPTION_IF_NULL(kernel_graph); + auto &value_nodes = kernel_graph->graph_value_nodes(); + bool changed = false; + for (auto node : todos) { + if (!GetValueNode(node)) { + continue; + } + if (auto vptr = node->cast(); value_nodes.count(vptr) == 0) { + kernel_graph->AddValueNodeToGraph(vptr); + changed = true; + } + } + + return changed; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/value_graph_binder.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/value_graph_binder.h new file mode 100644 index 0000000000..b9fd608e69 --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/value_graph_binder.h @@ -0,0 +1,33 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_VALUE_GRAPH_BINDER_H_ +#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_VALUE_GRAPH_BINDER_H_ +#include +#include "ir/func_graph.h" +#include "backend/optimizer/common/pass.h" + +namespace mindspore { +namespace opt { +class BindValueToGraph : public Pass { + public: + BindValueToGraph() : Pass("bind_value_to_graph") {} + ~BindValueToGraph() override = default; + bool Run(const FuncGraphPtr &func_graph); +}; +using BindValueToGraphPtr = std::shared_ptr; +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_VALUE_GRAPH_BINDER_H_ diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index 7da013a849..149a3070cf 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -38,6 +38,7 @@ #include "backend/optimizer/gpu/remove_format_transform_pair.h" #include "backend/optimizer/gpu/remove_redundant_format_transform.h" #include "backend/optimizer/gpu/cudnn_inplace_fusion.h" +#include "backend/optimizer/graph_kernel/value_graph_binder.h" #include "backend/optimizer/graph_kernel/graph_kernel_splitter.h" #include "backend/optimizer/graph_kernel/graph_kernel_expander.h" #include "backend/optimizer/graph_kernel/basic_ops_fusion.h" @@ -116,6 +117,7 @@ void GPUSession::GraphKernelOptimize(const std::shared_ptr &kernel_ pm->AddPass(std::make_shared()); pm->AddPass(std::make_shared()); pm->AddPass(std::make_shared()); + pm->AddPass(std::make_shared()); optimizer->AddPassManager(pm); (void)optimizer->Optimize(kernel_graph); kernel_graph->SetExecOrderByDefault();