!7892 Convert non-scalar tensor to parameter

Merge pull request !7892 from DeshiChen/1028_nonscalar_tensor_to_input
5 years ago · 5c4940cdcc
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit 03ef896b90a34ebdb7eeb3fa77d7d4252d021011
 Subproject commit f308919c39811c2c3e07fb0dcc8054a533c84cbc
--- a/mindspore/_extends/graph_kernel/model/model_builder.py
+++ b/mindspore/_extends/graph_kernel/model/model_builder.py
@@ -205,6 +205,8 @@ class CompositeGraph:
                            if output.shape[i] == 1 and inputs[0].shape[i] > 1:
                                red_axis.append(i)
                    else:
                        if isinstance(a['value'], int):
                            a['value'] = [a['value']]
                        for i in a['value']:
                            red_axis.append(i if i >= 0 else dim_size + i)
                    attr['reduce_axis'] = red_axis
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/basic_ops_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/basic_ops_fusion.cc
@@ -203,6 +203,7 @@ bool FuseBasicOps(const FuncGraphPtr &kernel_graph, const std::vector<AnfNodePtr
    AnfNodePtrList outputs;
    std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(fuse_nodes);
    RemoveControlDependOut(fg, &outputs, mng);
    ConvertNonscalarTensorToParameter(fg, &inputs);
    auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, outputs, is_before_kernel_select);
    if (!is_before_kernel_select) {
      SetNewKernelInfo(fuse_new_node, fg, inputs, outputs, AnfAlgo::GetProcessor(fuse_nodes[0]));
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
@@ -263,8 +263,9 @@ bool GenJson(const AnfNodePtrList &op_nodes, const AnfNodePtrList &inputs, const
  MS_LOG(INFO) << "Collect fusion json: " << fused_name;
  return true;
 }
 }  // namespace

 void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) {
 bool ConvertNonscalarTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) {
  MS_EXCEPTION_IF_NULL(inputs_ptr);
  auto nodes = TopoSort(fg->get_return());

@@ -284,7 +285,7 @@ void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inp
  }

  if (vmap.empty()) {
    return;
    return false;
  }

  auto mng = fg->manager();
@@ -310,11 +311,12 @@ void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inp

    inputs.push_back(vnode);
  }
  return true;
 }

 // Transform nodes(including basic and composite node) to a new graph, and collect their inputs and outputs.
 std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes,
                                                                                AnfNodePtrList *src_outputs = nullptr) {
                                                                                AnfNodePtrList *src_outputs) {
  FuncGraphPtr fg;
  AnfNodePtrList inputs;
  AnfNodePtrList outputs;
@@ -341,13 +343,12 @@ std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(
  }

  EliminateMakeTuple(fg, mng);
  ConvertComplexTensorToParameter(fg, &inputs);
  ConvertNonscalarTensorToParameter(fg, &inputs);

  outputs.clear();
  kernel::GetFuncGraphOutputNodes(fg, &outputs);
  return std::make_tuple(fg, inputs, outputs);
 }
 }  // namespace

 void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
                      const AnfNodePtrList &outputs, kernel::Processor processor) {
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h
@@ -19,6 +19,7 @@
 #include <vector>
 #include <memory>
 #include <map>
 #include <tuple>
 #include <unordered_set>
 #include <nlohmann/json.hpp>
 #include "ir/anf.h"
@@ -37,6 +38,9 @@ constexpr auto kJsonKeyMultiGraph = "multi_graph";
 constexpr auto kJsonKeyGraphDesc = "graph_desc";
 constexpr auto kJsonKeyGraphMode = "graph_mode";

 bool ConvertNonscalarTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr);
 std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes,
                                                                                AnfNodePtrList *src_outputs = nullptr);
 void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
                      const AnfNodePtrList &outputs, kernel::Processor processor);
 AnfNodePtrList GetExpandOuts(const AnfNodePtrList &outs);
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.cc
@@ -0,0 +1,57 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "backend/optimizer/graph_kernel/tensor_promotion.h"
 #include <vector>
 #include "backend/kernel_compiler/common_utils.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_helper.h"
 #include "backend/session/anf_runtime_algorithm.h"
 #include "ir/func_graph.h"

 namespace mindspore {
 namespace opt {
 bool TensorPromotion::Run(const FuncGraphPtr &func_graph) {
  MS_EXCEPTION_IF_NULL(func_graph);
  auto mng = func_graph->manager();
  if (mng == nullptr) {
    mng = Manage(func_graph, true);
    func_graph->set_manager(mng);
  }
  auto todos = TopoSort(func_graph->get_return());

  bool changed = false;
  for (auto iter = todos.crbegin(); iter != todos.crend(); ++iter) {
    auto node = *iter;
    if (!AnfAlgo::IsGraphKernel(node)) {
      continue;
    }
    auto args = node->cast<CNodePtr>()->inputs();
    auto fg = GetValueNode<FuncGraphPtr>(args[kAnfPrimitiveIndex]);
    if (!ConvertNonscalarTensorToParameter(fg, &args)) {
      continue;
    }
    AnfNodePtrList inputs, outputs;
    inputs.insert(inputs.end(), args.begin() + 1, args.end());
    kernel::GetFuncGraphOutputNodes(fg, &outputs);
    auto new_cnode = CreateNewFuseCNode(func_graph, fg, inputs, outputs, false);
    SetNewKernelInfo(new_cnode, fg, inputs, outputs, AnfAlgo::GetProcessor(node));
    mng->Replace(node, new_cnode);
    changed = true;
  }

  return changed;
 }
 }  // namespace opt
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.h
@@ -0,0 +1,33 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_
 #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_
 #include <memory>
 #include "ir/func_graph.h"
 #include "backend/optimizer/common/pass.h"

 namespace mindspore {
 namespace opt {
 class TensorPromotion : public Pass {
 public:
  TensorPromotion() : Pass("graph_kernel_tensor_promotion") {}
  ~TensorPromotion() override = default;
  bool Run(const FuncGraphPtr &func_graph);
 };
 using TensorPromotionPtr = std::shared_ptr<TensorPromotion>;
 }  // namespace opt
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_
--- a/mindspore/ccsrc/backend/session/gpu_session.cc
+++ b/mindspore/ccsrc/backend/session/gpu_session.cc
@@ -38,6 +38,7 @@
 #include "backend/optimizer/graph_kernel/arithmetic_simplify.h"
 #include "backend/optimizer/graph_kernel/basic_ops_fusion.h"
 #include "backend/optimizer/graph_kernel/composite_ops_fusion.h"
 #include "backend/optimizer/graph_kernel/tensor_promotion.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_splitter.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_expander.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_cse.h"
@@ -164,6 +165,7 @@ void GPUSession::GraphKernelOptimize(const std::shared_ptr<KernelGraph> &kernel_
  pm->AddPass(std::make_shared<opt::GraphKernelCSE>());
  pm->AddPass(std::make_shared<opt::ArithmeticSimplify>());
  pm->AddPass(std::make_shared<opt::GraphKernelCSE>());
  pm->AddPass(std::make_shared<opt::TensorPromotion>());
  pm->AddPass(std::make_shared<opt::GraphKernelSplitter>());
  // After Simplify and Splitter, a lot of redundant getitem/maketuple
  // will be exposed, use GetitemTuple Pass to delete them.
--- a/mindspore/ccsrc/backend/session/kernel_graph.cc
+++ b/mindspore/ccsrc/backend/session/kernel_graph.cc
@@ -395,8 +395,9 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) {
  kernel::GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list);
  for (auto &anf_node : node_list) {
    MS_EXCEPTION_IF_NULL(anf_node);
    auto kernel_info = std::make_shared<device::KernelInfo>();
    anf_node->set_kernel_info(kernel_info);
    if (anf_node->kernel_info() == nullptr) {
      anf_node->set_kernel_info(std::make_shared<device::KernelInfo>());
    }
    auto anf_cnode = anf_node->cast<CNodePtr>();
    MS_EXCEPTION_IF_NULL(anf_cnode);
    for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_cnode); ++i) {
@@ -412,8 +413,9 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) {
  }
  for (auto &anf_node : input_list) {
    MS_EXCEPTION_IF_NULL(anf_node);
    auto kernel_info = std::make_shared<device::KernelInfo>();
    anf_node->set_kernel_info(kernel_info);
    if (anf_node->kernel_info() == nullptr) {
      anf_node->set_kernel_info(std::make_shared<device::KernelInfo>());
    }
  }
 }