From 0f8f1cdda73134abba712b55cd421961fb7e9dbe Mon Sep 17 00:00:00 2001 From: dayschan Date: Wed, 4 Nov 2020 16:54:48 +0800 Subject: [PATCH] Eliminate redundant parameters while expanding basic ops. add testcase for Gelu/GeluGrad --- .../graph_kernel/graph_kernel_expander.cc | 26 +++++- .../graph_kernel/graph_kernel_expander.h | 1 + .../graph_kernel/graph_kernel_helper.cc | 5 +- tests/st/ops/graph_kernel/test_gelu.py | 83 +++++++++++++++++++ 4 files changed, 110 insertions(+), 5 deletions(-) create mode 100644 tests/st/ops/graph_kernel/test_gelu.py diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc index 4cb8951af0..fe1dc92576 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc @@ -18,6 +18,7 @@ #include #include +#include #include #include "backend/kernel_compiler/akg/akg_kernel_json_generator.h" @@ -144,11 +145,35 @@ FuncGraphPtr GraphKernelExpander::CreateExpandFuncGraph(const CNodePtr &node) { return JsonDescToAnf(kernel_desc_str, ori_inputs); } +void GraphKernelExpander::EliminateRedundantParameters(const FuncGraphPtr &func_graph, AnfNodePtrList *inputs) { + const auto &ori_parameter = func_graph->parameters(); + auto todos = TopoSort(func_graph->get_return()); + std::unordered_set used_param; + for (auto node : todos) { + if (node->isa()) { + used_param.insert(node); + } + } + if (used_param.size() == ori_parameter.size()) { + return; + } + AnfNodePtrList new_parameter, new_inputs; + for (size_t i = 0; i < ori_parameter.size(); ++i) { + if (used_param.count(ori_parameter[i])) { + new_parameter.push_back(ori_parameter[i]); + new_inputs.push_back((*inputs)[i]); + } + } + func_graph->set_parameters(new_parameter); + *inputs = std::move(new_inputs); +} + AnfNodePtr GraphKernelExpander::CreateExpandGraphKernel(const FuncGraphPtr &func_graph, const FuncGraphPtr &new_func_graph, const CNodePtr &node) { std::vector inputs(node->inputs().begin() + 1, node->inputs().end()); AnfNodePtrList kernel_nodes; AnfNodePtrList outputs; + EliminateRedundantParameters(new_func_graph, &inputs); kernel::GetValidKernelNodes(new_func_graph, &kernel_nodes); kernel::GetFuncGraphOutputNodes(new_func_graph, &outputs); auto graph_kernel_node = CreateNewFuseCNode(func_graph, new_func_graph, inputs, outputs, false); @@ -184,7 +209,6 @@ bool GraphKernelExpander::DoExpand(const FuncGraphPtr &func_graph) { auto graph_kernel_node = CreateExpandGraphKernel(func_graph, new_func_graph, node); new_func_graph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, MakeValue(AnfAlgo::GetCNodeName(node))); - MS_LOG(INFO) << "create new cnode success."; // replace origin node. (void)mng->Replace(node, graph_kernel_node); diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.h index 925b1f519f..eda740f056 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.h +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.h @@ -32,6 +32,7 @@ class GraphKernelExpander : public Pass { FuncGraphPtr CreateExpandFuncGraph(const CNodePtr &node); bool DoExpand(const FuncGraphPtr &func_graph); void ToPrimitive(const FuncGraphPtr &func_graph) const; + void EliminateRedundantParameters(const FuncGraphPtr &func_graph, AnfNodePtrList *inputs); AnfNodePtr CreateExpandGraphKernel(const FuncGraphPtr &func_graph, const FuncGraphPtr &new_func_graph, const CNodePtr &node); bool CanExpand(const CNodePtr &node) { diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc index dae293f7b7..6526bc998d 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc @@ -702,10 +702,7 @@ FuncGraphPtr JsonDescToAnf(const std::string &json_desc, const std::vector GetExpandOps() { std::unordered_set expand_ops = { - prim::kPrimSquare, - prim::kPrimBiasAdd, - prim::kPrimBiasAddGrad, - prim::kPrimGelu, + prim::kPrimSquare, prim::kPrimBiasAdd, prim::kPrimBiasAddGrad, prim::kPrimGelu, prim::kPrimGeluGrad, }; return expand_ops; } diff --git a/tests/st/ops/graph_kernel/test_gelu.py b/tests/st/ops/graph_kernel/test_gelu.py new file mode 100644 index 0000000000..52e55b0bd5 --- /dev/null +++ b/tests/st/ops/graph_kernel/test_gelu.py @@ -0,0 +1,83 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest +import mindspore.context as context +from mindspore import Tensor +from mindspore.nn import Cell +import mindspore.ops.operations as P +import mindspore.ops.operations._grad_ops as G + +context.set_context(mode=context.GRAPH_MODE, enable_graph_kernel=True, device_target="GPU") + + +class GeluNet(Cell): + def __init__(self): + super(GeluNet, self).__init__() + self.gelu = P.Gelu() + + def construct(self, x): + return self.gelu(x) + + +class GeluGradNet(Cell): + def __init__(self): + super(GeluGradNet, self).__init__() + self.gelu_grad = G.GeluGrad() + + def construct(self, dy, x, y): + return self.gelu_grad(dy, x, y) + + +def CalGelu(x): + tmp = np.sqrt(2.0 / np.pi) * (x + 0.044715 * x * x * x) + expect = 0.5 * x * (1.0 + np.tanh(tmp)) + return expect + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_gelu(): + input_x = np.random.normal(0, 1, [2, 3, 4, 3]).astype(np.float32) + + net = GeluNet() + result = net(Tensor(input_x)) + + expect = CalGelu(input_x) + + res = np.allclose(expect, result.asnumpy(), rtol=1.e-4, atol=1.e-7, equal_nan=True) + assert res + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_gelu_grad(): + input_dy = np.random.normal(0, 1, [2, 3, 4, 3]).astype(np.float32) + input_x = np.random.normal(0, 1, [2, 3, 4, 3]).astype(np.float32) + input_y = CalGelu(input_x) + + net = GeluGradNet() + result = net(Tensor(input_dy), Tensor(input_x), Tensor(input_y)) + + tanh_res = np.tanh(0.7978845608 * (input_x + 0.044715 * input_x * input_x * input_x)) + mul_right = 0.7978845608 + 0.1070322244 * input_x * input_x + dx = 0.5 * (1.0 + tanh_res) + 0.5 * input_x * (1.0 - tanh_res * tanh_res) * mul_right + expect = input_dy * dx + + res = np.allclose(expect, result.asnumpy(), rtol=1.e-4, atol=1.e-7, equal_nan=True) + assert res