!12281 fix exec order bug about monad and add test_case in ci

From: @zengzitao Reviewed-by: Signed-off-by:
5 years ago · aa71118a99
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/optimize_assign.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/optimize_assign.cc
@@ -88,11 +88,14 @@ std::map<size_t, AnfNodePtr> FindAssignAndOutputVal(const CNodePtr &fg_cnode) {
  return output_replace_map;
 }

 bool HasPathToParamUser(const AnfNodePtr &gk_node, const AnfNodePtr &param_user) {
 bool HasPathToParamUser(const AnfNodePtr &gk_node, const AnfNodePtr &param_user, const AnfNodePtr &getitem) {
  auto mng = AnfAlgo::GetCNodeFuncGraphPtr(gk_node)->manager();
  MS_EXCEPTION_IF_NULL(mng);
  bool result = false;
  auto IncludeUser = [&result, &gk_node](const AnfNodePtr &node) {
  auto IncludeUser = [&result, &gk_node, &getitem](const AnfNodePtr &node) {
    if (node == getitem) {
      return EXCLUDE;
    }
    if (node == gk_node) {
      result = true;
      return EXCLUDE;
@@ -103,23 +106,23 @@ bool HasPathToParamUser(const AnfNodePtr &gk_node, const AnfNodePtr &param_user)
  return result;
 }

 void KeepExecOrder(const FuncGraphPtr &func_graph, const AnfNodePtr &gk_node, const AnfNodePtr &par_user_node,
 void KeepExecOrder(const FuncGraphPtr &func_graph, const AnfNodePtr &getitem, const AnfNodePtr &assign_to_node,
                   const FuncGraphManagerPtr &mng) {
  // Insert update_state_node, need mount a monad node.
  auto u = NewValueNode(kUMonad);
  u->set_abstract(kUMonad->ToAbstract());
  AnfNodePtrList update_state_inputs = {NewValueNode(prim::kPrimUpdateState), u, gk_node};
  AnfNodePtrList update_state_inputs = {NewValueNode(prim::kPrimUpdateState), u, getitem};
  auto update_state_node = func_graph->NewCNode(update_state_inputs);
  update_state_node->set_abstract(gk_node->abstract());
  update_state_node->set_abstract(getitem->abstract());
  func_graph->AddNode(update_state_node);

  // Insert load_node
  AnfNodePtrList load_inputs = {NewValueNode(prim::kPrimLoad), par_user_node, update_state_node};
  AnfNodePtrList load_inputs = {NewValueNode(prim::kPrimLoad), assign_to_node, update_state_node};
  auto load_node = func_graph->NewCNode(load_inputs);
  load_node->set_abstract(par_user_node->abstract());
  load_node->set_abstract(assign_to_node->abstract());
  func_graph->AddNode(load_node);

  mng->Replace(gk_node, par_user_node);
  mng->Replace(getitem, load_node);
 }

 int64_t GetitemIndex(const AnfNodePtr &getitem) {
@@ -136,17 +139,18 @@ void UpdateUsersOfGraphKernel(const FuncGraphPtr &func_graph, const AnfNodePtr &
    auto getitem = getitem_iter.first;
    if (GetitemIndex(getitem) != removed_index) continue;
    auto getitem_users = mng->node_users()[getitem];  // get a copy of getitem's users before replacing
    mng->Replace(getitem, assign_to);

    for (const auto &getitem_user_iter : getitem_users) {
      auto getitem_user = getitem_user_iter.first;
      // 1. A previous pass `DependFormater` has ensured that all data users are directly link to its
      //   input, without Depend node.
      // 2. If the `cnode` has another path to the getitem_user, it's unnecessary to add a ControlDepend.
      if (!AnfAlgo::IsRealKernel(getitem_user) || HasPathToParamUser(cnode, getitem_user)) {
      // 2. If the `cnode` has another path to the getitem_user, it's unnecessary to add update_state and load node to
      // keep exec_order.
      if (!AnfAlgo::IsRealKernel(getitem_user) || HasPathToParamUser(cnode, getitem_user, getitem)) {
        mng->Replace(getitem, assign_to);
        continue;
      }
      KeepExecOrder(func_graph, cnode, getitem_user, mng);
      KeepExecOrder(func_graph, getitem, assign_to, mng);
    }
    break;
  }
--- a/tests/st/ops/graph_kernel/test_optimize_assign.py
+++ b/tests/st/ops/graph_kernel/test_optimize_assign.py
@@ -0,0 +1,102 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 import numpy as np
 import pytest
 import mindspore.context as context
 from mindspore import Tensor
 from mindspore.nn import Cell
 import mindspore.ops.operations as P
 from mindspore.ops import functional as F
 from mindspore.common.parameter import Parameter


 class TestOptAssignNet_1(Cell):
    def __init__(self):
        super(TestOptAssignNet_1, self).__init__()
        self.add = P.Add()
        self.reduce_max = P.ReduceMax()
        self.param = Parameter(
            Tensor(np.zeros([2, 2, 2]).astype(np.float32)), name='param')

    def construct(self, x, y):
        add_res = self.add(x, y)
        F.depend(add_res, F.assign(self.param, add_res))

        return self.reduce_max(add_res)


 class TestOptAssignNet_2(Cell):
    def __init__(self):
        super(TestOptAssignNet_2, self).__init__()
        self.add = P.Add()
        self.param = Parameter(
            Tensor(np.zeros([2, 2, 2]).astype(np.float32)), name='param')

    def construct(self, x, y):
        add_res = self.add(x, y)
        F.depend(add_res, F.assign(self.param, add_res))

        return add_res


 def test_opt_assign_output_1():
    np.random.seed(0)
    input_x = np.random.normal(0, 1, [2, 2, 2]).astype(np.float32)
    input_y = np.random.normal(0, 1, [2, 2, 2]).astype(np.float32)

    context.set_context(mode=context.GRAPH_MODE,
                        enable_graph_kernel=True, device_target="GPU")
    net = TestOptAssignNet_1()
    result_open_gk = net(Tensor(input_x), Tensor(input_y))

    context.set_context(mode=context.GRAPH_MODE,
                        enable_graph_kernel=False, device_target="GPU")
    net_beta = TestOptAssignNet_1()
    result_close_gk = net_beta(Tensor(input_x), Tensor(input_y))
    res = np.allclose(result_open_gk.asnumpy(), result_close_gk.asnumpy(), rtol=1.e-4, atol=1.e-7, equal_nan=True)
    assert res


 def test_opt_assign_output_2():
    np.random.seed(0)
    input_x = np.random.normal(0, 1, [2, 2, 2]).astype(np.float32)
    input_y = np.random.normal(0, 1, [2, 2, 2]).astype(np.float32)

    context.set_context(mode=context.GRAPH_MODE,
                        enable_graph_kernel=True, device_target="GPU")
    net = TestOptAssignNet_2()
    result_open_gk = net(Tensor(input_x), Tensor(input_y))

    context.set_context(mode=context.GRAPH_MODE,
                        enable_graph_kernel=False, device_target="GPU")
    net_beta = TestOptAssignNet_2()
    result_close_gk = net_beta(Tensor(input_x), Tensor(input_y))
    res = np.allclose(result_open_gk.asnumpy(), result_close_gk.asnumpy(), rtol=1.e-4, atol=1.e-7, equal_nan=True)
    assert res


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_opt_assign_gpu_1():
    test_opt_assign_output_1()


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_opt_assign_gpu_2():
    test_opt_assign_output_2()