From f0f55ad7e826199d96498b3efbae7bd305751870 Mon Sep 17 00:00:00 2001
From: lichenever <lichentrue@163.com>
Date: Mon, 30 Mar 2020 11:31:45 +0800
Subject: [PATCH 01/58] fix_cast_bug

---
 mindspore/ccsrc/parallel/step_parallel.cc | 10 ++++++++++
 1 file changed, 10 insertions(+)
diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc
index 78bec00bcf..31dc77b595 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_parallel.cc
@@ -653,6 +653,13 @@ LossNodeInfo GetLossNodeInfo(const AnfNodePtr& loss_node) {
   MS_EXCEPTION_IF_NULL(pre_node);
 
   LossNodeInfo node_info;
+  // return -> cast
+  auto pre_cnode = pre_node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(pre_cnode);
+  auto pre_prim = GetValueNode<PrimitivePtr>(pre_cnode->input(0));
+  if (pre_prim->name() == CAST && pre_cnode->operator_info() == nullptr) {
+    pre_node = pre_cnode->input(1);
+  }
 
   // return -> cast
   auto pre_cnode = pre_node->cast<CNodePtr>();
@@ -1970,7 +1977,10 @@ CNodePtr FindLossCNode(const FuncGraphPtr& func_graph) {
   MS_EXCEPTION_IF_NULL(current_value);
   PrimitivePtr current_prim = current_value->value()->cast<PrimitivePtr>();
   MS_EXCEPTION_IF_NULL(current_prim);
+<<<<<<< HEAD
 
+=======
+>>>>>>> fix_cast_bug
   // return -> cast
   if (current_prim->name() == CAST && pre_cnode->operator_info() == nullptr) {
     pre_cnode = pre_cnode->input(1)->cast<CNodePtr>();

From 4c2aa41f1d859ba3a57d12593736bb7cc6b52ed6 Mon Sep 17 00:00:00 2001
From: leonwanghui <leon.wanghui@huawei.com>
Date: Tue, 31 Mar 2020 10:27:40 +0800
Subject: [PATCH 02/58] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20!1?=
 =?UTF-8?q?7=20:=20[AutoParallel]Fix=20bug=20in=20the=20case=20of=20two=20?=
 =?UTF-8?q?cast'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ccsrc/parallel/step_auto_parallel.cc      |  2 --
 mindspore/ccsrc/parallel/step_parallel.cc     | 11 +++----
 .../parallel/test_element_wise_function.py    | 29 -------------------
 3 files changed, 4 insertions(+), 38 deletions(-)

diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc
index 50e6a1e84e..7a895a9458 100644
--- a/mindspore/ccsrc/parallel/step_auto_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc
@@ -350,8 +350,6 @@ bool IsAutoParallelCareNode(const CNodePtr &cnode) {
 }
 
 OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &cnode) {
-  MS_EXCEPTION_IF_NULL(prim);
-  MS_EXCEPTION_IF_NULL(cnode);
   auto attrs = prim->attrs();
   std::vector<Shapes> shape_list = ExtractShape(cnode);
   if (shape_list.empty()) {
diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc
index 31dc77b595..af5eb0159f 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_parallel.cc
@@ -374,6 +374,7 @@ bool IsParallelCareNode(const CNodePtr& cnode) {
   if (prim == nullptr) {
     return false;
   }
+  auto attrs = prim->attrs();
   if (IsInBlackList(prim)) {
     MS_LOG(INFO) << "Parallel don't care node: " << prim->name();
     return false;
@@ -653,13 +654,6 @@ LossNodeInfo GetLossNodeInfo(const AnfNodePtr& loss_node) {
   MS_EXCEPTION_IF_NULL(pre_node);
 
   LossNodeInfo node_info;
-  // return -> cast
-  auto pre_cnode = pre_node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(pre_cnode);
-  auto pre_prim = GetValueNode<PrimitivePtr>(pre_cnode->input(0));
-  if (pre_prim->name() == CAST && pre_cnode->operator_info() == nullptr) {
-    pre_node = pre_cnode->input(1);
-  }
 
   // return -> cast
   auto pre_cnode = pre_node->cast<CNodePtr>();
@@ -1978,6 +1972,7 @@ CNodePtr FindLossCNode(const FuncGraphPtr& func_graph) {
   PrimitivePtr current_prim = current_value->value()->cast<PrimitivePtr>();
   MS_EXCEPTION_IF_NULL(current_prim);
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
 >>>>>>> fix_cast_bug
@@ -1988,6 +1983,8 @@ CNodePtr FindLossCNode(const FuncGraphPtr& func_graph) {
     current_prim = GetValueNode<PrimitivePtr>(pre_cnode->input(0));
   }
 
+=======
+>>>>>>> 回退 'Pull Request !17 : [AutoParallel]Fix bug in the case of two cast'
   // notice: the GetNext op has not input
   if (INVALID_LOSS_OPS.find(current_prim->name()) != INVALID_LOSS_OPS.end()) {
     MS_LOG(INFO) << "The loss is: " << current_prim->name();
diff --git a/tests/ut/python/parallel/test_element_wise_function.py b/tests/ut/python/parallel/test_element_wise_function.py
index 2eb3a22ed2..0c65593d6a 100644
--- a/tests/ut/python/parallel/test_element_wise_function.py
+++ b/tests/ut/python/parallel/test_element_wise_function.py
@@ -268,32 +268,3 @@ def test_cast_before_mirror3():
     y = Tensor(np.ones([32, 64]), dtype=ms.float16)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
     _executor.compile(net, x, y, b)
-
-
-def test_mul_two_cast():
-    class Net(nn.Cell):
-        def __init__(self, strategy1, strategy2, strategy3):
-            super().__init__()
-            self.mul = P.Mul().set_strategy(strategy1)
-            self.mul2 = P.Mul().set_strategy(strategy2)
-            self.cast = P.Cast().set_strategy(strategy3)
-            self.cast2 = P.Cast().set_strategy(strategy3)
-
-        def construct(self, x, y, b):
-            out = self.mul(x, y)
-            out = self.mul2(out, b)
-            out = self.cast(out, ms.int32)
-            out = self.cast2(out, ms.bool_)
-            return out
-
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-    strategy1 = ((2, 2), (2, 2))
-    strategy2 = ((8, 1), (8, 1))
-    strategy3 = ((8, 1), )
-    net = GradWrap(Net(strategy1, strategy2, strategy3))
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-
-    x = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    y = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    b = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    _executor.compile(net, x, y, b)

From 1984e4a1ffd938a9d24e6f3db215c7a70cafb114 Mon Sep 17 00:00:00 2001
From: zhaozhenlong <zhaozhenlong1@huawei.com>
Date: Tue, 31 Mar 2020 09:34:09 +0800
Subject: [PATCH 03/58] add operator diag and diag_part

---
 mindspore/ccsrc/transform/convert.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc
index d2a11948ef..8eed207f59 100755
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@@ -190,6 +190,7 @@ const char kNameAtan2[] = "Atan2";
 const char kNameApplyRMSProp[] = "ApplyRMSProp";
 const char kNameApplyCenteredRMSProp[] = "ApplyCenteredRMSProp";
 
+
 // -----------------OpAdapter initialization--------------
 std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_map() {
   static std::unordered_map<std::string, OpAdapterDescPtr> adpt_map = {

From b27129c9da5c4f54f2ec8d5170fd0628cbbe87bb Mon Sep 17 00:00:00 2001
From: chang zherui <760161589@qq.com>
Date: Tue, 31 Mar 2020 16:18:04 +0800
Subject: [PATCH 04/58] modify longtime python ut

---
 .../train/summary/test_summary_performance.py | 97 +++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 tests/ut/python/train/summary/test_summary_performance.py

diff --git a/tests/ut/python/train/summary/test_summary_performance.py b/tests/ut/python/train/summary/test_summary_performance.py
new file mode 100644
index 0000000000..9ee9725d13
--- /dev/null
+++ b/tests/ut/python/train/summary/test_summary_performance.py
@@ -0,0 +1,97 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+@File  : test_summary.py
+@Author:
+@Date  : 2019-07-4
+@Desc  : test summary function
+"""
+import os
+import logging
+import time
+import numpy as np
+from mindspore.train.summary.summary_record import SummaryRecord, _cache_summary_tensor_data
+from mindspore.common.tensor import Tensor
+
+CUR_DIR = os.getcwd()
+SUMMARY_DIR = CUR_DIR + "/test_temp_summary_event_file/"
+
+log = logging.getLogger("test")
+log.setLevel(level=logging.ERROR)
+
+def get_now_time_ns():
+    """get the time of second"""
+    time_second = int(time.time_ns())
+    return time_second
+
+def get_test_data(step):
+    """ get_test_data """
+    # pylint: disable=unused-argument
+    test_data_list = []
+    tag1 = "xt1[:Tensor]"
+    tag2 = "xt2[:Tensor]"
+    tag3 = "xt3[:Tensor]"
+    np1 = np.random.random((5, 4, 3, 5))
+    np2 = np.random.random((5, 5, 3, 5))
+    np3 = np.random.random((4, 5, 3, 5))
+
+    dict1 = {}
+    dict1["name"] = tag1
+    dict1["data"] = Tensor(np1)
+
+    dict2 = {}
+    dict2["name"] = tag2
+    dict2["data"] = Tensor(np2)
+
+    dict3 = {}
+    dict3["name"] = tag3
+    dict3["data"] = Tensor(np3)
+
+    test_data_list.append(dict1)
+    test_data_list.append(dict2)
+
+    return test_data_list
+
+
+# Test 1: summary sample of scalar
+def test_summary_performance():
+    """ test_summary_performance """
+    log.debug("begin test_scalar_summary_sample")
+    current_time = time.time()
+    print("time = ", current_time)
+    # step 0: create the thread
+    test_writer = SummaryRecord(SUMMARY_DIR, flush_time=120)
+
+    # step 1: create the test data for summary
+    old_time = get_now_time_ns()
+    # step 2: create the Event
+    for i in range(1, 10):
+        test_data = get_test_data(i)
+        _cache_summary_tensor_data(test_data)
+        test_writer.record(i)
+        now_time = get_now_time_ns()
+        consume_time = (now_time - old_time)/1000/1000
+        old_time = now_time
+        print("step test_summary_performance conusmer time is:", consume_time)
+
+
+    # step 3: send the event to mq
+
+    # step 4: accept the event and write the file
+    test_writer.flush()
+    test_writer.close()
+    current_time = time.time() - current_time
+    print("consume time = ", current_time)
+    log.debug("finished test_scalar_summary_sample")

From 07449cd1cc6e44ba4d51f2891ada4410df02a725 Mon Sep 17 00:00:00 2001
From: lichenever <lichentrue@163.com>
Date: Tue, 31 Mar 2020 18:43:42 +0800
Subject: [PATCH 05/58] fix two cast bug in auto parallel

---
 .../ccsrc/parallel/step_auto_parallel.cc      |  2 ++
 mindspore/ccsrc/parallel/step_parallel.cc     |  8 +----
 .../parallel/test_element_wise_function.py    | 29 +++++++++++++++++++
 3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc
index 7a895a9458..50e6a1e84e 100644
--- a/mindspore/ccsrc/parallel/step_auto_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc
@@ -350,6 +350,8 @@ bool IsAutoParallelCareNode(const CNodePtr &cnode) {
 }
 
 OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &cnode) {
+  MS_EXCEPTION_IF_NULL(prim);
+  MS_EXCEPTION_IF_NULL(cnode);
   auto attrs = prim->attrs();
   std::vector<Shapes> shape_list = ExtractShape(cnode);
   if (shape_list.empty()) {
diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc
index af5eb0159f..9a08ead584 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_parallel.cc
@@ -374,7 +374,6 @@ bool IsParallelCareNode(const CNodePtr& cnode) {
   if (prim == nullptr) {
     return false;
   }
-  auto attrs = prim->attrs();
   if (IsInBlackList(prim)) {
     MS_LOG(INFO) << "Parallel don't care node: " << prim->name();
     return false;
@@ -1971,11 +1970,7 @@ CNodePtr FindLossCNode(const FuncGraphPtr& func_graph) {
   MS_EXCEPTION_IF_NULL(current_value);
   PrimitivePtr current_prim = current_value->value()->cast<PrimitivePtr>();
   MS_EXCEPTION_IF_NULL(current_prim);
-<<<<<<< HEAD
-<<<<<<< HEAD
 
-=======
->>>>>>> fix_cast_bug
   // return -> cast
   if (current_prim->name() == CAST && pre_cnode->operator_info() == nullptr) {
     pre_cnode = pre_cnode->input(1)->cast<CNodePtr>();
@@ -1983,8 +1978,7 @@ CNodePtr FindLossCNode(const FuncGraphPtr& func_graph) {
     current_prim = GetValueNode<PrimitivePtr>(pre_cnode->input(0));
   }
 
-=======
->>>>>>> 回退 'Pull Request !17 : [AutoParallel]Fix bug in the case of two cast'
+
   // notice: the GetNext op has not input
   if (INVALID_LOSS_OPS.find(current_prim->name()) != INVALID_LOSS_OPS.end()) {
     MS_LOG(INFO) << "The loss is: " << current_prim->name();
diff --git a/tests/ut/python/parallel/test_element_wise_function.py b/tests/ut/python/parallel/test_element_wise_function.py
index 0c65593d6a..2eb3a22ed2 100644
--- a/tests/ut/python/parallel/test_element_wise_function.py
+++ b/tests/ut/python/parallel/test_element_wise_function.py
@@ -268,3 +268,32 @@ def test_cast_before_mirror3():
     y = Tensor(np.ones([32, 64]), dtype=ms.float16)
     b = Tensor(np.ones([64, 64]), dtype=ms.float32)
     _executor.compile(net, x, y, b)
+
+
+def test_mul_two_cast():
+    class Net(nn.Cell):
+        def __init__(self, strategy1, strategy2, strategy3):
+            super().__init__()
+            self.mul = P.Mul().set_strategy(strategy1)
+            self.mul2 = P.Mul().set_strategy(strategy2)
+            self.cast = P.Cast().set_strategy(strategy3)
+            self.cast2 = P.Cast().set_strategy(strategy3)
+
+        def construct(self, x, y, b):
+            out = self.mul(x, y)
+            out = self.mul2(out, b)
+            out = self.cast(out, ms.int32)
+            out = self.cast2(out, ms.bool_)
+            return out
+
+    context.set_auto_parallel_context(device_num=8, global_rank=0)
+    strategy1 = ((2, 2), (2, 2))
+    strategy2 = ((8, 1), (8, 1))
+    strategy3 = ((8, 1), )
+    net = GradWrap(Net(strategy1, strategy2, strategy3))
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+
+    x = Tensor(np.ones([128, 32]), dtype=ms.float32)
+    y = Tensor(np.ones([128, 32]), dtype=ms.float32)
+    b = Tensor(np.ones([128, 32]), dtype=ms.float32)
+    _executor.compile(net, x, y, b)

From 079df4c909dcfb3a5c665e3cceca64fe3038e097 Mon Sep 17 00:00:00 2001
From: kswang <wangkaisheng2@huawei.com>
Date: Tue, 31 Mar 2020 21:25:48 +0800
Subject: [PATCH 06/58] add cpu st lenet

---
 tests/st/networks/test_cpu_lenet.py    |  2 +-
 tests/st/networks/test_network_main.py | 51 ++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/tests/st/networks/test_cpu_lenet.py b/tests/st/networks/test_cpu_lenet.py
index 9fd50f5d9b..bdcbc32382 100644
--- a/tests/st/networks/test_cpu_lenet.py
+++ b/tests/st/networks/test_cpu_lenet.py
@@ -78,4 +78,4 @@ def test_lenet():
     data = Tensor(np.ones([32, 1, 32, 32]).astype(np.float32) * 0.01)
     label = Tensor(np.ones([32]).astype(np.int32))
     net = LeNet()
-    train(net, data, label)
+    train(net, data, label)
\ No newline at end of file
diff --git a/tests/st/networks/test_network_main.py b/tests/st/networks/test_network_main.py
index 7601739f8c..730602c0ae 100644
--- a/tests/st/networks/test_network_main.py
+++ b/tests/st/networks/test_network_main.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
+<<<<<<< HEAD:tests/st/networks/test_network_main.py
 """
 Function: 
     test network
@@ -31,6 +32,47 @@ from models.lenet import LeNet
 from models.resnetv1_5 import resnet50
 from models.alexnet import AlexNet
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+=======
+import pytest
+from mindspore.nn import TrainOneStepCell, WithLossCell
+import mindspore.context as context
+from mindspore.nn.optim import Momentum
+import numpy as np
+import mindspore.nn as nn
+from mindspore.ops import operations as P
+from mindspore import Tensor
+
+class LeNet(nn.Cell):
+    def __init__(self):
+        super(LeNet, self).__init__()
+        self.relu = P.ReLU()
+        self.batch_size = 32
+
+        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0, has_bias=False, pad_mode='valid')
+        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0, has_bias=False, pad_mode='valid')
+        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.reshape = P.Reshape()
+        self.fc1 = nn.Dense(400, 120)
+        self.fc2 = nn.Dense(120, 84)
+        self.fc3 = nn.Dense(84, 10)
+
+    def construct(self, input_x):
+        output = self.conv1(input_x)
+        output = self.relu(output)
+        output = self.pool(output)
+        output = self.conv2(output)
+        output = self.relu(output)
+        output = self.pool(output)
+        output = self.reshape(output, (self.batch_size, -1))
+        output = self.fc1(output)
+        output = self.relu(output)
+        output = self.fc2(output)
+        output = self.relu(output)
+        output = self.fc3(output)
+        return output
+
+context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
+>>>>>>> add cpu st lenet:tests/st/networks/test_cpu_lenet.py
 
 def train(net, data, label):
     learning_rate = 0.01
@@ -47,17 +89,24 @@ def train(net, data, label):
     print("+++++++++++++++++++++++++++")
     assert res
 
+<<<<<<< HEAD:tests/st/networks/test_network_main.py
 def test_resnet50():
     data = Tensor(np.ones([32, 3 ,224, 224]).astype(np.float32) * 0.01)
     label = Tensor(np.ones([32]).astype(np.int32))
     net = resnet50(32, 10)
     train(net, data, label)
 
+=======
+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu
+@pytest.mark.env_onecard
+>>>>>>> add cpu st lenet:tests/st/networks/test_cpu_lenet.py
 def test_lenet():
     data = Tensor(np.ones([32, 1 ,32, 32]).astype(np.float32) * 0.01)
     label = Tensor(np.ones([32]).astype(np.int32))
     net = LeNet()
     train(net, data, label)
+<<<<<<< HEAD:tests/st/networks/test_network_main.py
 
 def test_alexnet():
     data = Tensor(np.ones([32, 3 ,227, 227]).astype(np.float32) * 0.01)
@@ -79,3 +128,5 @@ if __name__ == "__main__":
         test_alexnet()
     else:
         print("Please add net name like --net lenet")
+=======
+>>>>>>> add cpu st lenet:tests/st/networks/test_cpu_lenet.py

From 8adbcdbc4cd572d0bc57d9167841baee87b01501 Mon Sep 17 00:00:00 2001
From: chang zherui <760161589@qq.com>
Date: Wed, 1 Apr 2020 11:53:35 +0800
Subject: [PATCH 07/58] delete longtime python ut

---
 .../train/summary/test_summary_performance.py | 97 -------------------
 1 file changed, 97 deletions(-)
 delete mode 100644 tests/ut/python/train/summary/test_summary_performance.py

diff --git a/tests/ut/python/train/summary/test_summary_performance.py b/tests/ut/python/train/summary/test_summary_performance.py
deleted file mode 100644
index 9ee9725d13..0000000000
--- a/tests/ut/python/train/summary/test_summary_performance.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-@File  : test_summary.py
-@Author:
-@Date  : 2019-07-4
-@Desc  : test summary function
-"""
-import os
-import logging
-import time
-import numpy as np
-from mindspore.train.summary.summary_record import SummaryRecord, _cache_summary_tensor_data
-from mindspore.common.tensor import Tensor
-
-CUR_DIR = os.getcwd()
-SUMMARY_DIR = CUR_DIR + "/test_temp_summary_event_file/"
-
-log = logging.getLogger("test")
-log.setLevel(level=logging.ERROR)
-
-def get_now_time_ns():
-    """get the time of second"""
-    time_second = int(time.time_ns())
-    return time_second
-
-def get_test_data(step):
-    """ get_test_data """
-    # pylint: disable=unused-argument
-    test_data_list = []
-    tag1 = "xt1[:Tensor]"
-    tag2 = "xt2[:Tensor]"
-    tag3 = "xt3[:Tensor]"
-    np1 = np.random.random((5, 4, 3, 5))
-    np2 = np.random.random((5, 5, 3, 5))
-    np3 = np.random.random((4, 5, 3, 5))
-
-    dict1 = {}
-    dict1["name"] = tag1
-    dict1["data"] = Tensor(np1)
-
-    dict2 = {}
-    dict2["name"] = tag2
-    dict2["data"] = Tensor(np2)
-
-    dict3 = {}
-    dict3["name"] = tag3
-    dict3["data"] = Tensor(np3)
-
-    test_data_list.append(dict1)
-    test_data_list.append(dict2)
-
-    return test_data_list
-
-
-# Test 1: summary sample of scalar
-def test_summary_performance():
-    """ test_summary_performance """
-    log.debug("begin test_scalar_summary_sample")
-    current_time = time.time()
-    print("time = ", current_time)
-    # step 0: create the thread
-    test_writer = SummaryRecord(SUMMARY_DIR, flush_time=120)
-
-    # step 1: create the test data for summary
-    old_time = get_now_time_ns()
-    # step 2: create the Event
-    for i in range(1, 10):
-        test_data = get_test_data(i)
-        _cache_summary_tensor_data(test_data)
-        test_writer.record(i)
-        now_time = get_now_time_ns()
-        consume_time = (now_time - old_time)/1000/1000
-        old_time = now_time
-        print("step test_summary_performance conusmer time is:", consume_time)
-
-
-    # step 3: send the event to mq
-
-    # step 4: accept the event and write the file
-    test_writer.flush()
-    test_writer.close()
-    current_time = time.time() - current_time
-    print("consume time = ", current_time)
-    log.debug("finished test_scalar_summary_sample")

From 9b5d4eff89cd47856e1ee181207d4b62d05d542b Mon Sep 17 00:00:00 2001
From: zhaoting <zhaoting23@huawei.com>
Date: Tue, 31 Mar 2020 09:14:08 +0800
Subject: [PATCH 08/58] add RMSProp optimizer

---
 mindspore/ccsrc/transform/op_declare.h |  1 +
 mindspore/nn/optim/rmsprop.py          | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/mindspore/ccsrc/transform/op_declare.h b/mindspore/ccsrc/transform/op_declare.h
index 9fbc97f3c9..339a5027c5 100755
--- a/mindspore/ccsrc/transform/op_declare.h
+++ b/mindspore/ccsrc/transform/op_declare.h
@@ -458,6 +458,7 @@ DECLARE_OP_USE_INPUT_ATTR(ApplyRMSPropD)
 DECLARE_OP_USE_OUTPUT(ApplyRMSPropD)
 DECLARE_OP_ADAPTER(ApplyCenteredRMSProp)
 DECLARE_OP_USE_OUTPUT(ApplyCenteredRMSProp)
+
 #ifdef ENABLE_GE
 DECLARE_OP_ADAPTER(Print)
 DECLARE_OP_USE_DYN_INPUT(Print)
diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py
index b17a101708..a34de449d1 100644
--- a/mindspore/nn/optim/rmsprop.py
+++ b/mindspore/nn/optim/rmsprop.py
@@ -18,8 +18,12 @@ from mindspore.common.initializer import initializer
 from mindspore.common.parameter import Parameter
 from mindspore._checkparam import ParamValidator as validator
 import mindspore.common.dtype as mstype
+<<<<<<< HEAD
 from mindspore.common import Tensor
 from .optimizer import Optimizer, grad_scale, apply_decay
+=======
+from .optimizer import Optimizer, grad_scale
+>>>>>>> add RMSProp optimizer
 
 rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt")
 centered_rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt")
@@ -119,9 +123,12 @@ class RMSProp(Optimizer):
         use_locking (bool): Enable a lock to protect the update of variable and accumlation tensors. Default: False.
         centered (bool): If True, gradients are normalized by the estimated variance of the gradient. Default: False
         loss_scale (float): A floating point value for the loss scale. Default: 1.0.
+<<<<<<< HEAD
         weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
         decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default:
                                  lambda x: 'beta' not in x.name and 'gamma' not in x.name.
+=======
+>>>>>>> add RMSProp optimizer
 
     Inputs:
         - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
@@ -132,12 +139,20 @@ class RMSProp(Optimizer):
     Examples:
         >>> net = Net()
         >>> loss = nn.SoftmaxCrossEntropyWithLogits()
+<<<<<<< HEAD
         >>> opt = nn.RMSProp(params=net.trainable_params(), learning_rate=lr)
         >>> model = Model(net, loss, opt)
     """
     def __init__(self, params, learning_rate=0.1, decay=0.9, momentum=0.0, epsilon=1e-10,
                  use_locking=False, centered=False, loss_scale=1.0, weight_decay=0.0,
                  decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name):
+=======
+        >>> opt = RMSProp(params=net.trainable_params(), learning_rate=lr)
+        >>> model = Model(net, loss, opt)
+    """
+    def __init__(self, params, learning_rate=0.1, decay=0.9, momentum=0.0, epsilon=1e-10,
+                 use_locking=False, centered=False, loss_scale=1.0):
+>>>>>>> add RMSProp optimizer
         super(RMSProp, self).__init__(learning_rate, params)
 
         if isinstance(momentum, float) and momentum < 0.0:

From 2ba026dbf2e8791281cbd4e98cf74a6464b6a991 Mon Sep 17 00:00:00 2001
From: Wei Luning <invisiblewei@gmail.com>
Date: Mon, 23 Mar 2020 17:33:56 +0800
Subject: [PATCH 09/58] remove ge depend in cpu

---
 mindspore/ccsrc/pipeline/pipeline_ge.cc | 55 +++++++++++++++++++++++++
 mindspore/ccsrc/utils/callbacks.h       |  2 +-
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/pipeline_ge.cc
index 2f68935591..0b37e8f930 100644
--- a/mindspore/ccsrc/pipeline/pipeline_ge.cc
+++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc
@@ -391,7 +391,12 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr& graph, const std::ve
                                         const std::string& phase) {
   std::vector<GeTensorPtr> ge_tensors = TransformUtil::ConvertInputTensors(inputs, kOpFormat_NCHW);
   if (ge_tensors.size() != inputs.size()) {
+<<<<<<< HEAD
     MS_LOG(EXCEPTION) << "Convert me args to ge tensor error.";
+=======
+    MS_LOG(ERROR) << "args convert to ge tensor error";
+    return nullptr;
+>>>>>>> remove ge depend in cpu
   }
 
   std::vector<GeTensorPtr> ge_outputs;
@@ -402,7 +407,12 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr& graph, const std::ve
   auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner();
 
   if (graph_runner == nullptr) {
+<<<<<<< HEAD
     MS_LOG(EXCEPTION) << "Can not found GraphRunner.";
+=======
+    MS_LOG(ERROR) << "Can not found GraphRunner";
+    return nullptr;
+>>>>>>> remove ge depend in cpu
   }
 
   {
@@ -419,7 +429,11 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr& graph, const std::ve
 
   std::vector<MeTensorPtr> me_outputs = TransformUtil::ConvertGeTensors(ge_outputs);
   if (me_outputs.size() != ge_outputs.size()) {
+<<<<<<< HEAD
     MS_LOG(WARNING) << "Convert output Ge tensor to Me tensor failed";
+=======
+    MS_LOG(ERROR) << "Convert output Ge tensor to Me tensor failed";
+>>>>>>> remove ge depend in cpu
   }
 
   py::tuple outputs(me_outputs.size());
@@ -429,11 +443,28 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr& graph, const std::ve
 
   std::shared_ptr<py::object> ret = nullptr;
 
+<<<<<<< HEAD
   AnfNodePtr output_node = graph->get_return()->input(1);
   MS_EXCEPTION_IF_NULL(output_node);
   size_t count = 0;
   py::object oj = StructureOutput(output_node, outputs, &count);
   ret = std::make_shared<py::object>(oj);
+=======
+#ifdef ENABLE_GE
+  AnfNodePtr root = graph->get_return();
+  MS_EXCEPTION_IF_NULL(root);
+  AbstractBasePtr output = root->abstract();
+  size_t count = 0;
+  py::object oj = StructureOutput(output, outputs, &count);
+  ret = std::make_shared<py::object>(oj);
+#else
+  if (outputs.size() == 1) {
+    ret = std::make_shared<py::object>(outputs[0]);
+  } else {
+    ret = std::make_shared<py::object>(outputs);
+  }
+#endif
+>>>>>>> remove ge depend in cpu
 
   return ret;
 }
@@ -444,7 +475,11 @@ void ProcessGeArg(const std::map<std::string, ExecutorInfoPtr>& info, const py::
   std::size_t size = args.size();
 
   if (info.count(phase) == 0) {
+<<<<<<< HEAD
     MS_LOG(EXCEPTION) << "No phase in executor:" << GetPhasePrefix(phase);
+=======
+    MS_LOG(EXCEPTION) << "no phase in executor:" << GetPhasePrefix(phase);
+>>>>>>> remove ge depend in cpu
   }
 
   auto arg_size = info.at(phase)->arg_list_size;
@@ -459,12 +494,20 @@ void ProcessGeArg(const std::map<std::string, ExecutorInfoPtr>& info, const py::
       ValuePtr converted = nullptr;
       bool succ = parse::ConvertData(args[i], &converted);
       if (!succ) {
+<<<<<<< HEAD
         MS_LOG(EXCEPTION) << "Args convert error";
+=======
+        MS_LOG(EXCEPTION) << "args convert error";
+>>>>>>> remove ge depend in cpu
       }
       if (converted->isa<tensor::Tensor>()) {
         (*inputs).push_back(converted->cast<tensor::TensorPtr>());
       } else {
+<<<<<<< HEAD
         MS_LOG(EXCEPTION) << "Args " << converted->ToString() << " is not tensor";
+=======
+        MS_LOG(EXCEPTION) << "args, " << converted->ToString() << " is not tensor";
+>>>>>>> remove ge depend in cpu
       }
     }
   }
@@ -481,12 +524,20 @@ py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr>& info, const
   }
 
   if (info.count(phase) == 0) {
+<<<<<<< HEAD
     MS_LOG(EXCEPTION) << "There is no phase:" << phase;
+=======
+    MS_LOG(EXCEPTION) << "has no phase:" << phase;
+>>>>>>> remove ge depend in cpu
   }
 
   FuncGraphPtr anf_graph = info.at(phase)->func_graph;
 
+<<<<<<< HEAD
 #ifdef ENABLE_INFER
+=======
+#if (!defined ENABLE_GE) || (defined ENABLE_INFER)
+>>>>>>> remove ge depend in cpu
   // Now don't use the graph because the exec ge function don't take effect
   MS_EXCEPTION_IF_NULL(info.at(phase)->func_graph);
   if (ENABLE_TRAIN != info.at(phase)->func_graph->flags()["training"]) {
@@ -511,7 +562,11 @@ py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr>& info, const
   if (ret != nullptr) {
     return *ret;
   } else {
+<<<<<<< HEAD
     MS_LOG(EXCEPTION) << "Exec graph failed";
+=======
+    MS_LOG(EXCEPTION) << "exec graph failed";
+>>>>>>> remove ge depend in cpu
   }
 }
 void ExportDFGraph(const std::string& file_name, const std::string& phase) {
diff --git a/mindspore/ccsrc/utils/callbacks.h b/mindspore/ccsrc/utils/callbacks.h
index a1e4e75d5b..6f099ef4ca 100644
--- a/mindspore/ccsrc/utils/callbacks.h
+++ b/mindspore/ccsrc/utils/callbacks.h
@@ -40,7 +40,7 @@ const int kCallbackOk = 0;
 const int kCallbackFalied = 1;
 
 bool GetParameterShape(const FuncGraphPtr& anf_graph, const std::string& param_name,
-                       const std::shared_ptr<std::vector<int>>& shape);
+                       const std::shared_ptr<std::vector<int>>& shape)
 uint32_t SummarySaveCallback(uint32_t, const std::map<std::string, TensorPtr>&);
 
 }  // namespace callbacks

From 23c21e191ff2ebdcdbc7340e3748ce2d4c9c25f0 Mon Sep 17 00:00:00 2001
From: zhangz0911gm <zhangzheng44@huawei.com>
Date: Tue, 31 Mar 2020 23:14:21 -0400
Subject: [PATCH 10/58] Add FloorMod, Acosh in ME

---
 mindspore/ccsrc/transform/op_declare.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mindspore/ccsrc/transform/op_declare.h b/mindspore/ccsrc/transform/op_declare.h
index 339a5027c5..9fbc97f3c9 100755
--- a/mindspore/ccsrc/transform/op_declare.h
+++ b/mindspore/ccsrc/transform/op_declare.h
@@ -458,7 +458,6 @@ DECLARE_OP_USE_INPUT_ATTR(ApplyRMSPropD)
 DECLARE_OP_USE_OUTPUT(ApplyRMSPropD)
 DECLARE_OP_ADAPTER(ApplyCenteredRMSProp)
 DECLARE_OP_USE_OUTPUT(ApplyCenteredRMSProp)
-
 #ifdef ENABLE_GE
 DECLARE_OP_ADAPTER(Print)
 DECLARE_OP_USE_DYN_INPUT(Print)

From 2a82eb450efa5b26b8722807c1ff33db192594a9 Mon Sep 17 00:00:00 2001
From: zhaoting <zhaoting23@huawei.com>
Date: Fri, 3 Apr 2020 11:45:49 +0800
Subject: [PATCH 11/58] add weight decay in RMSProp optimizer

---
 mindspore/nn/optim/rmsprop.py | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py
index a34de449d1..e252f89f2f 100644
--- a/mindspore/nn/optim/rmsprop.py
+++ b/mindspore/nn/optim/rmsprop.py
@@ -18,12 +18,8 @@ from mindspore.common.initializer import initializer
 from mindspore.common.parameter import Parameter
 from mindspore._checkparam import ParamValidator as validator
 import mindspore.common.dtype as mstype
-<<<<<<< HEAD
 from mindspore.common import Tensor
 from .optimizer import Optimizer, grad_scale, apply_decay
-=======
-from .optimizer import Optimizer, grad_scale
->>>>>>> add RMSProp optimizer
 
 rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt")
 centered_rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt")
@@ -123,12 +119,9 @@ class RMSProp(Optimizer):
         use_locking (bool): Enable a lock to protect the update of variable and accumlation tensors. Default: False.
         centered (bool): If True, gradients are normalized by the estimated variance of the gradient. Default: False
         loss_scale (float): A floating point value for the loss scale. Default: 1.0.
-<<<<<<< HEAD
         weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
         decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default:
                                  lambda x: 'beta' not in x.name and 'gamma' not in x.name.
-=======
->>>>>>> add RMSProp optimizer
 
     Inputs:
         - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
@@ -139,20 +132,12 @@ class RMSProp(Optimizer):
     Examples:
         >>> net = Net()
         >>> loss = nn.SoftmaxCrossEntropyWithLogits()
-<<<<<<< HEAD
         >>> opt = nn.RMSProp(params=net.trainable_params(), learning_rate=lr)
         >>> model = Model(net, loss, opt)
     """
     def __init__(self, params, learning_rate=0.1, decay=0.9, momentum=0.0, epsilon=1e-10,
                  use_locking=False, centered=False, loss_scale=1.0, weight_decay=0.0,
                  decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name):
-=======
-        >>> opt = RMSProp(params=net.trainable_params(), learning_rate=lr)
-        >>> model = Model(net, loss, opt)
-    """
-    def __init__(self, params, learning_rate=0.1, decay=0.9, momentum=0.0, epsilon=1e-10,
-                 use_locking=False, centered=False, loss_scale=1.0):
->>>>>>> add RMSProp optimizer
         super(RMSProp, self).__init__(learning_rate, params)
 
         if isinstance(momentum, float) and momentum < 0.0:
@@ -209,4 +194,4 @@ class RMSProp(Optimizer):
         else:
             success = self.hyper_map(F.partial(rmsprop_opt, self.opt, lr, self.decay, self.epsilon,
                                                self.momentum), params, self.ms, self.moment, gradients)
-        return success
+        return success
\ No newline at end of file

From 93fe493cf46fdca43f2b0ecc486b210864629cc5 Mon Sep 17 00:00:00 2001
From: kingfo <wangqiuliang@huawei.com>
Date: Fri, 3 Apr 2020 10:53:46 +0800
Subject: [PATCH 12/58] fix ME+GE compile error

---
 mindspore/ccsrc/pipeline/pipeline_ge.cc | 63 ++-----------------------
 1 file changed, 4 insertions(+), 59 deletions(-)

diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/pipeline_ge.cc
index 0b37e8f930..ee67d46cf7 100644
--- a/mindspore/ccsrc/pipeline/pipeline_ge.cc
+++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc
@@ -116,7 +116,7 @@ bool InitExecDatasetGe(const std::string& queue_name, int64_t size, int64_t batc
     return transform::TransformUtil::ConvertDataType(i->type_id());
   });
 
-  ConfigManager::GetInstance().set_dataset_mode(DatasetMode::DS_GRAPH_MODE);
+  ConfigManager::GetInstance().set_dataset_mode(DatasetMode::DS_SINK_MODE);
   ConfigManager::GetInstance().set_iter_num(size);
   ConfigManager::GetInstance().set_dataset_phase(phase);
 
@@ -391,12 +391,7 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr& graph, const std::ve
                                         const std::string& phase) {
   std::vector<GeTensorPtr> ge_tensors = TransformUtil::ConvertInputTensors(inputs, kOpFormat_NCHW);
   if (ge_tensors.size() != inputs.size()) {
-<<<<<<< HEAD
     MS_LOG(EXCEPTION) << "Convert me args to ge tensor error.";
-=======
-    MS_LOG(ERROR) << "args convert to ge tensor error";
-    return nullptr;
->>>>>>> remove ge depend in cpu
   }
 
   std::vector<GeTensorPtr> ge_outputs;
@@ -407,12 +402,7 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr& graph, const std::ve
   auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner();
 
   if (graph_runner == nullptr) {
-<<<<<<< HEAD
     MS_LOG(EXCEPTION) << "Can not found GraphRunner.";
-=======
-    MS_LOG(ERROR) << "Can not found GraphRunner";
-    return nullptr;
->>>>>>> remove ge depend in cpu
   }
 
   {
@@ -429,11 +419,7 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr& graph, const std::ve
 
   std::vector<MeTensorPtr> me_outputs = TransformUtil::ConvertGeTensors(ge_outputs);
   if (me_outputs.size() != ge_outputs.size()) {
-<<<<<<< HEAD
     MS_LOG(WARNING) << "Convert output Ge tensor to Me tensor failed";
-=======
-    MS_LOG(ERROR) << "Convert output Ge tensor to Me tensor failed";
->>>>>>> remove ge depend in cpu
   }
 
   py::tuple outputs(me_outputs.size());
@@ -443,28 +429,11 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr& graph, const std::ve
 
   std::shared_ptr<py::object> ret = nullptr;
 
-<<<<<<< HEAD
   AnfNodePtr output_node = graph->get_return()->input(1);
   MS_EXCEPTION_IF_NULL(output_node);
   size_t count = 0;
   py::object oj = StructureOutput(output_node, outputs, &count);
   ret = std::make_shared<py::object>(oj);
-=======
-#ifdef ENABLE_GE
-  AnfNodePtr root = graph->get_return();
-  MS_EXCEPTION_IF_NULL(root);
-  AbstractBasePtr output = root->abstract();
-  size_t count = 0;
-  py::object oj = StructureOutput(output, outputs, &count);
-  ret = std::make_shared<py::object>(oj);
-#else
-  if (outputs.size() == 1) {
-    ret = std::make_shared<py::object>(outputs[0]);
-  } else {
-    ret = std::make_shared<py::object>(outputs);
-  }
-#endif
->>>>>>> remove ge depend in cpu
 
   return ret;
 }
@@ -475,11 +444,7 @@ void ProcessGeArg(const std::map<std::string, ExecutorInfoPtr>& info, const py::
   std::size_t size = args.size();
 
   if (info.count(phase) == 0) {
-<<<<<<< HEAD
     MS_LOG(EXCEPTION) << "No phase in executor:" << GetPhasePrefix(phase);
-=======
-    MS_LOG(EXCEPTION) << "no phase in executor:" << GetPhasePrefix(phase);
->>>>>>> remove ge depend in cpu
   }
 
   auto arg_size = info.at(phase)->arg_list_size;
@@ -488,26 +453,18 @@ void ProcessGeArg(const std::map<std::string, ExecutorInfoPtr>& info, const py::
   }
 
   // process the first args of tensor
-  // only in Dataset Feed Mode, fp_bp graph need input tensors
-  if (ConfigManager::GetInstance().dataset_mode() == DS_FEED_MODE) {
+  // only in dataset normal(non-sink) mode, fp_bp graph need input tensors
+  if (ConfigManager::GetInstance().dataset_mode() == DS_NORMAL_MODE) {
     for (std::size_t i = 0; i < size; i++) {
       ValuePtr converted = nullptr;
       bool succ = parse::ConvertData(args[i], &converted);
       if (!succ) {
-<<<<<<< HEAD
         MS_LOG(EXCEPTION) << "Args convert error";
-=======
-        MS_LOG(EXCEPTION) << "args convert error";
->>>>>>> remove ge depend in cpu
       }
       if (converted->isa<tensor::Tensor>()) {
         (*inputs).push_back(converted->cast<tensor::TensorPtr>());
       } else {
-<<<<<<< HEAD
         MS_LOG(EXCEPTION) << "Args " << converted->ToString() << " is not tensor";
-=======
-        MS_LOG(EXCEPTION) << "args, " << converted->ToString() << " is not tensor";
->>>>>>> remove ge depend in cpu
       }
     }
   }
@@ -524,20 +481,12 @@ py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr>& info, const
   }
 
   if (info.count(phase) == 0) {
-<<<<<<< HEAD
     MS_LOG(EXCEPTION) << "There is no phase:" << phase;
-=======
-    MS_LOG(EXCEPTION) << "has no phase:" << phase;
->>>>>>> remove ge depend in cpu
   }
 
   FuncGraphPtr anf_graph = info.at(phase)->func_graph;
 
-<<<<<<< HEAD
 #ifdef ENABLE_INFER
-=======
-#if (!defined ENABLE_GE) || (defined ENABLE_INFER)
->>>>>>> remove ge depend in cpu
   // Now don't use the graph because the exec ge function don't take effect
   MS_EXCEPTION_IF_NULL(info.at(phase)->func_graph);
   if (ENABLE_TRAIN != info.at(phase)->func_graph->flags()["training"]) {
@@ -562,11 +511,7 @@ py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr>& info, const
   if (ret != nullptr) {
     return *ret;
   } else {
-<<<<<<< HEAD
     MS_LOG(EXCEPTION) << "Exec graph failed";
-=======
-    MS_LOG(EXCEPTION) << "exec graph failed";
->>>>>>> remove ge depend in cpu
   }
 }
 void ExportDFGraph(const std::string& file_name, const std::string& phase) {
@@ -588,4 +533,4 @@ void ExportDFGraph(const std::string& file_name, const std::string& phase) {
   MS_LOG(DEBUG) << "ExportGraph End";
 }
 }  // namespace pipeline
-}  // namespace mindspore
+}  // namespace mindspore
\ No newline at end of file

From 3aa51f35c1dd33344f40c8d475d1bd8b60bd3028 Mon Sep 17 00:00:00 2001
From: wanghua <wanghua36@huawei.com>
Date: Fri, 3 Apr 2020 14:55:25 +0800
Subject: [PATCH 13/58] fix bert precison bug

---
 mindspore/ccsrc/device/ascend/kernel_select_ascend.cc | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
index d05b9fafa1..a7c8d131fb 100644
--- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
+++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
@@ -82,6 +82,12 @@ bool IsValidKernelInfo(const std::shared_ptr<CNode> &kernel_node, const kernel::
     }
     return true;
   };
+  if (AnfAlgo::GetCNodeName(kernel_node) == "LayerNormBetaGammaBackprop" ||
+      AnfAlgo::GetCNodeName(kernel_node) == "LayerNormXBackprop") {
+    if (AnfAlgo::GetPrevNodeOutputFormat(kernel_node, 0) != kernel_build_info.GetInputFormat(0)) {
+      return true;
+    }
+  }
   if (AnfAlgo::GetCNodeName(kernel_node) == prim::kPrimCast->name()) {
     return AnfAlgo::GetOutputInferDataType(kernel_node, 0) == kernel_build_info.GetOutputDeviceType(0) &&
            AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0) == kernel_build_info.GetInputDeviceType(0);
@@ -155,7 +161,7 @@ bool PriorityChooseItem(const std::vector<int> &cur_item, std::vector<int> *best
       return false;
     }
   }
-  return false;
+  return true;
 }
 
 void UpdateCurMatchCounts(const kernel::KernelBuildInfo &kernel_build_info, const std::shared_ptr<CNode> &kernel_node,

From a8f95e6d2c94565dadff47f39c30dcb676773f5d Mon Sep 17 00:00:00 2001
From: wanghua <wanghua36@huawei.com>
Date: Fri, 3 Apr 2020 17:51:56 +0800
Subject: [PATCH 14/58] modify bert test file

---
 mindspore/ccsrc/device/ascend/kernel_select_ascend.cc | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
index a7c8d131fb..d05b9fafa1 100644
--- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
+++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
@@ -82,12 +82,6 @@ bool IsValidKernelInfo(const std::shared_ptr<CNode> &kernel_node, const kernel::
     }
     return true;
   };
-  if (AnfAlgo::GetCNodeName(kernel_node) == "LayerNormBetaGammaBackprop" ||
-      AnfAlgo::GetCNodeName(kernel_node) == "LayerNormXBackprop") {
-    if (AnfAlgo::GetPrevNodeOutputFormat(kernel_node, 0) != kernel_build_info.GetInputFormat(0)) {
-      return true;
-    }
-  }
   if (AnfAlgo::GetCNodeName(kernel_node) == prim::kPrimCast->name()) {
     return AnfAlgo::GetOutputInferDataType(kernel_node, 0) == kernel_build_info.GetOutputDeviceType(0) &&
            AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0) == kernel_build_info.GetInputDeviceType(0);
@@ -161,7 +155,7 @@ bool PriorityChooseItem(const std::vector<int> &cur_item, std::vector<int> *best
       return false;
     }
   }
-  return true;
+  return false;
 }
 
 void UpdateCurMatchCounts(const kernel::KernelBuildInfo &kernel_build_info, const std::shared_ptr<CNode> &kernel_node,

From 1efa4ffc39b9599769ea1ef8cb1deb6b9d4bd280 Mon Sep 17 00:00:00 2001
From: ms_yan <6576637+ms_yan@user.noreply.gitee.com>
Date: Thu, 2 Apr 2020 21:56:48 +0800
Subject: [PATCH 15/58] add parameter check for Class Schema

---
 mindspore/dataset/engine/datasets.py   | 20 +++++++----
 mindspore/dataset/engine/validators.py | 49 ++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index db2b5169d2..2d5c219b71 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -38,7 +38,7 @@ from .iterators import DictIterator, TupleIterator
 from .validators import check, check_batch, check_shuffle, check_map, check_repeat, check_zip, check_rename, \
     check_project, check_imagefolderdatasetv2, check_mnist_cifar_dataset, check_manifestdataset, \
     check_tfrecorddataset, check_vocdataset, check_celebadataset, check_minddataset, check_generatordataset, \
-    check_zip_dataset
+    check_zip_dataset, check_add_column, check_columns
 from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist
 
 try:
@@ -2334,13 +2334,20 @@ class Schema:
             self.dataset_type = ''
             self.num_rows = 0
         else:
+            if not os.path.isfile(schema_file) or not os.access(schema_file, os.R_OK):
+                raise ValueError("The file %s does not exist or permission denied!" % schema_file)
             try:
                 with open(schema_file, 'r') as load_f:
                     json_obj = json.load(load_f)
-                    self.from_json(json_obj)
             except json.decoder.JSONDecodeError:
-                raise RuntimeError("Schema file failed to load")
+                raise RuntimeError("Schema file failed to load.")
+            except UnicodeDecodeError:
+                raise RuntimeError("Schema file failed to decode.")
+            except Exception:
+                raise RuntimeError("Schema file failed to open.")
+            self.from_json(json_obj)
 
+    @check_add_column
     def add_column(self, name, de_type, shape=None):
         """
         Add new column to the schema.
@@ -2359,10 +2366,8 @@ class Schema:
         if isinstance(de_type, typing.Type):
             de_type = mstype_to_detype(de_type)
             new_column["type"] = str(de_type)
-        elif isinstance(de_type, str):
-            new_column["type"] = str(DataType(de_type))
         else:
-            raise ValueError("Unknown column type")
+            new_column["type"] = str(DataType(de_type))
 
         if shape is not None:
             new_column["shape"] = shape
@@ -2399,6 +2404,7 @@ class Schema:
             RuntimeError: If column's name field is missing.
             RuntimeError: If column's type field is missing.
         """
+        check_columns(columns, columns)
         self.columns = []
         for col in columns:
             name = None
@@ -2443,6 +2449,8 @@ class Schema:
             RuntimeError: if dataset type is missing in the object.
             RuntimeError: if columns are missing in the object.
         """
+        if not isinstance(json_obj, dict) or json_obj is None:
+            raise ValueError("Expected non-empty dict.")
         for k, v in json_obj.items():
             if k == "datasetType":
                 self.dataset_type = v
diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index b4d22a4a01..1c374ae879 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -19,10 +19,15 @@ import inspect as ins
 import os
 from functools import wraps
 from multiprocessing import cpu_count
+from mindspore._c_expression import typing
 from . import samplers
 from . import datasets
 
 INT32_MAX = 2147483647
+valid_detype = [
+    "bool", "int8", "int16", "int32", "int64", "uint8", "uint16",
+    "uint32", "uint64", "float16", "float32", "float64"
+]
 
 
 def check(method):
@@ -188,6 +193,12 @@ def check(method):
     return wrapper
 
 
+def check_valid_detype(type_):
+    if type_ not in valid_detype:
+        raise ValueError("Unknown column type")
+    return True
+
+
 def check_filename(path):
     """
     check the filename in the path
@@ -743,3 +754,41 @@ def check_project(method):
         return method(*args, **kwargs)
 
     return new_method
+
+
+def check_shape(shape, name):
+    if isinstance(shape, list):
+        for element in shape:
+            if not isinstance(element, int):
+                raise TypeError(
+                    "Each element in {0} should be of type int. Got {1}.".format(name, type(element)))
+    else:
+        raise TypeError("Expected int list.")
+
+
+def check_add_column(method):
+    """check the input arguments of add_column."""
+    @wraps(method)
+    def new_method(*args, **kwargs):
+        param_dict = make_param_dict(method, args, kwargs)
+
+        # check name; required argument
+        name = param_dict.get("name")
+        if not isinstance(name, str) or not name:
+            raise TypeError("Expected non-empty string.")
+
+        # check type; required argument
+        de_type = param_dict.get("de_type")
+        if not isinstance(de_type, str) or not de_type:
+            raise TypeError("Expected non-empty string.")
+        if not isinstance(de_type, typing.Type) and not check_valid_detype(de_type):
+            raise ValueError("Unknown column type.")
+
+        # check shape
+        shape = param_dict.get("shape")
+        if shape is not None:
+            check_shape(shape, "shape")
+
+        return method(*args, **kwargs)
+
+    return new_method

From 0a595b4749525b94b10ddf33770255ad5effe674 Mon Sep 17 00:00:00 2001
From: chang zherui <760161589@qq.com>
Date: Tue, 7 Apr 2020 11:39:10 +0800
Subject: [PATCH 16/58] fix runtest.sh for python ut

---
 mindspore/dataset/engine/datasets.py   | 20 ++++-------
 mindspore/dataset/engine/validators.py | 49 --------------------------
 2 files changed, 6 insertions(+), 63 deletions(-)

diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 2d5c219b71..db2b5169d2 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -38,7 +38,7 @@ from .iterators import DictIterator, TupleIterator
 from .validators import check, check_batch, check_shuffle, check_map, check_repeat, check_zip, check_rename, \
     check_project, check_imagefolderdatasetv2, check_mnist_cifar_dataset, check_manifestdataset, \
     check_tfrecorddataset, check_vocdataset, check_celebadataset, check_minddataset, check_generatordataset, \
-    check_zip_dataset, check_add_column, check_columns
+    check_zip_dataset
 from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist
 
 try:
@@ -2334,20 +2334,13 @@ class Schema:
             self.dataset_type = ''
             self.num_rows = 0
         else:
-            if not os.path.isfile(schema_file) or not os.access(schema_file, os.R_OK):
-                raise ValueError("The file %s does not exist or permission denied!" % schema_file)
             try:
                 with open(schema_file, 'r') as load_f:
                     json_obj = json.load(load_f)
+                    self.from_json(json_obj)
             except json.decoder.JSONDecodeError:
-                raise RuntimeError("Schema file failed to load.")
-            except UnicodeDecodeError:
-                raise RuntimeError("Schema file failed to decode.")
-            except Exception:
-                raise RuntimeError("Schema file failed to open.")
-            self.from_json(json_obj)
+                raise RuntimeError("Schema file failed to load")
 
-    @check_add_column
     def add_column(self, name, de_type, shape=None):
         """
         Add new column to the schema.
@@ -2366,8 +2359,10 @@ class Schema:
         if isinstance(de_type, typing.Type):
             de_type = mstype_to_detype(de_type)
             new_column["type"] = str(de_type)
-        else:
+        elif isinstance(de_type, str):
             new_column["type"] = str(DataType(de_type))
+        else:
+            raise ValueError("Unknown column type")
 
         if shape is not None:
             new_column["shape"] = shape
@@ -2404,7 +2399,6 @@ class Schema:
             RuntimeError: If column's name field is missing.
             RuntimeError: If column's type field is missing.
         """
-        check_columns(columns, columns)
         self.columns = []
         for col in columns:
             name = None
@@ -2449,8 +2443,6 @@ class Schema:
             RuntimeError: if dataset type is missing in the object.
             RuntimeError: if columns are missing in the object.
         """
-        if not isinstance(json_obj, dict) or json_obj is None:
-            raise ValueError("Expected non-empty dict.")
         for k, v in json_obj.items():
             if k == "datasetType":
                 self.dataset_type = v
diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index 1c374ae879..b4d22a4a01 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -19,15 +19,10 @@ import inspect as ins
 import os
 from functools import wraps
 from multiprocessing import cpu_count
-from mindspore._c_expression import typing
 from . import samplers
 from . import datasets
 
 INT32_MAX = 2147483647
-valid_detype = [
-    "bool", "int8", "int16", "int32", "int64", "uint8", "uint16",
-    "uint32", "uint64", "float16", "float32", "float64"
-]
 
 
 def check(method):
@@ -193,12 +188,6 @@ def check(method):
     return wrapper
 
 
-def check_valid_detype(type_):
-    if type_ not in valid_detype:
-        raise ValueError("Unknown column type")
-    return True
-
-
 def check_filename(path):
     """
     check the filename in the path
@@ -754,41 +743,3 @@ def check_project(method):
         return method(*args, **kwargs)
 
     return new_method
-
-
-def check_shape(shape, name):
-    if isinstance(shape, list):
-        for element in shape:
-            if not isinstance(element, int):
-                raise TypeError(
-                    "Each element in {0} should be of type int. Got {1}.".format(name, type(element)))
-    else:
-        raise TypeError("Expected int list.")
-
-
-def check_add_column(method):
-    """check the input arguments of add_column."""
-    @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        # check name; required argument
-        name = param_dict.get("name")
-        if not isinstance(name, str) or not name:
-            raise TypeError("Expected non-empty string.")
-
-        # check type; required argument
-        de_type = param_dict.get("de_type")
-        if not isinstance(de_type, str) or not de_type:
-            raise TypeError("Expected non-empty string.")
-        if not isinstance(de_type, typing.Type) and not check_valid_detype(de_type):
-            raise ValueError("Unknown column type.")
-
-        # check shape
-        shape = param_dict.get("shape")
-        if shape is not None:
-            check_shape(shape, "shape")
-
-        return method(*args, **kwargs)
-
-    return new_method

From bf4c09931eec2c2516cfe0a2cc790ea9f92d8eb1 Mon Sep 17 00:00:00 2001
From: VectorSL <shiliang10@huawei.com>
Date: Fri, 3 Apr 2020 16:55:37 +0800
Subject: [PATCH 17/58] edit loss_scale for gpu

---
 mindspore/nn/wrap/loss_scale.py      | 40 ++++++++++----
 mindspore/ops/operations/__init__.py |  6 +-
 mindspore/ops/operations/math_ops.py | 83 ++++++++++++++++++++++++++++
 3 files changed, 117 insertions(+), 12 deletions(-)

diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py
index 1ce3179273..4d929352b3 100644
--- a/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/nn/wrap/loss_scale.py
@@ -25,6 +25,7 @@ from ...ops import operations as P
 from ...ops.operations import NPUGetFloatStatus, NPUAllocFloatStatus, NPUClearFloatStatus, ReduceSum, LessEqual, \
     ControlDepend
 from ...common import dtype as mstype
+import mindspore.context as context
 
 _grad_scale = C.MultitypeFuncGraph("grad_scale")
 reciprocal = P.Reciprocal()
@@ -34,6 +35,12 @@ reciprocal = P.Reciprocal()
 def tensor_grad_scale(scale, grad):
     return grad * F.cast(reciprocal(scale), F.dtype(grad))
 
+_grad_overflow = C.MultitypeFuncGraph("_grad_overflow")
+grad_overflow = P.FloatStatus()
+
+@_grad_overflow.register("Tensor")
+def _tensor_grad_overflow(grad):
+    return grad_overflow(grad)
 
 class DynamicLossScaleUpdateCell(Cell):
     r"""
@@ -197,9 +204,15 @@ class TrainOneStepWithLossScaleCell(Cell):
         self.optimizer = optimizer
         self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
         self.hyper_map = C.HyperMap()
-        self.alloc_status = NPUAllocFloatStatus()
-        self.get_status = NPUGetFloatStatus()
-        self.clear_status = NPUClearFloatStatus()
+        if context.get_context("device_target") == "GPU":
+            self.gpu_target = True
+            self.float_status = P.FloatStatus()
+            self.addn = P.AddN()
+        else:
+            self.gpu_target = False
+            self.alloc_status = NPUAllocFloatStatus()
+            self.get_status = NPUGetFloatStatus()
+            self.clear_status = NPUClearFloatStatus()
         self.reduce_sum = ReduceSum(keep_dims=False)
         self.base = Tensor(1, mstype.float32)
         self.less_equal = LessEqual()
@@ -224,10 +237,11 @@ class TrainOneStepWithLossScaleCell(Cell):
     def construct(self, data, label, sens=None):
         weights = self.weights
         loss = self.network(data, label)
-        # init overflow buffer
-        init = self.alloc_status()
-        # clear overflow buffer
-        self.clear_status(init)
+        if not self.gpu_target:
+            # init overflow buffer
+            init = self.alloc_status()
+            # clear overflow buffer
+            self.clear_status(init)
         if sens is None:
             scaling_sens = self.loss_scale
         else:
@@ -237,10 +251,14 @@ class TrainOneStepWithLossScaleCell(Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        # get the overflow buffer
-        self.get_status(init)
-        # sum overflow buffer elements, 0:not overflow , >0:overflow
-        flag_sum = self.reduce_sum(init, (0,))
+        if not self.gpu_target:
+            # get the overflow buffer
+            self.get_status(init)
+            # sum overflow buffer elements, 0:not overflow , >0:overflow
+            flag_sum = self.reduce_sum(init, (0,))
+        else:
+            flag_sum = self.hyper_map(F.partial(_grad_overflow), grads)
+            flag_sum = self.addn(flag_sum)
         if self.is_distributed:
             # sum overflow flag over devices
             flag_reduce = self.allreduce(flag_sum)
diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py
index 37a3b38bb6..48a985b33d 100644
--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -44,7 +44,7 @@ from .math_ops import (Abs, ACos, AddN, AssignAdd, AssignSub, Atan2, BatchMatMul
                        LogicalNot, LogicalOr, MatMul, Maximum,
                        Minimum, Mul, Neg, NMSWithMask, NotEqual,
                        NPUAllocFloatStatus, NPUClearFloatStatus,
-                       NPUGetFloatStatus, Pow, RealDiv,
+                       NPUGetFloatStatus, Pow, RealDiv, IsNan, IsInf, IsFinite, FloatStatus,
                        Reciprocal, CumSum,
                        Sin, Sqrt, Rsqrt,
                        Square, Sub, TensorAdd, Sign, Round)
@@ -151,6 +151,10 @@ __all__ = [
     'Neg',
     'Slice',
     'DType',
+    'IsNan',
+    'IsInf',
+    'IsFinite',
+    'FloatStatus',
     'NPUAllocFloatStatus',
     'NPUGetFloatStatus',
     'NPUClearFloatStatus',
diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py
index 175b72560f..5f8c24d78b 100644
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -1541,6 +1541,89 @@ class LogicalOr(_LogicBinaryOp):
     def infer_dtype(self, x_dtype, y_dtype):
         return _LogicBinaryOp.do_infer_dtype(x_dtype, y_dtype, (mstype.bool_,), self.prim_name())
 
+class IsNan(PrimitiveWithInfer):
+    """
+    Judging which elements are nan for each position
+    Inputs:
+        - **input_x** (Tensor) - The input tensor.
+
+    Outputs:
+        Tensor, has the same shape of input.
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init IsNan"""
+        self.init_prim_io_names(inputs=['x'], outputs=['output'])
+
+    def infer_shape(self, x_shape):
+        return x_shape
+
+    def infer_dtype(self, x_dtype):
+        return mstype.bool_
+
+class IsInf(PrimitiveWithInfer):
+    """
+    Judging which elements are inf or -inf for each position
+    Inputs:
+        - **input_x** (Tensor) - The input tensor.
+
+    Outputs:
+        Tensor, has the same shape of input.
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init IsInf"""
+        self.init_prim_io_names(inputs=['x'], outputs=['output'])
+
+    def infer_shape(self, x_shape):
+        return x_shape
+
+    def infer_dtype(self, x_dtype):
+        return mstype.bool_
+
+class IsFinite(PrimitiveWithInfer):
+    """
+    Judging which elements are finite for each position
+    Inputs:
+        - **input_x** (Tensor) - The input tensor.
+
+    Outputs:
+        Tensor, has the same shape of input.
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init IsFinite"""
+        self.init_prim_io_names(inputs=['x'], outputs=['output'])
+
+    def infer_shape(self, x_shape):
+        return x_shape
+
+    def infer_dtype(self, x_dtype):
+        return mstype.bool_
+
+class FloatStatus(PrimitiveWithInfer):
+    """
+    Determine if the elements contains nan, inf or -inf
+    Inputs:
+        - **input_x** (Tensor) - The input tensor.
+
+    Outputs:
+        Tensor, has the shape of `(1,)`.
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init FloatStatus"""
+        self.init_prim_io_names(inputs=['x'], outputs=['output'])
+
+    def infer_shape(self, x_shape):
+        return [1]
+
+    def infer_dtype(self, x_dtype):
+        return x_dtype
 
 class NPUAllocFloatStatus(PrimitiveWithInfer):
     """

From 22578e983949129e5f1c60f53cd2426072d55fd5 Mon Sep 17 00:00:00 2001
From: chengang <chengang82@huawei.com>
Date: Tue, 7 Apr 2020 16:56:33 +0800
Subject: [PATCH 18/58] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20!1?=
 =?UTF-8?q?33=20:=20Edit=20loss=5Fscale=20to=20fit=20GPU'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mindspore/nn/wrap/loss_scale.py      | 40 ++++----------
 mindspore/ops/operations/__init__.py |  6 +-
 mindspore/ops/operations/math_ops.py | 83 ----------------------------
 3 files changed, 12 insertions(+), 117 deletions(-)

diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py
index 4d929352b3..1ce3179273 100644
--- a/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/nn/wrap/loss_scale.py
@@ -25,7 +25,6 @@ from ...ops import operations as P
 from ...ops.operations import NPUGetFloatStatus, NPUAllocFloatStatus, NPUClearFloatStatus, ReduceSum, LessEqual, \
     ControlDepend
 from ...common import dtype as mstype
-import mindspore.context as context
 
 _grad_scale = C.MultitypeFuncGraph("grad_scale")
 reciprocal = P.Reciprocal()
@@ -35,12 +34,6 @@ reciprocal = P.Reciprocal()
 def tensor_grad_scale(scale, grad):
     return grad * F.cast(reciprocal(scale), F.dtype(grad))
 
-_grad_overflow = C.MultitypeFuncGraph("_grad_overflow")
-grad_overflow = P.FloatStatus()
-
-@_grad_overflow.register("Tensor")
-def _tensor_grad_overflow(grad):
-    return grad_overflow(grad)
 
 class DynamicLossScaleUpdateCell(Cell):
     r"""
@@ -204,15 +197,9 @@ class TrainOneStepWithLossScaleCell(Cell):
         self.optimizer = optimizer
         self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
         self.hyper_map = C.HyperMap()
-        if context.get_context("device_target") == "GPU":
-            self.gpu_target = True
-            self.float_status = P.FloatStatus()
-            self.addn = P.AddN()
-        else:
-            self.gpu_target = False
-            self.alloc_status = NPUAllocFloatStatus()
-            self.get_status = NPUGetFloatStatus()
-            self.clear_status = NPUClearFloatStatus()
+        self.alloc_status = NPUAllocFloatStatus()
+        self.get_status = NPUGetFloatStatus()
+        self.clear_status = NPUClearFloatStatus()
         self.reduce_sum = ReduceSum(keep_dims=False)
         self.base = Tensor(1, mstype.float32)
         self.less_equal = LessEqual()
@@ -237,11 +224,10 @@ class TrainOneStepWithLossScaleCell(Cell):
     def construct(self, data, label, sens=None):
         weights = self.weights
         loss = self.network(data, label)
-        if not self.gpu_target:
-            # init overflow buffer
-            init = self.alloc_status()
-            # clear overflow buffer
-            self.clear_status(init)
+        # init overflow buffer
+        init = self.alloc_status()
+        # clear overflow buffer
+        self.clear_status(init)
         if sens is None:
             scaling_sens = self.loss_scale
         else:
@@ -251,14 +237,10 @@ class TrainOneStepWithLossScaleCell(Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        if not self.gpu_target:
-            # get the overflow buffer
-            self.get_status(init)
-            # sum overflow buffer elements, 0:not overflow , >0:overflow
-            flag_sum = self.reduce_sum(init, (0,))
-        else:
-            flag_sum = self.hyper_map(F.partial(_grad_overflow), grads)
-            flag_sum = self.addn(flag_sum)
+        # get the overflow buffer
+        self.get_status(init)
+        # sum overflow buffer elements, 0:not overflow , >0:overflow
+        flag_sum = self.reduce_sum(init, (0,))
         if self.is_distributed:
             # sum overflow flag over devices
             flag_reduce = self.allreduce(flag_sum)
diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py
index 48a985b33d..37a3b38bb6 100644
--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -44,7 +44,7 @@ from .math_ops import (Abs, ACos, AddN, AssignAdd, AssignSub, Atan2, BatchMatMul
                        LogicalNot, LogicalOr, MatMul, Maximum,
                        Minimum, Mul, Neg, NMSWithMask, NotEqual,
                        NPUAllocFloatStatus, NPUClearFloatStatus,
-                       NPUGetFloatStatus, Pow, RealDiv, IsNan, IsInf, IsFinite, FloatStatus,
+                       NPUGetFloatStatus, Pow, RealDiv,
                        Reciprocal, CumSum,
                        Sin, Sqrt, Rsqrt,
                        Square, Sub, TensorAdd, Sign, Round)
@@ -151,10 +151,6 @@ __all__ = [
     'Neg',
     'Slice',
     'DType',
-    'IsNan',
-    'IsInf',
-    'IsFinite',
-    'FloatStatus',
     'NPUAllocFloatStatus',
     'NPUGetFloatStatus',
     'NPUClearFloatStatus',
diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py
index 5f8c24d78b..175b72560f 100644
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -1541,89 +1541,6 @@ class LogicalOr(_LogicBinaryOp):
     def infer_dtype(self, x_dtype, y_dtype):
         return _LogicBinaryOp.do_infer_dtype(x_dtype, y_dtype, (mstype.bool_,), self.prim_name())
 
-class IsNan(PrimitiveWithInfer):
-    """
-    Judging which elements are nan for each position
-    Inputs:
-        - **input_x** (Tensor) - The input tensor.
-
-    Outputs:
-        Tensor, has the same shape of input.
-    """
-
-    @prim_attr_register
-    def __init__(self):
-        """init IsNan"""
-        self.init_prim_io_names(inputs=['x'], outputs=['output'])
-
-    def infer_shape(self, x_shape):
-        return x_shape
-
-    def infer_dtype(self, x_dtype):
-        return mstype.bool_
-
-class IsInf(PrimitiveWithInfer):
-    """
-    Judging which elements are inf or -inf for each position
-    Inputs:
-        - **input_x** (Tensor) - The input tensor.
-
-    Outputs:
-        Tensor, has the same shape of input.
-    """
-
-    @prim_attr_register
-    def __init__(self):
-        """init IsInf"""
-        self.init_prim_io_names(inputs=['x'], outputs=['output'])
-
-    def infer_shape(self, x_shape):
-        return x_shape
-
-    def infer_dtype(self, x_dtype):
-        return mstype.bool_
-
-class IsFinite(PrimitiveWithInfer):
-    """
-    Judging which elements are finite for each position
-    Inputs:
-        - **input_x** (Tensor) - The input tensor.
-
-    Outputs:
-        Tensor, has the same shape of input.
-    """
-
-    @prim_attr_register
-    def __init__(self):
-        """init IsFinite"""
-        self.init_prim_io_names(inputs=['x'], outputs=['output'])
-
-    def infer_shape(self, x_shape):
-        return x_shape
-
-    def infer_dtype(self, x_dtype):
-        return mstype.bool_
-
-class FloatStatus(PrimitiveWithInfer):
-    """
-    Determine if the elements contains nan, inf or -inf
-    Inputs:
-        - **input_x** (Tensor) - The input tensor.
-
-    Outputs:
-        Tensor, has the shape of `(1,)`.
-    """
-
-    @prim_attr_register
-    def __init__(self):
-        """init FloatStatus"""
-        self.init_prim_io_names(inputs=['x'], outputs=['output'])
-
-    def infer_shape(self, x_shape):
-        return [1]
-
-    def infer_dtype(self, x_dtype):
-        return x_dtype
 
 class NPUAllocFloatStatus(PrimitiveWithInfer):
     """

From b9701db887348513da47b7d301f25e7f7420a8f3 Mon Sep 17 00:00:00 2001
From: Alexey Shevlyakov <alexey.shevlyakov@huawei.com>
Date: Thu, 2 Apr 2020 14:17:46 -0400
Subject: [PATCH 19/58] fix RandomCropDecodeResize test

---
 tests/ut/cpp/dataset/CMakeLists.txt           |   2 +-
 .../dataset/random_crop_and_resize_op_test.cc |  36 ++----
 ...c => random_crop_decode_resize_op_test.cc} | 105 +++++++++---------
 3 files changed, 62 insertions(+), 81 deletions(-)
 rename tests/ut/cpp/dataset/{random_crop_decode_resizeOp_test.cc => random_crop_decode_resize_op_test.cc} (56%)

diff --git a/tests/ut/cpp/dataset/CMakeLists.txt b/tests/ut/cpp/dataset/CMakeLists.txt
index 0da470ac89..086a67c7d7 100644
--- a/tests/ut/cpp/dataset/CMakeLists.txt
+++ b/tests/ut/cpp/dataset/CMakeLists.txt
@@ -32,7 +32,7 @@ SET(DE_UT_SRCS
     project_op_test.cc
     queue_test.cc
     random_crop_op_test.cc
-    random_crop_decode_resizeOp_test.cc
+    random_crop_decode_resize_op_test.cc
     random_crop_and_resize_op_test.cc
     random_color_adjust_op_test.cc
     random_horizontal_flip_op_test.cc
diff --git a/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc b/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc
index 864d713ed3..7be18fb02c 100644
--- a/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc
+++ b/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc
@@ -20,35 +20,17 @@
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
-using mindspore::MsLogLevel::INFO;
-using mindspore::ExceptionType::NoExceptionType;
 using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
 
 class MindDataTestRandomCropAndResizeOp : public UT::CVOP::CVOpCommon {
  public:
   MindDataTestRandomCropAndResizeOp() : CVOpCommon() {}
 };
 
-TEST_F(MindDataTestRandomCropAndResizeOp, TestOpDefault) {
-  MS_LOG(INFO) << "Doing testRandomCropAndResize.";
-  TensorShape s_in = input_tensor_->shape();
-  std::shared_ptr<Tensor> output_tensor;
-  int h_out = 512;
-  int w_out = 512;
-
-  TensorShape s_out({(uint32_t) h_out, (uint32_t) w_out, (uint32_t) s_in[2]});
-
-  std::unique_ptr<RandomCropAndResizeOp> op(new RandomCropAndResizeOp(h_out, w_out));
-  Status s;
-  for (auto i = 0; i < 100; i++) {
-    s = op->Compute(input_tensor_, &output_tensor);
-  }
-  EXPECT_TRUE(s.IsOk());
-  MS_LOG(INFO) << "testRandomCropAndResize end.";
-}
-
-TEST_F(MindDataTestRandomCropAndResizeOp, TestOpExtended) {
-  MS_LOG(INFO) << "Doing testRandomCropAndResize.";
+TEST_F(MindDataTestRandomCropAndResizeOp, TestOpSimpleTest) {
+  MS_LOG(INFO) << " starting RandomCropAndResizeOp simple test";
   TensorShape s_in = input_tensor_->shape();
   std::shared_ptr<Tensor> output_tensor;
   int h_out = 1024;
@@ -58,14 +40,14 @@ TEST_F(MindDataTestRandomCropAndResizeOp, TestOpExtended) {
   float scale_lb = 0.0001;
   float scale_ub = 1.0;
 
-  TensorShape s_out({(uint32_t) h_out, (uint32_t) w_out, (uint32_t) s_in[2]});
+  TensorShape s_out({h_out, w_out, s_in[2]});
 
-  std::unique_ptr<RandomCropAndResizeOp> op(
-    new RandomCropAndResizeOp(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub));
+  auto op = std::make_unique<RandomCropAndResizeOp>(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub);
   Status s;
   for (auto i = 0; i < 100; i++) {
     s = op->Compute(input_tensor_, &output_tensor);
+    EXPECT_TRUE(s.IsOk());
   }
-  EXPECT_TRUE(s.IsOk());
-  MS_LOG(INFO) << "testRandomCropAndResize end.";
+
+  MS_LOG(INFO) << "RandomCropAndResizeOp simple test finished";
 }
diff --git a/tests/ut/cpp/dataset/random_crop_decode_resizeOp_test.cc b/tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc
similarity index 56%
rename from tests/ut/cpp/dataset/random_crop_decode_resizeOp_test.cc
rename to tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc
index facd35c4f7..d7e0b16aff 100644
--- a/tests/ut/cpp/dataset/random_crop_decode_resizeOp_test.cc
+++ b/tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc
@@ -23,9 +23,10 @@
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
-using mindspore::MsLogLevel::INFO;
-using mindspore::ExceptionType::NoExceptionType;
 using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+constexpr double kMseThreshold = 2.0;
 
 class MindDataTestRandomCropDecodeResizeOp : public UT::CVOP::CVOpCommon {
  public:
@@ -33,39 +34,38 @@ class MindDataTestRandomCropDecodeResizeOp : public UT::CVOP::CVOpCommon {
 };
 
 TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp2) {
-  MS_LOG(INFO) << "Doing testRandomCropDecodeResizeOp Test";
+  MS_LOG(INFO) << "starting RandomCropDecodeResizeOp test 1";
 
-  std::shared_ptr<Tensor> output_tensor1;
-  std::shared_ptr<Tensor> output_tensor2;
+  std::shared_ptr<Tensor> decode_and_crop_output;
+  std::shared_ptr<Tensor> crop_and_decode_output;
 
-  int target_height = 884;
-  int target_width = 718;
-  float scale_lb = 0.08;
-  float scale_ub = 1.0;
-  float aspect_lb = 0.75;
-  float aspect_ub = 1.333333;
-  InterpolationMode interpolation = InterpolationMode::kLinear;
-  uint32_t max_iter = 10;
-  std::unique_ptr<RandomCropAndResizeOp> op1(new RandomCropAndResizeOp(
-    target_height, target_width, scale_lb, scale_ub, aspect_lb, aspect_ub, interpolation, max_iter));
-  EXPECT_TRUE(op1->OneToOne());
-  std::unique_ptr<RandomCropDecodeResizeOp> op2(new RandomCropDecodeResizeOp(
-    target_height, target_width, scale_lb, scale_ub, aspect_lb, aspect_ub, interpolation, max_iter));
-  EXPECT_TRUE(op2->OneToOne());
-  Status s1, s2;
+  constexpr int target_height = 884;
+  constexpr int target_width = 718;
+  constexpr float scale_lb = 0.08;
+  constexpr float scale_ub = 1.0;
+  constexpr float aspect_lb = 0.75;
+  constexpr float aspect_ub = 1.333333;
+  const InterpolationMode interpolation = InterpolationMode::kLinear;
+  constexpr uint32_t max_iter = 10;
 
+  auto crop_and_decode = RandomCropDecodeResizeOp(target_height, target_width, scale_lb, scale_ub, aspect_lb, aspect_ub,
+                                                  interpolation, max_iter);
+  auto crop_and_decode_copy = crop_and_decode;
+  auto decode_and_crop = static_cast<RandomCropAndResizeOp>(crop_and_decode_copy);
+  EXPECT_TRUE(crop_and_decode.OneToOne());
+  GlobalContext::config_manager()->set_seed(42);
   for (int i = 0; i < 100; i++) {
-    s1 = op1->Compute(input_tensor_, &output_tensor1);
-    s2 = op2->Compute(raw_input_tensor_, &output_tensor2);
-    cv::Mat output1(target_height, target_width, CV_8UC3, output_tensor1->StartAddr());
-    cv::Mat output2(target_height, target_width, CV_8UC3, output_tensor2->StartAddr());
+    (void)crop_and_decode.Compute(raw_input_tensor_, &crop_and_decode_output);
+    (void)decode_and_crop.Compute(input_tensor_, &decode_and_crop_output);
+    cv::Mat output1(target_height, target_width, CV_8UC3, crop_and_decode_output->StartAddr());
+    cv::Mat output2(target_height, target_width, CV_8UC3, decode_and_crop_output->StartAddr());
     long int mse_sum = 0;
     long int count = 0;
     int a, b;
-    for (int i = 0; i < target_height; i++) {
-      for (int j = 0; j < target_width; j++) {
-        a = (int)output1.at<cv::Vec3b>(i, j)[1];
-        b = (int)output2.at<cv::Vec3b>(i, j)[1];
+    for (int j = 0; j < target_height; j++) {
+      for (int k = 0; k < target_width; k++) {
+        a = static_cast<int>(output1.at<cv::Vec3b>(i, j)[1]);
+        b = static_cast<int>(output2.at<cv::Vec3b>(i, j)[1]);
         mse_sum += sqrt((a - b) * (a - b));
         if (a != b) {
           count++;
@@ -73,24 +73,22 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp2) {
       }
     }
     double mse;
-    if (count > 0) {
-      mse = (double) mse_sum / count;
-    } else {
-      mse = mse_sum;
-    }
-    MS_LOG(DEBUG) << "mse: " << mse << std::endl;
+    mse = count > 0 ? static_cast<double>(mse_sum) / count : mse_sum;
+    MS_LOG(INFO) << "mse: " << mse << std::endl;
+    EXPECT_LT(mse, kMseThreshold);
   }
-  MS_LOG(INFO) << "MindDataTestRandomCropDecodeResizeOp end!";
+
+  MS_LOG(INFO) << "RandomCropDecodeResizeOp test 1 finished";
 }
 
 TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp1) {
-  MS_LOG(INFO) << "Doing MindDataTestRandomCropDecodeResizeOp";
-  const unsigned int h = 884;
-  const unsigned int w = 718;
-  const float scale_lb = 0.1;
-  const float scale_ub = 1;
-  const float aspect_lb = 0.1;
-  const float aspect_ub = 10;
+  MS_LOG(INFO) << "starting RandomCropDecodeResizeOp test 2";
+  constexpr int h = 884;
+  constexpr int w = 718;
+  constexpr float scale_lb = 0.1;
+  constexpr float scale_ub = 1;
+  constexpr float aspect_lb = 0.1;
+  constexpr float aspect_ub = 10;
 
   std::shared_ptr<Tensor> decoded, decoded_and_cropped, cropped_and_decoded;
   std::mt19937 rd;
@@ -98,14 +96,14 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp1) {
   std::uniform_real_distribution<float> rd_aspect(aspect_lb, aspect_ub);
   DecodeOp op(true);
   op.Compute(raw_input_tensor_, &decoded);
-  Status s1, s2;
+  Status crop_and_decode_status, decode_and_crop_status;
   float scale, aspect;
   int crop_width, crop_height;
   bool crop_success = false;
-  unsigned int mse_sum, m1, m2, count;
-  float mse;
+  int mse_sum, m1, m2, count;
+  double mse;
 
-  for (unsigned int k = 0; k < 100; ++k) {
+  for (int k = 0; k < 100; ++k) {
     mse_sum = 0;
     count = 0;
     for (auto i = 0; i < 100; i++) {
@@ -132,13 +130,13 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp1) {
     int y = rd_y(rd);
 
     op.Compute(raw_input_tensor_, &decoded);
-    s1 = Crop(decoded, &decoded_and_cropped, x, y, crop_width, crop_height);
-    s2 = JpegCropAndDecode(raw_input_tensor_, &cropped_and_decoded, x, y, crop_width, crop_height);
+    crop_and_decode_status = Crop(decoded, &decoded_and_cropped, x, y, crop_width, crop_height);
+    decode_and_crop_status = JpegCropAndDecode(raw_input_tensor_, &cropped_and_decoded, x, y, crop_width, crop_height);
     {
       cv::Mat M1(crop_height, crop_width, CV_8UC3, decoded_and_cropped->StartAddr());
       cv::Mat M2(crop_height, crop_width, CV_8UC3, cropped_and_decoded->StartAddr());
-      for (unsigned int i = 0; i < crop_height; ++i) {
-        for (unsigned int j = 0; j < crop_width; ++j) {
+      for (int i = 0; i < crop_height; ++i) {
+        for (int j = 0; j < crop_width; ++j) {
           m1 = M1.at<cv::Vec3b>(i, j)[1];
           m2 = M2.at<cv::Vec3b>(i, j)[1];
           mse_sum += sqrt((m1 - m2) * (m1 - m2));
@@ -149,8 +147,9 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp1) {
       }
     }
 
-    mse = (count == 0) ? mse_sum : static_cast<float>(mse_sum) / count;
-    MS_LOG(DEBUG) << "mse: " << mse << std::endl;
+    mse = count > 0 ? static_cast<double>(mse_sum) / count : mse_sum;
+    MS_LOG(INFO) << "mse: " << mse << std::endl;
+    EXPECT_LT(mse, kMseThreshold);
   }
-  MS_LOG(INFO) << "MindDataTestRandomCropDecodeResizeOp end!";
+  MS_LOG(INFO) << "RandomCropDecodeResizeOp test 2 finished";
 }

From c6c8c94d3f40b04b736da3d5d411c1a41629f6e6 Mon Sep 17 00:00:00 2001
From: dengwentao <dengwentao1@huawei.com>
Date: Tue, 7 Apr 2020 11:22:53 +0800
Subject: [PATCH 20/58] updata mkl-dnn link and md5

---
 Third_Party_Open_Source_Software_Notice |  2 +-
 cmake/external_libs/mkl_dnn.cmake       | 18 +++++++++---------
 cmake/utils.cmake                       |  2 ++
 mindspore/ccsrc/CMakeLists.txt          |  2 +-
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/Third_Party_Open_Source_Software_Notice b/Third_Party_Open_Source_Software_Notice
index 498b5b8d1b..60ad7cf47c 100644
--- a/Third_Party_Open_Source_Software_Notice
+++ b/Third_Party_Open_Source_Software_Notice
@@ -368,7 +368,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 
 
-Software: MKL-DNN 1.1.2
+Software: oneDNN 1.1.2
 Copyright (c) 2009-2018 The MathJax Consortium
 Copyright 2018 Intel Corporation
 Copyright 2019 Intel Corporation
diff --git a/cmake/external_libs/mkl_dnn.cmake b/cmake/external_libs/mkl_dnn.cmake
index 17d8020d3a..6f033fa565 100644
--- a/cmake/external_libs/mkl_dnn.cmake
+++ b/cmake/external_libs/mkl_dnn.cmake
@@ -1,11 +1,11 @@
-set(mkl_dnn_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2")
-set(mkl_dnn_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
-mindspore_add_pkg(mkl_dnn
-        VER 1.1.1
+set(onednn_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2")
+set(onednn_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
+mindspore_add_pkg(onednn
+        VER 1.1.2
         LIBS dnnl mkldnn
-        URL https://github.com/intel/mkl-dnn/archive/v1.1.1.tar.gz
-        MD5 d6a422b00459600bdc22242590953f38
+        URL https://github.com/oneapi-src/oneDNN/archive/v1.1.2.tar.gz
+        MD5 ab40d52230f3ad1d7a6f06ce0f6bc17a
         CMAKE_OPTION -DDNNL_ARCH_OPT_FLAGS='' -DDNNL_CPU_RUNTIME='SEQ' -DDNNL_BUILD_EXAMPLES=OFF -DDNNL_BUILD_TESTS=OFF)
-include_directories(${mkl_dnn_INC})
-add_library(mindspore::dnnl ALIAS mkl_dnn::dnnl)
-add_library(mindspore::mkldnn ALIAS mkl_dnn::mkldnn)
+include_directories(${onednn_INC})
+add_library(mindspore::dnnl ALIAS onednn::dnnl)
+add_library(mindspore::mkldnn ALIAS onednn::mkldnn)
diff --git a/cmake/utils.cmake b/cmake/utils.cmake
index 060e400820..99c064fdd4 100644
--- a/cmake/utils.cmake
+++ b/cmake/utils.cmake
@@ -40,6 +40,8 @@ else()
     set(JOBS 8)
     if (${JOBS} GREATER ${N})
         set(THNUM ${N})
+    else()
+        set(THNUM ${JOBS})
     endif()
 endif ()
 message("set make thread num: ${THNUM}")
diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index befe86f3c0..9f559a51eb 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -542,7 +542,7 @@ endif()
 
 if (ENABLE_CPU)
     add_custom_target(add_cpu_lib ALL
-            COMMAND cp ${mkl_dnn_LIBPATH}/libdnnl.so.1.1 ${MS_LIB_PATH}/libdnnl.so.1
+            COMMAND cp ${onednn_LIBPATH}/libdnnl.so.1.1 ${MS_LIB_PATH}/libdnnl.so.1
             )
     add_dependencies(add_cpu_lib add_ms_lib)
 endif()

From 6dc6d6bc83113431542b6d21e4005331f1d20a4d Mon Sep 17 00:00:00 2001
From: jonyguo <guozhijian@huawei.com>
Date: Fri, 3 Apr 2020 16:53:45 +0800
Subject: [PATCH 21/58] fix: when use MindDataset block_reade=True hung

---
 mindspore/ccsrc/mindrecord/io/shard_reader.cc |  2 ++
 mindspore/mindrecord/filewriter.py            |  1 +
 mindspore/mindrecord/tools/cifar100_to_mr.py  |  9 ++++---
 tests/ut/python/dataset/test_minddataset.py   | 27 ++++++++++++++++---
 .../mindrecord/test_cifar100_to_mindrecord.py |  4 ++-
 .../mindrecord/test_mindrecord_exception.py   |  8 +++++-
 6 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/mindspore/ccsrc/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/mindrecord/io/shard_reader.cc
index 791de6c60b..32825fd9df 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_reader.cc
+++ b/mindspore/ccsrc/mindrecord/io/shard_reader.cc
@@ -785,6 +785,8 @@ vector<std::string> ShardReader::GetAllColumns() {
 
 MSRStatus ShardReader::CreateTasksByBlock(const std::vector<std::tuple<int, int, int, uint64_t>> &row_group_summary,
                                           const std::vector<std::shared_ptr<ShardOperator>> &operators) {
+  vector<std::string> columns = GetAllColumns();
+  CheckIfColumnInIndex(columns);
   for (const auto &rg : row_group_summary) {
     auto shard_id = std::get<0>(rg);
     auto group_id = std::get<1>(rg);
diff --git a/mindspore/mindrecord/filewriter.py b/mindspore/mindrecord/filewriter.py
index d1471f47cb..4056825ff3 100644
--- a/mindspore/mindrecord/filewriter.py
+++ b/mindspore/mindrecord/filewriter.py
@@ -143,6 +143,7 @@ class FileWriter:
             ParamTypeError: If index field is invalid.
             MRMDefineIndexError: If index field is not primitive type.
             MRMAddIndexError: If failed to add index field.
+            MRMGetMetaError: If the schema is not set or get meta failed.
         """
         if not index_fields or not isinstance(index_fields, list):
             raise ParamTypeError('index_fields', 'list')
diff --git a/mindspore/mindrecord/tools/cifar100_to_mr.py b/mindspore/mindrecord/tools/cifar100_to_mr.py
index a359de853d..c011c8f4b0 100644
--- a/mindspore/mindrecord/tools/cifar100_to_mr.py
+++ b/mindspore/mindrecord/tools/cifar100_to_mr.py
@@ -24,7 +24,7 @@ from mindspore import log as logger
 from .cifar100 import Cifar100
 from ..common.exceptions import PathNotExistsError
 from ..filewriter import FileWriter
-from ..shardutils import check_filename
+from ..shardutils import check_filename, SUCCESS
 try:
     cv2 = import_module("cv2")
 except ModuleNotFoundError:
@@ -98,8 +98,11 @@ class Cifar100ToMR:
         data_list = _construct_raw_data(images, fine_labels, coarse_labels)
         test_data_list = _construct_raw_data(test_images, test_fine_labels, test_coarse_labels)
 
-        _generate_mindrecord(self.destination, data_list, fields, "img_train")
-        _generate_mindrecord(self.destination + "_test", test_data_list, fields, "img_test")
+        if _generate_mindrecord(self.destination, data_list, fields, "img_train") != SUCCESS:
+            return FAILED
+        if _generate_mindrecord(self.destination + "_test", test_data_list, fields, "img_test") != SUCCESS:
+            return FAILED
+        return SUCCESS
 
 def _construct_raw_data(images, fine_labels, coarse_labels):
     """
diff --git a/tests/ut/python/dataset/test_minddataset.py b/tests/ut/python/dataset/test_minddataset.py
index da22f5c3b7..460a728b5c 100644
--- a/tests/ut/python/dataset/test_minddataset.py
+++ b/tests/ut/python/dataset/test_minddataset.py
@@ -47,7 +47,9 @@ def add_and_remove_cv_file():
         os.remove("{}.db".format(x)) if os.path.exists("{}.db".format(x)) else None
     writer = FileWriter(CV_FILE_NAME, FILES_NUM)
     data = get_data(CV_DIR_NAME)
-    cv_schema_json = {"file_name": {"type": "string"}, "label": {"type": "int32"},
+    cv_schema_json = {"id": {"type": "int32"},
+                      "file_name": {"type": "string"},
+                      "label": {"type": "int32"},
                       "data": {"type": "bytes"}}
     writer.add_schema(cv_schema_json, "img_schema")
     writer.add_index(["file_name", "label"])
@@ -226,6 +228,24 @@ def test_cv_minddataset_blockreader_tutorial(add_and_remove_cv_file):
         num_iter += 1
     assert num_iter == 20
 
+def test_cv_minddataset_blockreader_some_field_not_in_index_tutorial(add_and_remove_cv_file):
+    """tutorial for cv minddataset."""
+    columns_list = ["id", "data", "label"]
+    num_readers = 4
+    data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, shuffle=False,
+                              block_reader=True)
+    assert data_set.get_dataset_size() == 10
+    repeat_num = 2
+    data_set = data_set.repeat(repeat_num)
+    num_iter = 0
+    for item in data_set.create_dict_iterator():
+        logger.info("-------------- block reader repeat tow {} -----------------".format(num_iter))
+        logger.info("-------------- item[id]: {} ----------------------------".format(item["id"]))
+        logger.info("-------------- item[label]: {} ----------------------------".format(item["label"]))
+        logger.info("-------------- item[data]: {} -----------------------------".format(item["data"]))
+        num_iter += 1
+    assert num_iter == 20
+
 
 def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
     """tutorial for cv minderdataset."""
@@ -359,13 +379,14 @@ def get_data(dir_name):
         lines = file_reader.readlines()
 
     data_list = []
-    for line in lines:
+    for i, line in enumerate(lines):
         try:
             filename, label = line.split(",")
             label = label.strip("\n")
             with open(os.path.join(img_dir, filename), "rb") as file_reader:
                 img = file_reader.read()
-            data_json = {"file_name": filename,
+            data_json = {"id": i,
+                         "file_name": filename,
                          "data": img,
                          "label": int(label)}
             data_list.append(data_json)
diff --git a/tests/ut/python/mindrecord/test_cifar100_to_mindrecord.py b/tests/ut/python/mindrecord/test_cifar100_to_mindrecord.py
index b3a8d94589..e95f25aae4 100644
--- a/tests/ut/python/mindrecord/test_cifar100_to_mindrecord.py
+++ b/tests/ut/python/mindrecord/test_cifar100_to_mindrecord.py
@@ -18,6 +18,7 @@ import pytest
 from mindspore.mindrecord import Cifar100ToMR
 from mindspore.mindrecord import FileReader
 from mindspore.mindrecord import MRMOpenError
+from mindspore.mindrecord import SUCCESS
 from mindspore import log as logger
 
 CIFAR100_DIR = "../data/mindrecord/testCifar100Data"
@@ -26,7 +27,8 @@ MINDRECORD_FILE = "./cifar100.mindrecord"
 def test_cifar100_to_mindrecord_without_index_fields():
     """test transform cifar100 dataset to mindrecord without index fields."""
     cifar100_transformer = Cifar100ToMR(CIFAR100_DIR, MINDRECORD_FILE)
-    cifar100_transformer.transform()
+    ret = cifar100_transformer.transform()
+    assert ret == SUCCESS, "Failed to tranform from cifar100 to mindrecord"
     assert os.path.exists(MINDRECORD_FILE)
     assert os.path.exists(MINDRECORD_FILE + "_test")
     read()
diff --git a/tests/ut/python/mindrecord/test_mindrecord_exception.py b/tests/ut/python/mindrecord/test_mindrecord_exception.py
index 0a51fbf4e7..1f7a3f859d 100644
--- a/tests/ut/python/mindrecord/test_mindrecord_exception.py
+++ b/tests/ut/python/mindrecord/test_mindrecord_exception.py
@@ -16,7 +16,7 @@
 import os
 import pytest
 from mindspore.mindrecord import FileWriter, FileReader, MindPage
-from mindspore.mindrecord import MRMOpenError, MRMGenerateIndexError, ParamValueError
+from mindspore.mindrecord import MRMOpenError, MRMGenerateIndexError, ParamValueError, MRMGetMetaError
 from mindspore import log as logger
 from utils import get_data
 
@@ -280,3 +280,9 @@ def test_cv_file_writer_shard_num_greater_than_1000():
     with pytest.raises(ParamValueError) as err:
         FileWriter(CV_FILE_NAME, 1001)
     assert 'Shard number should between' in str(err.value)
+
+def test_add_index_without_add_schema():
+    with pytest.raises(MRMGetMetaError) as err:
+        fw = FileWriter(CV_FILE_NAME)
+        fw.add_index(["label"])
+    assert 'Failed to get meta info' in str(err.value)

From 180c1750de1a0b4a3b62fe383cb90f63e5df75cf Mon Sep 17 00:00:00 2001
From: ms_yan <6576637+ms_yan@user.noreply.gitee.com>
Date: Thu, 2 Apr 2020 21:56:48 +0800
Subject: [PATCH 22/58] add parameter check for Class Schema

---
 mindspore/dataset/engine/datasets.py   | 23 ++++++++----
 mindspore/dataset/engine/validators.py | 50 ++++++++++++++++++++++++++
 2 files changed, 66 insertions(+), 7 deletions(-)

diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index db2b5169d2..de604a67e9 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -38,7 +38,7 @@ from .iterators import DictIterator, TupleIterator
 from .validators import check, check_batch, check_shuffle, check_map, check_repeat, check_zip, check_rename, \
     check_project, check_imagefolderdatasetv2, check_mnist_cifar_dataset, check_manifestdataset, \
     check_tfrecorddataset, check_vocdataset, check_celebadataset, check_minddataset, check_generatordataset, \
-    check_zip_dataset
+    check_zip_dataset, check_add_column
 from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist
 
 try:
@@ -2334,13 +2334,20 @@ class Schema:
             self.dataset_type = ''
             self.num_rows = 0
         else:
+            if not os.path.isfile(schema_file) or not os.access(schema_file, os.R_OK):
+                raise ValueError("The file %s does not exist or permission denied!" % schema_file)
             try:
                 with open(schema_file, 'r') as load_f:
                     json_obj = json.load(load_f)
-                    self.from_json(json_obj)
             except json.decoder.JSONDecodeError:
-                raise RuntimeError("Schema file failed to load")
+                raise RuntimeError("Schema file failed to load.")
+            except UnicodeDecodeError:
+                raise RuntimeError("Schema file failed to decode.")
+            except Exception:
+                raise RuntimeError("Schema file failed to open.")
+            self.from_json(json_obj)
 
+    @check_add_column
     def add_column(self, name, de_type, shape=None):
         """
         Add new column to the schema.
@@ -2359,10 +2366,8 @@ class Schema:
         if isinstance(de_type, typing.Type):
             de_type = mstype_to_detype(de_type)
             new_column["type"] = str(de_type)
-        elif isinstance(de_type, str):
-            new_column["type"] = str(DataType(de_type))
         else:
-            raise ValueError("Unknown column type")
+            new_column["type"] = str(DataType(de_type))
 
         if shape is not None:
             new_column["shape"] = shape
@@ -2391,7 +2396,7 @@ class Schema:
         Parse the columns and add it to self.
 
         Args:
-            columns (list[str]): names of columns.
+            columns (dict or list[str]): names of columns.
 
         Raises:
             RuntimeError: If failed to parse schema file.
@@ -2399,6 +2404,8 @@ class Schema:
             RuntimeError: If column's name field is missing.
             RuntimeError: If column's type field is missing.
         """
+        if columns is None:
+            raise TypeError("Expected non-empty dict or string list.")
         self.columns = []
         for col in columns:
             name = None
@@ -2443,6 +2450,8 @@ class Schema:
             RuntimeError: if dataset type is missing in the object.
             RuntimeError: if columns are missing in the object.
         """
+        if not isinstance(json_obj, dict) or json_obj is None:
+            raise ValueError("Expected non-empty dict.")
         for k, v in json_obj.items():
             if k == "datasetType":
                 self.dataset_type = v
diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index b4d22a4a01..26d6241945 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -19,10 +19,15 @@ import inspect as ins
 import os
 from functools import wraps
 from multiprocessing import cpu_count
+from mindspore._c_expression import typing
 from . import samplers
 from . import datasets
 
 INT32_MAX = 2147483647
+valid_detype = [
+    "bool", "int8", "int16", "int32", "int64", "uint8", "uint16",
+    "uint32", "uint64", "float16", "float32", "float64"
+]
 
 
 def check(method):
@@ -188,6 +193,12 @@ def check(method):
     return wrapper
 
 
+def check_valid_detype(type_):
+    if type_ not in valid_detype:
+        raise ValueError("Unknown column type")
+    return True
+
+
 def check_filename(path):
     """
     check the filename in the path
@@ -743,3 +754,42 @@ def check_project(method):
         return method(*args, **kwargs)
 
     return new_method
+
+
+def check_shape(shape, name):
+    if isinstance(shape, list):
+        for element in shape:
+            if not isinstance(element, int):
+                raise TypeError(
+                    "Each element in {0} should be of type int. Got {1}.".format(name, type(element)))
+    else:
+        raise TypeError("Expected int list.")
+
+
+def check_add_column(method):
+    """check the input arguments of add_column."""
+    @wraps(method)
+    def new_method(*args, **kwargs):
+        param_dict = make_param_dict(method, args, kwargs)
+
+        # check name; required argument
+        name = param_dict.get("name")
+        if not isinstance(name, str) or not name:
+            raise TypeError("Expected non-empty string.")
+
+        # check type; required argument
+        de_type = param_dict.get("de_type")
+        if de_type is not None:
+            if not isinstance(de_type, typing.Type) and not check_valid_detype(de_type):
+                raise ValueError("Unknown column type.")
+        else:
+            raise TypeError("Expected non-empty string.")
+
+        # check shape
+        shape = param_dict.get("shape")
+        if shape is not None:
+            check_shape(shape, "shape")
+
+        return method(*args, **kwargs)
+
+    return new_method

From f01098bc12be06f440ae166bf28a17587d9e50cb Mon Sep 17 00:00:00 2001
From: Jonathan Yan <jonathan.yan1@huawei.com>
Date: Sat, 4 Apr 2020 06:48:58 -0400
Subject: [PATCH 23/58] remove ENABLE_MINDRECORD flag

---
 mindspore/ccsrc/dataset/CMakeLists.txt               |  2 --
 mindspore/ccsrc/dataset/api/de_pipeline.cc           | 12 +++---------
 mindspore/ccsrc/dataset/api/de_pipeline.h            |  4 ----
 mindspore/ccsrc/dataset/api/python_bindings.cc       |  6 ------
 .../engine/datasetops/source/mindrecord_op.cc        |  3 ---
 .../dataset/engine/datasetops/source/mindrecord_op.h |  2 --
 tests/ut/cpp/CMakeLists.txt                          |  1 -
 tests/ut/cpp/dataset/mind_record_op_test.cc          |  2 --
 8 files changed, 3 insertions(+), 29 deletions(-)

diff --git a/mindspore/ccsrc/dataset/CMakeLists.txt b/mindspore/ccsrc/dataset/CMakeLists.txt
index d6791f2b9b..477d37051e 100644
--- a/mindspore/ccsrc/dataset/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/CMakeLists.txt
@@ -17,8 +17,6 @@ if (ENABLE_TDTQUE)
     message(STATUS "TDT queue is enabled")
 endif ()
 
-add_definitions(-D ENABLE_MINDRECORD)
-
 # conde coverage
 # option(ENABLE_COVERAGE "Enable code coverage report" OFF)
 # if (ENABLE_COVERAGE)
diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.cc b/mindspore/ccsrc/dataset/api/de_pipeline.cc
index d51204f659..65ec8d30f2 100644
--- a/mindspore/ccsrc/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc
@@ -29,11 +29,9 @@
 #include "dataset/engine/datasetops/source/manifest_op.h"
 #include "dataset/engine/datasetops/source/cifar_op.h"
 #include "dataset/engine/datasetops/source/celeba_op.h"
-#ifdef ENABLE_MINDRECORD
-#include "./shard_category.h"
-#include "./shard_sample.h"
-#include "./shard_shuffle.h"
-#endif
+#include "mindrecord/include/shard_category.h"
+#include "mindrecord/include/shard_sample.h"
+#include "mindrecord/include/shard_shuffle.h"
 
 #include "dataset/util/random.h"
 #include "dataset/util/status.h"
@@ -46,9 +44,7 @@ using pFunction = Status (DEPipeline::*)(const py::dict &, std::shared_ptr<Datas
 
 static std::unordered_map<uint32_t, pFunction> g_parse_op_func_ = {{kStorage, &DEPipeline::ParseStorageOp},
                                                                    {kShuffle, &DEPipeline::ParseShuffleOp},
-#ifdef ENABLE_MINDRECORD
                                                                    {kMindrecord, &DEPipeline::ParseMindRecordOp},
-#endif
                                                                    {kMap, &DEPipeline::ParseMapOp},
                                                                    {kBatch, &DEPipeline::ParseBatchOp},
                                                                    {kRepeat, &DEPipeline::ParseRepeatOp},
@@ -364,7 +360,6 @@ Status DEPipeline::ParseShuffleOp(const py::dict &args, std::shared_ptr<DatasetO
   return Status::OK();
 }
 
-#ifdef ENABLE_MINDRECORD
 Status DEPipeline::CheckMindRecordPartitionInfo(const py::dict &args, std::vector<int> *in_partitions) {
   if (args["partitions"].is_none()) {
     std::string err_msg = "Error: partitions is not set (None)";
@@ -450,7 +445,6 @@ Status DEPipeline::ParseMindRecordOp(const py::dict &args, std::shared_ptr<Datas
   *ptr = op;
   return Status::OK();
 }
-#endif
 
 Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
   std::shared_ptr<MapOp::Builder> builder = std::make_shared<MapOp::Builder>();
diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.h b/mindspore/ccsrc/dataset/api/de_pipeline.h
index e8dde85a77..acffc390cc 100644
--- a/mindspore/ccsrc/dataset/api/de_pipeline.h
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.h
@@ -38,9 +38,7 @@ using DsOpPtr = std::shared_ptr<DatasetOp>;
 enum OpName {
   kStorage = 0,
   kShuffle,
-#ifdef ENABLE_MINDRECORD
   kMindrecord,
-#endif
   kBatch,
   kCache,
   kRepeat,
@@ -101,11 +99,9 @@ class DEPipeline {
 
   Status ParseShuffleOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
 
-#ifdef ENABLE_MINDRECORD
   Status CheckMindRecordPartitionInfo(const py::dict &args, std::vector<int> *ptr);
 
   Status ParseMindRecordOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
-#endif
 
   Status ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
 
diff --git a/mindspore/ccsrc/dataset/api/python_bindings.cc b/mindspore/ccsrc/dataset/api/python_bindings.cc
index 86b0a5d66a..e6c2691281 100644
--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/dataset/api/python_bindings.cc
@@ -44,9 +44,7 @@
 #include "dataset/engine/datasetops/source/io_block.h"
 #include "dataset/engine/datasetops/source/mnist_op.h"
 #include "dataset/engine/datasetops/source/manifest_op.h"
-#ifdef ENABLE_MINDRECORD
 #include "dataset/engine/datasetops/source/mindrecord_op.h"
-#endif
 #include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
 #include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
 #include "dataset/engine/datasetops/source/sampler/random_sampler.h"
@@ -146,14 +144,12 @@ void bindDatasetOps(py::module *m) {
       return py::make_tuple(count, num_classes);
     });
 
-#ifdef ENABLE_MINDRECORD
   (void)py::class_<MindRecordOp, DatasetOp, std::shared_ptr<MindRecordOp>>(*m, "MindRecordOp")
     .def_static("get_num_rows", [](const std::string &path) {
       int64_t count = 0;
       THROW_IF_ERROR(MindRecordOp::CountTotalRows(path, &count));
       return count;
     });
-#endif
 
   (void)py::class_<ManifestOp, DatasetOp, std::shared_ptr<ManifestOp>>(*m, "ManifestOp")
     .def_static("get_num_rows_and_classes",
@@ -424,9 +420,7 @@ PYBIND11_MODULE(_c_dataengine, m) {
     .value("STORAGE", OpName::kStorage)
     .value("SHUFFLE", OpName::kShuffle)
     .value("BATCH", OpName::kBatch)
-#ifdef ENABLE_MINDRECORD
     .value("MINDRECORD", OpName::kMindrecord)
-#endif
     .value("CACHE", OpName::kCache)
     .value("REPEAT", OpName::kRepeat)
     .value("TAKE", OpName::kTake)
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
index b062371d7f..b5bea5416c 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
@@ -13,8 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifdef ENABLE_MINDRECORD
-
 #include "dataset/engine/datasetops/source/mindrecord_op.h"
 
 #include <algorithm>
@@ -665,4 +663,3 @@ Status MindRecordOp::CountTotalRows(const std::string dataset_path, int64_t *cou
 }
 }  // namespace dataset
 }  // namespace mindspore
-#endif
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h
index 2535acbc50..0b16391b20 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h
@@ -15,7 +15,6 @@
  */
 #ifndef DATASET_ENGINE_DATASETOPS_SOURCE_MINDRECORD_OP_H_
 #define DATASET_ENGINE_DATASETOPS_SOURCE_MINDRECORD_OP_H_
-#ifdef ENABLE_MINDRECORD
 #pragma once
 
 #include <cstdint>
@@ -276,5 +275,4 @@ class MindRecordOp : public ParallelOp {
 };
 }  // namespace dataset
 }  // namespace mindspore
-#endif
 #endif  // DATASET_ENGINE_DATASETOPS_SOURCE_MINDRECORD_OP_H_
diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt
index 5f4bd41b3b..8d3f8a8138 100644
--- a/tests/ut/cpp/CMakeLists.txt
+++ b/tests/ut/cpp/CMakeLists.txt
@@ -26,7 +26,6 @@ MESSAGE("check  ut_test ${CMAKE_BINARY_DIR}")
 link_directories(${MS_CCSRC_BUILD_PATH})
 
 if(ENABLE_MINDDATA)
-    add_definitions(-D ENABLE_MINDRECORD)
     add_definitions(-D ENABLE_MINDDATA)
     link_directories(${MS_CCSRC_BUILD_PATH}/dataset)
     link_directories(${MS_CCSRC_BUILD_PATH}/mindrecord)
diff --git a/tests/ut/cpp/dataset/mind_record_op_test.cc b/tests/ut/cpp/dataset/mind_record_op_test.cc
index abe7faef14..3d5c80b3f4 100644
--- a/tests/ut/cpp/dataset/mind_record_op_test.cc
+++ b/tests/ut/cpp/dataset/mind_record_op_test.cc
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifdef ENABLE_MINDRECORD
 #include <iostream>
 #include <memory>
 #include <vector>
@@ -480,4 +479,3 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) {
     row_count++;
   }
 }
-#endif

From 5f1fedaae7bed936b22e1f7e4a0adde4f1fb11e8 Mon Sep 17 00:00:00 2001
From: WeibiaoYu <weibiao.yu@huawei.com>
Date: Thu, 2 Apr 2020 08:43:13 -0400
Subject: [PATCH 24/58] Support to config whether to save integeated
 checkpoint, in auto model parallel scene

---
 mindspore/common/api.py                | 35 --------------------------
 mindspore/train/callback.py            | 16 +++++++++---
 mindspore/train/serialization.py       |  7 +++---
 tests/ut/python/utils/test_callback.py |  4 +--
 4 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/mindspore/common/api.py b/mindspore/common/api.py
index 9ee95ef772..5a74febe53 100644
--- a/mindspore/common/api.py
+++ b/mindspore/common/api.py
@@ -374,9 +374,6 @@ class _Executor:
                 obj.parameter_layout_dict = self._executor.get_parameter_layout(phase)
                 obj.load_parameter_slice(params)
 
-            if _get_parallel_mode() in ["hybrid_parallel"]:
-                obj.parameter_layout_dict = self._build_parameter_layout(obj)
-
         # the following GE init process is not needed when use vm or ms backend
         if enable_ge:
             # decide whether to sink based on whether the inputs is virtual or not
@@ -449,38 +446,6 @@ class _Executor:
             return self._exec_pip(obj, *args, phase=phase_real)
         raise KeyError('{} graph is not exist.'.format(phase_real))
 
-    def _build_parameter_layout(self, obj):
-        """
-        Build parameter layout, for layerwise_parallel parameter.
-
-        Args:
-            obj (Function or Cell): The function or cell instance need to be compiled.
-
-        Returns:
-            Dictionary, parameter layout info.
-        """
-        parameter_layout_dict = {}
-        layerwise_parallel_parameters = []
-        for key in obj.parameters_dict():
-            if obj.parameters_dict()[key].layerwise_parallel is True:
-                layerwise_parallel_parameters.append(key)
-
-        if not layerwise_parallel_parameters:
-            return parameter_layout_dict
-
-        from ..communication.management import get_group_size
-        group_size = [get_group_size()]
-        for key in layerwise_parallel_parameters:
-            tensor_map = [0]
-            shape = obj.parameters_dict()[key].data.shape()
-            for x in range(len(shape)):  # dim 0 set 0, others set -1
-                if x:
-                    tensor_map.append(-1)
-            layout = [group_size, tensor_map]
-            parameter_layout_dict[key] = layout
-
-        return parameter_layout_dict
-
     def del_net_res(self, net_id):
         self._executor.del_net_res(net_id)
 
diff --git a/mindspore/train/callback.py b/mindspore/train/callback.py
index 62f847089d..dcf630342c 100644
--- a/mindspore/train/callback.py
+++ b/mindspore/train/callback.py
@@ -24,7 +24,7 @@ import mindspore.context as context
 from mindspore.train.serialization import _exec_save_checkpoint, _fill_param_into_net, _save_graph
 from mindspore.train._utils import _make_directory
 from mindspore import log as logger
-from mindspore._checkparam import check_int_non_negative
+from mindspore._checkparam import check_int_non_negative, check_bool
 from mindspore.common.tensor import Tensor
 from .summary.summary_record import _cache_summary_tensor_data
 
@@ -150,6 +150,8 @@ class CheckpointConfig:
         keep_checkpoint_max (int): Maximum step to save checkpoint. Default: 5.
         keep_checkpoint_per_n_minutes (int): Keep one checkpoint every n minutes. Default: 0.
             Can't be used with keep_checkpoint_max at the same time.
+        integrated_save (bool): Whether to intergrated save in automatic model parall scene. Default: True.
+            Integrated save function is only supported in automatic parall scene, not supported in manual parallel.
 
     Raises:
         ValueError: If the input_param is None or 0.
@@ -163,7 +165,8 @@ class CheckpointConfig:
                  save_checkpoint_steps=1,
                  save_checkpoint_seconds=0,
                  keep_checkpoint_max=5,
-                 keep_checkpoint_per_n_minutes=0):
+                 keep_checkpoint_per_n_minutes=0,
+                 integrated_save=True):
 
         if not save_checkpoint_steps and not save_checkpoint_seconds and \
                 not keep_checkpoint_max and not keep_checkpoint_per_n_minutes:
@@ -191,6 +194,8 @@ class CheckpointConfig:
             if not self._keep_checkpoint_per_n_minutes or self._keep_checkpoint_per_n_minutes == 0:
                 self._keep_checkpoint_max = 1
 
+        self._integrated_save = check_bool(integrated_save)
+
     @property
     def save_checkpoint_steps(self):
         """Get the value of _save_checkpoint_steps."""
@@ -211,6 +216,11 @@ class CheckpointConfig:
         """Get the value of _keep_checkpoint_per_n_minutes."""
         return self._keep_checkpoint_per_n_minutes
 
+    @property
+    def integrated_save(self):
+        """Get the value of _integrated_save."""
+        return self._integrated_save
+
     def get_checkpoint_policy(self):
         """Get the policy of checkpoint."""
         checkpoint_policy = {'save_checkpoint_steps': self._save_checkpoint_steps,
@@ -619,7 +629,7 @@ class ModelCheckpoint(Callback):
                 _set_cur_net(cb_params.train_network)
                 cb_params.train_network.exec_checkpoint_graph()
 
-            _exec_save_checkpoint(cb_params.train_network, gen_file)
+            _exec_save_checkpoint(cb_params.train_network, gen_file, self._config.integrated_save)
 
             if os.path.exists(gen_file):
                 shutil.move(gen_file, cur_file)
diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py
index 0478bbc071..b334c3e9d8 100644
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -279,13 +279,14 @@ def _save_graph(network, file_name):
         os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR)
 
 
-def _exec_save_checkpoint(train_network, ckpoint_file_name):
+def _exec_save_checkpoint(train_network, ckpoint_file_name, integrated_save=True):
     """
     Saves checkpoint for 'ms' backend.
 
     Args:
         train_network (Network): The train network for training.
         ckpoint_file_name (str): The name of checkpoint file.
+        integrated_save (bool): Whether to intergrated save in automatic model parallel scene.
     """
 
     param_dict = {}
@@ -300,9 +301,9 @@ def _exec_save_checkpoint(train_network, ckpoint_file_name):
         else:
             param_data = Tensor(value.data)
 
-        # in model parallel scenario, some parameters were spliteds to all the devices,
+        # in automatic model parallel scenario, some parameters were spliteds to all the devices,
         # which should be combined before saving
-        if key in train_network.parameter_layout_dict:
+        if integrated_save and key in train_network.parameter_layout_dict:
             param_data = _get_merged_param_data(train_network, key, param_data)
 
         each_param["data"] = param_data
diff --git a/tests/ut/python/utils/test_callback.py b/tests/ut/python/utils/test_callback.py
index 60e4c6527a..43cf827330 100644
--- a/tests/ut/python/utils/test_callback.py
+++ b/tests/ut/python/utils/test_callback.py
@@ -308,10 +308,10 @@ def test_RunContext():
 def test_Checkpoint_Config():
     """Test CheckpointConfig all None or 0."""
     with pytest.raises(ValueError):
-        CheckpointConfig(0, 0, 0, 0)
+        CheckpointConfig(0, 0, 0, 0, True)
 
     with pytest.raises(ValueError):
-        CheckpointConfig(0, None, 0, 0)
+        CheckpointConfig(0, None, 0, 0, True)
 
 
 def test_step_end_save_graph():

From 5b915155427293e60ecb229810b7e8d0f67912fd Mon Sep 17 00:00:00 2001
From: VectorSL <shiliang10@huawei.com>
Date: Tue, 7 Apr 2020 19:48:34 +0800
Subject: [PATCH 25/58] update lossscale for gpu

---
 mindspore/nn/wrap/loss_scale.py      | 40 +++++++++----
 mindspore/ops/operations/__init__.py |  6 +-
 mindspore/ops/operations/math_ops.py | 88 ++++++++++++++++++++++++++++
 3 files changed, 123 insertions(+), 11 deletions(-)

diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py
index 1ce3179273..6a1f15a402 100644
--- a/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/nn/wrap/loss_scale.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 """Loss scale cell for loss scale training."""
+import mindspore.context as context
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
 from mindspore.train.parallel_utils import ParallelMode
 from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean
@@ -34,6 +35,13 @@ reciprocal = P.Reciprocal()
 def tensor_grad_scale(scale, grad):
     return grad * F.cast(reciprocal(scale), F.dtype(grad))
 
+_grad_overflow = C.MultitypeFuncGraph("_grad_overflow")
+grad_overflow = P.FloatStatus()
+
+
+@_grad_overflow.register("Tensor")
+def _tensor_grad_overflow(grad):
+    return grad_overflow(grad)
 
 class DynamicLossScaleUpdateCell(Cell):
     r"""
@@ -197,9 +205,15 @@ class TrainOneStepWithLossScaleCell(Cell):
         self.optimizer = optimizer
         self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
         self.hyper_map = C.HyperMap()
-        self.alloc_status = NPUAllocFloatStatus()
-        self.get_status = NPUGetFloatStatus()
-        self.clear_status = NPUClearFloatStatus()
+        if context.get_context("device_target") == "GPU":
+            self.gpu_target = True
+            self.float_status = P.FloatStatus()
+            self.addn = P.AddN()
+        else:
+            self.gpu_target = False
+            self.alloc_status = NPUAllocFloatStatus()
+            self.get_status = NPUGetFloatStatus()
+            self.clear_status = NPUClearFloatStatus()
         self.reduce_sum = ReduceSum(keep_dims=False)
         self.base = Tensor(1, mstype.float32)
         self.less_equal = LessEqual()
@@ -224,10 +238,12 @@ class TrainOneStepWithLossScaleCell(Cell):
     def construct(self, data, label, sens=None):
         weights = self.weights
         loss = self.network(data, label)
-        # init overflow buffer
-        init = self.alloc_status()
-        # clear overflow buffer
-        self.clear_status(init)
+        init = False
+        if not self.gpu_target:
+            # init overflow buffer
+            init = self.alloc_status()
+            # clear overflow buffer
+            self.clear_status(init)
         if sens is None:
             scaling_sens = self.loss_scale
         else:
@@ -238,9 +254,13 @@ class TrainOneStepWithLossScaleCell(Cell):
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
         # get the overflow buffer
-        self.get_status(init)
-        # sum overflow buffer elements, 0:not overflow , >0:overflow
-        flag_sum = self.reduce_sum(init, (0,))
+        if not self.gpu_target:
+            self.get_status(init)
+            # sum overflow buffer elements, 0:not overflow , >0:overflow
+            flag_sum = self.reduce_sum(init, (0,))
+        else:
+            flag_sum = self.hyper_map(F.partial(_grad_overflow), grads)
+            flag_sum = self.addn(flag_sum)
         if self.is_distributed:
             # sum overflow flag over devices
             flag_reduce = self.allreduce(flag_sum)
diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py
index 37a3b38bb6..d255796bae 100644
--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -44,7 +44,7 @@ from .math_ops import (Abs, ACos, AddN, AssignAdd, AssignSub, Atan2, BatchMatMul
                        LogicalNot, LogicalOr, MatMul, Maximum,
                        Minimum, Mul, Neg, NMSWithMask, NotEqual,
                        NPUAllocFloatStatus, NPUClearFloatStatus,
-                       NPUGetFloatStatus, Pow, RealDiv,
+                       NPUGetFloatStatus, Pow, RealDiv, IsNan, IsInf, IsFinite, FloatStatus,
                        Reciprocal, CumSum,
                        Sin, Sqrt, Rsqrt,
                        Square, Sub, TensorAdd, Sign, Round)
@@ -154,6 +154,10 @@ __all__ = [
     'NPUAllocFloatStatus',
     'NPUGetFloatStatus',
     'NPUClearFloatStatus',
+    'IsNan',
+    'IsFinite',
+    'IsInf',
+    'FloatStatus',
     'Reciprocal',
     'SmoothL1Loss',
     'ReduceAll',
diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py
index 175b72560f..127d3c513c 100644
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -1541,6 +1541,94 @@ class LogicalOr(_LogicBinaryOp):
     def infer_dtype(self, x_dtype, y_dtype):
         return _LogicBinaryOp.do_infer_dtype(x_dtype, y_dtype, (mstype.bool_,), self.prim_name())
 
+class IsNan(PrimitiveWithInfer):
+    """
+    Judging which elements are nan for each position
+
+    Inputs:
+        - **input_x** (Tensor) - The input tensor.
+
+    Outputs:
+        Tensor, has the same shape of input, and the dtype is bool.
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init IsNan"""
+        self.init_prim_io_names(inputs=['x'], outputs=['output'])
+
+    def infer_shape(self, x_shape):
+        return x_shape
+
+    def infer_dtype(self, x_dtype):
+        return mstype.bool_
+
+class IsInf(PrimitiveWithInfer):
+    """
+    Judging which elements are inf or -inf for each position
+
+    Inputs:
+        - **input_x** (Tensor) - The input tensor.
+
+    Outputs:
+        Tensor, has the same shape of input, and the dtype is bool.
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init IsInf"""
+        self.init_prim_io_names(inputs=['x'], outputs=['output'])
+
+    def infer_shape(self, x_shape):
+        return x_shape
+
+    def infer_dtype(self, x_dtype):
+        return mstype.bool_
+
+class IsFinite(PrimitiveWithInfer):
+    """
+    Judging which elements are finite for each position
+
+    Inputs:
+        - **input_x** (Tensor) - The input tensor.
+
+    Outputs:
+        Tensor, has the same shape of input, and the dtype is bool.
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init IsFinite"""
+        self.init_prim_io_names(inputs=['x'], outputs=['output'])
+
+    def infer_shape(self, x_shape):
+        return x_shape
+
+    def infer_dtype(self, x_dtype):
+        return mstype.bool_
+
+class FloatStatus(PrimitiveWithInfer):
+    """
+    Determine if the elements contains nan, inf or -inf. `0` for normal, `1` for overflow.
+
+    Inputs:
+        - **input_x** (Tensor) - The input tensor.
+
+    Outputs:
+        Tensor, has the shape of `(1,)`, and has the same dtype of input `mindspore.dtype.float32` or
+        `mindspore.dtype.float16`.
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init FloatStatus"""
+        self.init_prim_io_names(inputs=['x'], outputs=['output'])
+
+    def infer_shape(self, x_shape):
+        return [1]
+
+    def infer_dtype(self, x_dtype):
+        return x_dtype
 
 class NPUAllocFloatStatus(PrimitiveWithInfer):
     """

From 14c5c1b57d89e6a80f9d5272c566d199515b8b59 Mon Sep 17 00:00:00 2001
From: guohongzilong <2713219276@qq.com>
Date: Tue, 7 Apr 2020 20:39:51 +0800
Subject: [PATCH 26/58] usr mindspore. instead of mstype.

---
 mindspore/common/api.py               | 4 ++--
 mindspore/common/initializer.py       | 2 +-
 mindspore/dataset/engine/datasets.py  | 4 ++--
 mindspore/ops/operations/array_ops.py | 7 ++++---
 mindspore/ops/operations/math_ops.py  | 4 ++--
 mindspore/train/model.py              | 2 +-
 6 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/mindspore/common/api.py b/mindspore/common/api.py
index 5a74febe53..7f0b2bfeaa 100644
--- a/mindspore/common/api.py
+++ b/mindspore/common/api.py
@@ -230,8 +230,8 @@ def ms_function(fn=None, obj=None, input_signature=None):
         >>>     z = F.tensor_add(x, y)
         >>>     return z
         >>>
-        >>> @ms_function(input_signature=(MetaTensor(mstype.float32, (1, 1, 3, 3)),
-        >>>                               MetaTensor(mstype.float32, (1, 1, 3, 3))))
+        >>> @ms_function(input_signature=(MetaTensor(mindspore.float32, (1, 1, 3, 3)),
+        >>>                               MetaTensor(mindspore.float32, (1, 1, 3, 3))))
         >>> def tensor_add_with_sig(x, y):
         >>>     z = F.tensor_add(x, y)
         >>>     return z
diff --git a/mindspore/common/initializer.py b/mindspore/common/initializer.py
index bdc3418129..4261621272 100644
--- a/mindspore/common/initializer.py
+++ b/mindspore/common/initializer.py
@@ -282,7 +282,7 @@ def initializer(init, shape=None, dtype=mstype.float32):
         Tensor, initialized tensor.
 
     Examples:
-        >>> tensor = initializer('ones', [1, 2, 3], mstype.float32)
+        >>> tensor = initializer('ones', [1, 2, 3], mindspore.float32)
     """
     if not isinstance(init, (Tensor, numbers.Number, str, Initializer)):
         raise TypeError('Unsupported init type.')
diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index de604a67e9..ab2290c13c 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -1814,7 +1814,7 @@ class TFRecordDataset(SourceDataset):
         >>> tfdataset = ds.TFRecordDataset(dataset_files=dataset_files)
         >>> # 2) get all rows from dataset_files with user-defined schema:
         >>> schema = ds.Schema()
-        >>> schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
+        >>> schema.add_column('col_1d', de_type=mindspore.int64, shape=[2])
         >>> tfdataset = ds.TFRecordDataset(dataset_files=dataset_files, schema=schema)
         >>> # 3) get all rows from dataset_files with schema file "./schema.json":
         >>> tfdataset = ds.TFRecordDataset(dataset_files=dataset_files, schema="./schema.json")
@@ -2325,7 +2325,7 @@ class Schema:
         >>> import mindspore.common.dtype as mstype
         >>> # create schema, specify column name, mindspore.dtype and shape of the column
         >>> schema = ds.Schema()
-        >>> schema.add_column('col1', de_type=mstype.int64, shape=[2])
+        >>> schema.add_column('col1', de_type=mindspore.int64, shape=[2])
     """
 
     def __init__(self, schema_file=None):
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index fdad46a31f..b91c2cbc7d 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -1535,7 +1535,8 @@ class StridedSlice(PrimitiveWithInfer):
             - Finally, the output is [3, 3, 3].
 
     Examples
-        >>> input_x = Tensor([[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 6, 6]]])
+        >>> input_x = Tensor([[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]],
+        >>>                   [[5, 5, 5], [6, 6, 6]]], mindspore.float32)
         >>> slice = StridedSlice()
         >>> output = slice(input_x, (1, 0, 0), (2, 1, 3), (1, 1, 1))
         >>> output.shape()
@@ -2067,7 +2068,7 @@ class SpaceToBatch(PrimitiveWithInfer):
         >>> block_size = 2
         >>> paddings = [[0, 0], [0, 0]]
         >>> space_to_batch = P.SpaceToBatch(block_size, paddings)
-        >>> x = Tensor(np.array([[[[1, 2], [3, 4]]]]), mstype.float32)
+        >>> x = Tensor(np.array([[[[1, 2], [3, 4]]]]), mindspore.float32)
         >>> space_to_batch(x)
         [[[[1.]]], [[[2.]]], [[[3.]]], [[[4.]]]]
 
@@ -2135,7 +2136,7 @@ class BatchToSpace(PrimitiveWithInfer):
         >>> block_size = 2
         >>> crops = [[0, 0], [0, 0]]
         >>> op = P.BatchToSpace(block_size, crops)
-        >>> x = Tensor(np.array([[[[1]]], [[[2]]], [[[3]]], [[[4]]]]), mstype.float32)
+        >>> x = Tensor(np.array([[[[1]]], [[[2]]], [[[3]]], [[[4]]]]), mindspore.float32)
         >>> output = op(x)
         [[[[1., 2.], [3., 4.]]]]
 
diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py
index 127d3c513c..47b9e490f1 100644
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -1996,8 +1996,8 @@ class Atan2(_MathBinaryOp):
         Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'.
 
     Examples:
-         >>> input_x = Tensor(np.array([[0, 1]]), mstype.float32)
-         >>> input_y = Tensor(np.array([[1, 1]]), mstype.float32)
+         >>> input_x = Tensor(np.array([[0, 1]]), mindspore.float32)
+         >>> input_y = Tensor(np.array([[1, 1]]), mindspore.float32)
          >>> atan2 = P.Atan2()
          >>> atan2(input_x, input_y)
          [[0. 0.7853982]]
diff --git a/mindspore/train/model.py b/mindspore/train/model.py
index bcfd897f58..41b372f85a 100755
--- a/mindspore/train/model.py
+++ b/mindspore/train/model.py
@@ -528,7 +528,7 @@ class Model:
             Tensor, array(s) of predictions.
 
         Examples:
-            >>> input_data = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]), mstype.float32)
+            >>> input_data = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]), mindspore.float32)
             >>> model = Model(Net())
             >>> model.predict(input_data)
         """

From 951e094dd4868c871f61006487f1e6d8d7672fb6 Mon Sep 17 00:00:00 2001
From: zhaozhenlong <zhaozhenlong1@huawei.com>
Date: Fri, 3 Apr 2020 15:37:42 +0800
Subject: [PATCH 27/58] add api image gradients

---
 mindspore/nn/layer/__init__.py               |  4 +-
 mindspore/nn/layer/basic.py                  | 45 ++++++++++++++
 tests/st/ops/davinci/test_image_gradients.py | 62 ++++++++++++++++++++
 tests/ut/python/nn/test_image_gradients.py   | 49 ++++++++++++++++
 4 files changed, 158 insertions(+), 2 deletions(-)
 create mode 100644 tests/st/ops/davinci/test_image_gradients.py
 create mode 100644 tests/ut/python/nn/test_image_gradients.py

diff --git a/mindspore/nn/layer/__init__.py b/mindspore/nn/layer/__init__.py
index bb29935602..dae18fe663 100644
--- a/mindspore/nn/layer/__init__.py
+++ b/mindspore/nn/layer/__init__.py
@@ -22,7 +22,7 @@ from .normalization import BatchNorm1d, BatchNorm2d, LayerNorm
 from .container import SequentialCell, CellList
 from .conv import Conv2d, Conv2dTranspose
 from .lstm import LSTM
-from .basic import Dropout, Flatten, Dense, ClipByNorm, Norm, OneHot
+from .basic import Dropout, Flatten, Dense, ClipByNorm, Norm, OneHot, ImageGradients
 from .embedding import Embedding
 from .pooling import AvgPool2d, MaxPool2d
 
@@ -31,7 +31,7 @@ __all__ = ['Softmax', 'LogSoftmax', 'ReLU', 'ReLU6', 'Tanh', 'GELU', 'Sigmoid',
            'SequentialCell', 'CellList',
            'Conv2d', 'Conv2dTranspose',
            'LSTM',
-           'Dropout', 'Flatten', 'Dense', 'ClipByNorm', 'Norm', 'OneHot',
+           'Dropout', 'Flatten', 'Dense', 'ClipByNorm', 'Norm', 'OneHot', 'ImageGradients',
            'Embedding',
            'AvgPool2d', 'MaxPool2d',
            ]
diff --git a/mindspore/nn/layer/basic.py b/mindspore/nn/layer/basic.py
index 30b94c738d..de49685dac 100644
--- a/mindspore/nn/layer/basic.py
+++ b/mindspore/nn/layer/basic.py
@@ -370,3 +370,48 @@ class OneHot(Cell):
 
     def construct(self, indices):
         return self.onehot(indices, self.depth, self.on_value, self.off_value)
+
+
+class ImageGradients(Cell):
+    r"""
+    Returns two tensors, the first is along the height dimension and the second is along the width dimension.
+
+    Assume an image shape is :math:`h*w`. The gradients along the height and the width are :math:`dy` and :math:`dx`,
+    respectively.
+
+    .. math::
+        dy[i] = \begin{cases} image[i+1, :]-image[i, :], &if\ 0<=i<h-1 \cr
+        0, &if\ i==h-1\end{cases}
+
+        dx[i] = \begin{cases} image[:, i+1]-image[:, i], &if\ 0<=i<w-1 \cr
+        0, &if\ i==w-1\end{cases}
+
+    Inputs:
+        - **images** (Tensor) - The input image data, with format 'NCHW'.
+
+    Outputs:
+        - **dy** (Tensor) - vertical image gradients, the same type and shape as input.
+        - **dx** (Tensor) - horizontal image gradients, the same type and shape as input.
+
+    Examples:
+        >>> net = nn.ImageGradients()
+        >>> image = Tensor(np.array([[[[1,2],[3,4]]]]), dtype=mstype.int32)
+        >>> net(image)
+        [[[[2,2]
+           [0,0]]]]
+        [[[[1,0]
+           [1,0]]]]
+    """
+    def __init__(self):
+        super(ImageGradients, self).__init__()
+
+    def construct(self, images):
+        batch_size, depth, height, width = P.Shape()(images)
+        dy = images[:, :, 1:, :] - images[:, :, :height - 1, :]
+        dy_last = P.Fill()(P.DType()(images), (batch_size, depth, 1, width), 0)
+        dy = P.Concat(2)((dy, dy_last))
+
+        dx = images[:, :, :, 1:] - images[:, :, :, :width - 1]
+        dx_last = P.Fill()(P.DType()(images), (batch_size, depth, height, 1), 0)
+        dx = P.Concat(3)((dx, dx_last))
+        return dy, dx
diff --git a/tests/st/ops/davinci/test_image_gradients.py b/tests/st/ops/davinci/test_image_gradients.py
new file mode 100644
index 0000000000..ea385158c9
--- /dev/null
+++ b/tests/st/ops/davinci/test_image_gradients.py
@@ -0,0 +1,62 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import numpy as np
+import mindspore.nn as nn
+import mindspore.context as context
+import mindspore.common.dtype as mstype
+from mindspore import Tensor
+from mindspore.common.api import ms_function
+
+context.set_context(device_target="Ascend")
+class Net(nn.Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.image_gradients = nn.ImageGradients()
+
+    @ms_function
+    def construct(self, x):
+        return self.image_gradients(x)
+
+
+def test_image_gradients():
+    image = Tensor(np.array([[[[1,2],[3,4]]]]), dtype=mstype.int32)
+    expected_dy = np.array([[[[2,2],[0,0]]]]).astype(np.int32)
+    expected_dx = np.array([[[[1,0],[1,0]]]]).astype(np.int32)
+    net = Net()
+    dy, dx = net(image)
+    assert np.any(dx.asnumpy()-expected_dx) == False
+    assert np.any(dy.asnumpy()-expected_dy) == False
+
+
+def test_image_gradients_multi_channel_depth():
+    # 4 x 2 x 2 x 2
+    dtype = mstype.int32
+    image = Tensor(np.array([[[[1,2],[3,4]], [[5,6],[7,8]]],
+                             [[[3,5],[7,9]], [[11,13],[15,17]]],
+                             [[[5,10],[15,20]], [[25,30],[35,40]]],
+                             [[[10,20],[30,40]], [[50,60],[70,80]]]]), dtype=dtype)
+    expected_dy = Tensor(np.array([[[[2,2],[0,0]], [[2,2],[0,0]]],
+                                   [[[4,4],[0,0]], [[4,4],[0,0]]],
+                                   [[[10,10],[0,0]], [[10,10],[0,0]]],
+                                   [[[20,20],[0,0]], [[20,20],[0,0]]]]), dtype=dtype)
+    expected_dx = Tensor(np.array([[[[1,0],[1,0]], [[1,0],[1,0]]],
+                                   [[[2,0],[2,0]], [[2,0],[2,0]]],
+                                   [[[5,0],[5,0]], [[5,0],[5,0]]],
+                                   [[[10,0],[10,0]], [[10,0],[10,0]]]]), dtype=dtype)
+    net = Net()
+    dy, dx = net(image)
+
+    assert np.any(dx.asnumpy()-expected_dx.asnumpy()) == False
+    assert np.any(dy.asnumpy()-expected_dy.asnumpy()) == False
diff --git a/tests/ut/python/nn/test_image_gradients.py b/tests/ut/python/nn/test_image_gradients.py
new file mode 100644
index 0000000000..f65f38ec0a
--- /dev/null
+++ b/tests/ut/python/nn/test_image_gradients.py
@@ -0,0 +1,49 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+""" test loss """
+import numpy as np
+import mindspore.nn as nn
+import mindspore.context as context
+import mindspore.common.dtype as mstype
+from mindspore import Tensor
+from mindspore.common.api import _executor
+from mindspore.common.api import ms_function
+
+context.set_context(device_target="Ascend")
+class Net(nn.Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.image_gradients = nn.ImageGradients()
+
+    @ms_function
+    def construct(self, x):
+        return self.image_gradients(x)
+
+def test_compile():
+    # input shape 1 x 1 x 2 x 2
+    image = Tensor(np.array([[[[1,2],[3,4]]]]), dtype=mstype.int32)
+    net = Net()
+    _executor.compile(net, image)
+
+
+def test_compile_multi_channel():
+    # input shape 4 x 2 x 2 x 2
+    dtype = mstype.int32
+    image = Tensor(np.array([[[[1,2],[3,4]], [[5,6],[7,8]]],
+                             [[[3,5],[7,9]], [[11,13],[15,17]]],
+                             [[[5,10],[15,20]], [[25,30],[35,40]]],
+                             [[[10,20],[30,40]], [[50,60],[70,80]]]]), dtype=dtype)
+    net = Net()
+    _executor.compile(net, image)

From 7798c85e70ebe474a2832102d15707b9b2b40eb4 Mon Sep 17 00:00:00 2001
From: Xiaoda Zhang <zhangxiaoda@huawei.com>
Date: Fri, 3 Apr 2020 14:33:04 +0800
Subject: [PATCH 28/58] This commit is to separate the computation cost and
 memory cost in auto_parallel. Some related memory correction is removed.

---
 .../ccsrc/parallel/auto_parallel/costmodel.cc |  12 +-
 .../ccsrc/parallel/auto_parallel/costmodel.h  |  10 +-
 .../auto_parallel/dp_algo_costmodel.h         |   2 +-
 .../parallel/auto_parallel/edge_costmodel.cc  |  51 ++--
 .../parallel/auto_parallel/edge_costmodel.h   |   2 +-
 .../parallel/auto_parallel/graph_costmodel.cc | 105 +++-----
 .../parallel/auto_parallel/graph_costmodel.h  |   8 +-
 .../auto_parallel/operator_costmodel.cc       | 114 ++++----
 .../auto_parallel/operator_costmodel.h        | 244 +++++++++---------
 .../ccsrc/parallel/ops_info/matmul_info.cc    |   8 +-
 .../ccsrc/parallel/ops_info/operator_info.cc  |  25 +-
 .../ccsrc/parallel/ops_info/operator_info.h   |   6 +-
 .../ccsrc/parallel/step_auto_parallel.cc      |  30 +--
 .../tensor_layout/tensor_redistribution.cc    |  24 +-
 .../tensor_layout/tensor_redistribution.h     |   9 +-
 .../auto_parallel/graph_costmodel_test.cc     |   4 +-
 .../auto_parallel/operator_costmodel_test.cc  |  12 +-
 .../cpp/parallel/ops_info/activation_test.cc  |   8 +-
 .../cpp/parallel/ops_info/matmul_info_test.cc |   8 +-
 .../parallel/ops_info/tensor_add_info_test.cc |   8 +-
 .../cpp/parallel/ops_info/tmpidentity_test.cc |   4 +-
 21 files changed, 322 insertions(+), 372 deletions(-)

diff --git a/mindspore/ccsrc/parallel/auto_parallel/costmodel.cc b/mindspore/ccsrc/parallel/auto_parallel/costmodel.cc
index f5cf5069be..190f589bb5 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/costmodel.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/costmodel.cc
@@ -23,8 +23,8 @@
 namespace mindspore {
 namespace parallel {
 void Simplify(CostPtrList* clist_ptrs) {
-  // Sort the cost_list with the memory_cost increasing, and communication_cost decreasing order. This method
-  // excludes the cost with greater memory_cost and greater communication_cost.
+  // Sort the cost_list with the computation_cost_ increasing, and communication_cost decreasing order. This method
+  // excludes the cost with greater computation_cost_ and greater communication_cost.
   // E.g. clist_ptrs = {<100, 20>, <200, 10>, <300, 50>}. After this method, clist_ptrs = {<200, 10>, <100, 20>}
   if (!COST_MODEL_SIMPLIFY_CALCULATION) {
     return;
@@ -33,7 +33,7 @@ void Simplify(CostPtrList* clist_ptrs) {
   std::vector<size_t> id(clist_ptrs->size());
   std::iota(id.begin(), id.end(), size_t(0));
   std::sort(id.begin(), id.end(), [&clist_ptrs](size_t x, size_t y) {
-    return clist_ptrs->at(x)->memory_cost_ < clist_ptrs->at(y)->memory_cost_;
+    return clist_ptrs->at(x)->computation_cost_ < clist_ptrs->at(y)->computation_cost_;
   });
   CostPtrList ret;
   for (size_t i = 0; i < clist_ptrs->size(); ++i) {
@@ -45,8 +45,8 @@ void Simplify(CostPtrList* clist_ptrs) {
 }
 
 void SimplifyForDreasingCommunicationWithPartialPara(CostPtrList* clist_ptrs) {
-  // Sort the cost_list with the memory_cost increasing, and communication_with_partial_para_cost decreasing order.
-  // This method excludes the cost with greater memory_cost and greater communication_without_para_cost.
+  // Sort the cost_list with the computation_cost_ increasing, and communication_with_partial_para_cost decreasing
+  // order. This method excludes the cost with greater computation_cost_ and greater communication_without_para_cost.
   if (!COST_MODEL_SIMPLIFY_CALCULATION) {
     return;
   }
@@ -54,7 +54,7 @@ void SimplifyForDreasingCommunicationWithPartialPara(CostPtrList* clist_ptrs) {
   std::vector<size_t> id(clist_ptrs->size());
   std::iota(id.begin(), id.end(), size_t(0));
   std::sort(id.begin(), id.end(), [&clist_ptrs](size_t x, size_t y) {
-    return clist_ptrs->at(x)->memory_cost_ < clist_ptrs->at(y)->memory_cost_;
+    return clist_ptrs->at(x)->computation_cost_ < clist_ptrs->at(y)->computation_cost_;
   });
   CostPtrList ret;
   for (size_t i = 0; i < clist_ptrs->size(); ++i) {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/costmodel.h
index 361c19573f..229f0fbf5e 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/costmodel.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/costmodel.h
@@ -44,14 +44,18 @@ using RedistributionOpListPtr = std::shared_ptr<std::pair<OperatorVector, OutPut
 
 struct Cost {
   Cost();
-  Cost(double memory, double commuication, const std::shared_ptr<Decision>& decision_ = nullptr)
-      : memory_cost_(memory), communication_cost_(commuication), decision_ptr_(std::move(decision_)) {
+  Cost(double computation, double commuication, const std::shared_ptr<Decision>& decision_ = nullptr)
+      : computation_cost_(computation), communication_cost_(commuication), decision_ptr_(std::move(decision_)) {
+    memory_with_reuse_ = 0.0;
     communication_without_parameter_ = 0.0;
     communication_with_partial_para_ = 0.0;
     communication_redis_forward_ = 0.0;
     communication_redis_backward_ = 0.0;
   }
-  double memory_cost_;
+  // 'memory_with_reuse_' calculates the peak memory usage in a training phase
+  double memory_with_reuse_;
+  // 'computation_cost_'  models the training time of an iteration in a training phase
+  double computation_cost_;
   // 'communication_cost_' includes communications from operators (forward and backward) and edges
   double communication_cost_;
   // communication_without_parameter_ = communication_cost_ - (backward communication from operators)
diff --git a/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h
index c9b6a07317..0cb58c49da 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h
@@ -35,7 +35,7 @@ namespace parallel {
 //       interpretation of 6 operations in costmodel.h.
 // Phase 2: Search the cost_list in the final graph, and determine the optimal one
 //       Create the cost_list for the final graph, and choose the optimal one: one the minimum quantity
-//       COST_MODEL_ALPHA * memory_cost + COST_MODEL_BETA * communication_cost
+//       COST_MODEL_ALPHA * computation_cost + COST_MODEL_BETA * communication_cost
 // Phase 3: Recover the original CostGraph, the determine strategy for each operator
 //       After determining the optimal cost for the final graph, the algorithm recovers the original graph by applying
 //       the 4 operations in the reverse order in the Phase 1. Because each operation decision contains the strategy,
diff --git a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc b/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc
index 6381049f17..653f6c903d 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc
@@ -69,7 +69,7 @@ Status Edge::InitEdgeCost() {
           MS_LOG(EXCEPTION) << "Failure: redistribution cost calculation failed";
         }
         MS_EXCEPTION_IF_NULL(cost);
-        MS_LOG(DEBUG) << "The redistribution cost: memory_cost: " << cost->memory_cost_
+        MS_LOG(DEBUG) << "The redistribution cost: computation_cost: " << cost->computation_cost_
                       << ", communication_cost: " << cost->communication_cost_
                       << ", communication_without_parameter_: " << cost->communication_without_parameter_
                       << ", communication_with_partial_para_: " << cost->communication_with_partial_para_ << ".";
@@ -117,9 +117,9 @@ Status Edge::GetRedistributionCost(const TensorLayout& prev_op_output_layout, co
   double comm_cost = tensor_redistribution.comm_cost();
   double forward_comm_cost = tensor_redistribution.forward_comm_cost();
   double backward_comm_cost = tensor_redistribution.backward_comm_cost();
-  double mem_cost = tensor_redistribution.mem_cost();
+  double computation_cost = tensor_redistribution.computation_cost();
 
-  *cost = std::make_shared<Cost>(type_length * mem_cost, type_length * comm_cost);
+  *cost = std::make_shared<Cost>(type_length * computation_cost, type_length * comm_cost);
   (*cost)->communication_without_parameter_ = type_length * comm_cost;
   (*cost)->communication_with_partial_para_ =
     (*cost)->communication_without_parameter_ +
@@ -150,26 +150,26 @@ CostPtrList Edge::CreateEdgeEliminationCostList(const StrategyPtr& output_st_ptr
   (void)std::transform(edges.begin(), edges.end(), all_cost_list.begin(), LocalGetCostList);
 
   CostPtrList selected_cost_list(all_cost_list.size(), nullptr);
-  std::function<void(size_t, double, double, double)> recursive = [&](size_t k, double memory, double communication,
-                                                                      double communication_without_para) {
-    if (k == edges.size()) {
-      auto decision = std::make_shared<EdgeEliminationDecision>(selected_cost_list);
-      CostPtr new_cost = std::make_shared<Cost>(memory, communication);
-      MS_EXCEPTION_IF_NULL(new_cost);
-      new_cost->communication_without_parameter_ = communication_without_para;
-      new_cost->communication_with_partial_para_ =
-        communication_without_para + COST_MODEL_GAMMA * (communication - communication_without_para);
-      new_cost->decision_ptr_ = decision;
-      result.push_back(new_cost);
-      return;
-    }
-    for (auto& c : all_cost_list[k]) {
-      MS_EXCEPTION_IF_NULL(c);
-      selected_cost_list[k] = c;
-      recursive(k + 1, memory + c->memory_cost_, communication + c->communication_cost_,
-                communication_without_para + c->communication_without_parameter_);
-    }
-  };
+  std::function<void(size_t, double, double, double)> recursive =
+    [&](size_t k, double computation, double communication, double communication_without_para) {
+      if (k == edges.size()) {
+        auto decision = std::make_shared<EdgeEliminationDecision>(selected_cost_list);
+        CostPtr new_cost = std::make_shared<Cost>(computation, communication);
+        MS_EXCEPTION_IF_NULL(new_cost);
+        new_cost->communication_without_parameter_ = communication_without_para;
+        new_cost->communication_with_partial_para_ =
+          communication_without_para + COST_MODEL_GAMMA * (communication - communication_without_para);
+        new_cost->decision_ptr_ = decision;
+        result.push_back(new_cost);
+        return;
+      }
+      for (auto& c : all_cost_list[k]) {
+        MS_EXCEPTION_IF_NULL(c);
+        selected_cost_list[k] = c;
+        recursive(k + 1, computation + c->computation_cost_, communication + c->communication_cost_,
+                  communication_without_para + c->communication_without_parameter_);
+      }
+    };
   recursive(0, 0, 0, 0);
   SimplifyForDreasingCommunicationWithPartialPara(&result);
   return result;
@@ -203,7 +203,8 @@ void Edge::CreateOpEliminationSubCostList(StrategyPtr op_strategy, const CostPtr
       MS_EXCEPTION_IF_NULL(middle_cost);
       for (auto& right_cost : right_cost_list) {
         MS_EXCEPTION_IF_NULL(right_cost);
-        double memory = left_cost->memory_cost_ + middle_cost->memory_cost_ + right_cost->memory_cost_;
+        double computation =
+          left_cost->computation_cost_ + middle_cost->computation_cost_ + right_cost->computation_cost_;
         double communication =
           left_cost->communication_cost_ + middle_cost->communication_cost_ + right_cost->communication_cost_;
         double communication_without_para = left_cost->communication_without_parameter_ +
@@ -211,7 +212,7 @@ void Edge::CreateOpEliminationSubCostList(StrategyPtr op_strategy, const CostPtr
                                             right_cost->communication_without_parameter_;
 
         auto decision = std::make_shared<OpEliminationDecision>(op_strategy, left_cost, middle_cost, right_cost);
-        auto cost = std::make_shared<Cost>(memory, communication, decision);
+        auto cost = std::make_shared<Cost>(computation, communication, decision);
         MS_EXCEPTION_IF_NULL(cost);
         cost->communication_without_parameter_ = communication_without_para;
         cost->communication_with_partial_para_ =
diff --git a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h
index 1fa49029fa..eb89466d7c 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h
@@ -133,7 +133,7 @@ class Edge {
   void set_parameter_involve(int para_invol) { is_output_parameter_involve_ = para_invol; }
   // When the input of a operator contains WEIGHT or a output from other operators involving WEIGHT, then these input
   // should stay in memory until it is used in the backward phase, which is kept in memory at the end of forward phase.
-  Status CorrectStrategyCostForMemoryReuse() const { return SUCCESS; }
+  Status CalculateMemoryCost() const { return SUCCESS; }
 
  private:
   std::string edge_name_;
diff --git a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc
index 59b9d9e992..88a54662d3 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc
@@ -247,7 +247,7 @@ CostPtrList CostGraph::CreateFinalCostList(const OperatorInfoPtr& u, const std::
             MS_EXCEPTION_IF_NULL(cost1);
             MS_EXCEPTION_IF_NULL(cost2);
             MS_EXCEPTION_IF_NULL(cost3);
-            double memory = cost1->memory_cost_ + cost2->memory_cost_ + cost3->memory_cost_;
+            double computation = cost1->computation_cost_ + cost2->computation_cost_ + cost3->computation_cost_;
             double commmunication =
               cost1->communication_cost_ + cost2->communication_cost_ + cost3->communication_cost_;
             double communication_without_para = cost1->communication_without_parameter_ +
@@ -255,7 +255,7 @@ CostPtrList CostGraph::CreateFinalCostList(const OperatorInfoPtr& u, const std::
                                                 cost3->communication_without_parameter_;
             auto decision =
               std::make_shared<FinalDecision>(u_strategy->strategy_ptr, v_strategy->strategy_ptr, cost1, cost2, cost3);
-            auto cost = std::make_shared<Cost>(memory, commmunication, decision);
+            auto cost = std::make_shared<Cost>(computation, commmunication, decision);
             MS_EXCEPTION_IF_NULL(cost);
             cost->communication_without_parameter_ = communication_without_para;
             cost->communication_with_partial_para_ =
@@ -282,7 +282,7 @@ CostPtrList CostGraph::CreateFinalSingleCostList(const OperatorInfoPtr& u) {
     for (const auto& cost1 : clist1) {
       MS_EXCEPTION_IF_NULL(cost1);
       auto decision = std::make_shared<FinalSingleDecision>(u_strategy_ptr, cost1);
-      auto new_cost = std::make_shared<Cost>(cost1->memory_cost_, cost1->communication_cost_, decision);
+      auto new_cost = std::make_shared<Cost>(cost1->computation_cost_, cost1->communication_cost_, decision);
       MS_EXCEPTION_IF_NULL(new_cost);
       new_cost->communication_without_parameter_ = cost1->communication_without_parameter_;
       new_cost->communication_with_partial_para_ =
@@ -297,12 +297,12 @@ CostPtrList CostGraph::CreateFinalSingleCostList(const OperatorInfoPtr& u) {
 }
 
 CostPtr CostGraph::SelectCostWithMemoryConstraint(const CostPtrList& cost_list, double memory) {
-  if (cost_list.empty() || cost_list[0]->memory_cost_ >= memory) {
+  if (cost_list.empty() || cost_list[0]->computation_cost_ >= memory) {
     return nullptr;
   }
   std::function<CostPtr(CostPtr, const CostPtr&)> LocalCompare = [&](CostPtr init, const CostPtr& cost_x) {
     MS_EXCEPTION_IF_NULL(cost_x);
-    if (init == nullptr || cost_x->memory_cost_ < memory) {
+    if (init == nullptr || cost_x->computation_cost_ < memory) {
       init = cost_x;
     }
     return init;
@@ -313,36 +313,36 @@ CostPtr CostGraph::SelectCostWithMemoryConstraint(const CostPtrList& cost_list,
 
 CostPtr CostGraph::SelectCostWithMinTrainingTime(const CostPtrList& cost_list, double memory) {
   // Select the cost with minimum training time. Currently, the training time is modeled as =
-  // costmodel_alpha_ * memory_cost + costmodel_beta_ * communication_with_partial_para_
+  // costmodel_alpha_ * computation_cost + costmodel_beta_ * communication_with_partial_para_
   if (cost_list.empty()) {
     MS_LOG(ERROR) << "Final cost list is null.";
     return nullptr;
   }
   CostPtr ret = cost_list[0];
   MS_EXCEPTION_IF_NULL(ret);
-  if (ret->memory_cost_ >= memory) {
-    MS_LOG(ERROR) << "No available cost; the minimum cost is " << ret->memory_cost_
+  if (ret->computation_cost_ >= memory) {
+    MS_LOG(ERROR) << "No available cost; the minimum cost is " << ret->computation_cost_
                   << ", the memory capacity is: " << memory << ".";
     return nullptr;
   }
-  double minimum = costmodel_alpha_ * ret->memory_cost_ + costmodel_beta_ * ret->communication_with_partial_para_;
-  MS_LOG(INFO) << "minimum: " << minimum << ", memory_cost_: " << ret->memory_cost_
+  double minimum = costmodel_alpha_ * ret->computation_cost_ + costmodel_beta_ * ret->communication_with_partial_para_;
+  MS_LOG(INFO) << "minimum: " << minimum << ", computation_cost_: " << ret->computation_cost_
                << ", communication_with_partial_para_: " << ret->communication_with_partial_para_
                << ", communication_cost_: " << ret->communication_cost_
                << ", communication_without_parameter_: " << ret->communication_without_parameter_ << ".";
   for (size_t i = 1; i < cost_list.size(); ++i) {
     MS_EXCEPTION_IF_NULL(cost_list[i]);
-    if (cost_list[i]->memory_cost_ >= memory) {
-      MS_LOG(INFO) << "cost_list " << i << " memory_cost_: " << cost_list[i]->memory_cost_
+    if (cost_list[i]->computation_cost_ >= memory) {
+      MS_LOG(INFO) << "cost_list " << i << " computation_cost_: " << cost_list[i]->computation_cost_
                    << ", is larger than the memory capacity: " << memory << ".";
       break;
     }
-    MS_LOG(INFO) << "cost_list " << i << " memory_cost_: " << cost_list[i]->memory_cost_
+    MS_LOG(INFO) << "cost_list " << i << " computation_cost_: " << cost_list[i]->computation_cost_
                  << ", communication_with_partial_para_: " << cost_list[i]->communication_with_partial_para_
                  << ", communication_cost_: " << cost_list[i]->communication_cost_
                  << ", communication_without_parameter_: " << cost_list[i]->communication_without_parameter_ << ".";
-    auto tmp =
-      costmodel_alpha_ * cost_list[i]->memory_cost_ + costmodel_beta_ * cost_list[i]->communication_with_partial_para_;
+    auto tmp = costmodel_alpha_ * cost_list[i]->computation_cost_ +
+               costmodel_beta_ * cost_list[i]->communication_with_partial_para_;
     MS_LOG(INFO) << "tmp: " << tmp;
     if (minimum > tmp) {
       minimum = tmp;
@@ -363,8 +363,8 @@ CostPtrList CostGraph::SelectCostListWithMinTrainingTimeMultiple(const std::vect
       MS_LOG(ERROR) << "The cost list " << i << " is empty.";
       return ret;
     } else {
-      total_memory += all_cost_list[i][0]->memory_cost_;
-      minimum += costmodel_alpha_ * all_cost_list[i][0]->memory_cost_ +
+      total_memory += all_cost_list[i][0]->computation_cost_;
+      minimum += costmodel_alpha_ * all_cost_list[i][0]->computation_cost_ +
                  costmodel_beta_ * all_cost_list[i][0]->communication_with_partial_para_;
       ret[i] = all_cost_list[i][0];
     }
@@ -381,8 +381,8 @@ CostPtrList CostGraph::SelectCostListWithMinTrainingTimeMultiple(const std::vect
       double tmp_memory = 0.0, tmp_minimum = 0.0;
       for (size_t i = 0; i < selected_cost_list.size(); ++i) {
         MS_EXCEPTION_IF_NULL(selected_cost_list[i]);
-        tmp_memory += selected_cost_list[i]->memory_cost_;
-        tmp_minimum += costmodel_alpha_ * selected_cost_list[i]->memory_cost_ +
+        tmp_memory += selected_cost_list[i]->computation_cost_;
+        tmp_minimum += costmodel_alpha_ * selected_cost_list[i]->computation_cost_ +
                        costmodel_beta_ * selected_cost_list[i]->communication_with_partial_para_;
       }
       MS_LOG(INFO) << "tmp_memory: " << tmp_memory << ", tmp_minimum: " << tmp_minimum << ", minimum: " << minimum
@@ -394,6 +394,7 @@ CostPtrList CostGraph::SelectCostListWithMinTrainingTimeMultiple(const std::vect
       }
       return;
     }
+
     MS_LOG(DEBUG) << "The value minimum: " << minimum << ", available_memory: " << available_memory << ".";
     for (auto& c : all_cost_list[k]) {
       selected_cost_list[k] = c;
@@ -814,7 +815,7 @@ void CostGraph::CreateMergeEliminationSubCostList(StrategyPtr op_strategy, const
       for (size_t k = 0; k < tar_cost_list.size(); ++k) {
         auto& tar_cost = tar_cost_list[k];
         MS_EXCEPTION_IF_NULL(tar_cost);
-        double memory = op_cost->memory_cost_ + edge_cost->memory_cost_ + tar_cost->memory_cost_;
+        double computation = op_cost->computation_cost_ + edge_cost->computation_cost_ + tar_cost->computation_cost_;
         double communication =
           op_cost->communication_cost_ + edge_cost->communication_cost_ + tar_cost->communication_cost_;
         double communication_without_para = op_cost->communication_without_parameter_ +
@@ -823,7 +824,7 @@ void CostGraph::CreateMergeEliminationSubCostList(StrategyPtr op_strategy, const
 
         auto decision =
           std::make_shared<MergeEliminationDecision>(op_strategy, op_cost, edge_cost, tar_op_strategy, tar_cost);
-        auto new_cost = std::make_shared<Cost>(memory, communication, decision);
+        auto new_cost = std::make_shared<Cost>(computation, communication, decision);
         MS_EXCEPTION_IF_NULL(new_cost);
         new_cost->communication_without_parameter_ = communication_without_para;
         new_cost->communication_with_partial_para_ =
@@ -891,7 +892,8 @@ void CostGraph::CreateContractEliminationSubCostList(StrategyPtr contract_op_str
       for (size_t k = 0; k < tar_cost_list.size(); ++k) {
         auto& tar_cost = tar_cost_list[k];
         MS_EXCEPTION_IF_NULL(tar_cost);
-        double memory = contract_op_cost->memory_cost_ + edge_cost->memory_cost_ + tar_cost->memory_cost_;
+        double computation =
+          contract_op_cost->computation_cost_ + edge_cost->computation_cost_ + tar_cost->computation_cost_;
         double communication =
           contract_op_cost->communication_cost_ + edge_cost->communication_cost_ + tar_cost->communication_cost_;
         double communication_without_para = contract_op_cost->communication_without_parameter_ +
@@ -900,7 +902,7 @@ void CostGraph::CreateContractEliminationSubCostList(StrategyPtr contract_op_str
 
         auto decision = std::make_shared<ContractEliminationDecision>(contract_op_stra, contract_op_cost, edge_cost,
                                                                       target_op_stra, tar_cost);
-        auto new_cost = std::make_shared<Cost>(memory, communication, decision);
+        auto new_cost = std::make_shared<Cost>(computation, communication, decision);
         new_cost->communication_without_parameter_ = communication_without_para;
         new_cost->communication_with_partial_para_ =
           communication_without_para + COST_MODEL_GAMMA * (communication - communication_without_para);
@@ -963,9 +965,9 @@ void CostGraph::CreateTriangleEliminationSubCostList(StrategyPtr elimi_op_stra,
       MS_EXCEPTION_IF_NULL(left_edge_cost);
       for (auto& left_node_cost : left_node_clist_origin) {
         MS_EXCEPTION_IF_NULL(left_node_cost);
-        double new_memory_cost = elimi_op_cost->memory_cost_ + left_edge_cost->memory_cost_ +
-                                 left_node_cost->memory_cost_ + right_edge_cost->memory_cost_ +
-                                 right_op_cost->memory_cost_;
+        double new_computation = elimi_op_cost->computation_cost_ + left_edge_cost->computation_cost_ +
+                                 left_node_cost->computation_cost_ + right_edge_cost->computation_cost_ +
+                                 right_op_cost->computation_cost_;
         double new_commu_cost = elimi_op_cost->communication_cost_ + left_edge_cost->communication_cost_ +
                                 left_node_cost->communication_cost_ + right_edge_cost->communication_cost_ +
                                 right_op_cost->communication_cost_;
@@ -977,7 +979,7 @@ void CostGraph::CreateTriangleEliminationSubCostList(StrategyPtr elimi_op_stra,
         auto decision =
           std::make_shared<TriangleEliminationDecision>(elimi_op_stra, elimi_op_cost, left_edge_cost, right_edge_cost,
                                                         left_op_stra, left_node_cost, right_op_stra, right_op_cost);
-        auto new_cost = std::make_shared<Cost>(new_memory_cost, new_commu_cost, decision);
+        auto new_cost = std::make_shared<Cost>(new_computation, new_commu_cost, decision);
         new_cost->communication_without_parameter_ = new_commu_without;
         new_cost->communication_with_partial_para_ =
           new_commu_without + COST_MODEL_GAMMA * (new_commu_cost - new_commu_without);
@@ -1082,11 +1084,12 @@ void CostGraph::CreateStarEliminationSubCostList(const StrategyPtr& first_succ_n
         succ_edges_costs[0] = first_succ_edge_cost;
         succ_nodes_costs[0] = first_succ_node_cost;
 
-        double memory_cost = merged_node_cost->memory_cost_, commu_cost = merged_node_cost->communication_cost_,
+        double computation_cost = merged_node_cost->computation_cost_,
+               commu_cost = merged_node_cost->communication_cost_,
                commu_without = merged_node_cost->communication_without_parameter_;
         for (size_t i = 0; i < succ_nodes_stras.size(); ++i) {
           MS_EXCEPTION_IF_NULL(succ_edges_costs[i]);
-          memory_cost += succ_edges_costs[i]->memory_cost_ + succ_nodes_costs[i]->memory_cost_;
+          computation_cost += succ_edges_costs[i]->computation_cost_ + succ_nodes_costs[i]->computation_cost_;
           commu_cost += succ_edges_costs[i]->communication_cost_ + succ_nodes_costs[i]->communication_cost_;
           commu_without += succ_edges_costs[i]->communication_without_parameter_ +
                            succ_nodes_costs[i]->communication_without_parameter_;
@@ -1094,7 +1097,7 @@ void CostGraph::CreateStarEliminationSubCostList(const StrategyPtr& first_succ_n
 
         auto decision = std::make_shared<StarEliminationDecision>(merged_op_stra, merged_node_cost, succ_edges_costs,
                                                                   succ_nodes_stras, succ_nodes_costs);
-        auto new_cost = std::make_shared<Cost>(memory_cost, commu_cost, decision);
+        auto new_cost = std::make_shared<Cost>(computation_cost, commu_cost, decision);
         new_cost->communication_without_parameter_ = commu_without;
         new_cost->communication_with_partial_para_ = commu_without + COST_MODEL_GAMMA * (commu_cost - commu_without);
         first_succ_node_clist_new->emplace_back(std::move(new_cost));
@@ -1210,36 +1213,6 @@ Status CostGraph::InitSelectedStrategy() {
   return SUCCESS;
 }
 
-Status CostGraph::CorrectOpsStrategyCostForMultiOutputUse() {
-  for (auto& op : ops_) {
-    MS_EXCEPTION_IF_NULL(op);
-    if (op->GetAliveSuccEdges().size() > 1) {
-      // Filter out the case of a output being used by multiple operators
-      std::map<size_t, int> output_count;
-      for (size_t i = 0; i < op->GetAliveSuccEdges().size(); ++i) {
-        auto output_index = op->GetAliveSuccEdges()[i]->prev_op_output_index();
-        output_count[output_index]++;
-      }
-      for (size_t i = 0; i < op->GetAliveSuccEdges().size(); ++i) {
-        auto output_index = op->GetAliveSuccEdges()[i]->prev_op_output_index();
-        if (output_count[output_index] <= 1) {
-          continue;
-        }
-        auto next_op = op->GetAliveSuccEdges()[i]->next_operator();
-        MS_EXCEPTION_IF_NULL(next_op);
-        auto input_index = op->GetAliveSuccEdges()[i]->next_op_input_index();
-        if (next_op->CorrectStrategyCostForMultiOutputUse(input_index) != SUCCESS) {
-          MS_LOG(ERROR) << "The operator name: " << op->name() << ", the next operator name: " << next_op->name()
-                        << ", the output_index: " << output_index << ", the input_index: " << input_index << ".";
-          return FAILED;
-        }
-        output_count[output_index]--;
-      }
-    }
-  }
-  return SUCCESS;
-}
-
 Status CostGraph::ComputeOpsAndEdgesParameterInvolved() {
   for (auto& op : ops_) {
     MS_EXCEPTION_IF_NULL(op);
@@ -1252,23 +1225,23 @@ Status CostGraph::ComputeOpsAndEdgesParameterInvolved() {
   return SUCCESS;
 }
 
-Status CostGraph::CorrectOpsStrategyCostForMemoryReuse() {
+Status CostGraph::CalculateOpsMemoryCost() {
   for (auto& op : ops_) {
     MS_EXCEPTION_IF_NULL(op);
-    if (op->CorrectStrategyCostForMemoryReuse() != SUCCESS) {
-      MS_LOG(ERROR) << "Correcting Operator: " << op->name() << " cost for memory reuse failed.";
+    if (op->CalculateMemoryCost() != SUCCESS) {
+      MS_LOG(ERROR) << "Calculate Operator: " << op->name() << " cost for memory usage failed.";
       return FAILED;
     }
   }
   return SUCCESS;
 }
 
-Status CostGraph::CorrectEdgesStrategyCostForMemoryReuse() {
+Status CostGraph::CalculateEdgesMemoryCost() {
   for (auto& edge_pair : edges_) {
     const auto& edges = edge_pair.second;
     for (auto& one_edge : edges) {
-      if (one_edge->CorrectStrategyCostForMemoryReuse() != SUCCESS) {
-        MS_LOG(ERROR) << "Correcting Edge: " << one_edge->edge_name() << " cost for memory reuse failed.";
+      if (one_edge->CalculateMemoryCost() != SUCCESS) {
+        MS_LOG(ERROR) << "Calculate Edge: " << one_edge->edge_name() << " cost for memory usage failed.";
         return FAILED;
       }
     }
diff --git a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
index e4cbdffb61..c149534826 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
@@ -175,16 +175,12 @@ class CostGraph {
   void CreateStarEliminationSubCostList(const StrategyPtr&, const CostPtrList&, const CostPtrList&, const StrategyPtr&,
                                         const CostPtrList&, std::vector<StrategyPtr>, CostPtrList&, CostPtrList&,
                                         CostPtrList*);
-
-  // When a output of a operator is being used by multiple operators, the memory cost of this part should be calculated
-  // only once. This method is for correcting the 'strategy_cost_' for operators
-  Status CorrectOpsStrategyCostForMultiOutputUse();
   // When the input of a operator is neither a WEIGHT, nor a output of a subsequent operator involving WEIGHT, then
   // the memory cost can be resused.
-  Status CorrectOpsStrategyCostForMemoryReuse();
+  Status CalculateOpsMemoryCost();
   // When the input of the edge is neither a WEIGHT, nor a output of a subsequent operator involving WEIGHT, then
   // the memory cost can be resused.
-  Status CorrectEdgesStrategyCostForMemoryReuse();
+  Status CalculateEdgesMemoryCost();
   Status ComputeOpsAndEdgesParameterInvolved();
 
   std::vector<OperatorInfoPtr> GetOperators() const { return ops_; }
diff --git a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc
index 6958932fd6..7c17b499b1 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc
@@ -74,8 +74,8 @@ double MatMulCost::GetBackwardCommCost(const std::vector<TensorInfo>& inputs, co
 
 // Return the per device memory cost in the forward phase. The cost is calculated according to the bytes
 // this operator uses
-double MatMulCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                                        const int32_t&) const {
+double MatMulCost::GetForwardComputationCost(const std::vector<TensorInfo>& inputs,
+                                             const std::vector<TensorInfo>& outputs, const int32_t&) const {
   // In forward phase, the memory cost = slice(A) + slice(B) + (0 or 1) allreduce(slice(C))
   double result = 0.0;
   TensorInfo output0 = outputs[0];
@@ -93,8 +93,8 @@ double MatMulCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, c
 
 // Return the per device memory cost in the forward phase. The cost is calculated according to the bytes
 // this operator uses
-double MatMulCost::GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
-                                         const int32_t& stage_id) const {
+double MatMulCost::GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
+                                              const int32_t& stage_id) const {
   // In backward phase, the memory cost = (0 or 1) allreduce(slice(B))
   double result = 0.0;
   if (is_parameter_[1]) {
@@ -147,8 +147,8 @@ double ActivationCost::GetBackwardCommCost(const std::vector<TensorInfo>& inputs
 
 // Return the per memory cost in the forward phase. The cost is calculated according to the bytes
 // this operator uses
-double ActivationCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
-                                            const int32_t&) const {
+double ActivationCost::GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
+                                                 const int32_t&) const {
   TensorInfo input0_info = inputs[0];
   Shape input0_slice_shape = input0_info.slice_shape();
   return ListProduct(input0_slice_shape) * static_cast<double>(inputs_type_lengths_[0]);
@@ -156,8 +156,8 @@ double ActivationCost::GetForwardMemoryCost(const std::vector<TensorInfo>& input
 
 // Return the per memory cost in the forward phase. The cost is calculated according to the bytes
 // this operator uses
-double ActivationCost::GetBackwardMemoryCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
-                                             const int32_t&) const {
+double ActivationCost::GetBackwardComputationCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
+                                                  const int32_t&) const {
   return 0.0;
 }
 
@@ -191,8 +191,8 @@ double SoftmaxCost::GetBackwardCommCost(const std::vector<TensorInfo>& inputs, c
 
 // Return the per memory cost in the forward phase. The cost is calculated according to the bytes
 // this operator uses
-double SoftmaxCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
-                                         const int32_t&) const {
+double SoftmaxCost::GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
+                                              const int32_t&) const {
   // In the forward phase, the memory cost = slice(A)
   TensorInfo input0 = inputs[0];
   Shape input0_slice_shape = input0.slice_shape();
@@ -201,8 +201,9 @@ double SoftmaxCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs,
 
 // Return the per memory cost in the forward phase. The cost is calculated according to the bytes
 // this operator uses
-double SoftmaxCost::GetBackwardMemoryCost(const std::vector<mindspore::parallel::TensorInfo>&,
-                                          const std::vector<mindspore::parallel::TensorInfo>&, const int32_t&) const {
+double SoftmaxCost::GetBackwardComputationCost(const std::vector<mindspore::parallel::TensorInfo>&,
+                                               const std::vector<mindspore::parallel::TensorInfo>&,
+                                               const int32_t&) const {
   return 0.0;
 }
 
@@ -222,9 +223,9 @@ double TmpIdentityCost::GetBackwardCommCost(const std::vector<mindspore::paralle
 
 // Return the per memory cost in the forward phase. The cost is calculated according to the bytes
 // this operator uses
-double TmpIdentityCost::GetForwardMemoryCost(const std::vector<mindspore::parallel::TensorInfo>& inputs,
-                                             const std::vector<mindspore::parallel::TensorInfo>&,
-                                             const int32_t&) const {
+double TmpIdentityCost::GetForwardComputationCost(const std::vector<mindspore::parallel::TensorInfo>& inputs,
+                                                  const std::vector<mindspore::parallel::TensorInfo>&,
+                                                  const int32_t&) const {
   TensorInfo input0_info = inputs[0];
   Shape input0_slice_shape = input0_info.slice_shape();
   return ListProduct(input0_slice_shape) * static_cast<double>(inputs_type_lengths_[0]);
@@ -232,15 +233,15 @@ double TmpIdentityCost::GetForwardMemoryCost(const std::vector<mindspore::parall
 
 // Return the per memory cost in the backward phase. The cost is calculated according to the bytes
 // this operator uses
-double TmpIdentityCost::GetBackwardMemoryCost(const std::vector<mindspore::parallel::TensorInfo>&,
-                                              const std::vector<mindspore::parallel::TensorInfo>&,
-                                              const int32_t&) const {
+double TmpIdentityCost::GetBackwardComputationCost(const std::vector<mindspore::parallel::TensorInfo>&,
+                                                   const std::vector<mindspore::parallel::TensorInfo>&,
+                                                   const int32_t&) const {
   return 0.0;
 }
 
-double BatchParallelCost::GetForwardMemoryCost(const std::vector<mindspore::parallel::TensorInfo>& inputs,
-                                               const std::vector<mindspore::parallel::TensorInfo>&,
-                                               const int32_t&) const {
+double BatchParallelCost::GetForwardComputationCost(const std::vector<mindspore::parallel::TensorInfo>& inputs,
+                                                    const std::vector<mindspore::parallel::TensorInfo>&,
+                                                    const int32_t&) const {
   double cost = 0.0;
   for (size_t i = 0; i < inputs.size(); ++i) {
     cost += ListProduct(inputs[i].slice_shape()) * static_cast<double>(inputs_type_lengths_[i]);
@@ -248,9 +249,9 @@ double BatchParallelCost::GetForwardMemoryCost(const std::vector<mindspore::para
   return cost;
 }
 
-double BatchParallelCost::GetBackwardMemoryCost(const std::vector<mindspore::parallel::TensorInfo>&,
-                                                const std::vector<mindspore::parallel::TensorInfo>&,
-                                                const int32_t&) const {
+double BatchParallelCost::GetBackwardComputationCost(const std::vector<mindspore::parallel::TensorInfo>&,
+                                                     const std::vector<mindspore::parallel::TensorInfo>&,
+                                                     const int32_t&) const {
   return 0.0;
 }
 
@@ -285,8 +286,8 @@ double PReLUCost::GetBackwardCommCost(const std::vector<TensorInfo>& inputs, con
 
 // Return the per memory cost in the forward phase. The cost is calculated according to the bytes
 // this operator uses
-double PReLUCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
-                                       const int32_t&) const {
+double PReLUCost::GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
+                                            const int32_t&) const {
   // In forward phase, the memory cost = slice(A) + slice(B)
   Shape input0_slice_shape = inputs[0].slice_shape();
   Shape input1_slice_shape = inputs[1].slice_shape();
@@ -297,9 +298,9 @@ double PReLUCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, co
 
 // Return the per memory cost in the backward phase. The cost is calculated according to the bytes
 // this operator uses
-double PReLUCost::GetBackwardMemoryCost(const std::vector<mindspore::parallel::TensorInfo>& inputs,
-                                        const std::vector<mindspore::parallel::TensorInfo>&,
-                                        const int32_t& stage_id) const {
+double PReLUCost::GetBackwardComputationCost(const std::vector<mindspore::parallel::TensorInfo>& inputs,
+                                             const std::vector<mindspore::parallel::TensorInfo>&,
+                                             const int32_t& stage_id) const {
   // In backward phase, the memory cost = (0 or 1) allreduce(slice(B))
   double result = 0.0;
   if (is_parameter_[1]) {
@@ -338,8 +339,8 @@ double OneHotCost::GetBackwardCommCost(const std::vector<TensorInfo>&, const std
 
 // Return the per memory cost in the forward phase. The cost is calculated according to the bytes
 // this operator uses
-double OneHotCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
-                                        const int32_t&) const {
+double OneHotCost::GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
+                                             const int32_t&) const {
   // In onehot's forward phase, the memory cost = slice(A)
   Shape input0_slice_shape = inputs[0].slice_shape();
   return ListProduct(input0_slice_shape) * static_cast<double>(inputs_type_lengths_[0]);
@@ -347,8 +348,8 @@ double OneHotCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, c
 
 // Return the per memory cost in the backward phase. The cost is calculated according to the bytes
 // this operator uses
-double OneHotCost::GetBackwardMemoryCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
-                                         const int32_t&) const {
+double OneHotCost::GetBackwardComputationCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
+                                              const int32_t&) const {
   return 0.0;
 }
 
@@ -368,8 +369,9 @@ double SoftmaxCrossEntropyWithLogitsCost::GetBackwardCommCost(const std::vector<
 
 // Return the per memory cost in the forward phase. The cost is calculated according to the bytes
 // this operator uses
-double SoftmaxCrossEntropyWithLogitsCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs,
-                                                               const std::vector<TensorInfo>&, const int32_t&) const {
+double SoftmaxCrossEntropyWithLogitsCost::GetForwardComputationCost(const std::vector<TensorInfo>& inputs,
+                                                                    const std::vector<TensorInfo>&,
+                                                                    const int32_t&) const {
   // In forward phase, the memory cost = slice(A) + slice(B)
   Shape input0_slice_shape = inputs[0].slice_shape();
   Shape input1_slice_shape = inputs[1].slice_shape();
@@ -380,8 +382,9 @@ double SoftmaxCrossEntropyWithLogitsCost::GetForwardMemoryCost(const std::vector
 
 // Return the per memory cost in the backward phase. The cost is calculated according to the bytes
 // this operator uses
-double SoftmaxCrossEntropyWithLogitsCost::GetBackwardMemoryCost(const std::vector<TensorInfo>&,
-                                                                const std::vector<TensorInfo>&, const int32_t&) const {
+double SoftmaxCrossEntropyWithLogitsCost::GetBackwardComputationCost(const std::vector<TensorInfo>&,
+                                                                     const std::vector<TensorInfo>&,
+                                                                     const int32_t&) const {
   return 0.0;
 }
 
@@ -409,8 +412,8 @@ double ReshapeCost::GetBackwardCommCost(const std::vector<TensorInfo>&, const st
 
 // Return the per memory cost in the forward phase. The cost is calculated according to the bytes
 // this operator uses
-double ReshapeCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                                         const int32_t& stage_id) const {
+double ReshapeCost::GetForwardComputationCost(const std::vector<TensorInfo>& inputs,
+                                              const std::vector<TensorInfo>& outputs, const int32_t& stage_id) const {
   CheckGlobalDeviceManager();
   MS_EXCEPTION_IF_NULL(g_device_manager);
   RankList dev_list = g_device_manager->GetDeviceListByStageId(stage_id);
@@ -421,26 +424,27 @@ double ReshapeCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs,
   if (tensor_redistribution.ComputeCost() == FAILED) {
     MS_LOG(EXCEPTION) << "Failure: tensor_redistribution ComputeCost failed.";
   }
-  return (inputs_type_lengths_[0] * tensor_redistribution.mem_cost());
+  return (inputs_type_lengths_[0] * tensor_redistribution.computation_cost());
 }
 
 // Return the per memory cost in the backward phase. The cost is calculated according to the bytes
 // this operator uses
-double ReshapeCost::GetBackwardMemoryCost(const std::vector<mindspore::parallel::TensorInfo>&,
-                                          const std::vector<mindspore::parallel::TensorInfo>&, const int32_t&) const {
+double ReshapeCost::GetBackwardComputationCost(const std::vector<mindspore::parallel::TensorInfo>&,
+                                               const std::vector<mindspore::parallel::TensorInfo>&,
+                                               const int32_t&) const {
   return 0.0;
 }
 
-double ArithmeticCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
-                                            const int32_t&) const {
+double ArithmeticCost::GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
+                                                 const int32_t&) const {
   double result;
   result = ListProduct(inputs[0].slice_shape()) * static_cast<double>(inputs_type_lengths_[0]) +
            ListProduct(inputs[1].slice_shape()) * static_cast<double>(inputs_type_lengths_[1]);
   return result;
 }
 
-double ArithmeticCost::GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
-                                             const int32_t& stage_id) const {
+double ArithmeticCost::GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
+                                                  const int32_t& stage_id) const {
   double result = 0.0;
   CheckGlobalDeviceManager();
   MS_EXCEPTION_IF_NULL(g_device_manager);
@@ -533,15 +537,15 @@ double L2NormalizeCost::GetBackwardCommCost(const std::vector<TensorInfo>& input
   return result;
 }
 
-double L2NormalizeCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
-                                             const int32_t&) const {
+double L2NormalizeCost::GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
+                                                  const int32_t&) const {
   TensorInfo input0_info = inputs[0];
   Shape input0_slice_shape = input0_info.slice_shape();
   return ListProduct(input0_slice_shape) * static_cast<double>(inputs_type_lengths_[0]);
 }
 
-double L2NormalizeCost::GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
-                                              const int32_t& stage_id) const {
+double L2NormalizeCost::GetBackwardComputationCost(const std::vector<TensorInfo>& inputs,
+                                                   const std::vector<TensorInfo>&, const int32_t& stage_id) const {
   double result = 0.0;
 
   if (is_parameter_[0]) {
@@ -618,8 +622,9 @@ double ReduceMethodCost::GetBackwardCommCost(const std::vector<TensorInfo>& inpu
   return result;
 }
 
-double ReduceMethodCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs,
-                                              const std::vector<TensorInfo>& outputs, const int32_t& stage_id) const {
+double ReduceMethodCost::GetForwardComputationCost(const std::vector<TensorInfo>& inputs,
+                                                   const std::vector<TensorInfo>& outputs,
+                                                   const int32_t& stage_id) const {
   double result = 0.0;
   TensorInfo input0 = inputs[0];
   TensorInfo output0 = outputs[0];
@@ -640,8 +645,9 @@ double ReduceMethodCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inp
   return result;
 }
 
-double ReduceMeanCost::GetForwardMemoryCost(const std::vector<TensorInfo>& inputs,
-                                            const std::vector<TensorInfo>& outputs, const int32_t& stage_id) const {
+double ReduceMeanCost::GetForwardComputationCost(const std::vector<TensorInfo>& inputs,
+                                                 const std::vector<TensorInfo>& outputs,
+                                                 const int32_t& stage_id) const {
   double result = 0.0;
   TensorInfo input0 = inputs[0];
   TensorInfo output0 = outputs[0];
diff --git a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h
index 9fb86d467e..8f0099bba3 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h
@@ -65,12 +65,12 @@ class OperatorCost {
   virtual double GetBackwardCommCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
                                      const int32_t& stage_id) const = 0;
   // per device computation cost
-  virtual double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                               const int32_t& stage_id) const = 0;
-  virtual double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                                      const int32_t& stage_id) const = 0;
-  virtual double GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                                       const int32_t& stage_id) const = 0;
+  virtual double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                    const int32_t& stage_id) const = 0;
+  virtual double GetForwardComputationCost(const std::vector<TensorInfo>& inputs,
+                                           const std::vector<TensorInfo>& outputs, const int32_t& stage_id) const = 0;
+  virtual double GetBackwardComputationCost(const std::vector<TensorInfo>& inputs,
+                                            const std::vector<TensorInfo>& outputs, const int32_t& stage_id) const = 0;
 
  protected:
   // for each input in 'inputs_', there is a bool variable indicating whether that the corresponding input is parameter
@@ -96,14 +96,14 @@ class MatMulCost : public OperatorCost {
                              const int32_t& stage_id) const override;
 
   // per device computation cost
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
-  }
-  double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                              const int32_t& stage_id) const override;
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                               const int32_t& stage_id) const override;
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
+  }
+  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                   const int32_t& stage_id) const override;
+  double GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                    const int32_t& stage_id) const override;
 };
 
 using MatMulCostPtr = std::shared_ptr<MatMulCost>;
@@ -121,14 +121,14 @@ class ActivationCost : public OperatorCost {
                             const int32_t& stage_id) const override;
   double GetBackwardCommCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
                              const int32_t& stage_id) const override;
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
-  }
-  double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                              const int32_t& stage_id) const override;
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                               const int32_t& stage_id) const override;
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
+  }
+  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                   const int32_t& stage_id) const override;
+  double GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                    const int32_t& stage_id) const override;
 };
 
 using ActivationCostPtr = std::shared_ptr<ActivationCost>;
@@ -146,14 +146,14 @@ class SoftmaxCost : public OperatorCost {
                             const int32_t& stage_id) const override;
   double GetBackwardCommCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
                              const int32_t& stage_id) const override;
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
-  }
-  double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                              const int32_t& stage_id) const override;
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                               const int32_t&) const override;
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
+  }
+  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                   const int32_t& stage_id) const override;
+  double GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                    const int32_t&) const override;
 };
 
 using SoftmaxCostPtr = std::shared_ptr<SoftmaxCost>;
@@ -171,14 +171,14 @@ class TmpIdentityCost : public OperatorCost {
                             const int32_t& stage_id) const override;
   double GetBackwardCommCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
                              const int32_t& stage_id) const override;
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
-  }
-  double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                              const int32_t& stage_id) const override;
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                               const int32_t& stage_id) const override;
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
+  }
+  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                   const int32_t& stage_id) const override;
+  double GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                    const int32_t& stage_id) const override;
 };
 using TmpIdentityCostPtr = std::shared_ptr<TmpIdentityCost>;
 
@@ -199,14 +199,14 @@ class BatchParallelCost : public OperatorCost {
                              const int32_t&) const override {
     return 0.0;
   }
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
   }
-  double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                              const int32_t& stage_id) const override;
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                               const int32_t& stage_id) const override;
+  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                   const int32_t& stage_id) const override;
+  double GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                    const int32_t& stage_id) const override;
 };
 using BatchParallelCostPtr = std::shared_ptr<BatchParallelCost>;
 
@@ -227,16 +227,16 @@ class VirtualDatasetCost : public OperatorCost {
                              const int32_t&) const override {
     return 0.0;
   }
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
   }
-  double GetForwardMemoryCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
-                              const int32_t&) const override {
+  double GetForwardComputationCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
+                                   const int32_t&) const override {
     return 0.0;
   }
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
-                               const int32_t&) const override {
+  double GetBackwardComputationCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
+                                    const int32_t&) const override {
     return 0.0;
   }
 };
@@ -259,18 +259,18 @@ class GeneratorBaseCost : public OperatorCost {
                              const int32_t&) const override {
     return 0.0;
   }
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
   }
   // Inputs vector is empty for generator ops.
-  double GetForwardMemoryCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
-                              const int32_t&) const override {
+  double GetForwardComputationCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
+                                   const int32_t&) const override {
     return 0.0;
   }
   // Generator ops don't have backward steps.
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
-                               const int32_t&) const override {
+  double GetBackwardComputationCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
+                                    const int32_t&) const override {
     return 0.0;
   }
 };
@@ -292,14 +292,14 @@ class PReLUCost : public OperatorCost {
                              const int32_t& stage_id) const override;
 
   // per device computation cost
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
-  }
-  double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                              const int32_t& stage_id) const override;
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                               const int32_t& stage_id) const override;
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
+  }
+  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                   const int32_t& stage_id) const override;
+  double GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                    const int32_t& stage_id) const override;
 };
 using PReLUCostPtr = std::shared_ptr<PReLUCost>;
 
@@ -319,14 +319,14 @@ class OneHotCost : public OperatorCost {
                              const int32_t& stage_id) const override;
 
   // per device computation cost
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
-  }
-  double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                              const int32_t& stage_id) const override;
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                               const int32_t& stage_id) const override;
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
+  }
+  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                   const int32_t& stage_id) const override;
+  double GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                    const int32_t& stage_id) const override;
 };
 using OneHotCostPtr = std::shared_ptr<OneHotCost>;
 
@@ -346,14 +346,14 @@ class SoftmaxCrossEntropyWithLogitsCost : public OperatorCost {
                              const int32_t& stage_id) const override;
 
   // per device computation cost
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
-  }
-  double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                              const int32_t& stage_id) const override;
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                               const int32_t& stage_id) const override;
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
+  }
+  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                   const int32_t& stage_id) const override;
+  double GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                    const int32_t& stage_id) const override;
 };
 using SoftmaxCrossEntropyWithLogitsCostPtr = std::shared_ptr<SoftmaxCrossEntropyWithLogitsCost>;
 
@@ -376,16 +376,16 @@ class ReshapeCost : public OperatorCost {
                              const int32_t& stage_id) const override;
 
   // per device computation cost
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
   }
 
-  double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                              const int32_t& stage_id) const override;
+  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                   const int32_t& stage_id) const override;
 
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                               const int32_t& stage_id) const override;
+  double GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                    const int32_t& stage_id) const override;
 };
 using ReshapeCostPtr = std::shared_ptr<ReshapeCost>;
 
@@ -405,14 +405,14 @@ class ArithmeticCost : public OperatorCost {
   double GetBackwardCommCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
                              const int32_t&) const override;
 
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
   }
-  double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                              const int32_t& stage_id) const override;
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                               const int32_t& stage_id) const override;
+  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                   const int32_t& stage_id) const override;
+  double GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                    const int32_t& stage_id) const override;
 };
 using ArithmeticCostPtr = std::shared_ptr<ArithmeticCost>;
 
@@ -431,14 +431,14 @@ class L2NormalizeCost : public OperatorCost {
   }
   double GetBackwardCommCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
                              const int32_t& stage_id) const override;
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
-  }
-  double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                              const int32_t& stage_id) const override;
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                               const int32_t& stage_id) const override;
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
+  }
+  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                   const int32_t& stage_id) const override;
+  double GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                    const int32_t& stage_id) const override;
 };
 using L2NormalizeCostPtr = std::shared_ptr<L2NormalizeCost>;
 
@@ -455,14 +455,14 @@ class ReduceMethodCost : public OperatorCost {
                             const int32_t& stage_id) const override;
   double GetBackwardCommCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
                              const int32_t& stage_id) const override;
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardCommCost(inputs, outputs, stage_id);
-  }
-  double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                              const int32_t& stage_id) const override;
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
-                               const int32_t&) const override {
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardCommCost(inputs, outputs, stage_id);
+  }
+  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                   const int32_t& stage_id) const override;
+  double GetBackwardComputationCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
+                                    const int32_t&) const override {
     return 0.0;
   }
   void set_cross_batch(bool cb) { cross_batch_ = cb; }
@@ -477,8 +477,8 @@ class ReduceMeanCost : public ReduceMethodCost {
   ReduceMeanCost() = default;
   ~ReduceMeanCost() override = default;
 
-  double GetForwardMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                              const int32_t& stage_id) const override;
+  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                                   const int32_t& stage_id) const override;
 };
 using ReduceMeanCostPtr = std::shared_ptr<ReduceMeanCost>;
 
@@ -499,18 +499,18 @@ class GetNextCost : public OperatorCost {
                              const int32_t&) const override {
     return 0.0;
   }
-  double GetMemoryCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                       const int32_t& stage_id) const override {
-    return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id);
+  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
+                            const int32_t& stage_id) const override {
+    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
   }
   // Inputs vector is empty for generator ops.
-  double GetForwardMemoryCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
-                              const int32_t&) const override {
+  double GetForwardComputationCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
+                                   const int32_t&) const override {
     return 0.0;
   }
   // Generator ops don't have backward steps.
-  double GetBackwardMemoryCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
-                               const int32_t&) const override {
+  double GetBackwardComputationCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
+                                    const int32_t&) const override {
     return 0.0;
   }
 };
diff --git a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc b/mindspore/ccsrc/parallel/ops_info/matmul_info.cc
index ad6409be0a..2b02dc100d 100644
--- a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc
+++ b/mindspore/ccsrc/parallel/ops_info/matmul_info.cc
@@ -592,10 +592,10 @@ Status MatMulBase::SetCostUnderStrategy(const mindspore::parallel::StrategyPtr&
   int32_t stage_id = strategy->GetInputStage();
   // Here, we use the origin outputs_, because we only use the slice size of the output tensor.
   // It does not matter whether the output tensor is transposed or not.
-  double memory_cost =
-    matmulcost_ptr->GetForwardMemoryCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id);
+  double computation_cost =
+    matmulcost_ptr->GetForwardComputationCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id);
   double communication_cost = matmulcost_ptr->GetCommCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id);
-  std::shared_ptr<Cost> result = std::make_shared<Cost>(memory_cost, communication_cost);
+  std::shared_ptr<Cost> result = std::make_shared<Cost>(computation_cost, communication_cost);
   result->communication_without_parameter_ =
     matmulcost_ptr->GetForwardCommCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id);
   result->communication_with_partial_para_ =
@@ -604,7 +604,7 @@ Status MatMulBase::SetCostUnderStrategy(const mindspore::parallel::StrategyPtr&
 
   // Breaking ties for preferring data parallelization
   BreakingTiesForPerferringDataParallel(strategy, result);
-  MS_LOG(DEBUG) << name_ << " : memory_cost: " << result->memory_cost_
+  MS_LOG(DEBUG) << name_ << " : computation_cost: " << result->computation_cost_
                 << ", communication_cost: " << result->communication_cost_
                 << ", communication_without_parameter_: " << result->communication_without_parameter_
                 << ", communication_with_partial_para_: " << result->communication_with_partial_para_;
diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.cc b/mindspore/ccsrc/parallel/ops_info/operator_info.cc
index 8b96425bf7..11c518d844 100644
--- a/mindspore/ccsrc/parallel/ops_info/operator_info.cc
+++ b/mindspore/ccsrc/parallel/ops_info/operator_info.cc
@@ -1034,9 +1034,10 @@ Status OperatorInfo::SetCostUnderStrategyBase(const StrategyPtr& strategy) {
     return FAILED;
   }
   int32_t stage_id = strategy->GetInputStage();
-  double memory_cost = GetOperatorCost()->GetForwardMemoryCost(inputs_tensor_info_, outputs_tensor_info_, stage_id);
+  double computation_cost =
+    GetOperatorCost()->GetForwardComputationCost(inputs_tensor_info_, outputs_tensor_info_, stage_id);
   double communication_cost = GetOperatorCost()->GetCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id);
-  std::shared_ptr<Cost> result = std::make_shared<Cost>(memory_cost, communication_cost);
+  std::shared_ptr<Cost> result = std::make_shared<Cost>(computation_cost, communication_cost);
   result->communication_without_parameter_ =
     GetOperatorCost()->GetForwardCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id);
   result->communication_with_partial_para_ =
@@ -1056,22 +1057,6 @@ Status OperatorInfo::SetCostUnderStrategyBase(const StrategyPtr& strategy) {
   return SUCCESS;
 }
 
-Status OperatorInfo::CorrectStrategyCostForMultiOutputUse(size_t input_index) {
-  for (auto& swc : strategy_cost_) {
-    double parameter_memory_cost = ListProduct(swc->inputs_ptr[input_index].slice_shape()) *
-                                   static_cast<double>(GetOperatorCost()->inputs_type_lengths()[input_index]);
-    // remove the parameter memory cost
-    swc->cost_list[0]->memory_cost_ -= parameter_memory_cost;
-    if (swc->cost_list[0]->memory_cost_ < -1) {
-      MS_LOG(ERROR) << "The memory cost after correction is " << swc->cost_list[0]->memory_cost_
-                    << ", the parameter_memory_cost is " << parameter_memory_cost;
-      return FAILED;
-    }
-  }
-  corrected_input_indices_.push_back(input_index);
-  return SUCCESS;
-}
-
 int OperatorInfo::ComputeOpAndPrevEdgeParameterInvolved() {
   if (is_output_parameter_involve_ != -1) {
     return is_output_parameter_involve_;
@@ -1217,7 +1202,7 @@ void OperatorInfo::BreakingTiesForPerferringDataParallel(const StrategyPtr& stra
     CheckGlobalDeviceManager();
     auto total_device_num = g_device_manager->GetDeviceListByStageId(stra->GetInputStage()).size();
     if (IntToSize(stra->GetInputDim()[0][0]) == total_device_num) {
-      cost->memory_cost_ -= 1.0;
+      cost->computation_cost_ -= 1.0;
       cost->communication_cost_ -= 1.0;
       cost->communication_with_partial_para_ -= 1.0;
       cost->communication_without_parameter_ -= 1.0;
@@ -1226,7 +1211,7 @@ void OperatorInfo::BreakingTiesForPerferringDataParallel(const StrategyPtr& stra
 }
 
 double OperatorInfo::GetForwardMemoryCostFromCNode() {
-  return GetOperatorCost()->GetForwardMemoryCost(inputs_tensor_info_, outputs_tensor_info_, 0);
+  return GetOperatorCost()->GetForwardComputationCost(inputs_tensor_info_, outputs_tensor_info_, 0);
 }
 
 }  // namespace parallel
diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.h b/mindspore/ccsrc/parallel/ops_info/operator_info.h
index cc70f1b870..e7b8af0a7e 100644
--- a/mindspore/ccsrc/parallel/ops_info/operator_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/operator_info.h
@@ -87,13 +87,9 @@ class OperatorInfo {
   // is checked
   Status SetCostUnderStrategyBase(const StrategyPtr& strategy);
   std::vector<std::shared_ptr<StrategyWithCost>> GetStrategyCost() { return strategy_cost_; }
-  // In the case of a Parameter (or a output) being used by multiple operators, the memory cost induced by
-  // the parameter (or a output) should be calculated only once. This method is used to
-  // remove this part from the 'strategy_cost_'.
-  Status CorrectStrategyCostForMultiOutputUse(size_t input_index);
   // When the input of a operator contains WEIGHT or a output from other operators involving WEIGHT, then these input
   // should stay in memory until it is used in the backward phase, which is kept in memory at the end of forward phase.
-  Status CorrectStrategyCostForMemoryReuse() const { return SUCCESS; }
+  Status CalculateMemoryCost() const { return SUCCESS; }
   int ComputeOpAndPrevEdgeParameterInvolved();
 
   ForwardOp forward_op() const { return forward_op_; }
diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc
index 50e6a1e84e..d7d48c35bb 100644
--- a/mindspore/ccsrc/parallel/step_auto_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc
@@ -387,7 +387,7 @@ OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &
   operator_info->set_outputs_dtype(cnode->Type());
   operator_info->set_cnode(cnode);
   // If no strategy has been configured for this operator, then candidate strategies are generated for
-  // auto-strategy searchingm if this primitive is Cast, we ignore the user-specified strategy
+  // auto-strategy searching; if this primitive is CAST, we ignore the user-specified strategy
   if (!StrategyFound(attrs) || prim->name() == CAST) {
     // Compute split_flag_list_, indicating which input has batch dimension. This is ONLY used for preparation for
     // BatchParallelInfo operator
@@ -600,13 +600,7 @@ void ConstructCostGraphEdges(const std::vector<AnfNodePtr> &all_nodes) {
     }
     MS_LOG(INFO) << "Successfully created " << edge_count << " edges for: " << cnode->operator_info()->name();
   }
-  // For the case of a output being used by multiple subsequent operators, the output induced memory cost should be
-  // calculated only once. This method is for correct the operators' memory cost calculation.
-  if (entire_costgraph->CorrectOpsStrategyCostForMultiOutputUse() != SUCCESS) {
-    MS_LOG(EXCEPTION) << "Correcting strategy_cost_ for operators failed.";
-  } else {
-    MS_LOG(INFO) << "Correcting strategy_cost_ for operators succeeded.";
-  }
+
   MS_LOG(INFO) << "Constructing edges for cost graph ends.";
 }
 
@@ -803,14 +797,6 @@ void AugmentCostGraph(const std::vector<AnfNodePtr> &all_nodes) {
       std::shared_ptr<Edge> edge_ptr = std::make_shared<Edge>(
         edge_name, tmp_identity_ptr, target_cnode->operator_info(), 0, input_index - 1, false, true);
 
-      // Correct the memory calculation for a parameter being used by multiple operators. The parameter is calculated
-      // only once
-      if (target_cnode->operator_info()->CorrectStrategyCostForMultiOutputUse(IntToSize(input_index - 1)) != SUCCESS) {
-        MS_LOG(EXCEPTION) << "Correcting strategy_cost_ failed : " << prim->name();
-      } else {
-        MS_LOG(INFO) << "Correcting strategy_cost_ succeeded. " << prim->name();
-      }
-
       if (edge_ptr->InitEdgeCost() != SUCCESS) {
         MS_LOG(EXCEPTION) << "Edge cost initialization failed";
       }
@@ -840,7 +826,7 @@ Status ParallelStrategySearch(const std::vector<AnfNodePtr> &all_nodes, const Fu
   //      taking care for the case of a single Parameter being used by multiple operators. Create a TmpIdentity
   //      operator for this Parameter, and add an edge for the use of this Parameter by each
   //      subsequent operator;
-  // Step 3.1: Correct the memory calculation for memory reuse
+  // Step 3.1: Calculate memory usage
   // Step 4: Run the Dynamic Programming algorithm:
   //      in this process, cost is calculated based on not only the operators, but also the edges. Here, the edge
   //      cost is caused by the redistribution of a operator's output tensor layout to the next operator's input
@@ -867,14 +853,14 @@ Status ParallelStrategySearch(const std::vector<AnfNodePtr> &all_nodes, const Fu
   MS_LOG(INFO) << "After the augmenting procedure, there are " << entire_costgraph->GetOperators().size()
                << " operators, and " << entire_costgraph->GetNumPairs() << " edges.";
 
-  // Step 3.1: Correcting calculation for memory reuse
+  // Step 3.1: Calculate the memory usage
   if (entire_costgraph->ComputeOpsAndEdgesParameterInvolved() == SUCCESS) {
-    // Correcting operators' memory usage
-    if (entire_costgraph->CorrectOpsStrategyCostForMemoryReuse() != SUCCESS) {
+    // Calculate operators' memory usage
+    if (entire_costgraph->CalculateOpsMemoryCost() != SUCCESS) {
       MS_LOG(EXCEPTION) << "Correcting operators' cost for memory reuse failed.";
     }
-    // Correcting edges' memory usage
-    if (entire_costgraph->CorrectEdgesStrategyCostForMemoryReuse() != SUCCESS) {
+    // Calculate edges' memory usage
+    if (entire_costgraph->CalculateEdgesMemoryCost() != SUCCESS) {
       MS_LOG(EXCEPTION) << "Correcting edges' cost for memory reuse failed.";
     }
   } else {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc b/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc
index 93bda5da81..55e6a300e0 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc
+++ b/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc
@@ -144,7 +144,7 @@ Status TensorRedistribution::ComputeCost() {
     MS_LOG(ERROR) << "Failure: InferTensorRedistribution failed";
     return Status::FAILED;
   }
-  // Compute redistribution communication cost and memory cost
+  // Compute redistribution communication cost and computation cost
   for (auto& op_cost : operator_list_) {
     OperatorR op = op_cost.first;
     Shape slice_shape = op_cost.second;
@@ -154,14 +154,14 @@ Status TensorRedistribution::ComputeCost() {
     if (str == PERMUTE_BY_AXIS) {
       // The shape does not change after PermuteByAxis operation.
       // communication cost = all_to_all + all_to_all = 2 * slice_shape
-      // memory cost = slice_shape
+      // computation cost = slice_shape
       forward_comm_cost_ += prod;
       backward_comm_cost_ += prod;
       comm_cost_ += 2.0 * prod;
-      mem_cost_ += prod;
+      computation_cost_ += prod;
     } else if (str == CONCAT_BY_AXIS) {
       // communication cost = all_gather + reduce_scatter = before_slice_shape + after_slice_shape
-      // memory cost = before_slice_shape
+      // computation cost = before_slice_shape
       if (op.second.size() < 3) {
         MS_LOG(ERROR) << "op.second size should not be less than 3!";
         return Status::FAILED;
@@ -173,22 +173,22 @@ Status TensorRedistribution::ComputeCost() {
       comm_cost_ += prod * (dev_num + 1.0);
       int32_t concat_dim = op.second[0];
       if (concat_dim == 0) {
-        // memory cost = all_gather
-        mem_cost_ += prod;
+        // computation cost = all_gather
+        computation_cost_ += prod;
       } else {
-        // memory cost = all_gather + split + concat
-        mem_cost_ += (prod + prod * dev_num + prod * dev_num);
+        // computation cost = all_gather + split + concat
+        computation_cost_ += (prod + prod * dev_num + prod * dev_num);
       }
     } else {
-      // There is only memory cost in SplitByAxis.
-      // memory cost = before_slice_shape
-      mem_cost_ += prod;
+      // There is only computation cost in SplitByAxis.
+      // computation cost = before_slice_shape
+      computation_cost_ += prod;
     }
   }
   if (reshape_flag()) {
     Shape prev_slice_shape = from_.slice_shape().array();
     double prev_prod = std::accumulate(prev_slice_shape.begin(), prev_slice_shape.end(), 1, std::multiplies<int>());
-    mem_cost_ += 2.0 * prev_prod;
+    computation_cost_ += 2.0 * prev_prod;
   }
   return Status::SUCCESS;
 }
diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h b/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h
index 38fb5959ad..e933b9b8eb 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h
+++ b/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h
@@ -41,7 +41,7 @@ class TensorRedistribution {
         comm_cost_(0.0),
         forward_comm_cost_(0.0),
         backward_comm_cost_(0.0),
-        mem_cost_(0.0),
+        computation_cost_(0.0),
         construct_op_flag_(construct_op_flag),
         keep_reshape_(keep_reshape) {}
   Status Init(const TensorLayout& from, const TensorLayout& to, const RankList& dev_list);
@@ -51,7 +51,7 @@ class TensorRedistribution {
   bool reshape_flag() const { return reshape_flag_; }
   Status ComputeCost();
   double comm_cost() const { return comm_cost_; }
-  double mem_cost() const { return mem_cost_; }
+  double computation_cost() const { return computation_cost_; }
   double forward_comm_cost() const { return forward_comm_cost_; }
   double backward_comm_cost() const { return backward_comm_cost_; }
 
@@ -66,10 +66,13 @@ class TensorRedistribution {
   RankList dev_list_;
   OperatorList operator_list_;
   bool reshape_flag_;
+  // communication cost
   double comm_cost_;
+  // forward communication cost
   double forward_comm_cost_;
+  // backward communication cost
   double backward_comm_cost_;
-  double mem_cost_;
+  double computation_cost_;
   bool construct_op_flag_;
   bool keep_reshape_;
 };
diff --git a/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc b/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc
index 83a9eceacc..415a1fdd55 100644
--- a/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc
@@ -322,8 +322,8 @@ TEST_F(TestCostGraph, test_SelectCostListWithMinTrainingTimeMultiple) {
 
   auto ret_list = entire_cost_graph.SelectCostListWithMinTrainingTimeMultiple(all_list, memory);
   ASSERT_EQ(ret_list.size(), 2);
-  ASSERT_DOUBLE_EQ(ret_list[0]->memory_cost_, 10);
-  ASSERT_DOUBLE_EQ(ret_list[1]->memory_cost_, 1010);
+  ASSERT_DOUBLE_EQ(ret_list[0]->computation_cost_, 10);
+  ASSERT_DOUBLE_EQ(ret_list[1]->computation_cost_, 1010);
 }
 
 TEST_F(TestCostGraph, test_CheckOpElimination) {
diff --git a/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc b/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc
index 3bd65c049c..919c5b43ec 100644
--- a/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc
@@ -76,8 +76,8 @@ TEST_F(TestMatMulCost, test_CostGeneration) {
   mmcost_.SetInputAndOutputTypeLength(inputs_length, outputs_length);
   mmcost_.GetForwardCommCost(inputs, outputs, 0);
   mmcost_.GetBackwardCommCost(inputs, outputs, 0);
-  mmcost_.GetForwardMemoryCost(inputs, outputs, 0);
-  mmcost_.GetBackwardMemoryCost(inputs, outputs, 0);
+  mmcost_.GetForwardComputationCost(inputs, outputs, 0);
+  mmcost_.GetForwardComputationCost(inputs, outputs, 0);
 }
 
 class TestActivationCost : public UT::Common {
@@ -128,8 +128,8 @@ TEST_F(TestActivationCost, test_CostGeneration) {
   std::vector<size_t> inputs_length = {4, 4};
   std::vector<size_t> outputs_length = {4};
   ac_cost_.SetInputAndOutputTypeLength(inputs_length, outputs_length);
-  ac_cost_.GetForwardMemoryCost(inputs, outputs, 0);
-  ac_cost_.GetBackwardMemoryCost(inputs, outputs, 0);
+  ac_cost_.GetForwardComputationCost(inputs, outputs, 0);
+  ac_cost_.GetBackwardComputationCost(inputs, outputs, 0);
 }
 
 class TestPReLUCost : public UT::Common {
@@ -184,8 +184,8 @@ TEST_F(TestPReLUCost, test_CostGeneration) {
   prelu_cost_.SetInputAndOutputTypeLength(inputs_length, outputs_length);
   double BCC, FMC, GMC;
   BCC = prelu_cost_.GetBackwardCommCost(inputs, outputs, 0);
-  FMC = prelu_cost_.GetForwardMemoryCost(inputs, outputs, 0);
-  GMC = prelu_cost_.GetBackwardMemoryCost(inputs, outputs, 0);
+  FMC = prelu_cost_.GetForwardComputationCost(inputs, outputs, 0);
+  GMC = prelu_cost_.GetBackwardComputationCost(inputs, outputs, 0);
   ASSERT_EQ(BCC, 32 * 4);
   ASSERT_EQ(FMC, 8 * 32 * 8 * 8 * 4 + 32 * 4);
   ASSERT_EQ(GMC, 128);
diff --git a/tests/ut/cpp/parallel/ops_info/activation_test.cc b/tests/ut/cpp/parallel/ops_info/activation_test.cc
index 149aa9d5af..5d18c5372f 100644
--- a/tests/ut/cpp/parallel/ops_info/activation_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/activation_test.cc
@@ -84,8 +84,8 @@ TEST_F(TestActivation, test_activation_strategies) {
     act_ptr_->InitForCostModel(sp);
     std::vector<TensorInfo> inputs_info = act_ptr_->inputs_tensor_info();
     std::vector<TensorInfo> outputs_info = act_ptr_->outputs_tensor_info();
-    ASSERT_DOUBLE_EQ(act_ptr_->GetOperatorCost()->GetMemoryCost(inputs_info, outputs_info, sp->GetInputStage()),
-                     cost.memory_cost_);
+    ASSERT_DOUBLE_EQ(act_ptr_->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()),
+                     cost.computation_cost_);
     ASSERT_DOUBLE_EQ(act_ptr_->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()),
                      cost.communication_cost_);
   }
@@ -109,8 +109,8 @@ TEST_F(TestActivation, test_softmax_strategies) {
     soft_ptr_->InitForCostModel(sp);
     std::vector<TensorInfo> inputs_info = soft_ptr_->inputs_tensor_info();
     std::vector<TensorInfo> outputs_info = soft_ptr_->outputs_tensor_info();
-    ASSERT_DOUBLE_EQ(soft_ptr_->GetOperatorCost()->GetMemoryCost(inputs_info, outputs_info, sp->GetInputStage()),
-                     cost.memory_cost_);
+    ASSERT_DOUBLE_EQ(soft_ptr_->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()),
+                     cost.computation_cost_);
     ASSERT_DOUBLE_EQ(soft_ptr_->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()),
                      cost.communication_cost_);
   }
diff --git a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc
index 978b792a0c..99ca9f8e0e 100644
--- a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc
@@ -569,8 +569,8 @@ TEST_F(TestMatmulInfo, test_GenerateStrategies1) {
     matmul1->InitForCostModel(sp);
     std::vector<TensorInfo> inputs_info = matmul1->inputs_tensor_info();
     std::vector<TensorInfo> outputs_info = matmul1->outputs_tensor_info();
-    ASSERT_DOUBLE_EQ(matmul1->GetOperatorCost()->GetMemoryCost(inputs_info, outputs_info, sp->GetInputStage()),
-                     cost.memory_cost_);
+    ASSERT_DOUBLE_EQ(matmul1->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()),
+                     cost.computation_cost_);
     break;
   }
 }
@@ -599,8 +599,8 @@ TEST_F(TestMatmulInfo, test_GenerateStrategies2) {
     TensorInfo replica_input1_info(tly, input1_shape, input1_slice_shape);
     replica_inputs_info.push_back(replica_input1_info);
 
-    ASSERT_DOUBLE_EQ(matmul3->GetOperatorCost()->GetMemoryCost(replica_inputs_info, outputs_info, sp->GetInputStage()),
-                     cost.memory_cost_);
+    ASSERT_DOUBLE_EQ(matmul3->GetOperatorCost()->GetComputationCost(replica_inputs_info, outputs_info, sp->GetInputStage()),
+                     cost.computation_cost_);
     break;
   }
 }
diff --git a/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc b/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc
index e7736a4b3e..6cb9739b1c 100644
--- a/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc
@@ -188,8 +188,8 @@ TEST_F(TestTensorAddInfo, GenerateStrategies) {
     tensor_add->InitForCostModel(sp);
     std::vector<TensorInfo> inputs_info = tensor_add->inputs_tensor_info();
     std::vector<TensorInfo> outputs_info = tensor_add->outputs_tensor_info();
-    double memory_cost0 = tensor_add->GetOperatorCost()->GetMemoryCost(inputs_info, outputs_info, sp->GetInputStage());
-    double memory_cost1 = cost.memory_cost_;
+    double memory_cost0 = tensor_add->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage());
+    double memory_cost1 = cost.computation_cost_;
     bool memory = memory_cost0 - memory_cost1 <= 1.0;
 
     double comm_cost0 = tensor_add->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage());
@@ -210,8 +210,8 @@ TEST_F(TestTensorAddInfo, GenerateStrategies1) {
     tensor_add1->InitForCostModel(sp);
     std::vector<TensorInfo> inputs_info = tensor_add1->inputs_tensor_info();
     std::vector<TensorInfo> outputs_info = tensor_add1->outputs_tensor_info();
-    double memory_cost0 = tensor_add1->GetOperatorCost()->GetMemoryCost(inputs_info, outputs_info, sp->GetInputStage());
-    double memory_cost1 = cost.memory_cost_;
+    double memory_cost0 = tensor_add1->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage());
+    double memory_cost1 = cost.computation_cost_;
     bool memory = memory_cost0 - memory_cost1 <= 1.0;
 
     double comm_cost0 = tensor_add1->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage());
diff --git a/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc b/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc
index ce1238baeb..043746498f 100644
--- a/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc
@@ -145,8 +145,8 @@ TEST_F(TestTmpIdentityInfo, test_generate_strategies) {
     identity_ptr->Init(sp);
     std::vector<TensorInfo> inputs_info = identity_ptr->inputs_tensor_info();
     std::vector<TensorInfo> outputs_info = identity_ptr->outputs_tensor_info();
-    ASSERT_DOUBLE_EQ(identity_ptr->GetOperatorCost()->GetMemoryCost(inputs_info, outputs_info, sp->GetInputStage()),
-                     cost.memory_cost_);
+    ASSERT_DOUBLE_EQ(identity_ptr->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()),
+                     cost.computation_cost_);
     ASSERT_DOUBLE_EQ(identity_ptr->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()),
                      cost.communication_cost_);
   }

From 89f0b3b1bb7af5e638dd9e76cd444edec4260856 Mon Sep 17 00:00:00 2001
From: jojobugfree <caifubi1@huawei.com>
Date: Tue, 7 Apr 2020 19:59:36 +0800
Subject: [PATCH 29/58] profiling feature enhancement

---
 .../device/ascend/ascend_stream_assign.cc     |   2 +-
 .../ascend/profiling/profiling_manager.h      |   4 -
 .../ascend/profiling/profiling_utils.cc       | 229 ++++++++----------
 .../device/ascend/profiling/profiling_utils.h |  93 ++++---
 mindspore/ccsrc/device/kernel_adjust.cc       |  31 +--
 mindspore/ccsrc/device/kernel_adjust.h        |   6 +-
 .../ascend/ascend_backend_optimization.cc     |   2 +-
 mindspore/ccsrc/session/ascend_session.cc     |   2 +-
 .../tasksink/ascend_stream_assign_stub.cc     |   2 +-
 9 files changed, 190 insertions(+), 181 deletions(-)

diff --git a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc b/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc
index 4f16c596c7..8c4d1f4a8f 100644
--- a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc
@@ -702,7 +702,7 @@ void AscendStreamAssign::PrintGraphExeOrders(const shared_ptr<mindspore::session
                    << AnfAlgo::GetStreamId(cur_cnode_ptr) << "], event_id["
                    << GetValue<uint32_t>(primitive->GetAttr(kAttrEventId)) << "]";
     } else {
-      MS_LOG(INFO) << "node name[" << AnfAlgo::GetCNodeName(cur_cnode_ptr) << "], logic id["
+      MS_LOG(INFO) << "node name[" << cur_cnode_ptr->fullname_with_scope() << "], logic id["
                    << AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()) << "], stream id["
                    << AnfAlgo::GetStreamId(cur_cnode_ptr) << "]";
     }
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h
index de8f6a7d0a..b826c4cf36 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h
+++ b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h
@@ -29,10 +29,6 @@ namespace ascend {
 // PROFILING_CUSTOM_LOGID_START 3
 const uint64_t kProfilingFpStartLogId = 1;
 const uint64_t kProfilingBpEndLogId = 2;
-const uint64_t kProfilingAllReduce1Start = 3;
-const uint64_t kProfilingAllReduce1End = 4;
-const uint64_t kProfilingAllReduce2Start = 5;
-const uint64_t kProfilingAllReduce2End = 6;
 const uint64_t kProfilingIterEndLogId = 255;
 
 class ProfilingEngineImpl;
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
index 0d7088300e..aa71aa0566 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
+++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
@@ -14,10 +14,8 @@
  * limitations under the License.
  */
 
-#include "device/ascend/profiling/profiling_utils.h"
-
 #include <map>
-
+#include "device/ascend/profiling/profiling_utils.h"
 #include "kernel/kernel.h"
 #include "device/ascend/profiling/profiling_manager.h"
 #include "session/anf_runtime_algorithm.h"
@@ -27,82 +25,61 @@
 namespace mindspore {
 namespace device {
 namespace ascend {
-const char ProfilingUtils::kProfiling[] = "Profiling";
-const char ProfilingUtils::kNotify[] = "notify";
-const char ProfilingUtils::kProfilerTraceId[] = "profiler_trace_id";
-const char ProfilingUtils::kFlags[] = "flags";
+constexpr uint32_t kMaxProfilingNodeNum = 100;
+constexpr char kCustomNode[] = "PROFILING_CUSTOM_";
+constexpr char kFpStartNode[] = "PROFILING_FP_START";
+constexpr char kBpEndNode[] = "PROFILING_BP_END";
+constexpr char kIterEndNode[] = "PROFILING_ITER_END";
 std::unordered_map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
-bool ProfilingUtils::GetProfilingTraceInfo(const std::shared_ptr<session::KernelGraph> &graph_ptr,
-                                           ProfilingTraceInfo *profiling_trace_info) {
-  MS_EXCEPTION_IF_NULL(profiling_trace_info);
-  MS_EXCEPTION_IF_NULL(graph_ptr);
-  bool find_begin = false;
-  bool first_allreduce = true;
-  for (const auto &anf_node : graph_ptr->execution_order()) {
-    if (anf_node->isa<CNode>()) {
-      const std::string kernel_name = AnfAlgo::GetCNodeName(anf_node);
-      if ((kernel_name == "Cast" || kernel_name == "Four2Five") && !find_begin) {
-        profiling_trace_info->profiling_trace_begin = anf_node->fullname_with_scope();
-        find_begin = true;
-      }
-      if (kernel_name == "Conv2DBackpropFilter") {
-        profiling_trace_info->profiling_trace_bp_end = anf_node->fullname_with_scope();
-      }
-      if (kernel_name == kFusedMulApplyMomentumOpName || kernel_name == kApplyMomentumOpName) {
-        profiling_trace_info->profiling_trace_netoutput = anf_node->fullname_with_scope();
-      }
-      if (kernel_name == kAllReduceOpName) {
-        if (first_allreduce) {
-          profiling_trace_info->profiling_allreduce1_start = anf_node->fullname_with_scope();
-          profiling_trace_info->profiling_allreduce1_end = anf_node->fullname_with_scope();
-          first_allreduce = false;
-        } else {
-          profiling_trace_info->profiling_allreduce2_start = anf_node->fullname_with_scope();
-          profiling_trace_info->profiling_allreduce2_end = anf_node->fullname_with_scope();
-        }
-      }
+uint32_t ProfilingUtils::custom_node_index_ = 1;
+
+ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr) {
+  MS_LOG(INFO) << "get env start";
+  custom_node_index_ = 1;
+  auto &cnode_exec_order = graph_ptr->execution_order();
+  ProfilingTraceInfo profiling_trace;
+  profiling_trace.trace_begin = GetTraceBegin(cnode_exec_order);
+  profiling_trace.trace_bp_end = GetTraceBpEnd();
+  profiling_trace.trace_netoutput = GetTraceNetoutput(cnode_exec_order);
+
+  MS_LOG(INFO) << "[profiling] trace_begin:" << profiling_trace.trace_begin
+               << " trace_bp_end:" << profiling_trace.trace_bp_end
+               << " trace_netoutput:" << profiling_trace.trace_netoutput;
+
+  for (uint32_t i = 1; i <= kMaxProfilingNodeNum; ++i) {
+    std::string env_str = std::string(kCustomNode) + std::to_string(i);
+    const char *node_full_name = std::getenv(env_str.c_str());
+    if (node_full_name == nullptr) {
+      break;
     }
+    MS_LOG(INFO) << "Get profiling node:" << node_full_name;
+    profiling_trace.trace_custom_node.insert(node_full_name);
   }
-  MS_LOG(INFO) << "[profiling]begin:" << profiling_trace_info->profiling_trace_begin
-               << ", net_output:" << profiling_trace_info->profiling_trace_netoutput
-               << ", end:" << profiling_trace_info->profiling_trace_bp_end
-               << ", allreduce1:" << profiling_trace_info->profiling_allreduce1_start
-               << ", allreduce2:" << profiling_trace_info->profiling_allreduce2_start;
-  return profiling_trace_info->IsValid();
+  MS_LOG(INFO) << "get env end";
+  return profiling_trace;
 }
 
-bool ProfilingUtils::GetNetOutput(AnfNodePtr anf_node, std::string *profiling_trace_net_output) {
-  MS_EXCEPTION_IF_NULL(anf_node);
-  MS_EXCEPTION_IF_NULL(profiling_trace_net_output);
-  MS_LOG(INFO) << "[profiling]Anf node's full name with scope:" << anf_node->fullname_with_scope();
-  if (!profiling_trace_net_output->empty()) {
-    MS_LOG(INFO) << "[profiling]Has got the net_output:" << profiling_trace_net_output->c_str();
-    return true;
-  }
-
-  if (AnfAlgo::IsRealKernel(anf_node)) {
-    *profiling_trace_net_output = anf_node->fullname_with_scope();
-    return true;
-  }
+std::string ProfilingUtils::GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order) {
+  const char *trace_begin = std::getenv(kFpStartNode);
+  auto &first_cnode = cnode_exec_order.front();
+  MS_EXCEPTION_IF_NULL(first_cnode);
+  return trace_begin == nullptr ? first_cnode->fullname_with_scope() : std::string(trace_begin);
+}
 
-  auto cnode = anf_node->cast<CNodePtr>();
-  if (cnode == nullptr) {
-    MS_LOG(ERROR) << "[profiling]Anf node should be a CNode";
-    return false;
-  }
+std::string ProfilingUtils::GetTraceBpEnd() {
+  const char *trace_bp_end = std::getenv(kBpEndNode);
+  return trace_bp_end == nullptr ? "" : std::string(trace_bp_end);
+}
 
-  auto inputs = cnode->inputs();
-  auto input_size = inputs.size();
-  if (input_size < 2) {
-    MS_LOG(ERROR) << "[profiling]Anf node' input size(" << input_size << ") < 2, don't support get apply kernel node.";
-    return false;
-  }
-  return GetNetOutput(inputs[1], profiling_trace_net_output);
+std::string ProfilingUtils::GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order) {
+  const char *trace_netoutput = std::getenv(kIterEndNode);
+  auto &last_cnode = cnode_exec_order.back();
+  MS_EXCEPTION_IF_NULL(last_cnode);
+  return trace_netoutput == nullptr ? last_cnode->fullname_with_scope() : std::string(trace_netoutput);
 }
 
-CNodePtr ProfilingUtils::CreateProfilingCNode(const std::shared_ptr<session::KernelGraph> &graph_ptr, bool notify,
-                                              uint64_t profiler_trace_id, uint32_t flags) {
-  MS_EXCEPTION_IF_NULL(graph_ptr);
+NotNull<CNodePtr> ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content,
+                                                       NotNull<session::KernelGraph *> graph_ptr) {
   kernel::KernelBuildInfo::KernelBuildInfoBuilder selected_kernel_builder;
   selected_kernel_builder.SetInputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT});
   selected_kernel_builder.SetInputsDeviceType({TypeId::kNumberTypeInt32, TypeId::kNumberTypeInt32});
@@ -118,75 +95,79 @@ CNodePtr ProfilingUtils::CreateProfilingCNode(const std::shared_ptr<session::Ker
   AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_builder.Build(), cnode_ptr.get());
   cnode_ptr->set_abstract(type_none_abstract);
   // set attr
-  ValuePtr notify_value = MakeValue(notify);
-  ValuePtr trace_id_value = MakeValue(profiler_trace_id);
-  ValuePtr flags_value = MakeValue(flags);
+  ValuePtr notify_value = MakeValue(profiling_content.notify);
+  ValuePtr trace_id_value = MakeValue(profiling_content.profiler_trace_id);
+  ValuePtr flags_value = MakeValue(profiling_content.flags);
   AnfAlgo::SetNodeAttr(ProfilingUtils::kNotify, notify_value, cnode_ptr);
   AnfAlgo::SetNodeAttr(ProfilingUtils::kProfilerTraceId, trace_id_value, cnode_ptr);
   AnfAlgo::SetNodeAttr(ProfilingUtils::kFlags, flags_value, cnode_ptr);
-  return cnode_ptr;
+  return NOT_NULL(cnode_ptr);
 }
 
-void ProfilingUtils::ProfilingTraceFpStart(const std::shared_ptr<mindspore::session::KernelGraph> &graph_ptr,
-                                           const mindspore::AnfNodePtr &anf_node,
-                                           const mindspore::device::ascend::ProfilingTraceInfo &profiling_trace_info,
-                                           std::vector<mindspore::CNodePtr> *kernel_list) {
-  if (profiling_trace_info.IsValid() && profiling_trace_info.profiling_trace_begin == anf_node->fullname_with_scope()) {
-    if (graph_ptr == nullptr || kernel_list == nullptr || anf_node == nullptr) {
-      MS_LOG(ERROR) << "[profiling]input param invalid";
-      return;
-    }
+void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node,
+                                           const ProfilingTraceInfo &profiling_trace_info,
+                                           NotNull<session::KernelGraph *> graph_ptr,
+                                           NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) {
+  if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) {
     auto job_id = ProfilingManager::GetInstance().GetJobId();
-    // job task info
-    CNodePtr job_kernel_ptr = CreateProfilingCNode(graph_ptr, false, job_id, 0);
-    AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), job_kernel_ptr.get());
-    AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), job_kernel_ptr.get());
-    // fp task info
-    CNodePtr start_kernel_ptr = CreateProfilingCNode(graph_ptr, false, kProfilingFpStartLogId, 0);
-    AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), start_kernel_ptr.get());
-    AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), start_kernel_ptr.get());
-    kernel_list->emplace_back(job_kernel_ptr);
-    kernel_list->emplace_back(start_kernel_ptr);
+    ProfilingContent job_profiling_context = {false, job_id, 0};
+    auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr);
+    kernel_list->emplace_back(job_profiling_node);
+
+    ProfilingContent fp_profiling_content = {false, kProfilingFpStartLogId, 0};
+    auto fp_profiling_node = CreateProfilingCNodeWithStream(anf_node, fp_profiling_content, graph_ptr);
+    kernel_list->emplace_back(fp_profiling_node);
   }
 }
 
-void ProfilingUtils::ProfilingAllReduce(const std::shared_ptr<session::KernelGraph> &graph_ptr,
-                                        const AnfNodePtr &anf_node, int job_id, const std::string &profiling_node_name,
-                                        std::vector<CNodePtr> *kernel_list) {
-  MS_EXCEPTION_IF_NULL(graph_ptr);
+CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNodePtr &anf_node,
+                                                        const ProfilingContent &profiling_content,
+                                                        NotNull<session::KernelGraph *> graph_ptr) {
+  CNodePtr profiling_node = CreateProfilingCNode(profiling_content, graph_ptr);
+  AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), profiling_node.get());
+  AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), profiling_node.get());
+  return profiling_node;
+}
+
+void ProfilingUtils::ProfilingCustomOp(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
+                                       NotNull<session::KernelGraph *> graph_ptr,
+                                       NotNull<std::vector<CNodePtr> *> kernel_list) {
   MS_EXCEPTION_IF_NULL(anf_node);
-  MS_EXCEPTION_IF_NULL(kernel_list);
-  auto full_scope_name = anf_node->fullname_with_scope();
-  if (profiling_node_name == full_scope_name) {
-    CNodePtr allreduce_kernel_ptr = CreateProfilingCNode(graph_ptr, false, job_id, 0);
-    AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), allreduce_kernel_ptr.get());
-    AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), allreduce_kernel_ptr.get());
-    kernel_list->emplace_back(allreduce_kernel_ptr);
+  auto iter = profiling_trace_info.trace_custom_node.find(anf_node->fullname_with_scope());
+  if (iter == profiling_trace_info.trace_custom_node.end()) {
+    return;
   }
+  // custom op profiling job start from 3.
+  ProfilingContent front_profiling_content = {false, 2 * custom_node_index_ + 1, 0};
+  CNodePtr front_node = CreateProfilingCNodeWithStream(anf_node, front_profiling_content, graph_ptr);
+  kernel_list->insert(kernel_list->end() - 1, front_node);
+
+  ProfilingContent back_profiling_content = {false, 2 * custom_node_index_ + 2, 0};
+  CNodePtr back_node = CreateProfilingCNodeWithStream(anf_node, back_profiling_content, graph_ptr);
+  kernel_list->insert(kernel_list->end(), back_node);
+  ++custom_node_index_;
 }
 
-void ProfilingUtils::ProfilingTraceEnd(const std::shared_ptr<mindspore::session::KernelGraph> &graph_ptr,
-                                       const mindspore::AnfNodePtr &anf_node,
-                                       const mindspore::device::ascend::ProfilingTraceInfo &profiling_trace_info,
-                                       std::vector<mindspore::CNodePtr> *kernel_list) {
-  MS_EXCEPTION_IF_NULL(graph_ptr);
+void ProfilingUtils::ProfilingTraceBpEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
+                                         NotNull<session::KernelGraph *> graph_ptr,
+                                         NotNull<std::vector<CNodePtr> *> kernel_list) {
   MS_EXCEPTION_IF_NULL(anf_node);
-  MS_EXCEPTION_IF_NULL(kernel_list);
-  if (profiling_trace_info.IsValid()) {
-    auto full_scope_name = anf_node->fullname_with_scope();
-    if (profiling_trace_info.profiling_trace_netoutput == full_scope_name) {
-      CNodePtr bp_kernel_ptr = CreateProfilingCNode(graph_ptr, true, kProfilingIterEndLogId, 0);
-      AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), bp_kernel_ptr.get());
-      AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), bp_kernel_ptr.get());
-      kernel_list->emplace_back(bp_kernel_ptr);
-    }
+  if (profiling_trace_info.trace_bp_end == anf_node->fullname_with_scope()) {
+    ProfilingContent bp_end_profiling_content = {false, kProfilingBpEndLogId, 0};
+    CNodePtr bp_end_node = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr);
+    kernel_list->emplace_back(bp_end_node);
+  }
+}
 
-    if (profiling_trace_info.profiling_trace_bp_end == full_scope_name) {
-      CNodePtr end_task_info = CreateProfilingCNode(graph_ptr, false, kProfilingBpEndLogId, 0);
-      AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), end_task_info.get());
-      AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), end_task_info.get());
-      kernel_list->emplace_back(end_task_info);
-    }
+void ProfilingUtils::ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
+                                       NotNull<session::KernelGraph *> graph_ptr,
+                                       NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) {
+  MS_EXCEPTION_IF_NULL(anf_node);
+  auto full_scope_name = anf_node->fullname_with_scope();
+  if (profiling_trace_info.trace_netoutput == full_scope_name) {
+    ProfilingContent bp_end_profiling_content = {true, kProfilingIterEndLogId, 0};
+    CNodePtr bp_kernel_ptr = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr);
+    kernel_list->emplace_back(bp_kernel_ptr);
   }
 }
 
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
index ca0ef6f1f0..c59e856249 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
+++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
@@ -19,63 +19,102 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include <set>
 #include <unordered_map>
 #include "session/kernel_graph.h"
+#include "utils/contract.h"
 
 namespace mindspore {
 namespace device {
 namespace ascend {
 struct ProfilingTraceInfo {
   // execute order's first execute op(like: Cast or Four2Five ...), except tdt op(GetNext ...)
-  std::string profiling_trace_begin;
+  std::string trace_begin;
   // get first net_output(apply kernel) from graph outputs: fp ->net_output<- bp
-  std::string profiling_trace_bp_end;
+  std::string trace_bp_end;
   // execute order's end execute (like: Conv2DBackpropFilter)
-  std::string profiling_trace_netoutput;
+  std::string trace_netoutput;
 
-  std::string profiling_allreduce1_start;
-
-  std::string profiling_allreduce1_end;
-
-  std::string profiling_allreduce2_start;
-
-  std::string profiling_allreduce2_end;
+  // profiling specific op, such as AllReduce;
+  std::set<std::string> trace_custom_node;
 
   // 1. insert profiling_trace_begin if profiling_trace_bp_end is not empty.
   // 2. op lanuch get task info with callback func.
   // 3. insert profiling_trace_bp_end.
   // 4. insert profiling_trace_net_output if profiling_trace_bp_end is not empty.
 
-  bool IsValid() const { return !(profiling_trace_begin.empty() || profiling_trace_bp_end.empty()); }
+  bool IsValid() const { return !(trace_begin.empty() || trace_bp_end.empty() || trace_netoutput.empty()); }
+};
+
+struct ProfilingContent {
+  // true -send data from device to host and finish profiling
+  bool notify;
+  uint64_t profiler_trace_id;
+  uint32_t flags;
 };
 
 class ProfilingUtils {
  public:
   ProfilingUtils() = default;
   ~ProfilingUtils() = default;
-  static bool GetProfilingTraceInfo(const std::shared_ptr<session::KernelGraph> &graph_ptr,
-                                    ProfilingTraceInfo *profiling_trace_info);
-  static void ProfilingTraceFpStart(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node,
-                                    const ProfilingTraceInfo &profiling_trace_info, std::vector<CNodePtr> *kernel_list);
-  static void ProfilingAllReduce(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node,
-                                 int job_id, const std::string &profiling_node_name,
-                                 std::vector<CNodePtr> *kernel_list);
-  static void ProfilingTraceEnd(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node,
-                                const ProfilingTraceInfo &profiling_trace_info, std::vector<CNodePtr> *kernel_list);
+
+  // Insert job_id profiling node and fp_start profiling node.
+  // Job_id is got from envs, which shound be a number greater than 255
+  // Fp_start node should been inserted in the start of a network, and the log_id is hard code to 1.
+  static void ProfilingTraceFpStart(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
+                                    NotNull<session::KernelGraph *> graph_ptr,
+                                    NotNull<std::vector<CNodePtr> *> kernel_list);
+
+  // Insert net output profiling node, which tells the device to stop profiling.
+  // The notify in struct ProfilingContent should be 'true', which tells the device to send data to host.
+  static void ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
+                                NotNull<session::KernelGraph *> graph_ptr,
+                                NotNull<std::vector<CNodePtr> *> kernel_list);
+
+  // Insert bp_end profiling node, which should been inserted after the last backpropagation CNode in the network.
+  static void ProfilingTraceBpEnd(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
+                                  NotNull<session::KernelGraph *> graph_ptr,
+                                  NotNull<std::vector<mindspore::CNodePtr> *> kernel_list);
+
+  // Mapping graph id and the kernels' name in the graph
   static void SetGraphKernelName(uint32_t graph_id, const std::vector<std::string> &kernel_names);
+
+  // Mapping task_id and kernel name for device to generate the time cost of specific kernel.
+  // Device calculate the time cost of the task which is marked by task id.
+  // But we need data of (kernel name , time cost)
   static void ReportProfilingData(uint32_t graph_id, const std::vector<uint32_t> &task_ids);
 
-  static const char kProfiling[];
-  static const char kNotify[];
-  static const char kProfilerTraceId[];
-  static const char kFlags[];
+  // Get profiling trace point from envs.
+  // export PROFILING_FP_START='full name of the first cnode to execute'
+  // export PROFILING_BP_END='full name of the last backpropagation cnode to execute'
+  // export PROFILING_ITER_END='full name of last cnode in graph to execute'
+  // And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode'
+  // GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode'
+  // The variable i in PROFILING_CUSTOM_i should start from 1 without interruption.
+  static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr);
+
+  // Insert two profiling trace points, one in front and one behind
+  static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
+                                NotNull<session::KernelGraph *> graph_ptr,
+                                NotNull<std::vector<mindspore::CNodePtr> *> kernel_list);
+
+  inline static constexpr char kProfiling[] = "Profiling";
+  inline static constexpr char kNotify[] = "notify";
+  inline static constexpr char kProfilerTraceId[] = "profiler_trace_id";
+  inline static constexpr char kFlags[] = "flags";
 
  private:
-  static bool GetNetOutput(AnfNodePtr anf_node, std::string *profiling_trace_net_output);
-  static CNodePtr CreateProfilingCNode(const std::shared_ptr<session::KernelGraph> &graph_ptr, bool notify,
-                                       uint64_t profiler_trace_id, uint32_t flags);
+  static NotNull<CNodePtr> CreateProfilingCNode(const ProfilingContent &profiling_content,
+                                                NotNull<session::KernelGraph *> graph_ptr);
+  static CNodePtr CreateProfilingCNodeWithStream(const AnfNodePtr &anf_node, const ProfilingContent &profiling_content,
+                                                 NotNull<session::KernelGraph *> graph_ptr);
+  static std::string GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order);
+  static std::string GetTraceBpEnd();
+  static std::string GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order);
+
   // graph id --> (kernel name list)
   static std::unordered_map<uint32_t, std::vector<std::string>> graph_kernel_name_;
+  static uint32_t custom_node_index_;
 };
 }  // namespace ascend
 }  // namespace device
diff --git a/mindspore/ccsrc/device/kernel_adjust.cc b/mindspore/ccsrc/device/kernel_adjust.cc
index c422d50b51..9a6f48025f 100644
--- a/mindspore/ccsrc/device/kernel_adjust.cc
+++ b/mindspore/ccsrc/device/kernel_adjust.cc
@@ -438,23 +438,22 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
   MS_LOG(INFO) << "---------------- LoadSwitchInputs End--";
 }
 
-void KernelAdjust::Profiling(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) {
+void KernelAdjust::Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr) {
   if (!ascend::ProfilingManager::GetInstance().IsProfiling()) {
     MS_LOG(INFO) << "No need to profiling";
     return;
   }
-  ProfilingTraceInfo profiling_trace_info;
-  if (ProfilingUtils::GetProfilingTraceInfo(kernel_graph_ptr, &profiling_trace_info)) {
-    InsertProfilingKernel(kernel_graph_ptr, profiling_trace_info);
-  } else {
-    MS_LOG(WARNING) << "[profiling] GetProfilingTraceInfo failed";
+  ProfilingTraceInfo profiling_trace_info = ProfilingUtils::GetProfilingTraceFromEnv(kernel_graph_ptr);
+  if (!profiling_trace_info.IsValid()) {
+    MS_LOG(WARNING) << "[profiling] no profiling node found!";
+    return;
   }
+  InsertProfilingKernel(profiling_trace_info, kernel_graph_ptr);
 }
 
-void KernelAdjust::InsertProfilingKernel(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
-                                         const ProfilingTraceInfo &profiling_trace_info) {
+void KernelAdjust::InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info,
+                                         NotNull<session::KernelGraph *> kernel_graph_ptr) {
   MS_LOG(INFO) << "[profiling] Insert profiling kernel start";
-  MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
   if (!profiling_trace_info.IsValid()) {
     MS_LOG(WARNING) << "Profiling trace point not found";
     return;
@@ -462,18 +461,12 @@ void KernelAdjust::InsertProfilingKernel(const std::shared_ptr<session::KernelGr
   std::vector<CNodePtr> new_cnode_list;
   std::vector<CNodePtr> cnode_ptr_list = kernel_graph_ptr->execution_order();
   for (const auto &cnode_ptr : cnode_ptr_list) {
-    ProfilingUtils::ProfilingTraceFpStart(kernel_graph_ptr, cnode_ptr, profiling_trace_info, &new_cnode_list);
-    ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce1Start,
-                                       profiling_trace_info.profiling_allreduce1_start, &new_cnode_list);
-    ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce2Start,
-                                       profiling_trace_info.profiling_allreduce2_start, &new_cnode_list);
+    ProfilingUtils::ProfilingTraceFpStart(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
     new_cnode_list.emplace_back(cnode_ptr);
 
-    ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce1End,
-                                       profiling_trace_info.profiling_allreduce1_end, &new_cnode_list);
-    ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce2End,
-                                       profiling_trace_info.profiling_allreduce2_end, &new_cnode_list);
-    ProfilingUtils::ProfilingTraceEnd(kernel_graph_ptr, cnode_ptr, profiling_trace_info, &new_cnode_list);
+    ProfilingUtils::ProfilingCustomOp(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
+    ProfilingUtils::ProfilingTraceBpEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
+    ProfilingUtils::ProfilingTraceEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
   }
   kernel_graph_ptr->set_execution_order(new_cnode_list);
 }
diff --git a/mindspore/ccsrc/device/kernel_adjust.h b/mindspore/ccsrc/device/kernel_adjust.h
index 62c64d98b9..ca01d51e54 100644
--- a/mindspore/ccsrc/device/kernel_adjust.h
+++ b/mindspore/ccsrc/device/kernel_adjust.h
@@ -48,7 +48,7 @@ class KernelAdjust {
   void SetStreamSwitchOps(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
   bool StepLoadCtrlInputs(const std::shared_ptr<session::Context> &context,
                           const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
-  void Profiling(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
+  void Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr);
   static bool NeedInsertSwitch();
   CNodePtr CreateSteamActiveOp(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
 
@@ -66,8 +66,8 @@ class KernelAdjust {
   kernel::KernelBuildInfo::KernelBuildInfoBuilder CreateMngKernelBuilder(const std::vector<std::string> &formats,
                                                                          const std::vector<TypeId> &type_ids);
   void LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs);
-  void InsertProfilingKernel(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
-                             const ProfilingTraceInfo &profiling_trace_info);
+  void InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info,
+                             NotNull<session::KernelGraph *> kernel_graph_ptr);
 };
 }  // namespace device
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
index 8212d64c27..432d88e7a4 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
@@ -246,7 +246,7 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
   kernel_graph->SetExecOrderByDefault();
   if (save_graphs) {
     std::string file_path = save_graphs_path + "/" + "hwopt_d_end.ir";
-    DumpIR(file_path, kernel_graph);
+    DumpIR(file_path, kernel_graph, true);
     DumpIRProto(kernel_graph, "after_hwopt");
   }
 }
diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc
index 9d351f3199..34c05aed08 100644
--- a/mindspore/ccsrc/session/ascend_session.cc
+++ b/mindspore/ccsrc/session/ascend_session.cc
@@ -136,7 +136,7 @@ void AscendSession::BuildGraph(GraphId graph_id) {
   // Assign streams for control sink and hccl and so on
   AssignStream(graph);
 
-  device::KernelAdjust::GetInstance().Profiling(graph);
+  device::KernelAdjust::GetInstance().Profiling(NOT_NULL(graph.get()));
   // build kernel if node is cnode
   BuildKernel(graph);
   auto ms_context = MsContext::GetInstance();
diff --git a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
index ebd2ac8b46..e0b5ab0d61 100755
--- a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
+++ b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
@@ -42,6 +42,6 @@ bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::Context> &c
   return true;
 }
 bool KernelAdjust::NeedInsertSwitch() { return true; }
-void KernelAdjust::Profiling(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; }
+void KernelAdjust::Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr) { return; }
 }  // namespace device
 }  // namespace mindspore

From 0ba72a6885df59659e5a6162805941f42a428476 Mon Sep 17 00:00:00 2001
From: guohongzilong <2713219276@qq.com>
Date: Wed, 8 Apr 2020 18:00:33 +0800
Subject: [PATCH 30/58] unified tensor and mindspore.type

---
 mindspore/common/tensor.py             | 8 ++++----
 mindspore/ops/operations/math_ops.py   | 6 +++---
 mindspore/ops/operations/random_ops.py | 2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/mindspore/common/tensor.py b/mindspore/common/tensor.py
index d17661595f..709b2ae280 100644
--- a/mindspore/common/tensor.py
+++ b/mindspore/common/tensor.py
@@ -42,14 +42,14 @@ class Tensor(Tensor_):
 
     Examples:
         >>> # init a tensor with input data
-        >>> t1 = mindspore.Tensor(np.zeros([1, 2, 3]), mindspore.float32)
-        >>> assert isinstance(t1, mindspore.Tensor)
+        >>> t1 = Tensor(np.zeros([1, 2, 3]), mindspore.float32)
+        >>> assert isinstance(t1, Tensor)
         >>> assert t1.shape() == (1, 2, 3)
         >>> assert t1.dtype() == mindspore.float32
         >>>
         >>> # init a tensor with a float scalar
-        >>> t2 = mindspore.Tensor(0.1)
-        >>> assert isinstance(t2, mindspore.Tensor)
+        >>> t2 = Tensor(0.1)
+        >>> assert isinstance(t2, Tensor)
         >>> assert t2.dtype() == mindspore.float64
     """
 
diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py
index 47b9e490f1..d003f6ee8b 100644
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -1208,7 +1208,7 @@ class Acosh(PrimitiveWithInfer):
 
     Examples:
         >>> acosh = Acosh()
-        >>> X = Tensor(np.array([1.0, 1.5, 3.0, 100.0]), ms.float32)
+        >>> X = Tensor(np.array([1.0, 1.5, 3.0, 100.0]), mindspore.float32)
         >>> output = acosh(X)
     """
 
@@ -1752,7 +1752,7 @@ class Cos(PrimitiveWithInfer):
 
     Examples:
         >>> cos = P.Cos()
-        >>> X = Tensor(np.array([0.24, 0.83, 0.31, 0.09]), ms.float32)
+        >>> X = Tensor(np.array([0.24, 0.83, 0.31, 0.09]), mindspore.float32)
         >>> output = cos(X)
     """
 
@@ -1808,7 +1808,7 @@ class Sin(PrimitiveWithInfer):
 
     Examples:
         >>> sin = P.Sin()
-        >>> input_x = Tensor(np.array([0.62, 0.28, 0.43, 0.62]), ms.float32)
+        >>> input_x = Tensor(np.array([0.62, 0.28, 0.43, 0.62]), mindspore.float32)
         >>> output = sin(input_x)
     """
 
diff --git a/mindspore/ops/operations/random_ops.py b/mindspore/ops/operations/random_ops.py
index 9ef5b301f9..95692a622e 100644
--- a/mindspore/ops/operations/random_ops.py
+++ b/mindspore/ops/operations/random_ops.py
@@ -45,7 +45,7 @@ class RandomChoiceWithMask(PrimitiveWithInfer):
 
     Examples:
         >>> rnd_choice_mask = RandomChoiceWithMask()
-        >>> input_x = Tensor(np.ones(shape=[240000, 4]), ms.bool_)
+        >>> input_x = Tensor(np.ones(shape=[240000, 4]), mindspore.bool_)
         >>> output_y, output_mask = rnd_choice_mask(input_x)
     """
 

From ee519aa0d188695caf61a277ddb2d3c915e67046 Mon Sep 17 00:00:00 2001
From: yoonlee666 <qiuyunlei@huawei.com>
Date: Wed, 8 Apr 2020 14:31:18 +0800
Subject: [PATCH 31/58] use TFRecordDataset in bert ci script and add absolute
 position embedding code in bert model

---
 mindspore/model_zoo/Bert_NEZHA/bert_model.py       | 14 ++++++++++++++
 .../networks/models/bert/bert_tdt_no_lossscale.py  |  4 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/mindspore/model_zoo/Bert_NEZHA/bert_model.py b/mindspore/model_zoo/Bert_NEZHA/bert_model.py
index f20c57dd75..d7f9355b3c 100644
--- a/mindspore/model_zoo/Bert_NEZHA/bert_model.py
+++ b/mindspore/model_zoo/Bert_NEZHA/bert_model.py
@@ -165,6 +165,7 @@ class EmbeddingPostprocessor(nn.Cell):
     def __init__(self,
                  embedding_size,
                  embedding_shape,
+                 use_relative_positions=False,
                  use_token_type=False,
                  token_type_vocab_size=16,
                  use_one_hot_embeddings=False,
@@ -192,6 +193,13 @@ class EmbeddingPostprocessor(nn.Cell):
         self.layernorm = nn.LayerNorm(embedding_size)
         self.dropout = nn.Dropout(1 - dropout_prob)
         self.gather = P.GatherV2()
+        self.use_relative_positions = use_relative_positions
+        self.slice = P.Slice()
+        self.full_position_embeddings = Parameter(initializer
+                                                  (TruncatedNormal(initializer_range),
+                                                   [max_position_embeddings,
+                                                    embedding_size]),
+                                                  name='full_position_embeddings')
 
     def construct(self, token_type_ids, word_embeddings):
         output = word_embeddings
@@ -206,6 +214,11 @@ class EmbeddingPostprocessor(nn.Cell):
                 token_type_embeddings = self.gather(self.embedding_table, flat_ids, 0)
             token_type_embeddings = self.reshape(token_type_embeddings, self.shape)
             output += token_type_embeddings
+        if not self.use_relative_positions:
+            _, seq, width = self.shape
+            position_embeddings = self.slice(self.full_position_embeddings, [0, 0], [seq, width])
+            position_embeddings = self.reshape(position_embeddings, (1, seq, width))
+            output += position_embeddings
         output = self.layernorm(output)
         output = self.dropout(output)
         return output
@@ -853,6 +866,7 @@ class BertModel(nn.Cell):
         self.bert_embedding_postprocessor = EmbeddingPostprocessor(
             embedding_size=self.embedding_size,
             embedding_shape=output_embedding_shape,
+            use_relative_positions=config.use_relative_positions,
             use_token_type=True,
             token_type_vocab_size=config.type_vocab_size,
             use_one_hot_embeddings=use_one_hot_embeddings,
diff --git a/tests/st/networks/models/bert/bert_tdt_no_lossscale.py b/tests/st/networks/models/bert/bert_tdt_no_lossscale.py
index 9cc11997e6..5b6268505b 100644
--- a/tests/st/networks/models/bert/bert_tdt_no_lossscale.py
+++ b/tests/st/networks/models/bert/bert_tdt_no_lossscale.py
@@ -103,9 +103,9 @@ def me_de_train_dataset():
     """test me de train dataset"""
     # apply repeat operations
     repeat_count = 1
-    ds = de.StorageDataset(DATA_DIR, SCHEMA_DIR, columns_list=["input_ids", "input_mask", "segment_ids",
+    ds = de.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["input_ids", "input_mask", "segment_ids",
                                                                "next_sentence_labels", "masked_lm_positions",
-                                                               "masked_lm_ids", "masked_lm_weights"])
+                                                               "masked_lm_ids", "masked_lm_weights"], shuffle=False)
     type_cast_op = C.TypeCast(mstype.int32)
     ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
     ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)

From 599543932c566fd7f8b7a9245e455d53c81b396d Mon Sep 17 00:00:00 2001
From: liuxiao <liuxiao93@huawei.com>
Date: Mon, 6 Apr 2020 10:22:47 +0800
Subject: [PATCH 32/58] Add pack and unpack

---
 mindspore/ccsrc/operator/ops.h        |   1 +
 mindspore/ccsrc/transform/convert.cc  |   3 +-
 mindspore/ops/_grad/grad_array_ops.py |  24 +++++
 mindspore/ops/operations/__init__.py  |   4 +-
 mindspore/ops/operations/array_ops.py | 144 ++++++++++++++++++++++++++
 tests/ut/python/ops/test_ops.py       |  53 ++++++++++
 6 files changed, 227 insertions(+), 2 deletions(-)

diff --git a/mindspore/ccsrc/operator/ops.h b/mindspore/ccsrc/operator/ops.h
index 727d66dfb3..5fbf2b7067 100644
--- a/mindspore/ccsrc/operator/ops.h
+++ b/mindspore/ccsrc/operator/ops.h
@@ -135,6 +135,7 @@ extern const PrimitivePtr kPrimGatherV2;
 extern const PrimitivePtr kPrimSize;
 extern const PrimitivePtr kPrimArgMax;
 extern const PrimitivePtr kPrimPack;
+extern const PrimitivePtr kPrimUnpack;
 extern const PrimitivePtr kPrimUnsortedSegmentSum;
 extern const PrimitivePtr kPrimConcatOffset;
 extern const PrimitivePtr kPrimReshape;
diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc
index 8eed207f59..8b14a8f338 100755
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@@ -148,7 +148,8 @@ const char kNameSlice[] = "Slice";
 const char kNameAddN[] = "AddN";
 const char kNameLess[] = "Less";
 const char kNameGreater[] = "Greater";
-const char kNamePack[] = "Stack";
+const char kNameStack[] = "Stack";
+const char kNameUnstack[] = "Unstack";
 const char kNameMerge[] = "Merge";
 const char kNameGeSwitch[] = "GeSwitch";
 
diff --git a/mindspore/ops/_grad/grad_array_ops.py b/mindspore/ops/_grad/grad_array_ops.py
index 81d38a1e1e..0a0caf471e 100644
--- a/mindspore/ops/_grad/grad_array_ops.py
+++ b/mindspore/ops/_grad/grad_array_ops.py
@@ -266,6 +266,30 @@ def get_bprop_gather_v2(self):
     return bprop
 
 
+@bprop_getters.register(P.Stack)
+def get_bprop_stack(self):
+    """Generate bprop for Stack"""
+    axis = self.axis
+
+    def bprop(x, out, dout):
+        stack_grad = P.Unstack(axis)
+        out = stack_grad(dout)
+        return (out,)
+    return bprop
+
+
+@bprop_getters.register(P.Unstack)
+def get_bprop_unstack(self):
+    """Generate bprop for Unstack"""
+    axis = self.axis
+
+    def bprop(x, out, dout):
+        unstack_grad = P.Stack(axis)
+        out = unstack_grad(dout)
+        return (out,)
+    return bprop
+
+
 @bprop_getters.register(P.StridedSlice)
 def get_bprop_strided_slice(self):
     """Generate bprop for StridedSlice"""
diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py
index d255796bae..5fd3f07876 100644
--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -19,7 +19,7 @@ Primitive operator classes.
 A collection of operators to build nerual networks or computing functions.
 """
 
-from .array_ops import (Argmax, Argmin, Cast, ConcatOffset, Concat,
+from .array_ops import (Argmax, Argmin, Cast, ConcatOffset, Concat, Stack, Unstack,
                         Diag, DiagPart, DType, ExpandDims, Eye,
                         Fill, GatherNd, GatherV2, InvertPermutation,
                         IsInstance, IsSubClass, ArgMaxWithValue, OnesLike, ZerosLike,
@@ -112,6 +112,8 @@ __all__ = [
     'OneHot',
     'GatherV2',
     'Concat',
+    'Stack',
+    'Unstack',
     'Tile',
     'BiasAdd',
     'Gelu',
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index b91c2cbc7d..59d3083c5d 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -1350,6 +1350,150 @@ class Concat(PrimitiveWithInfer):
         return out
 
 
+def _get_stack_shape(x_shape, x_type, axis):
+    """for satck output shape"""
+    validator.check_type("shape", x_shape, [tuple])
+    validator.check_integer("len of input_x shape", len(x_shape), 0, Rel.GT)
+    validator.check_subclass("shape0", x_type[0], mstype.tensor)
+    validator.check_integer("len of input_x0 shape", len(x_shape[0]), 0, Rel.GT)
+    rank_base = len(x_shape[0])
+    N = len(x_shape)
+    out_shape = x_shape[0]
+    validator.check_int_range('axis', axis, -rank_base - 1, rank_base, Rel.INC_BOTH)
+    if axis < 0:
+        axis = axis + rank_base + 1
+    for i in range(1, N):
+        v = x_shape[i]
+        validator.check('len of x_shape[%d]' % i, len(v), 'len of rank_base', rank_base)
+        validator.check('x_type[%d]' % i, x_type[i], 'base', x_type[0])
+        for j in range(rank_base):
+            if v[j] != x_shape[0][j]:
+                raise ValueError("Stack evaluator element %d shape in input can not stack with first element" % i)
+    out_shape.insert(axis, N)
+    return out_shape
+
+class Stack(PrimitiveWithInfer):
+    r"""
+    Stacks a list of rank-`R` tensors into one rank-`(R+1)` tensor.
+
+    Packs the list of tensors in `input_x` into a tensor with rank one higher than
+    each tensor in `input_x`, by packing them along the `axis` dimension.
+    Given a list of length `N` of tensors of shape `(A, B, C)`;
+
+    If `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`.
+
+    If `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`. Etc.
+
+    Args:
+        axis (int): The axis to stack along. Negative values wrap around,
+                    so the valid range is [-(R+1), R+1). Default: 0.
+
+    Inputs:
+        - **input_x** (Union[tuple, list]) - A Tuple or list of Tensor objects with the same shape and type.
+
+    Outputs:
+        Tensor. A stacked Tensor with the same type as values.
+
+    Examples:
+        >>> data1 = Tensor(np.array([0, 1]).astype(np.float32))
+        >>> data2 = Tensor(np.array([2, 3]).astype(np.float32))
+        >>> op = P.Stack()
+        >>> output = op([data1, data2])
+        [[0, 1], [2, 3]]
+    """
+
+    @prim_attr_register
+    def __init__(self, axis=0):
+        """init Stack"""
+        self.__setattr_flag__ = True
+        validator.check_type("axis", axis, [int])
+        self.axis = axis
+
+    def __infer__(self, value):
+        x_shape = value['shape']
+        x_type = value['dtype']
+        self.add_prim_attr('num', len(x_shape))
+        all_shape = _get_stack_shape(x_shape, x_type, self.axis)
+        out = {'shape': all_shape,
+               'dtype': x_type[0],
+               'value': None}
+        return out
+
+
+class Unstack(PrimitiveWithInfer):
+    r"""
+    Unpacks the given dimension of a rank-`R` tensor into rank-`(R-1)` tensors.
+
+    Unpacks num tensors from value by chipping it along the axis dimension.
+    If num is not specified (the default), it is inferred from value's shape.
+    If value.shape[axis] is not known, ValueError is raised.
+
+    For example, given a tensor of shape (A, B, C, D);
+
+    If axis == 0 then the i'th tensor in output is the slice value[i, :, :, :] and
+    each tensor in output will have shape (B, C, D). (Note that the dimension unpacked along is gone, unlike split).
+
+    If axis == 1 then the i'th tensor in output is the slice value[:, i, :, :] and
+    each tensor in output will have shape (A, C, D). Etc.
+
+    This is the opposite of stack.
+
+    Args:
+        axis (int): The axis to unstack along. Defaults to the first dimension.
+                    Negative values wrap around, so the valid range is [-R, R).
+
+    Inputs:
+        - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`.
+          A rank R > 0 Tensor to be unstacked.
+
+    Outputs:
+        A tuple of Tensors, the shape of each objects is same.
+
+    Raises:
+        ValueError: If axis is out of the range [-len(input_x.shape()), len(input_x.shape())),
+                    or if len(input_x.shape[axis]) not equal to num.
+
+    Examples:
+        >>> unstack = P.Unstack()
+        >>> x = Tensor(np.array([[1, 1, 1, 1], [2, 2, 2, 2]]))
+        >>> output = unstack(x)
+        ([1, 1, 1, 1], [2, 2, 2, 2])
+    """
+
+    @prim_attr_register
+    def __init__(self, axis=0):
+        """init Unstack"""
+        self.__setattr_flag__ = True
+        validator.check_type("axis", axis, [int])
+        self.axis = axis
+
+    def __infer__(self, x):
+        validator.check_subclass("x", x['dtype'], mstype.tensor)
+        x_shape = list(x['shape'])
+        dim = len(x_shape)
+        validator.check_int_range('axis value', self.axis, -dim, dim, Rel.INC_LEFT)
+        if self.axis < 0:
+            self.axis = self.axis + dim
+        output_num = x_shape[self.axis]
+        validator.check_type("num", output_num, [int])
+        validator.check_integer("output_num", output_num, 0, Rel.GT)
+        self.add_prim_attr('num', output_num)
+        output_valid_check = x_shape[self.axis] - output_num
+        validator.check_integer("the dimension which to unstack divides output_num", output_valid_check, 0, Rel.EQ)
+        out_shapes = []
+        out_dtypes = []
+        out_shape = x_shape[:self.axis] + x_shape[self.axis + 1:]
+        for _ in range(output_num):
+            out_shapes.append(tuple(out_shape))
+            out_dtypes.append(x['dtype'])
+        out_shapes = tuple(out_shapes)
+        out_dtypes = tuple(out_dtypes)
+        out = {'shape': out_shapes,
+               'dtype': out_dtypes,
+               'value': None}
+        return out
+
+
 class Slice(PrimitiveWithInfer):
     """
     Slice a tensor in specified shape.
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index 3e4acb12f3..97481e69a2 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -80,6 +80,29 @@ class NetForConcat1(nn.Cell):
         return self.concat((x1, x2))
 
 
+class NetForStackInput(nn.Cell):
+    def __init__(self, op):
+        super(NetForStackInput, self).__init__()
+        self.op = op
+        self.mul = P.Mul()
+
+    def construct(self, *args):
+        t = ()
+        for i in range(len(args)):
+            t = t + (self.mul(args[i], args[i]),)
+        return self.op(t)
+
+
+class NetForUnstackInput(nn.Cell):
+    def __init__(self, op):
+        super(NetForUnstackInput, self).__init__()
+        self.op = op
+        self.mul = P.Mul()
+
+    def construct(self, x1):
+        return self.op((self.mul(x1, x1)))
+
+
 class NetForFlatten(nn.Cell):
     def __init__(self):
         super(NetForFlatten, self).__init__()
@@ -973,6 +996,36 @@ test_case_array_ops = [
                          Tensor(np.array([1], np.float32)),
                          Tensor(np.array([1], np.float32)))],
         'desc_bprop': [[3,]]}),
+    ('StackV2_0', {
+        'block': NetForStackInput(P.Stack()),
+        'desc_inputs':[[2, 2], [2, 2], [2, 2]],
+        'desc_bprop':[[3, 2, 2]],
+    }),
+    ('StackV2_1', {
+        'block': NetForStackInput(P.Stack(axis=-2)),
+        'desc_inputs':[[3, 2, 3], [3, 2, 3], [3, 2, 3]],
+        'desc_bprop':[[3, 2, 3, 3]],
+    }),
+    ('StackV2_2', {
+        'block': NetForStackInput(P.Stack()),
+        'desc_inputs':[[2, 2]],
+        'desc_bprop':[[2, 2, 2]],
+    }),
+    ('StackV2_3', {
+        'block': NetForStackInput(P.Stack()),
+        'desc_inputs':[[128, 128], [128, 128]],
+        'desc_bprop':[[2, 128, 128]],
+    }),
+    ('UnstackV2_0', {
+        'block': NetForUnstackInput(P.Unstack(axis=0)),
+        'desc_inputs':[[2, 4]],
+        'desc_bprop':[[4], [4]],
+    }),
+    ('UnstackV2_1', {
+        'block': NetForUnstackInput(P.Unstack(axis=-1)),
+        'desc_inputs':[Tensor(np.array([[1, 1, 1]], np.float32))],
+        'desc_bprop':[[1], [1], [1]],
+    }),
     ('Diag', {
         'block': P.Diag(),
         'desc_inputs': [[4]],

From e01df479151f16782f6b7865d220ec0e647c3e0a Mon Sep 17 00:00:00 2001
From: jinyaohui <jinyaohui@huawei.com>
Date: Tue, 7 Apr 2020 17:23:17 +0800
Subject: [PATCH 33/58] modify set_dataset_mode_config api param

---
 example/yolov3_coco2017/train.py        |  6 +++---
 mindspore/ccsrc/transform/convert.cc    | 12 ++++++------
 mindspore/ccsrc/utils/config_manager.cc |  4 ++--
 mindspore/ccsrc/utils/config_manager.h  |  4 ++--
 mindspore/common/api.py                 |  4 ++--
 mindspore/nn/wrap/loss_scale.py         |  2 +-
 tests/ut/python/utils/test_callback.py  | 16 +++++++++-------
 7 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/example/yolov3_coco2017/train.py b/example/yolov3_coco2017/train.py
index 3ac3816f4a..0a32a6d30d 100644
--- a/example/yolov3_coco2017/train.py
+++ b/example/yolov3_coco2017/train.py
@@ -67,7 +67,7 @@ if __name__ == '__main__':
     parser.add_argument("--distribute", type=bool, default=False, help="Run distribute, default is false.")
     parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
     parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.")
-    parser.add_argument("--mode", type=str, default="graph", help="Run graph mode or feed mode, default is graph")
+    parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or non-sink mode, default is sink")
     parser.add_argument("--epoch_size", type=int, default=10, help="Epoch size, default is 10")
     parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.")
     parser.add_argument("--checkpoint_path", type=str, default="", help="Checkpoint file path")
@@ -150,8 +150,8 @@ if __name__ == '__main__':
 
         model = Model(net)
         dataset_sink_mode = False
-        if args_opt.mode == "graph":
-            print("In graph mode, one epoch return a loss.")
+        if args_opt.mode == "sink":
+            print("In sink mode, one epoch return a loss.")
             dataset_sink_mode = True
         print("Start train YOLOv3, the first epoch will be slower because of the graph compilation.")
         model.train(args_opt.epoch_size, dataset, callbacks=callback, dataset_sink_mode=dataset_sink_mode)
diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc
index 8b14a8f338..59985c8ae3 100755
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@@ -446,10 +446,10 @@ void DfGraphConvertor::InitLoopVar(std::vector<ge::Operator> *init_input) {
 
     int64_t value = 0;
     auto const_iter_num = std::make_shared<Constant>("const/npu_runconfig/iterations_per_loop");
-    if (ConfigManager::GetInstance().dataset_mode() == DS_GRAPH_MODE) {
+    if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE) {
       value = ConfigManager::GetInstance().iter_num();
     } else {
-      MS_LOG(INFO) << "Run with feed mode, the iterator number will always be 1";
+      MS_LOG(INFO) << "Run with non-sink mode, the iterator number will always be 1";
       value = 1;
       ConfigManager::GetInstance().set_iter_num(value);
     }
@@ -580,7 +580,7 @@ void DfGraphConvertor::SetupParamInitSubGraph(const TensorOrderMap &tensors, std
 
 void DfGraphConvertor::MakeDatasetHandler(const std::string &name, const size_t &input_idx, const AnfNodePtr &it) {
   MS_LOG(INFO) << "The " << name << " is the " << input_idx << "(st/nd/th) input";
-  if (ConfigManager::GetInstance().dataset_mode() == DS_GRAPH_MODE) {
+  if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE) {
     auto getnext_idx = static_cast<int64_t>(input_idx);
     DatasetGraphParam param = ConfigManager::GetInstance().dataset_param();
     if (!param.input_indexes().empty() && input_idx <= param.input_indexes().size()) {
@@ -872,7 +872,7 @@ DfGraphConvertor &DfGraphConvertor::ConvertAllNode() {
   }
 
   // Create dataset iterator and iterator_getnext node
-  if (ConfigManager::GetInstance().dataset_mode() == DS_GRAPH_MODE) {
+  if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE) {
     DatasetGraphParam param = ConfigManager::GetInstance().dataset_param();
     MS_LOG(INFO) << "Dataset param is " << param.ToString() << ".";
     // GetNext
@@ -981,7 +981,7 @@ void DfGraphConvertor::TraceOutputFromParameter(const AnfNodePtr &anf_out) {
 }
 
 void SetupDatasetIterGetNextNode(const OperatorPtr &op) {
-  if (ConfigManager::GetInstance().dataset_mode() == DS_GRAPH_MODE) {
+  if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE) {
     DatasetGraphParam param = ConfigManager::GetInstance().dataset_param();
     size_t output_num = param.ge_types().size();
     MS_LOG(INFO) << "Set iterator_getnext op's output num = " << output_num << ".";
@@ -1040,7 +1040,7 @@ DfGraphConvertor &DfGraphConvertor::BuildGraph() {
 
   // set graph input according to the order from anf graph
   std::vector<Operator> inputs;
-  if (ConfigManager::GetInstance().dataset_mode() == DS_GRAPH_MODE) {
+  if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE) {
     inputs.push_back(*dataset_iter_getnext_);
   } else {
     auto params = anf_graph_->parameters();
diff --git a/mindspore/ccsrc/utils/config_manager.cc b/mindspore/ccsrc/utils/config_manager.cc
index ac8a965878..6d66b37436 100644
--- a/mindspore/ccsrc/utils/config_manager.cc
+++ b/mindspore/ccsrc/utils/config_manager.cc
@@ -28,7 +28,7 @@ ConfigManager& ConfigManager::GetInstance() noexcept {
 }
 
 void ConfigManager::SetDatasetModeConfig(const std::string& mode) {
-  static const std::map<std::string, DatasetMode> mode_map = {{"feed", DS_FEED_MODE}, {"graph", DS_GRAPH_MODE}};
+  static const std::map<std::string, DatasetMode> mode_map = {{"normal", DS_NORMAL_MODE}, {"sink", DS_SINK_MODE}};
   if (mode_map.find(mode) == mode_map.end()) {
     MS_LOG(ERROR) << "Invalid dataset mode:" << mode;
     return;
@@ -38,7 +38,7 @@ void ConfigManager::SetDatasetModeConfig(const std::string& mode) {
 
 void ConfigManager::ResetConfig() noexcept {
   parallel_strategy_ = ONE_DEVICE;
-  dataset_mode_ = DS_FEED_MODE;
+  dataset_mode_ = DS_NORMAL_MODE;
   dataset_param_ = DatasetGraphParam("", 0, 0, {}, {}, {});
   iter_num_ = 1;
 }
diff --git a/mindspore/ccsrc/utils/config_manager.h b/mindspore/ccsrc/utils/config_manager.h
index 31137f6243..db7d7d0c14 100644
--- a/mindspore/ccsrc/utils/config_manager.h
+++ b/mindspore/ccsrc/utils/config_manager.h
@@ -33,7 +33,7 @@ enum ParallelStrategy {
   DISTRIBUTION,
 };
 
-enum DatasetMode { DS_FEED_MODE = 0, DS_GRAPH_MODE };
+enum DatasetMode { DS_NORMAL_MODE = 0, DS_SINK_MODE };
 
 class DatasetGraphParam {
  public:
@@ -106,7 +106,7 @@ class ConfigManager {
   ~ConfigManager() = default;
 
   ParallelStrategy parallel_strategy_{ONE_DEVICE};
-  DatasetMode dataset_mode_{DS_FEED_MODE};
+  DatasetMode dataset_mode_{DS_NORMAL_MODE};
   DatasetGraphParam dataset_param_{"", 0, 0, {}, {}, {}};
   int64_t iter_num_{1};
   std::string dataset_phase_{""};
diff --git a/mindspore/common/api.py b/mindspore/common/api.py
index 7f0b2bfeaa..b5450bc5a3 100644
--- a/mindspore/common/api.py
+++ b/mindspore/common/api.py
@@ -378,9 +378,9 @@ class _Executor:
         if enable_ge:
             # decide whether to sink based on whether the inputs is virtual or not
             if args_list and isinstance(args_list[0], Tensor) and args_list[0].virtual_flag:
-                _set_dataset_mode_config('graph')
+                _set_dataset_mode_config('sink')
             else:
-                _set_dataset_mode_config('feed')
+                _set_dataset_mode_config('normal')
 
             self._build_data_graph(obj, params, phase)
 
diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py
index 6a1f15a402..fd1c22be1f 100644
--- a/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/nn/wrap/loss_scale.py
@@ -51,7 +51,7 @@ class DynamicLossScaleUpdateCell(Cell):
     In every training step, the loss scaling value  will be updated by loss scaling value/`scale_factor`
     when there is overflow. And it will be increased by loss scaling value * `scale_factor` if there is no
     overflow for a continuous `scale_window` steps. This cell is used for Graph mode training in which all
-    logic will be executed on device side(Another training mode is feed mode in which some logic will be
+    logic will be executed on device side(Another training mode is non-sink mode in which some logic will be
     executed on host).
 
     Args:
diff --git a/tests/ut/python/utils/test_callback.py b/tests/ut/python/utils/test_callback.py
index 43cf827330..7e7b893e0c 100644
--- a/tests/ut/python/utils/test_callback.py
+++ b/tests/ut/python/utils/test_callback.py
@@ -24,11 +24,12 @@ from mindspore import context
 from mindspore.common.tensor import Tensor
 from mindspore.nn.optim import Momentum
 from mindspore.nn import TrainOneStepCell, WithLossCell
-from mindspore.train.callback import ModelCheckpoint, _check_file_name_prefix, RunContext,_checkpoint_cb_for_save_op,\
-                                      LossMonitor, _InternalCallbackParam, _chg_ckpt_file_name_if_same_exist,\
-                                      _build_callbacks, CheckpointConfig, _set_cur_net
+from mindspore.train.callback import ModelCheckpoint, _check_file_name_prefix, RunContext, _checkpoint_cb_for_save_op, \
+    LossMonitor, _InternalCallbackParam, _chg_ckpt_file_name_if_same_exist, \
+    _build_callbacks, CheckpointConfig, _set_cur_net
 from mindspore.common.api import ms_function
 
+
 class Net(nn.Cell):
     """Net definition."""
 
@@ -52,6 +53,7 @@ class Net(nn.Cell):
 
 class LossNet(nn.Cell):
     """ LossNet definition """
+
     def __init__(self):
         super(LossNet, self).__init__()
         self.conv = nn.Conv2d(3, 64, 3, has_bias=False, weight_init='normal', pad_mode='valid')
@@ -110,8 +112,8 @@ def test_save_checkpoint():
         os.remove('./test_files/test_ckpt-model.pkl')
 
 
-def test_loss_monitor_graph_model():
-    """Test lossmonitor Graph model."""
+def test_loss_monitor_sink_model():
+    """Test loss monitor sink model."""
     cb_params = _InternalCallbackParam()
     cb_params.cur_epoch_num = 4
     cb_params.cur_step_num = 2
@@ -129,8 +131,8 @@ def test_loss_monitor_graph_model():
     callbacklist.end(run_context)
 
 
-def test_Loss_Monitor_feed_feed_model():
-    """Test Loss Monitor feed feed mode."""
+def test_loss_monitor_feed_model():
+    """Test loss monitor non-sink mode."""
     cb_params = _InternalCallbackParam()
     run_context = RunContext(cb_params)
     loss_cb = LossMonitor(1)

From 82896411e06f7c5372ebef500553cd62b1701aa9 Mon Sep 17 00:00:00 2001
From: anzhengqi <anzhengqi.huawei.com>
Date: Wed, 8 Apr 2020 14:47:16 +0800
Subject: [PATCH 34/58] modify part of comments

---
 .../ccsrc/dataset/kernels/image/image_utils.h      | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.h b/mindspore/ccsrc/dataset/kernels/image/image_utils.h
index d289f6f56e..a4ddef40d6 100644
--- a/mindspore/ccsrc/dataset/kernels/image/image_utils.h
+++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.h
@@ -84,18 +84,8 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
 
 // Returns Decoded image
 // Supported images:
-// -   Windows bitmaps - \*.bmp, \*.dib (always supported)
-// -   JPEG files - \*.jpeg, \*.jpg, \*.jpe (see the *Note* section)
-// -   JPEG 2000 files - \*.jp2 (see the *Note* section)
-// -   Portable Network Graphics - \*.png (see the *Note* section)
-// -   WebP - \*.webp (see the *Note* section)
-// -   Portable image format - \*.pbm, \*.pgm, \*.ppm \*.pxm, \*.pnm (always supported)
-// -   PFM files - \*.pfm (see the *Note* section)
-// -   Sun rasters - \*.sr, \*.ras (always supported)
-// -   TIFF files - \*.tiff, \*.tif (see the *Note* section)
-// -   OpenEXR Image files - \*.exr (see the *Note* section)
-// -   Radiance HDR - \*.hdr, \*.pic (always supported)
-// -   Raster and Vector geospatial data supported by GDAL (see the *Note* section)
+//  BMP JPEG JPG PNG TIFF
+// supported by opencv, if user need more image analysis capabilities, please compile opencv particularlly.
 // @param input: CVTensor containing the not decoded image 1D bytes
 // @param output: Decoded image Tensor of shape <H,W,C> and type DE_UINT8. Pixel order is RGB
 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);

From 6690a7fd7a3108ce408220342bd750455067ccab Mon Sep 17 00:00:00 2001
From: jonyguo <guozhijian@huawei.com>
Date: Wed, 8 Apr 2020 15:52:53 +0800
Subject: [PATCH 35/58] fix: error info is not exactly when column list invalid

---
 .../engine/datasetops/source/mindrecord_op.cc |   3 +-
 .../engine/datasetops/source/mindrecord_op.h  |   1 +
 .../ccsrc/mindrecord/common/shard_error.cc    | 178 ++++++++++++++++++
 .../ccsrc/mindrecord/include/shard_error.h    | 102 +++++-----
 mindspore/ccsrc/mindrecord/io/shard_reader.cc |   2 +-
 tests/ut/cpp/dataset/mind_record_op_test.cc   |  36 ++++
 .../ut/cpp/mindrecord/ut_shard_reader_test.cc |   2 +-
 7 files changed, 271 insertions(+), 53 deletions(-)
 create mode 100644 mindspore/ccsrc/mindrecord/common/shard_error.cc

diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
index b5bea5416c..cb0f135a0d 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
@@ -103,7 +103,8 @@ Status MindRecordOp::Init() {
   shard_reader_ = mindspore::make_unique<ShardReader>();
   auto rc = shard_reader_->Open(dataset_file_, num_mind_record_workers_, columns_to_load_, operators_, block_reader_);
 
-  CHECK_FAIL_RETURN_UNEXPECTED(rc != MSRStatus::FAILED, "MindRecordOp init failed.");
+  CHECK_FAIL_RETURN_UNEXPECTED(rc != MSRStatus::FAILED,
+                               "MindRecordOp init failed. Error message: " + ErrnoToMessage(rc));
 
   data_schema_ = mindspore::make_unique<DataSchema>();
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h
index 0b16391b20..aca5c86c2c 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h
@@ -32,6 +32,7 @@
 #include "dataset/engine/datasetops/source/io_block.h"
 #include "dataset/util/queue.h"
 #include "dataset/util/status.h"
+#include "mindrecord/include/shard_error.h"
 #include "mindrecord/include/shard_reader.h"
 #include "mindrecord/include/common/shard_utils.h"
 #include "dataset/util/wait_post.h"
diff --git a/mindspore/ccsrc/mindrecord/common/shard_error.cc b/mindspore/ccsrc/mindrecord/common/shard_error.cc
new file mode 100644
index 0000000000..cf43dcb315
--- /dev/null
+++ b/mindspore/ccsrc/mindrecord/common/shard_error.cc
@@ -0,0 +1,178 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mindrecord/include/shard_error.h"
+
+namespace mindspore {
+namespace mindrecord {
+std::string ErrnoToMessage(MSRStatus status) {
+  switch (status) {
+    case FAILED:
+      return "operator failed";
+      break;
+    case SUCCESS:
+      return "operator success";
+      break;
+    case OPEN_FILE_FAILED:
+      return "open file failed";
+      break;
+    case CLOSE_FILE_FAILED:
+      return "close file failed";
+      break;
+    case WRITE_METADATA_FAILED:
+      return "write metadata failed";
+      break;
+    case WRITE_RAWDATA_FAILED:
+      return "write rawdata failed";
+      break;
+    case GET_SCHEMA_FAILED:
+      return "get schema failed";
+      break;
+    case ILLEGAL_RAWDATA:
+      return "illegal raw data";
+      break;
+    case PYTHON_TO_JSON_FAILED:
+      return "pybind: python object to json failed";
+      break;
+    case DIR_CREATE_FAILED:
+      return "directory create failed";
+      break;
+    case OPEN_DIR_FAILED:
+      return "open directory failed";
+      break;
+    case INVALID_STATISTICS:
+      return "invalid statistics object";
+      break;
+    case OPEN_DATABASE_FAILED:
+      return "open database failed";
+      break;
+    case CLOSE_DATABASE_FAILED:
+      return "close database failed";
+      break;
+    case DATABASE_OPERATE_FAILED:
+      return "database operate failed";
+      break;
+    case BUILD_SCHEMA_FAILED:
+      return "build schema failed";
+      break;
+    case DIVISOR_IS_ILLEGAL:
+      return "divisor is illegal";
+      break;
+    case INVALID_FILE_PATH:
+      return "file path is invalid";
+      break;
+    case SECURE_FUNC_FAILED:
+      return "secure function failed";
+      break;
+    case ALLOCATE_MEM_FAILED:
+      return "allocate memory failed";
+      break;
+    case ILLEGAL_FIELD_NAME:
+      return "illegal field name";
+      break;
+    case ILLEGAL_FIELD_TYPE:
+      return "illegal field type";
+      break;
+    case SET_METADATA_FAILED:
+      return "set metadata failed";
+      break;
+    case ILLEGAL_SCHEMA_DEFINITION:
+      return "illegal schema definition";
+      break;
+    case ILLEGAL_COLUMN_LIST:
+      return "illegal column list";
+      break;
+    case SQL_ERROR:
+      return "sql error";
+      break;
+    case ILLEGAL_SHARD_COUNT:
+      return "illegal shard count";
+      break;
+    case ILLEGAL_SCHEMA_COUNT:
+      return "illegal schema count";
+      break;
+    case VERSION_ERROR:
+      return "data version is not matched";
+      break;
+    case ADD_SCHEMA_FAILED:
+      return "add schema failed";
+      break;
+    case ILLEGAL_Header_SIZE:
+      return "illegal header size";
+      break;
+    case ILLEGAL_Page_SIZE:
+      return "illegal page size";
+      break;
+    case ILLEGAL_SIZE_VALUE:
+      return "illegal size value";
+      break;
+    case INDEX_FIELD_ERROR:
+      return "add index fields failed";
+      break;
+    case GET_CANDIDATE_CATEGORYFIELDS_FAILED:
+      return "get candidate category fields failed";
+      break;
+    case GET_CATEGORY_INFO_FAILED:
+      return "get category information failed";
+      break;
+    case ILLEGAL_CATEGORY_ID:
+      return "illegal category id";
+      break;
+    case ILLEGAL_ROWNUMBER_OF_PAGE:
+      return "illegal row number of page";
+      break;
+    case ILLEGAL_SCHEMA_ID:
+      return "illegal schema id";
+      break;
+    case DESERIALIZE_SCHEMA_FAILED:
+      return "deserialize schema failed";
+      break;
+    case DESERIALIZE_STATISTICS_FAILED:
+      return "deserialize statistics failed";
+      break;
+    case ILLEGAL_DB_FILE:
+      return "illegal db file";
+      break;
+    case OVERWRITE_DB_FILE:
+      return "overwrite db file";
+      break;
+    case OVERWRITE_MINDRECORD_FILE:
+      return "overwrite mindrecord file";
+      break;
+    case ILLEGAL_MINDRECORD_FILE:
+      return "illegal mindrecord file";
+      break;
+    case PARSE_JSON_FAILED:
+      return "parse json failed";
+      break;
+    case ILLEGAL_PARAMETERS:
+      return "illegal parameters";
+      break;
+    case GET_PAGE_BY_GROUP_ID_FAILED:
+      return "get page by group id failed";
+      break;
+    case GET_SYSTEM_STATE_FAILED:
+      return "get system state failed";
+      break;
+    case IO_FAILED:
+      return "io operate failed";
+      break;
+    default:
+      return "invalid error no";
+  }
+}
+}  // namespace mindrecord
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/mindrecord/include/shard_error.h b/mindspore/ccsrc/mindrecord/include/shard_error.h
index 026ee836e3..b85eeb71c0 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_error.h
+++ b/mindspore/ccsrc/mindrecord/include/shard_error.h
@@ -18,63 +18,65 @@
 #define MINDRECORD_INCLUDE_SHARD_ERROR_H_
 
 #include <map>
-#include "utils/error_code.h"
+#include <string>
 
 namespace mindspore {
 namespace mindrecord {
-DE_ERRORNO_MINDRECORD(OPEN_FILE_FAILED, 0, "open file failed");
-DE_ERRORNO_MINDRECORD(CLOSE_FILE_FAILED, 1, "close file failed");
-DE_ERRORNO_MINDRECORD(WRITE_METADATA_FAILED, 2, "write metadata failed");
-DE_ERRORNO_MINDRECORD(WRITE_RAWDATA_FAILED, 3, "write rawdata failed");
-DE_ERRORNO_MINDRECORD(GET_SCHEMA_FAILED, 4, "get schema failed");
-DE_ERRORNO_MINDRECORD(ILLEGAL_RAWDATA, 5, "illegal raw data");
-DE_ERRORNO_MINDRECORD(PYTHON_TO_JSON_FAILED, 6, "pybind: python object to json failed");
-DE_ERRORNO_MINDRECORD(DIR_CREATE_FAILED, 7, "directory create failed");
-DE_ERRORNO_MINDRECORD(OPEN_DIR_FAILED, 8, "open directory failed");
-DE_ERRORNO_MINDRECORD(INVALID_STATISTICS, 9, "invalid statistics object");
-DE_ERRORNO_MINDRECORD(OPEN_DATABASE_FAILED, 10, "open database failed");
-DE_ERRORNO_MINDRECORD(CLOSE_DATABASE_FAILED, 11, "close database failed");
-DE_ERRORNO_MINDRECORD(DATABASE_OPERATE_FAILED, 12, "database operate failed");
-DE_ERRORNO_MINDRECORD(BUILD_SCHEMA_FAILED, 13, "build schema failed");
-DE_ERRORNO_MINDRECORD(DIVISOR_IS_ILLEGAL, 14, "divisor is illegal");
-DE_ERRORNO_MINDRECORD(INVALID_FILE_PATH, 15, "file path is invalid");
-DE_ERRORNO_MINDRECORD(SECURE_FUNC_FAILED, 16, "secure function failed");
-DE_ERRORNO_MINDRECORD(ALLOCATE_MEM_FAILED, 17, "allocate memory failed");
-DE_ERRORNO_MINDRECORD(ILLEGAL_FIELD_NAME, 18, "illegal field name");
-DE_ERRORNO_MINDRECORD(ILLEGAL_FIELD_TYPE, 19, "illegal field type");
-DE_ERRORNO_MINDRECORD(SET_METADATA_FAILED, 20, "set metadata failed");
-DE_ERRORNO_MINDRECORD(ILLEGAL_SCHEMA_DEFINITION, 21, "illegal schema definition");
-DE_ERRORNO_MINDRECORD(ILLEGAL_COLUMN_LIST, 22, "illegal column list");
-DE_ERRORNO_MINDRECORD(SQL_ERROR, 23, "sql error");
-DE_ERRORNO_MINDRECORD(ILLEGAL_SHARD_COUNT, 24, "illegal shard count");
-DE_ERRORNO_MINDRECORD(ILLEGAL_SCHEMA_COUNT, 25, "illegal schema count");
-DE_ERRORNO_MINDRECORD(VERSION_ERROR, 26, "data version is not matched");
-DE_ERRORNO_MINDRECORD(ADD_SCHEMA_FAILED, 27, "add schema failed");
-DE_ERRORNO_MINDRECORD(ILLEGAL_Header_SIZE, 28, "illegal header size");
-DE_ERRORNO_MINDRECORD(ILLEGAL_Page_SIZE, 29, "illegal page size");
-DE_ERRORNO_MINDRECORD(ILLEGAL_SIZE_VALUE, 30, "illegal size value");
-DE_ERRORNO_MINDRECORD(INDEX_FIELD_FAILED, 31, "add index fields failed");
-DE_ERRORNO_MINDRECORD(GET_CANDIDATE_CATEGORYFIELDS_FAILED, 32, "get candidate categoryFields failed");
-DE_ERRORNO_MINDRECORD(GET_CATEGORY_INFO, 33, "get category information failed");
-DE_ERRORNO_MINDRECORD(ILLEGAL_CATEGORY_ID, 34, "illegal category id");
-DE_ERRORNO_MINDRECORD(ILLEGAL_ROWNUMBER_OF_PAGE, 35, "illegal row number of page");
-DE_ERRORNO_MINDRECORD(ILLEGAL_SCHEMA_ID, 36, "illegal schema id");
-DE_ERRORNO_MINDRECORD(DESERIALIZE_SCHEMA_FAILED, 37, "deserialize schema failed");
-DE_ERRORNO_MINDRECORD(DESERIALIZE_STATISTICS_FAILED, 38, "deserialize statistics failed");
-DE_ERRORNO_MINDRECORD(ILLEGAL_DB_FILE, 39, "illegal db file.");
-DE_ERRORNO_MINDRECORD(OVERWRITE_DB_FILE, 40, "overwrite db file.");
-DE_ERRORNO_MINDRECORD(OVERWRITE_MINDRECORD_FILE, 41, "overwrite mindrecord file.");
-DE_ERRORNO_MINDRECORD(ILLEGAL_MINDRECORD_FILE, 42, "illegal mindrecord file.");
-DE_ERRORNO_MINDRECORD(PARSE_JSON_FAILED, 43, "parse json failed.");
-DE_ERRORNO_MINDRECORD(ILLEGAL_PARAMETERS, 44, "illegal parameters.");
-DE_ERRORNO_MINDRECORD(GET_PAGE_BY_GROUP_ID_FAILED, 46, "get page by group id failed.");
-DE_ERRORNO_MINDRECORD(GET_SYSTEM_STATE_FAILED, 47, "get system state failed.");
-DE_ERRORNO_MINDRECORD(IO_FAILED, 48, "io operate failed.");
-
 enum MSRStatus {
   SUCCESS = 0,
   FAILED = 1,
+  OPEN_FILE_FAILED,
+  CLOSE_FILE_FAILED,
+  WRITE_METADATA_FAILED,
+  WRITE_RAWDATA_FAILED,
+  GET_SCHEMA_FAILED,
+  ILLEGAL_RAWDATA,
+  PYTHON_TO_JSON_FAILED,
+  DIR_CREATE_FAILED,
+  OPEN_DIR_FAILED,
+  INVALID_STATISTICS,
+  OPEN_DATABASE_FAILED,
+  CLOSE_DATABASE_FAILED,
+  DATABASE_OPERATE_FAILED,
+  BUILD_SCHEMA_FAILED,
+  DIVISOR_IS_ILLEGAL,
+  INVALID_FILE_PATH,
+  SECURE_FUNC_FAILED,
+  ALLOCATE_MEM_FAILED,
+  ILLEGAL_FIELD_NAME,
+  ILLEGAL_FIELD_TYPE,
+  SET_METADATA_FAILED,
+  ILLEGAL_SCHEMA_DEFINITION,
+  ILLEGAL_COLUMN_LIST,
+  SQL_ERROR,
+  ILLEGAL_SHARD_COUNT,
+  ILLEGAL_SCHEMA_COUNT,
+  VERSION_ERROR,
+  ADD_SCHEMA_FAILED,
+  ILLEGAL_Header_SIZE,
+  ILLEGAL_Page_SIZE,
+  ILLEGAL_SIZE_VALUE,
+  INDEX_FIELD_ERROR,
+  GET_CANDIDATE_CATEGORYFIELDS_FAILED,
+  GET_CATEGORY_INFO_FAILED,
+  ILLEGAL_CATEGORY_ID,
+  ILLEGAL_ROWNUMBER_OF_PAGE,
+  ILLEGAL_SCHEMA_ID,
+  DESERIALIZE_SCHEMA_FAILED,
+  DESERIALIZE_STATISTICS_FAILED,
+  ILLEGAL_DB_FILE,
+  OVERWRITE_DB_FILE,
+  OVERWRITE_MINDRECORD_FILE,
+  ILLEGAL_MINDRECORD_FILE,
+  PARSE_JSON_FAILED,
+  ILLEGAL_PARAMETERS,
+  GET_PAGE_BY_GROUP_ID_FAILED,
+  GET_SYSTEM_STATE_FAILED,
+  IO_FAILED
 };
+
+// convert error no to string message
+std::string ErrnoToMessage(MSRStatus status);
 }  // namespace mindrecord
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/mindrecord/io/shard_reader.cc
index 32825fd9df..f91d28544e 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_reader.cc
+++ b/mindspore/ccsrc/mindrecord/io/shard_reader.cc
@@ -676,7 +676,7 @@ MSRStatus ShardReader::Open(const std::string &file_path, int n_consumer,
 
   if (CheckColumnList(selected_columns_) == FAILED) {
     MS_LOG(ERROR) << "Illegal column list";
-    return FAILED;
+    return ILLEGAL_COLUMN_LIST;
   }
 
   // Initialize argument
diff --git a/tests/ut/cpp/dataset/mind_record_op_test.cc b/tests/ut/cpp/dataset/mind_record_op_test.cc
index 3d5c80b3f4..90f41fdeb9 100644
--- a/tests/ut/cpp/dataset/mind_record_op_test.cc
+++ b/tests/ut/cpp/dataset/mind_record_op_test.cc
@@ -21,6 +21,7 @@
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "mindrecord/include/shard_category.h"
+#include "mindrecord/include/shard_error.h"
 #include "mindrecord/include/shard_sample.h"
 #include "mindrecord/include/shard_shuffle.h"
 #include "utils/log_adapter.h"
@@ -479,3 +480,38 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) {
     row_count++;
   }
 }
+
+TEST_F(MindDataTestMindRecordOp, TestMindRecordInvalidColumnList) {
+  // single MindRecord op and nothing else
+  //
+  //    MindRecordOp
+
+  MS_LOG(INFO) << "UT test TestMindRecordInvalidColumnList";
+
+  Status rc;
+
+  // Start with an empty execution tree
+  auto my_tree = std::make_shared<ExecutionTree>();
+
+  // Test info:
+  // Dataset from testDataset1 has 10 rows, 2 columns.
+  // RowsPerBuffer buffer setting of 3 yields 4 buffers with the last buffer having single row
+  // only.  2 workers.
+  // Test a column selection instead of all columns as well.
+
+  std::vector<std::string> column_list;
+  std::string label_col_name("file_name_2");
+  column_list.push_back(label_col_name);
+  label_col_name = "label";
+  column_list.push_back(label_col_name);
+
+  std::shared_ptr<MindRecordOp> my_mindrecord_op;
+  MindRecordOp::Builder builder;
+  builder.SetDatasetFile(mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0")
+      .SetRowsPerBuffer(3)
+      .SetNumMindRecordWorkers(4)
+      .SetColumnsToLoad(column_list);
+  rc = builder.Build(&my_mindrecord_op);
+  ASSERT_TRUE(rc.IsError());
+  ASSERT_TRUE(rc.ToString().find_first_of("illegal column list") != std::string::npos);
+}
diff --git a/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc b/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc
index fd63373e20..f7ed39a006 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc
@@ -155,7 +155,7 @@ TEST_F(TestShardReader, TestShardReaderColumnNotInSchema) {
   auto column_list = std::vector<std::string>{"file_namex"};
   ShardReader dataset;
   MSRStatus ret = dataset.Open(file_name, 4, column_list);
-  ASSERT_EQ(ret, FAILED);
+  ASSERT_EQ(ret, ILLEGAL_COLUMN_LIST);
 }
 
 TEST_F(TestShardReader, TestShardVersion) {

From 824d9e49565dfc3401e3c3fb54484bbf647daeb9 Mon Sep 17 00:00:00 2001
From: buxue <yiren19920727@163.com>
Date: Thu, 2 Apr 2020 11:58:45 +0800
Subject: [PATCH 36/58] Develop op MaxPoolWithArgMax

---
 mindspore/nn/layer/pooling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/nn/layer/pooling.py b/mindspore/nn/layer/pooling.py
index 5d9b0ffa6c..bf90fcc9de 100644
--- a/mindspore/nn/layer/pooling.py
+++ b/mindspore/nn/layer/pooling.py
@@ -58,7 +58,7 @@ class _PoolNd(Cell):
         pass
 
     def extend_repr(self):
-        return 'kernel_size={kernel_size}, stride={stride}, pad_mode={pad_mode}'.format(**self.__dict__)
+        return 'kernel_size={kernel_size}, strides={strides}, pad_mode={pad_mode}'.format(**self.__dict__)
 
 
 class MaxPool2d(_PoolNd):

From 71b81c8f1b25ac9fd2431d3b9f292df793197a13 Mon Sep 17 00:00:00 2001
From: Zirui Wu <zirui.wu@huawei.com>
Date: Wed, 1 Apr 2020 11:24:25 -0400
Subject: [PATCH 37/58] implemented multi-thread index writer for mindrecord

num threads cannot be more than num shards

minor fix

clang style fix

address review comments
---
 .../include/shard_index_generator.h           | 10 +-
 .../mindrecord/io/shard_index_generator.cc    | 98 ++++++++++++-------
 2 files changed, 72 insertions(+), 36 deletions(-)

diff --git a/mindspore/ccsrc/mindrecord/include/shard_index_generator.h b/mindspore/ccsrc/mindrecord/include/shard_index_generator.h
index f59dbe9bf0..1febd28fc2 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_index_generator.h
+++ b/mindspore/ccsrc/mindrecord/include/shard_index_generator.h
@@ -85,14 +85,14 @@ class ShardIndexGenerator {
   /// \param sql
   /// \param data
   /// \return
-  MSRStatus BindParamaterExecuteSQL(
+  MSRStatus BindParameterExecuteSQL(
     sqlite3 *db, const std::string &sql,
     const std::vector<std::vector<std::tuple<std::string, std::string, std::string>>> &data);
 
   INDEX_FIELDS GenerateIndexFields(const std::vector<json> &schema_detail);
 
-  MSRStatus ExcuteTransaction(const int &shard_no, const std::pair<MSRStatus, sqlite3 *> &db,
-                              const std::vector<int> &raw_page_ids, const std::map<int, int> &blob_id_to_page_id);
+  MSRStatus ExecuteTransaction(const int &shard_no, const std::pair<MSRStatus, sqlite3 *> &db,
+                               const std::vector<int> &raw_page_ids, const std::map<int, int> &blob_id_to_page_id);
 
   MSRStatus CreateShardNameTable(sqlite3 *db, const std::string &shard_name);
 
@@ -103,12 +103,16 @@ class ShardIndexGenerator {
   void AddIndexFieldByRawData(const std::vector<json> &schema_detail,
                               std::vector<std::tuple<std::string, std::string, std::string>> &row_data);
 
+  void DatabaseWriter();  // worker thread
+
   std::string file_path_;
   bool append_;
   ShardHeader shard_header_;
   uint64_t page_size_;
   uint64_t header_size_;
   int schema_count_;
+  std::atomic_int task_;
+  std::atomic_bool write_success_;
   std::vector<std::pair<uint64_t, std::string>> fields_;
 };
 }  // namespace mindrecord
diff --git a/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc b/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc
index 1c14d30f30..c0108241a1 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc
+++ b/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <thread>
 
 #include "mindrecord/include/shard_index_generator.h"
 #include "common/utils.h"
@@ -26,7 +27,13 @@ using mindspore::MsLogLevel::INFO;
 namespace mindspore {
 namespace mindrecord {
 ShardIndexGenerator::ShardIndexGenerator(const std::string &file_path, bool append)
-    : file_path_(file_path), append_(append), page_size_(0), header_size_(0), schema_count_(0) {}
+    : file_path_(file_path),
+      append_(append),
+      page_size_(0),
+      header_size_(0),
+      schema_count_(0),
+      task_(0),
+      write_success_(true) {}
 
 MSRStatus ShardIndexGenerator::Build() {
   ShardHeader header = ShardHeader();
@@ -284,7 +291,7 @@ std::pair<MSRStatus, std::string> ShardIndexGenerator::GenerateRawSQL(
   return {SUCCESS, sql};
 }
 
-MSRStatus ShardIndexGenerator::BindParamaterExecuteSQL(
+MSRStatus ShardIndexGenerator::BindParameterExecuteSQL(
   sqlite3 *db, const std::string &sql,
   const std::vector<std::vector<std::tuple<std::string, std::string, std::string>>> &data) {
   sqlite3_stmt *stmt = nullptr;
@@ -471,9 +478,9 @@ INDEX_FIELDS ShardIndexGenerator::GenerateIndexFields(const std::vector<json> &s
   return {SUCCESS, std::move(fields)};
 }
 
-MSRStatus ShardIndexGenerator::ExcuteTransaction(const int &shard_no, const std::pair<MSRStatus, sqlite3 *> &db,
-                                                 const std::vector<int> &raw_page_ids,
-                                                 const std::map<int, int> &blob_id_to_page_id) {
+MSRStatus ShardIndexGenerator::ExecuteTransaction(const int &shard_no, const std::pair<MSRStatus, sqlite3 *> &db,
+                                                  const std::vector<int> &raw_page_ids,
+                                                  const std::map<int, int> &blob_id_to_page_id) {
   // Add index data to database
   std::string shard_address = shard_header_.get_shard_address_by_id(shard_no);
   if (shard_address.empty()) {
@@ -493,7 +500,7 @@ MSRStatus ShardIndexGenerator::ExcuteTransaction(const int &shard_no, const std:
     if (data.first != SUCCESS) {
       return FAILED;
     }
-    if (BindParamaterExecuteSQL(db.second, sql.second, data.second) == FAILED) {
+    if (BindParameterExecuteSQL(db.second, sql.second, data.second) == FAILED) {
       return FAILED;
     }
     MS_LOG(INFO) << "Insert " << data.second.size() << " rows to index db.";
@@ -514,37 +521,62 @@ MSRStatus ShardIndexGenerator::WriteToDatabase() {
   page_size_ = shard_header_.get_page_size();
   header_size_ = shard_header_.get_header_size();
   schema_count_ = shard_header_.get_schema_count();
-  if (shard_header_.get_shard_count() <= kMaxShardCount) {
-    // Create one database per shard
-    for (int shard_no = 0; shard_no < shard_header_.get_shard_count(); ++shard_no) {
-      // Create database
-      auto db = CreateDatabase(shard_no);
-      if (db.first != SUCCESS || db.second == nullptr) {
-        return FAILED;
-      }
-      MS_LOG(INFO) << "Init index db for shard: " << shard_no << " successfully.";
-
-      // Pre-processing page information
-      auto total_pages = shard_header_.GetLastPageId(shard_no) + 1;
-
-      std::map<int, int> blob_id_to_page_id;
-      std::vector<int> raw_page_ids;
-      for (uint64_t i = 0; i < total_pages; ++i) {
-        std::shared_ptr<Page> cur_page = shard_header_.GetPage(shard_no, i).first;
-        if (cur_page->get_page_type() == "RAW_DATA") {
-          raw_page_ids.push_back(i);
-        } else if (cur_page->get_page_type() == "BLOB_DATA") {
-          blob_id_to_page_id[cur_page->get_page_type_id()] = i;
-        }
-      }
+  if (shard_header_.get_shard_count() > kMaxShardCount) {
+    MS_LOG(ERROR) << "num shards: " << shard_header_.get_shard_count() << " exceeds max count:" << kMaxSchemaCount;
+    return FAILED;
+  }
+  task_ = 0;  // set two atomic vars to initial value
+  write_success_ = true;
 
-      if (ExcuteTransaction(shard_no, db, raw_page_ids, blob_id_to_page_id) != SUCCESS) {
-        return FAILED;
+  // spawn half the physical threads or total number of shards whichever is smaller
+  const unsigned int num_workers =
+    std::min(std::thread::hardware_concurrency() / 2 + 1, static_cast<unsigned int>(shard_header_.get_shard_count()));
+
+  std::vector<std::thread> threads;
+  threads.reserve(num_workers);
+
+  for (size_t t = 0; t < threads.capacity(); t++) {
+    threads.emplace_back(std::thread(&ShardIndexGenerator::DatabaseWriter, this));
+  }
+
+  for (size_t t = 0; t < threads.capacity(); t++) {
+    threads[t].join();
+  }
+  return write_success_ ? SUCCESS : FAILED;
+}
+
+void ShardIndexGenerator::DatabaseWriter() {
+  int shard_no = task_++;
+  while (shard_no < shard_header_.get_shard_count()) {
+    auto db = CreateDatabase(shard_no);
+    if (db.first != SUCCESS || db.second == nullptr || write_success_ == false) {
+      write_success_ = false;
+      return;
+    }
+
+    MS_LOG(INFO) << "Init index db for shard: " << shard_no << " successfully.";
+
+    // Pre-processing page information
+    auto total_pages = shard_header_.GetLastPageId(shard_no) + 1;
+
+    std::map<int, int> blob_id_to_page_id;
+    std::vector<int> raw_page_ids;
+    for (uint64_t i = 0; i < total_pages; ++i) {
+      std::shared_ptr<Page> cur_page = shard_header_.GetPage(shard_no, i).first;
+      if (cur_page->get_page_type() == "RAW_DATA") {
+        raw_page_ids.push_back(i);
+      } else if (cur_page->get_page_type() == "BLOB_DATA") {
+        blob_id_to_page_id[cur_page->get_page_type_id()] = i;
       }
-      MS_LOG(INFO) << "Generate index db for shard: " << shard_no << " successfully.";
     }
+
+    if (ExecuteTransaction(shard_no, db, raw_page_ids, blob_id_to_page_id) != SUCCESS) {
+      write_success_ = false;
+      return;
+    }
+    MS_LOG(INFO) << "Generate index db for shard: " << shard_no << " successfully.";
+    shard_no = task_++;
   }
-  return SUCCESS;
 }
 }  // namespace mindrecord
 }  // namespace mindspore

From 6d1ea7af8e13c10795ddfe8bdd59db4767bf48bf Mon Sep 17 00:00:00 2001
From: Alexey Shevlyakov <alexey.shevlyakov@huawei.com>
Date: Tue, 7 Apr 2020 13:32:52 -0400
Subject: [PATCH 38/58] remove make_unique.h

---
 mindspore/ccsrc/dataset/api/de_pipeline.cc    |  7 ++-
 .../ccsrc/dataset/core/global_context.cc      |  6 +--
 mindspore/ccsrc/dataset/core/tensor.cc        |  5 +--
 mindspore/ccsrc/dataset/engine/data_buffer.cc |  2 +-
 mindspore/ccsrc/dataset/engine/data_schema.cc |  8 ++--
 .../dataset/engine/datasetops/batch_op.cc     | 16 +++----
 .../dataset/engine/datasetops/dataset_op.cc   | 10 ++---
 .../engine/datasetops/device_queue_op.cc      |  2 +-
 .../ccsrc/dataset/engine/datasetops/map_op.cc |  2 +-
 .../dataset/engine/datasetops/parallel_op.cc  |  2 +-
 .../dataset/engine/datasetops/project_op.cc   |  2 +-
 .../dataset/engine/datasetops/rename_op.cc    |  4 +-
 .../dataset/engine/datasetops/shuffle_op.cc   | 12 +++---
 .../engine/datasetops/source/celeba_op.cc     | 24 +++++------
 .../engine/datasetops/source/cifar_op.cc      | 24 +++++------
 .../engine/datasetops/source/generator_op.cc  |  8 ++--
 .../datasetops/source/image_folder_op.cc      | 26 +++++------
 .../engine/datasetops/source/manifest_op.cc   | 22 +++++-----
 .../engine/datasetops/source/mindrecord_op.cc | 43 ++++++++++---------
 .../engine/datasetops/source/mnist_op.cc      | 26 +++++------
 .../source/sampler/distributed_sampler.cc     |  6 +--
 .../datasetops/source/sampler/pk_sampler.cc   |  6 +--
 .../source/sampler/random_sampler.cc          |  8 ++--
 .../datasetops/source/sampler/sampler.cc      |  2 +-
 .../datasetops/source/sampler/sampler.h       |  1 -
 .../source/sampler/sequential_sampler.cc      |  6 +--
 .../source/sampler/subset_random_sampler.cc   |  6 +--
 .../source/sampler/weighted_random_sampler.cc | 10 ++---
 .../datasetops/source/storage_client.cc       |  9 ++--
 .../engine/datasetops/source/storage_op.cc    |  8 ++--
 .../engine/datasetops/source/tf_buffer.cc     |  7 ++-
 .../engine/datasetops/source/tf_reader_op.cc  | 37 ++++++++--------
 .../engine/datasetops/source/voc_op.cc        | 22 +++++-----
 .../ccsrc/dataset/engine/datasetops/zip_op.cc | 14 +++---
 mindspore/ccsrc/dataset/engine/db_connector.h |  2 +-
 .../ccsrc/dataset/engine/execution_tree.cc    |  2 +-
 .../dataset/kernels/image/image_utils.cc      |  3 +-
 mindspore/ccsrc/dataset/kernels/py_func_op.cc |  1 -
 mindspore/ccsrc/dataset/util/arena.cc         |  1 -
 mindspore/ccsrc/dataset/util/circular_pool.cc |  4 +-
 mindspore/ccsrc/dataset/util/de_error.h       |  7 +++
 mindspore/ccsrc/dataset/util/list.h           |  3 +-
 mindspore/ccsrc/dataset/util/lock.cc          |  1 +
 mindspore/ccsrc/dataset/util/lock.h           |  1 -
 mindspore/ccsrc/dataset/util/make_unique.h    | 37 ----------------
 mindspore/ccsrc/dataset/util/queue.h          |  2 +-
 mindspore/ccsrc/dataset/util/task.h           |  1 -
 mindspore/ccsrc/device/gpu/blocking_queue.cc  |  3 +-
 .../kernel/gpu/math/bias_add_gpu_kernel.h     |  5 +--
 .../kernel/gpu/nn/bias_add_grad_gpu_kenel.h   |  5 +--
 .../ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h     |  5 +--
 .../kernel/gpu/nn/lstm_grad_data_gpu_kernel.h |  7 ++-
 .../gpu/nn/lstm_grad_weight_gpu_kernel.h      |  5 +--
 tests/ut/cpp/dataset/celeba_op_test.cc        |  2 +-
 tests/ut/cpp/dataset/cifar_op_test.cc         |  2 +-
 tests/ut/cpp/dataset/image_folder_op_test.cc  | 14 +++---
 tests/ut/cpp/dataset/manifest_op_test.cc      |  2 +-
 tests/ut/cpp/dataset/project_op_test.cc       |  2 +-
 .../cpp/dataset/stand_alone_samplers_test.cc  |  2 +-
 tests/ut/cpp/dataset/tfReader_op_test.cc      | 20 ++++-----
 60 files changed, 241 insertions(+), 291 deletions(-)
 delete mode 100644 mindspore/ccsrc/dataset/util/make_unique.h

diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.cc b/mindspore/ccsrc/dataset/api/de_pipeline.cc
index 65ec8d30f2..1812c0421a 100644
--- a/mindspore/ccsrc/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc
@@ -23,7 +23,6 @@
 #include "dataset/engine/datasetops/source/image_folder_op.h"
 #include "dataset/engine/datasetops/source/mnist_op.h"
 #include "dataset/engine/datasetops/source/voc_op.h"
-#include "dataset/util/make_unique.h"
 #include "dataset/core/tensor.h"
 #include "dataset/engine/dataset_iterator.h"
 #include "dataset/engine/datasetops/source/manifest_op.h"
@@ -119,7 +118,7 @@ Status DEPipeline::AssignRootNode(const DsOpPtr &dataset_op) { return (tree_->As
 Status DEPipeline::LaunchTreeExec() {
   RETURN_IF_NOT_OK(tree_->Prepare());
   RETURN_IF_NOT_OK(tree_->Launch());
-  iterator_ = make_unique<DatasetIterator>(tree_);
+  iterator_ = std::make_unique<DatasetIterator>(tree_);
   if (iterator_ == nullptr) RETURN_STATUS_UNEXPECTED("Cannot create an Iterator.");
   return Status::OK();
 }
@@ -307,7 +306,7 @@ Status DEPipeline::ParseStorageOp(const py::dict &args, std::shared_ptr<DatasetO
     if (!args["schema"].is_none()) {
       (void)builder->SetSchemaFile(ToString(args["schema"]));
     } else if (!args["schema_json_string"].is_none()) {
-      std::unique_ptr<DataSchema> schema = make_unique<DataSchema>();
+      std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
       std::string s = ToString(args["schema_json_string"]);
       RETURN_IF_NOT_OK(schema->LoadSchemaString(s, std::vector<std::string>()));
       (void)builder->SetNumRows(schema->num_rows());
@@ -683,7 +682,7 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset
     }
   }
   if (schema_exists) {
-    std::unique_ptr<DataSchema> schema = make_unique<DataSchema>();
+    std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
     if (args.contains("schema_file_path")) {
       RETURN_IF_NOT_OK(schema->LoadSchemaFile(ToString(args["schema_file_path"]), columns_to_load));
     } else {
diff --git a/mindspore/ccsrc/dataset/core/global_context.cc b/mindspore/ccsrc/dataset/core/global_context.cc
index 7e361a1f2c..3de8e0fcd8 100644
--- a/mindspore/ccsrc/dataset/core/global_context.cc
+++ b/mindspore/ccsrc/dataset/core/global_context.cc
@@ -55,9 +55,9 @@ Status GlobalContext::Init() {
   // For testing we can use Dummy pool instead
 
   // Create some tensor allocators for the different types and hook them into the pool.
-  tensor_allocator_ = mindspore::make_unique<Allocator<Tensor>>(mem_pool_);
-  cv_tensor_allocator_ = mindspore::make_unique<Allocator<CVTensor>>(mem_pool_);
-  int_allocator_ = mindspore::make_unique<IntAlloc>(mem_pool_);
+  tensor_allocator_ = std::make_unique<Allocator<Tensor>>(mem_pool_);
+  cv_tensor_allocator_ = std::make_unique<Allocator<CVTensor>>(mem_pool_);
+  int_allocator_ = std::make_unique<IntAlloc>(mem_pool_);
   return Status::OK();
 }
 
diff --git a/mindspore/ccsrc/dataset/core/tensor.cc b/mindspore/ccsrc/dataset/core/tensor.cc
index 6aa34fa342..8f0eae459a 100644
--- a/mindspore/ccsrc/dataset/core/tensor.cc
+++ b/mindspore/ccsrc/dataset/core/tensor.cc
@@ -28,7 +28,6 @@
 #include "dataset/core/global_context.h"
 #include "dataset/core/pybind_support.h"
 #include "dataset/core/tensor_shape.h"
-#include "dataset/util/make_unique.h"
 
 namespace py = pybind11;
 namespace mindspore {
@@ -53,7 +52,7 @@ namespace dataset {
 Tensor::Tensor(const TensorShape &shape, const DataType &type) : shape_(shape), type_(type), data_(nullptr) {
   // grab the mem pool from global context and create the allocator for char data area
   std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool();
-  data_allocator_ = mindspore::make_unique<Allocator<unsigned char>>(global_pool);
+  data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
 }
 
 Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data) : Tensor(shape, type) {
@@ -137,7 +136,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
   if ((*ptr)->type_ == DataType::DE_UNKNOWN) RETURN_STATUS_UNEXPECTED("Invalid data type.");
 
   std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool();
-  (*ptr)->data_allocator_ = mindspore::make_unique<Allocator<unsigned char>>(global_pool);
+  (*ptr)->data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
   static_cast<void>((*ptr)->StartAddr());
   int64_t byte_size = (*ptr)->SizeInBytes();
   unsigned char *data = static_cast<unsigned char *>(arr.request().ptr);
diff --git a/mindspore/ccsrc/dataset/engine/data_buffer.cc b/mindspore/ccsrc/dataset/engine/data_buffer.cc
index a0f47512ec..4aed994d3c 100644
--- a/mindspore/ccsrc/dataset/engine/data_buffer.cc
+++ b/mindspore/ccsrc/dataset/engine/data_buffer.cc
@@ -40,7 +40,7 @@ Status DataBuffer::CreateDataBuffer(
       case DatasetType::kTf: {
         // This type of buffer is for TF record data.
         // Allocate derived class version for a TF buffers
-        new_data_buffer = mindspore::make_unique<TFBuffer>(id, kDeBFlagNone, storage_client);
+        new_data_buffer = std::make_unique<TFBuffer>(id, kDeBFlagNone, storage_client);
         break;
       }
       default: {
diff --git a/mindspore/ccsrc/dataset/engine/data_schema.cc b/mindspore/ccsrc/dataset/engine/data_schema.cc
index 68666796be..4fe5d665c6 100644
--- a/mindspore/ccsrc/dataset/engine/data_schema.cc
+++ b/mindspore/ccsrc/dataset/engine/data_schema.cc
@@ -26,8 +26,8 @@
 #include "common/utils.h"
 #include "dataset/util/status.h"
 #include "dataset/core/tensor_shape.h"
-#include "dataset/util/make_unique.h"
 #include "utils/log_adapter.h"
+#include "dataset/util/de_error.h"
 
 namespace mindspore {
 namespace dataset {
@@ -58,7 +58,7 @@ ColDescriptor::ColDescriptor(const std::string &col_name, DataType col_type, Ten
   // our shape.  Otherwise, set our shape to be empty.
   if (in_shape != nullptr) {
     // Create a shape and copy construct it into our column's shape.
-    tensor_shape_ = mindspore::make_unique<TensorShape>(*in_shape);
+    tensor_shape_ = std::make_unique<TensorShape>(*in_shape);
   } else {
     tensor_shape_ = nullptr;
   }
@@ -75,7 +75,7 @@ ColDescriptor::ColDescriptor(const std::string &col_name, DataType col_type, Ten
 ColDescriptor::ColDescriptor(const ColDescriptor &in_cd)
     : type_(in_cd.type_), rank_(in_cd.rank_), tensor_impl_(in_cd.tensor_impl_), col_name_(in_cd.col_name_) {
   // If it has a tensor shape, make a copy of it with our own unique_ptr.
-  tensor_shape_ = in_cd.hasShape() ? mindspore::make_unique<TensorShape>(in_cd.shape()) : nullptr;
+  tensor_shape_ = in_cd.hasShape() ? std::make_unique<TensorShape>(in_cd.shape()) : nullptr;
 }
 
 // Assignment overload
@@ -86,7 +86,7 @@ ColDescriptor &ColDescriptor::operator=(const ColDescriptor &in_cd) {
     tensor_impl_ = in_cd.tensor_impl_;
     col_name_ = in_cd.col_name_;
     // If it has a tensor shape, make a copy of it with our own unique_ptr.
-    tensor_shape_ = in_cd.hasShape() ? mindspore::make_unique<TensorShape>(in_cd.shape()) : nullptr;
+    tensor_shape_ = in_cd.hasShape() ? std::make_unique<TensorShape>(in_cd.shape()) : nullptr;
   }
   return *this;
 }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
index 7c5d4bd4c8..8778fe1b45 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
@@ -59,8 +59,8 @@ Status BatchOp::operator()() {
   TaskManager::FindMe()->Post();
   int32_t epoch_num = 0, batch_num = 0, cnt = 0;
   TensorRow new_row;
-  std::unique_ptr<TensorQTable> table = make_unique<TensorQTable>();
-  child_iterator_ = mindspore::make_unique<ChildIterator>(this, 0, 0);
+  std::unique_ptr<TensorQTable> table = std::make_unique<TensorQTable>();
+  child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
   RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
   column_name_map_ = child_iterator_->col_name_id_map();
   int32_t cur_batch_size = 0;
@@ -72,7 +72,7 @@ Status BatchOp::operator()() {
       if (table->size() == static_cast<size_t>(cur_batch_size)) {
         RETURN_IF_NOT_OK(worker_queues_[cnt++ % num_workers_]->EmplaceBack(
           std::make_pair(std::move(table), CBatchInfo(epoch_num, batch_num++, cnt - epoch_num))));
-        table = make_unique<TensorQTable>();
+        table = std::make_unique<TensorQTable>();
         RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(epoch_num, batch_num, cnt - epoch_num)));
       }
       RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
@@ -82,7 +82,7 @@ Status BatchOp::operator()() {
       RETURN_IF_NOT_OK(worker_queues_[cnt++ % num_workers_]->EmplaceBack(
         std::make_pair(std::move(table), CBatchInfo(epoch_num, batch_num++, cnt - epoch_num))));
     }
-    table = make_unique<TensorQTable>();  // this drops when drop == true
+    table = std::make_unique<TensorQTable>();  // this drops when drop == true
     // end of the current epoch, batch_num should start from 0 again
     batch_num = 0;
     epoch_num++;
@@ -153,9 +153,9 @@ Status BatchOp::WorkerEntry(int32_t workerId) {
   RETURN_IF_NOT_OK(worker_queues_[workerId]->PopFront(&table_pair));
   while (table_pair.second.ctrl_ != batchCtrl::kQuit) {
     if (table_pair.second.ctrl_ == batchCtrl::kEOE) {
-      RETURN_IF_NOT_OK(out_connector_->Add(workerId, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
+      RETURN_IF_NOT_OK(out_connector_->Add(workerId, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
     } else if (table_pair.second.ctrl_ == batchCtrl::kEOF) {
-      RETURN_IF_NOT_OK(out_connector_->Add(workerId, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
+      RETURN_IF_NOT_OK(out_connector_->Add(workerId, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
     } else if (table_pair.second.ctrl_ == batchCtrl::kNoCtrl) {
       std::unique_ptr<DataBuffer> db = nullptr;
       RETURN_IF_NOT_OK(MakeBatchedBuffer(std::move(table_pair), &db));
@@ -170,8 +170,8 @@ Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatc
                                   std::unique_ptr<DataBuffer> *db) {
   RETURN_UNEXPECTED_IF_NULL(table_pair.first);
   if (!input_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair));  // pass it through pyfunc
-  (*db) = make_unique<DataBuffer>(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone);
-  std::unique_ptr<TensorQTable> dest_table = make_unique<TensorQTable>();
+  (*db) = std::make_unique<DataBuffer>(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone);
+  std::unique_ptr<TensorQTable> dest_table = std::make_unique<TensorQTable>();
   RETURN_IF_NOT_OK(BatchRows(&table_pair.first, &dest_table, table_pair.first->size()));
   (*db)->set_tensor_table(std::move(dest_table));
   (*db)->set_column_name_map(column_name_map_);
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
index d3b85b84fb..f51c2a1539 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
@@ -80,9 +80,9 @@ void DatasetOp::CreateConnector(int32_t num_producers, int32_t num_consumers) {
   MS_LOG(INFO) << "Creating connector in tree operator: " << operator_id_ << ". Producer: " << num_producers
                << ". Consumer: " << num_consumers << ".";
   if (oc_queue_size_ > 0) {
-    out_connector_ = mindspore::make_unique<DbConnector>(num_producers,  // The number of producers
-                                                         num_consumers,  // Only one consumer (the training App)
-                                                         oc_queue_size_);
+    out_connector_ = std::make_unique<DbConnector>(num_producers,  // The number of producers
+                                                   num_consumers,  // Only one consumer (the training App)
+                                                   oc_queue_size_);
   } else {
     // Some op's may choose not to have an output connector
     MS_LOG(INFO) << "Bypassed connector creation for tree operator: " << operator_id_ << ".";
@@ -149,7 +149,7 @@ Status DatasetOp::GetNextInput(std::unique_ptr<DataBuffer> *p_buffer, int32_t wo
 // The base class implementation simply flows the eoe message to output. Derived classes
 // may override if they need to perform special eoe handling.
 Status DatasetOp::EoeReceived(int32_t worker_id) {
-  std::unique_ptr<DataBuffer> eoe_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+  std::unique_ptr<DataBuffer> eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
   return (out_connector_->Add(static_cast<int>(worker_id), std::move(eoe_buffer)));
 }
 
@@ -157,7 +157,7 @@ Status DatasetOp::EoeReceived(int32_t worker_id) {
 // The base class implementation simply flows the eof message to output. Derived classes
 // may override if they need to perform special eof handling.
 Status DatasetOp::EofReceived(int32_t worker_id) {
-  std::unique_ptr<DataBuffer> eof_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
+  std::unique_ptr<DataBuffer> eof_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
   return (out_connector_->Add(static_cast<int>(worker_id), std::move(eof_buffer)));
 }
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
index 3c2eea16ee..71e4ce64a4 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
@@ -225,7 +225,7 @@ Status DeviceQueueOp::SendDataToCPU() {
   MS_LOG(INFO) << "Device queue, sending data to CPU.";
   int64_t total_batch = 0;
 
-  std::unique_ptr<ChildIterator> child_iterator = mindspore::make_unique<ChildIterator>(this, 0, 0);
+  std::unique_ptr<ChildIterator> child_iterator = std::make_unique<ChildIterator>(this, 0, 0);
   while (!(child_iterator->eof_handled())) {
     TensorRow curr_row;
     RETURN_IF_NOT_OK(child_iterator->FetchNextTensorRow(&curr_row));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
index c9f1f98ae0..b6d603bac9 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
@@ -179,7 +179,7 @@ Status MapOp::WorkerEntry(int32_t worker_id) {
     RETURN_IF_NOT_OK(WorkerEntryInit(in_buffer.get(), &keep_input_columns, &to_process_indices, &final_col_name_id_map,
                                      &input_columns, &output_columns));
 
-    std::unique_ptr<TensorQTable> new_tensor_table(mindspore::make_unique<TensorQTable>());
+    std::unique_ptr<TensorQTable> new_tensor_table(std::make_unique<TensorQTable>());
     // Perform the compute function of TensorOp(s) and store the result in new_tensor_table.
     RETURN_IF_NOT_OK(WorkerCompute(in_buffer.get(), to_process_indices, new_tensor_table.get(), keep_input_columns,
                                    &input_columns, &output_columns));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc
index d9792312a3..4b2af2250a 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc
@@ -48,7 +48,7 @@ Status ParallelOp::CreateWorkerConnector(int32_t worker_connector_size) {
   // Instantiate the worker connector.  This is the internal connector, not the operators
   // output connector.  It has single master consuming from it (num producers is 1), and the number
   // of workers is the defined count from the op.
-  worker_connector_ = mindspore::make_unique<DbConnector>(num_workers_, num_producers_, worker_connector_size);
+  worker_connector_ = std::make_unique<DbConnector>(num_workers_, num_producers_, worker_connector_size);
 
   return Status::OK();
 }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc
index 11296f84f4..b87967dde8 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc
@@ -79,7 +79,7 @@ Status ProjectOp::Project(std::unique_ptr<DataBuffer> *data_buffer) {
     new_column_name_mapping[current_column] = i;
     projected_column_indices.push_back(column_name_mapping[current_column]);
   }
-  std::unique_ptr<TensorQTable> new_tensor_table = mindspore::make_unique<TensorQTable>();
+  std::unique_ptr<TensorQTable> new_tensor_table = std::make_unique<TensorQTable>();
   while ((*data_buffer)->NumRows() > 0) {
     TensorRow current_row;
     RETURN_IF_NOT_OK((*data_buffer)->PopRow(&current_row));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc
index c09f56141e..725476bf91 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc
@@ -84,13 +84,13 @@ Status RenameOp::operator()() {
 
     // we got eoe, now try again until we get eof
     MS_LOG(INFO) << "Rename operator EOE Received.";
-    RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))));
+    RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))));
     MS_LOG(DEBUG) << "Rename operator fetching buffer after EOE.";
     RETURN_IF_NOT_OK(GetNextInput(&curr_buffer));
   }  // end of while eof loop
 
   MS_LOG(INFO) << "Rename opeerator EOF Received.";
-  RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
+  RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
   return Status::OK();
 }
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
index 5dae48ad73..2afafe2128 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
@@ -70,7 +70,7 @@ ShuffleOp::ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_con
       rng_(shuffle_seed),
       buffer_counter_(0),
       rows_per_buffer_(rows_per_buffer),
-      shuffle_buffer_(mindspore::make_unique<TensorTable>()),
+      shuffle_buffer_(std::make_unique<TensorTable>()),
       shuffle_last_row_idx_(0),
       shuffle_buffer_state_(kShuffleStateInit) {}
 
@@ -90,7 +90,7 @@ Status ShuffleOp::SelfReset() {
     shuffle_seed_ = distribution(random_device);
     rng_ = std::mt19937_64(shuffle_seed_);
   }
-  shuffle_buffer_ = mindspore::make_unique<TensorTable>();
+  shuffle_buffer_ = std::make_unique<TensorTable>();
   buffer_counter_ = 0;
   shuffle_last_row_idx_ = 0;
   shuffle_buffer_state_ = kShuffleStateInit;
@@ -142,7 +142,7 @@ Status ShuffleOp::operator()() {
   // Create the child iterator to fetch our data from.
   int32_t worker_id = 0;
   int32_t child_idx = 0;
-  child_iterator_ = mindspore::make_unique<ChildIterator>(this, worker_id, child_idx);
+  child_iterator_ = std::make_unique<ChildIterator>(this, worker_id, child_idx);
 
   // Main operator loop
   while (true) {
@@ -161,7 +161,7 @@ Status ShuffleOp::operator()() {
       // Step 1)
       // Create an output tensor table if one is not created yet.
       if (!new_buffer_table) {
-        new_buffer_table = mindspore::make_unique<TensorQTable>();
+        new_buffer_table = std::make_unique<TensorQTable>();
       }
 
       // Step 2)
@@ -176,7 +176,7 @@ Status ShuffleOp::operator()() {
       // and send this buffer on it's way up the pipeline. Special case is if this is the
       // last row then we also send it.
       if (new_buffer_table->size() == rows_per_buffer_ || shuffle_last_row_idx_ == 0) {
-        auto new_buffer = mindspore::make_unique<DataBuffer>(buffer_counter_, DataBuffer::kDeBFlagNone);
+        auto new_buffer = std::make_unique<DataBuffer>(buffer_counter_, DataBuffer::kDeBFlagNone);
         new_buffer->set_tensor_table(std::move(new_buffer_table));
         new_buffer->set_column_name_map(column_name_map_);
         buffer_counter_++;
@@ -218,7 +218,7 @@ Status ShuffleOp::operator()() {
     // Since we overloaded eoeReceived function, we are responsible to flow the EOE up the
     // pipepline manually now that we are done draining the shuffle buffer
     MS_LOG(INFO) << "Shuffle operator sending EOE.";
-    auto eoe_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+    auto eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
     RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer)));
 
     // Do not wait for any reset to be flown down from operators above us.
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
index 570fc9f454..0c2e20729e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
@@ -40,7 +40,7 @@ Status CelebAOp::Builder::Build(std::shared_ptr<CelebAOp> *op) {
     builder_sampler_ = std::make_shared<SequentialSampler>();
   }
 
-  builder_schema_ = make_unique<DataSchema>();
+  builder_schema_ = std::make_unique<DataSchema>();
   RETURN_IF_NOT_OK(
     builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
   // label is like this:0 1 0 0 1......
@@ -83,7 +83,7 @@ CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::stri
     col_name_map_[data_schema_->column(index).name()] = index;
   }
 
-  attr_info_queue_ = make_unique<Queue<std::vector<std::string>>>(queue_size);
+  attr_info_queue_ = std::make_unique<Queue<std::vector<std::string>>>(queue_size);
   io_block_queues_.Init(num_workers_, queue_size);
 }
 
@@ -311,7 +311,7 @@ Status CelebAOp::AddIOBlock(std::unique_ptr<DataBuffer> *data_buffer) {
         row_count++;
         if (row_count % rows_per_buffer_ == 0) {
           RETURN_IF_NOT_OK(io_block_queues_[buff_count++ % num_workers_]->Add(
-            make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
+            std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
           keys.clear();
         }
       }
@@ -320,21 +320,21 @@ Status CelebAOp::AddIOBlock(std::unique_ptr<DataBuffer> *data_buffer) {
 
     if (!keys.empty()) {
       RETURN_IF_NOT_OK(io_block_queues_[(buff_count++) % num_workers_]->Add(
-        make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
+        std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
     }
     if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) {
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buff_count++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+        io_block_queues_[(buff_count++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buff_count++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof)));
+        io_block_queues_[(buff_count++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof)));
       for (int32_t i = 0; i < num_workers_; i++) {
         RETURN_IF_NOT_OK(
-          io_block_queues_[i]->Add(std::move(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))));
+          io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
       }
       return Status::OK();
     } else {  // not the last repeat. Acquire lock, sleeps master thread, wait for the wake-up from reset
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buff_count++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+        io_block_queues_[(buff_count++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
       RETURN_IF_NOT_OK(wp_.Wait());  // Master thread goes to sleep after it has made all the IOBlocks
       wp_.Clear();
       RETURN_IF_NOT_OK(sampler_->GetNextBuffer(data_buffer));
@@ -349,17 +349,17 @@ Status CelebAOp::WorkerEntry(int32_t worker_id) {
   RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block));
   while (io_block != nullptr) {
     if (io_block->eoe() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
       buffer_id = worker_id;
     } else if (io_block->eof() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
     } else {
       std::vector<int64_t> keys;
       RETURN_IF_NOT_OK(io_block->GetKeys(&keys));
       if (keys.empty()) {
         return Status::OK();  // empty key is a quit signal for workers
       }
-      std::unique_ptr<DataBuffer> db = make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
+      std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
       RETURN_IF_NOT_OK(LoadBuffer(keys, &db));
       RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db)));
       buffer_id += num_workers_;
@@ -370,7 +370,7 @@ Status CelebAOp::WorkerEntry(int32_t worker_id) {
 }
 
 Status CelebAOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) {
-  std::unique_ptr<TensorQTable> deq = make_unique<TensorQTable>();
+  std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>();
   for (const auto &key : keys) {
     TensorRow row;
     RETURN_IF_NOT_OK(LoadTensorRow(image_labels_vec_[key], &row));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
index 260a4a4dc5..3e64c8a3e6 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
@@ -47,7 +47,7 @@ Status CifarOp::Builder::Build(std::shared_ptr<CifarOp> *ptr) {
   if (sampler_ == nullptr) {
     sampler_ = std::make_shared<SequentialSampler>();
   }
-  schema_ = make_unique<DataSchema>();
+  schema_ = std::make_unique<DataSchema>();
   TensorShape scalar = TensorShape::CreateScalar();
   RETURN_IF_NOT_OK(schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
   if (cifar_type_ == kCifar10) {
@@ -91,7 +91,7 @@ CifarOp::CifarOp(CifarType type, int32_t num_works, int32_t rows_per_buf, const
     col_name_map_[data_schema_->column(i).name()] = i;
   }
   constexpr uint64_t kUtilQueueSize = 512;
-  cifar_raw_data_block_ = make_unique<Queue<std::vector<unsigned char>>>(kUtilQueueSize);
+  cifar_raw_data_block_ = std::make_unique<Queue<std::vector<unsigned char>>>(kUtilQueueSize);
   io_block_queues_.Init(num_workers_, queue_size);
 }
 
@@ -114,7 +114,7 @@ Status CifarOp::operator()() {
         if (row_cnt_ >= num_samples_) break;  // enough row read, break for loop
         if (row_cnt_ % rows_per_buffer_ == 0) {
           RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add(
-            make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
+            std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
           keys.clear();
         }
       }
@@ -122,21 +122,21 @@ Status CifarOp::operator()() {
     }
     if (keys.empty() == false) {
       RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(
-        make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
+        std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
     }
     if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) {
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof)));
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof)));
       for (int32_t i = 0; i < num_workers_; i++) {
         RETURN_IF_NOT_OK(
-          io_block_queues_[i]->Add(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
+          io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
       }
       return Status::OK();
     } else {  // not the last repeat. Acquire lock, sleeps master thread, wait for the wake-up from reset
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
       RETURN_IF_NOT_OK(wp_.Wait());  // Master thread goes to sleep after it has made all the IOBlocks
       wp_.Clear();
       RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer));
@@ -169,17 +169,17 @@ Status CifarOp::WorkerEntry(int32_t worker_id) {
   RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block));
   while (io_block != nullptr) {
     if (io_block->eoe() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
       buffer_id = worker_id;
     } else if (io_block->eof() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
     } else {
       std::vector<int64_t> keys;
       RETURN_IF_NOT_OK(io_block->GetKeys(&keys));
       if (keys.empty() == true) {
         return Status::OK();  // empty key is a quit signal for workers
       }
-      std::unique_ptr<DataBuffer> db = make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
+      std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
       RETURN_IF_NOT_OK(LoadBuffer(keys, &db));
       RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db)));
       buffer_id += num_workers_;
@@ -213,7 +213,7 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) {
 
 // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer
 Status CifarOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) {
-  std::unique_ptr<TensorQTable> deq = make_unique<TensorQTable>();
+  std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>();
   for (const int64_t &key : keys) {
     TensorRow trow;
     RETURN_IF_NOT_OK(LoadTensorRow(key, &trow));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
index ceb88ceb0e..37a74f019a 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
@@ -173,9 +173,9 @@ Status GeneratorOp::operator()() {
   bool eof = false;
   while (!eof) {
     // Create new buffer each iteration
-    fetched_buffer = mindspore::make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagNone);
+    fetched_buffer = std::make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagNone);
     fetched_buffer->set_column_name_map(column_names_map_);
-    std::unique_ptr<TensorQTable> fetched_table = mindspore::make_unique<TensorQTable>();
+    std::unique_ptr<TensorQTable> fetched_table = std::make_unique<TensorQTable>();
     bool eoe = false;
     {
       py::gil_scoped_acquire gil_acquire;
@@ -201,12 +201,12 @@ Status GeneratorOp::operator()() {
     if (eoe) {
       // Push out EOE upon StopIteration exception from generator
       MS_LOG(INFO) << "Generator operator sends out EOE.";
-      std::unique_ptr<DataBuffer> eoe_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+      std::unique_ptr<DataBuffer> eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
       RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer)));
       if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) {
         // If last repeat or not repeated, push out EOF and exit master loop
         MS_LOG(INFO) << "Generator operator sends out EOF.";
-        std::unique_ptr<DataBuffer> eof_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
+        std::unique_ptr<DataBuffer> eof_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
         RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eof_buffer)));
         MS_LOG(INFO) << "Generator operator main execution loop complete.";
         eof = true;
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
index b8044fb38a..f6cf377666 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
@@ -39,7 +39,7 @@ Status ImageFolderOp::Builder::Build(std::shared_ptr<ImageFolderOp> *ptr) {
   if (builder_sampler_ == nullptr) {
     builder_sampler_ = std::make_shared<SequentialSampler>();
   }
-  builder_schema_ = make_unique<DataSchema>();
+  builder_schema_ = std::make_unique<DataSchema>();
   TensorShape scalar = TensorShape::CreateScalar();
   RETURN_IF_NOT_OK(
     builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
@@ -82,8 +82,8 @@ ImageFolderOp::ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::str
   for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
     col_name_map_[data_schema_->column(i).name()] = i;
   }
-  folder_name_queue_ = make_unique<Queue<std::string>>(num_wkrs * queue_size);
-  image_name_queue_ = make_unique<Queue<FolderImagesPair>>(num_wkrs * queue_size);
+  folder_name_queue_ = std::make_unique<Queue<std::string>>(num_wkrs * queue_size);
+  image_name_queue_ = std::make_unique<Queue<FolderImagesPair>>(num_wkrs * queue_size);
   io_block_queues_.Init(num_workers_, queue_size);
 }
 
@@ -143,7 +143,7 @@ Status ImageFolderOp::operator()() {
         row_cnt_++;
         if (row_cnt_ % rows_per_buffer_ == 0) {
           RETURN_IF_NOT_OK(
-            io_block_queues_[buf_cnt_++ % num_workers_]->Add(make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone)));
+            io_block_queues_[buf_cnt_++ % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone)));
           keys.clear();
         }
       }
@@ -151,21 +151,21 @@ Status ImageFolderOp::operator()() {
     }
     if (keys.empty() == false) {
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone)));
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone)));
     }
     if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) {
-      std::unique_ptr<IOBlock> eoe_block = make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe);
-      std::unique_ptr<IOBlock> eof_block = make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof);
+      std::unique_ptr<IOBlock> eoe_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe);
+      std::unique_ptr<IOBlock> eof_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof);
       RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block)));
       RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block)));
       for (int32_t i = 0; i < num_workers_; ++i) {
         RETURN_IF_NOT_OK(
-          io_block_queues_[i]->Add(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
+          io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
       }
       return Status::OK();
     } else {  // not the last repeat. Sleep master thread, wait for the wake-up from reset
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
       RETURN_IF_NOT_OK(wp_.Wait());  // Master thread goes to sleep after it has made all the IOBlocks
       wp_.Clear();
       RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer));
@@ -182,15 +182,15 @@ Status ImageFolderOp::WorkerEntry(int32_t worker_id) {
   RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block));
   while (io_block != nullptr) {
     if (io_block->eoe() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
       buffer_id = worker_id;
     } else if (io_block->eof() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
     } else {
       std::vector<int64_t> keys;
       RETURN_IF_NOT_OK(io_block->GetKeys(&keys));
       if (keys.empty() == true) return Status::OK();  // empty key is a quit signal for workers
-      std::unique_ptr<DataBuffer> db = make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
+      std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
       RETURN_IF_NOT_OK(LoadBuffer(keys, &db));
       RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db)));
       buffer_id += num_workers_;
@@ -231,7 +231,7 @@ Status ImageFolderOp::LoadTensorRow(ImageLabelPair pairPtr, TensorRow *trow) {
 
 // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer
 Status ImageFolderOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) {
-  std::unique_ptr<TensorQTable> deq = make_unique<TensorQTable>();
+  std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>();
   TensorRow trow;
   for (const int64_t &key : keys) {
     RETURN_IF_NOT_OK(this->LoadTensorRow(image_label_pairs_[key], &trow));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
index 52db199e5b..6907647952 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
@@ -40,7 +40,7 @@ Status ManifestOp::Builder::Build(std::shared_ptr<ManifestOp> *ptr) {
   if (builder_sampler_ == nullptr) {
     builder_sampler_ = std::make_shared<SequentialSampler>();
   }
-  builder_schema_ = make_unique<DataSchema>();
+  builder_schema_ = std::make_unique<DataSchema>();
   RETURN_IF_NOT_OK(
     builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
   RETURN_IF_NOT_OK(
@@ -105,7 +105,7 @@ Status ManifestOp::AddIoBlock(std::unique_ptr<DataBuffer> *sampler_buffer) {
         row_cnt_++;
         if (row_cnt_ % rows_per_buffer_ == 0) {
           RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add(
-            make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
+            std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
           keys.clear();
         }
       }
@@ -113,21 +113,21 @@ Status ManifestOp::AddIoBlock(std::unique_ptr<DataBuffer> *sampler_buffer) {
     }
     if (keys.empty() == false) {
       RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(
-        make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
+        std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
     }
     if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) {
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof)));
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof)));
       for (int32_t i = 0; i < num_workers_; i++) {
         RETURN_IF_NOT_OK(
-          io_block_queues_[i]->Add(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
+          io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
       }
       return Status::OK();
     } else {
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
       RETURN_IF_NOT_OK(wp_.Wait());  // Master thread goes to sleep after it has made all the IOBlocks
       wp_.Clear();
       RETURN_IF_NOT_OK(sampler_->GetNextBuffer(sampler_buffer));
@@ -160,17 +160,17 @@ Status ManifestOp::WorkerEntry(int32_t worker_id) {
   RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block));
   while (io_block != nullptr) {
     if (io_block->eoe() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
       buffer_id = worker_id;
     } else if (io_block->eof() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
     } else {
       std::vector<int64_t> keys;
       RETURN_IF_NOT_OK(io_block->GetKeys(&keys));
       if (keys.empty()) {
         return Status::OK();  // empty key is a quit signal for workers
       }
-      std::unique_ptr<DataBuffer> db = make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
+      std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
       RETURN_IF_NOT_OK(LoadBuffer(keys, &db));
       RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db)));
       buffer_id += num_workers_;
@@ -227,7 +227,7 @@ Status ManifestOp::LoadTensorRow(const std::pair<std::string, std::vector<std::s
 
 // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer
 Status ManifestOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) {
-  std::unique_ptr<TensorQTable> deq = make_unique<TensorQTable>();
+  std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>();
   for (const auto &key : keys) {
     TensorRow trow;
     RETURN_IF_NOT_OK(LoadTensorRow(image_labelname_[static_cast<size_t>(key)], &trow));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
index cb0f135a0d..fbb772af59 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
@@ -28,7 +28,6 @@
 #include "dataset/engine/datasetops/dataset_op.h"
 #include "dataset/engine/db_connector.h"
 #include "dataset/engine/execution_tree.h"
-#include "dataset/util/make_unique.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
@@ -94,19 +93,19 @@ MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, int32_t rows_per_buf
   io_blk_queues_.Init(num_workers_, op_connector_queue_size);
   if (!block_reader_) return;
   for (int32_t i = 0; i < num_workers_; ++i) {
-    block_buffer_.emplace_back(make_unique<std::vector<ShardTuple>>(std::vector<ShardTuple>{}));
+    block_buffer_.emplace_back(std::make_unique<std::vector<ShardTuple>>(std::vector<ShardTuple>{}));
   }
 }
 
 // Private helper method to encapsulate some common construction/reset tasks
 Status MindRecordOp::Init() {
-  shard_reader_ = mindspore::make_unique<ShardReader>();
+  shard_reader_ = std::make_unique<ShardReader>();
   auto rc = shard_reader_->Open(dataset_file_, num_mind_record_workers_, columns_to_load_, operators_, block_reader_);
 
   CHECK_FAIL_RETURN_UNEXPECTED(rc != MSRStatus::FAILED,
                                "MindRecordOp init failed. Error message: " + ErrnoToMessage(rc));
 
-  data_schema_ = mindspore::make_unique<DataSchema>();
+  data_schema_ = std::make_unique<DataSchema>();
 
   std::vector<std::shared_ptr<Schema>> schema_vec = shard_reader_->get_shard_header()->get_schemas();
   // check whether schema exists, if so use the first one
@@ -143,7 +142,7 @@ Status MindRecordOp::Init() {
   }
 
   if (!load_all_cols) {
-    std::unique_ptr<DataSchema> tmp_schema = make_unique<DataSchema>();
+    std::unique_ptr<DataSchema> tmp_schema = std::make_unique<DataSchema>();
     for (std::string colname : columns_to_load_) {
       CHECK_FAIL_RETURN_UNEXPECTED(colname_to_ind.find(colname) != colname_to_ind.end(), colname + ": doesn't exist");
       RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->column(colname_to_ind[colname])));
@@ -297,7 +296,7 @@ Status MindRecordOp::LoadFloat(TensorShape *new_shape, std::unique_ptr<T[]> *arr
     RETURN_IF_NOT_OK(GetFloat(&value, columns_json[column_name], use_double));
 
     *new_shape = TensorShape::CreateScalar();
-    *array_data = mindspore::make_unique<T[]>(1);
+    *array_data = std::make_unique<T[]>(1);
     (*array_data)[0] = value;
   } else {
     if (column.hasShape()) {
@@ -308,7 +307,7 @@ Status MindRecordOp::LoadFloat(TensorShape *new_shape, std::unique_ptr<T[]> *arr
     }
 
     int idx = 0;
-    *array_data = mindspore::make_unique<T[]>(new_shape->NumOfElements());
+    *array_data = std::make_unique<T[]>(new_shape->NumOfElements());
     for (auto &element : columns_json[column_name]) {
       T value = 0;
       RETURN_IF_NOT_OK(GetFloat(&value, element, use_double));
@@ -349,7 +348,7 @@ Status MindRecordOp::LoadInt(TensorShape *new_shape, std::unique_ptr<T[]> *array
     RETURN_IF_NOT_OK(GetInt(&value, columns_json[column_name]));
 
     *new_shape = TensorShape::CreateScalar();
-    *array_data = mindspore::make_unique<T[]>(1);
+    *array_data = std::make_unique<T[]>(1);
     (*array_data)[0] = value;
   } else {
     if (column.hasShape()) {
@@ -360,7 +359,7 @@ Status MindRecordOp::LoadInt(TensorShape *new_shape, std::unique_ptr<T[]> *array
     }
 
     int idx = 0;
-    *array_data = mindspore::make_unique<T[]>(new_shape->NumOfElements());
+    *array_data = std::make_unique<T[]>(new_shape->NumOfElements());
     for (auto &element : columns_json[column_name]) {
       T value = 0;
       RETURN_IF_NOT_OK(GetInt(&value, element));
@@ -430,12 +429,14 @@ Status MindRecordOp::WorkerEntry(int32_t worker_id) {
   RETURN_IF_NOT_OK(io_blk_queues_[worker_id]->PopFront(&io_block));
   while (io_block != nullptr) {
     if (io_block->eoe() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))));
+      RETURN_IF_NOT_OK(
+        out_connector_->Add(worker_id, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))));
       RETURN_IF_NOT_OK(io_blk_queues_[worker_id]->PopFront(&io_block));
       continue;
     }
     if (io_block->eof() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
+      RETURN_IF_NOT_OK(
+        out_connector_->Add(worker_id, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
       RETURN_IF_NOT_OK(io_blk_queues_[worker_id]->PopFront(&io_block));
       continue;
     }
@@ -485,9 +486,9 @@ Status MindRecordOp::WorkerEntry(int32_t worker_id) {
 
 Status MindRecordOp::GetBufferFromReader(std::unique_ptr<DataBuffer> *fetched_buffer, int64_t buffer_id,
                                          int32_t worker_id) {
-  *fetched_buffer = mindspore::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
+  *fetched_buffer = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
   (*fetched_buffer)->set_column_name_map(column_name_mapping_);
-  std::unique_ptr<TensorQTable> tensor_table = mindspore::make_unique<TensorQTable>();
+  std::unique_ptr<TensorQTable> tensor_table = std::make_unique<TensorQTable>();
   for (int32_t i = 0; i < rows_per_buffer_; ++i) {
     ShardTuple tupled_buffer;
     if (block_reader_) {
@@ -596,22 +597,22 @@ Status MindRecordOp::operator()() {
     for (int32_t i = 0; i < buffers_needed_; ++i) {
       if (block_reader_) RETURN_IF_NOT_OK(FetchBlockBuffer(i));
       std::vector<int64_t> keys(1, i);
-      RETURN_IF_NOT_OK(
-        io_blk_queues_[buf_cnt_++ % num_workers_]->Add(make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
+      RETURN_IF_NOT_OK(io_blk_queues_[buf_cnt_++ % num_workers_]->Add(
+        std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
     }
     if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) {
       RETURN_IF_NOT_OK(
-        io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+        io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
       RETURN_IF_NOT_OK(
-        io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof)));
+        io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof)));
       for (int32_t i = 0; i < num_workers_; i++) {
-        RETURN_IF_NOT_OK(
-          io_blk_queues_[i]->Add(std::move(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))));
+        RETURN_IF_NOT_OK(io_blk_queues_[i]->Add(
+          std::move(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))));
       }
       return Status::OK();
     } else {  // not the last repeat. Acquire lock, sleeps master thread, wait for the wake-up from reset
       RETURN_IF_NOT_OK(
-        io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+        io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
 
       // reset our buffer count and go to loop again.
       RETURN_IF_NOT_OK(shard_reader_wait_post_.Wait());
@@ -655,7 +656,7 @@ Status MindRecordOp::LaunchThreadAndInitOp() {
 }
 
 Status MindRecordOp::CountTotalRows(const std::string dataset_path, int64_t *count) {
-  std::unique_ptr<ShardReader> shard_reader = mindspore::make_unique<ShardReader>();
+  std::unique_ptr<ShardReader> shard_reader = std::make_unique<ShardReader>();
   MSRStatus rc = shard_reader->CountTotalRows(dataset_path, count);
   if (rc == MSRStatus::FAILED) {
     RETURN_STATUS_UNEXPECTED("MindRecordOp count total rows failed.");
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
index f76fb9314d..3431e58aea 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
@@ -43,7 +43,7 @@ Status MnistOp::Builder::Build(std::shared_ptr<MnistOp> *ptr) {
   if (builder_sampler_ == nullptr) {
     builder_sampler_ = std::make_shared<SequentialSampler>();
   }
-  builder_schema_ = make_unique<DataSchema>();
+  builder_schema_ = std::make_unique<DataSchema>();
   RETURN_IF_NOT_OK(
     builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
   TensorShape scalar = TensorShape::CreateScalar();
@@ -89,7 +89,7 @@ Status MnistOp::TraversalSampleIds(const std::shared_ptr<Tensor> &sample_ids, st
     row_cnt_++;
     if (row_cnt_ % rows_per_buffer_ == 0) {
       RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add(
-        make_unique<IOBlock>(IOBlock(*keys, IOBlock::kDeIoBlockNone))));
+        std::make_unique<IOBlock>(IOBlock(*keys, IOBlock::kDeIoBlockNone))));
       keys->clear();
     }
   }
@@ -115,21 +115,21 @@ Status MnistOp::operator()() {
     }
     if (keys.empty() == false) {
       RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(
-        make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
+        std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
     }
     if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) {
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof)));
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof)));
       for (int32_t i = 0; i < num_workers_; ++i) {
         RETURN_IF_NOT_OK(
-          io_block_queues_[i]->Add(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
+          io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
       }
       return Status::OK();
     } else {
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
       RETURN_IF_NOT_OK(wp_.Wait());  // Master thread goes to sleep after it has made all the IOBlocks
       wp_.Clear();
       RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer));
@@ -145,15 +145,15 @@ Status MnistOp::WorkerEntry(int32_t worker_id) {
   RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&iOBlock));
   while (iOBlock != nullptr) {
     if (iOBlock->eoe() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
       buffer_id = worker_id;
     } else if (iOBlock->eof() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
     } else {
       std::vector<int64_t> keys;
       RETURN_IF_NOT_OK(iOBlock->GetKeys(&keys));
       if (keys.empty() == true) return Status::OK();  // empty key is a quit signal for workers
-      std::unique_ptr<DataBuffer> db = make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
+      std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
       RETURN_IF_NOT_OK(LoadBuffer(keys, &db));
       RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db)));
       buffer_id += num_workers_;
@@ -178,7 +178,7 @@ Status MnistOp::LoadTensorRow(const MnistLabelPair &mnist_pair, TensorRow *trow)
 
 // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer
 Status MnistOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) {
-  std::unique_ptr<TensorQTable> deq = make_unique<TensorQTable>();
+  std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>();
   TensorRow trow;
   for (const int64_t &key : keys) {
     RETURN_IF_NOT_OK(this->LoadTensorRow(image_label_pairs_[key], &trow));
@@ -309,8 +309,8 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la
   CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), "num_images != num_labels");
   // The image size of the Mnist dataset is fixed at [28,28]
   int64_t size = kMnistImageRows * kMnistImageCols;
-  auto images_buf = mindspore::make_unique<char[]>(size * num_images);
-  auto labels_buf = mindspore::make_unique<char[]>(num_images);
+  auto images_buf = std::make_unique<char[]>(size * num_images);
+  auto labels_buf = std::make_unique<char[]>(num_images);
   if (images_buf == nullptr || labels_buf == nullptr) {
     std::string err_msg = "Fail to allocate memory for MNIST Buffer.";
     MS_LOG(ERROR) << err_msg.c_str();
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
index 51ad71e8cf..28a5705648 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
@@ -52,9 +52,9 @@ Status DistributedSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer
   if (cnt_ > samples_per_buffer_) {
     RETURN_STATUS_UNEXPECTED("Distributed Sampler Error");
   } else if (cnt_ == samples_per_buffer_) {
-    (*out_buffer) = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+    (*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
   } else {
-    (*out_buffer) = mindspore::make_unique<DataBuffer>(cnt_, DataBuffer::kDeBFlagNone);
+    (*out_buffer) = std::make_unique<DataBuffer>(cnt_, DataBuffer::kDeBFlagNone);
     std::shared_ptr<Tensor> sample_ids;
     RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, samples_per_buffer_));
     int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->StartAddr());
@@ -63,7 +63,7 @@ Status DistributedSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer
       *(id_ptr++) = shuffle_ ? shuffle_vec_[static_cast<size_t>(next_id)] : next_id;
     }
     TensorRow row(1, sample_ids);
-    (*out_buffer)->set_tensor_table(make_unique<TensorQTable>(1, row));
+    (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, row));
   }
   return Status::OK();
 }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
index 04a6ad17a2..8c8c12fce2 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
@@ -53,9 +53,9 @@ Status PKSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) {
   if (next_id_ > num_pk_samples_ || num_pk_samples_ == 0) {
     RETURN_STATUS_UNEXPECTED("Index out of bound in PKSampler");
   } else if (next_id_ == num_pk_samples_) {
-    (*out_buffer) = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+    (*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
   } else {
-    (*out_buffer) = mindspore::make_unique<DataBuffer>(next_id_, DataBuffer::kDeBFlagNone);
+    (*out_buffer) = std::make_unique<DataBuffer>(next_id_, DataBuffer::kDeBFlagNone);
     std::shared_ptr<Tensor> sample_ids;
     int64_t last_id =
       (samples_per_buffer_ + next_id_ > num_pk_samples_) ? num_pk_samples_ : samples_per_buffer_ + next_id_;
@@ -68,7 +68,7 @@ Status PKSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) {
       *(id_ptr++) = samples[rnd_ind];
     }
     TensorRow row(1, sample_ids);
-    (*out_buffer)->set_tensor_table(make_unique<TensorQTable>(1, row));
+    (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, row));
   }
   return Status::OK();
 }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
index de4d89d950..216f322052 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
@@ -32,9 +32,9 @@ Status RandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) {
   if (next_id_ > num_samples_) {
     RETURN_STATUS_UNEXPECTED("RandomSampler Internal Error");
   } else if (next_id_ == num_samples_) {
-    (*out_buffer) = make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+    (*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
   } else {
-    (*out_buffer) = make_unique<DataBuffer>(next_id_, DataBuffer::kDeBFlagNone);
+    (*out_buffer) = std::make_unique<DataBuffer>(next_id_, DataBuffer::kDeBFlagNone);
     std::shared_ptr<Tensor> sampleIds;
     int64_t last_id = samples_per_buffer_ + next_id_ > num_samples_ ? num_samples_ : samples_per_buffer_ + next_id_;
     RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, last_id - next_id_));
@@ -44,7 +44,7 @@ Status RandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) {
     }
     next_id_ = last_id;
     TensorRow row(1, sampleIds);
-    (*out_buffer)->set_tensor_table(make_unique<TensorQTable>(1, row));
+    (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, row));
   }
   return Status::OK();
 }
@@ -61,7 +61,7 @@ Status RandomSampler::Init(const RandomAccessOp *op) {
     }
     std::shuffle(shuffled_ids_.begin(), shuffled_ids_.end(), rnd_);
   } else {
-    dist = make_unique<std::uniform_int_distribution<int64_t>>(0, num_rows_ - 1);
+    dist = std::make_unique<std::uniform_int_distribution<int64_t>>(0, num_rows_ - 1);
   }
   rnd_.seed(seed_++);
   return Status::OK();
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
index 9818cd8a17..aa3838f8b5 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
@@ -35,7 +35,7 @@ Status Sampler::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64_t
   }
   if (col_desc_ == nullptr) {
     // a ColDescriptor for Tensor that holds SampleIds
-    col_desc_ = make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1);
+    col_desc_ = std::make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1);
   }
   TensorShape shape(std::vector<dsize_t>(1, num_elements));
   RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, col_desc_->tensorImpl(), shape, col_desc_->type()));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h
index d9a20f9170..801565508b 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h
@@ -27,7 +27,6 @@
 #include "dataset/engine/data_buffer.h"
 #include "dataset/engine/data_schema.h"
 #include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/util/make_unique.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
index 71c3dd07c4..72131a6de1 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
@@ -25,9 +25,9 @@ Status SequentialSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer)
   if (next_id_ > num_samples_) {
     RETURN_STATUS_UNEXPECTED("Sequential Sampler Internal Error");
   } else if (next_id_ == num_samples_) {
-    (*out_buffer) = make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+    (*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
   } else {
-    (*out_buffer) = make_unique<DataBuffer>(next_id_, DataBuffer::kDeBFlagNone);
+    (*out_buffer) = std::make_unique<DataBuffer>(next_id_, DataBuffer::kDeBFlagNone);
     std::shared_ptr<Tensor> sampleIds;
     int64_t lastId = (samples_per_buffer_ + next_id_ > num_samples_) ? num_samples_ : samples_per_buffer_ + next_id_;
     RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, lastId - next_id_));
@@ -36,7 +36,7 @@ Status SequentialSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer)
       *(idPtr++) = next_id_++;
     }
     TensorRow row(1, sampleIds);
-    (*out_buffer)->set_tensor_table(make_unique<TensorQTable>(1, row));
+    (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, row));
   }
   return Status::OK();
 }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
index 4f727fcd04..16603939b3 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
@@ -64,9 +64,9 @@ Status SubsetRandomSampler::Reset() {
 Status SubsetRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) {
   // All samples have been drawn
   if (sample_id_ == indices_.size()) {
-    (*out_buffer) = make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagEOE);
+    (*out_buffer) = std::make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagEOE);
   } else {
-    (*out_buffer) = make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagNone);
+    (*out_buffer) = std::make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagNone);
     std::shared_ptr<Tensor> outputIds;
 
     int64_t last_id = sample_id_ + samples_per_buffer_;
@@ -92,7 +92,7 @@ Status SubsetRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffe
     }
 
     // Create a TensorTable from that single tensor and push into DataBuffer
-    (*out_buffer)->set_tensor_table(make_unique<TensorQTable>(1, TensorRow(1, outputIds)));
+    (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, TensorRow(1, outputIds)));
   }
 
   return Status::OK();
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
index f305474182..f2957e74be 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
@@ -46,10 +46,10 @@ Status WeightedRandomSampler::Init(const RandomAccessOp *op) {
   CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0 && samples_per_buffer_ > 0, "Fail to init WeightedRandomSampler");
 
   if (!replacement_) {
-    exp_dist_ = mindspore::make_unique<std::exponential_distribution<>>(1);
+    exp_dist_ = std::make_unique<std::exponential_distribution<>>(1);
     InitOnePassSampling();
   } else {
-    discrete_dist_ = mindspore::make_unique<std::discrete_distribution<int64_t>>(weights_.begin(), weights_.end());
+    discrete_dist_ = std::make_unique<std::discrete_distribution<int64_t>>(weights_.begin(), weights_.end());
   }
 
   return Status::OK();
@@ -96,9 +96,9 @@ Status WeightedRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buf
   }
 
   if (sample_id_ == num_samples_) {
-    (*out_buffer) = make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagEOE);
+    (*out_buffer) = std::make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagEOE);
   } else {
-    (*out_buffer) = make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagNone);
+    (*out_buffer) = std::make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagNone);
     std::shared_ptr<Tensor> outputIds;
 
     int64_t last_id = sample_id_ + samples_per_buffer_;
@@ -132,7 +132,7 @@ Status WeightedRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buf
     }
 
     // Create a TensorTable from that single tensor and push into DataBuffer
-    (*out_buffer)->set_tensor_table(make_unique<TensorQTable>(1, TensorRow(1, outputIds)));
+    (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, TensorRow(1, outputIds)));
   }
 
   return Status::OK();
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.cc
index 95720a97be..862edcf63a 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.cc
@@ -24,7 +24,6 @@
 #include "dataset/engine/datasetops/source/storage_client.h"
 #include "dataset/engine/datasetops/source/storage_op.h"
 #include "dataset/engine/datasetops/source/tf_client.h"
-#include "dataset/util/make_unique.h"
 #include "dataset/util/status.h"
 
 namespace mindspore {
@@ -57,7 +56,7 @@ static Status CreateStorageClientSwitch(
     case DatasetType::kTf: {
       // Construct the derived class TFClient, stored as base class StorageClient
       store_op->set_rows_per_buffer(32);
-      *out_client = mindspore::make_unique<TFClient>(std::move(schema), store_op);
+      *out_client = std::make_unique<TFClient>(std::move(schema), store_op);
       break;
     }
     case DatasetType::kUnknown:
@@ -83,7 +82,7 @@ Status StorageClient::CreateStorageClient(
   std::shared_ptr<StorageClient> *out_client) {  // Out: the created storage client
   // Make a new schema first.  This only assigns the dataset type.  It does not
   // create the columns yet.
-  auto new_schema = mindspore::make_unique<DataSchema>();
+  auto new_schema = std::make_unique<DataSchema>();
   RETURN_IF_NOT_OK(new_schema->LoadDatasetType(dataset_schema_path));
   RETURN_IF_NOT_OK(CreateStorageClientSwitch(std::move(new_schema), store_op, out_client));
   return Status::OK();
@@ -99,7 +98,7 @@ Status StorageClient::CreateStorageClient(
   std::shared_ptr<StorageClient> *out_client) {  // Out: the created storage client
   // The dataset type is passed in by the user.  Create an empty schema with only
   // only the dataset type filled in and then create the client with it.
-  auto new_schema = mindspore::make_unique<DataSchema>();
+  auto new_schema = std::make_unique<DataSchema>();
   new_schema->set_dataset_type(in_type);
   RETURN_IF_NOT_OK(CreateStorageClientSwitch(std::move(new_schema), store_op, out_client));
   return Status::OK();
@@ -147,7 +146,7 @@ Status StorageClient::AssignDatasetLayout(uint32_t num_rows,           // In: Th
   // The current schema was just an empty one with only the dataset field populated.
   // Let's copy construct a new one that will be a copy of the input schema (releasing the old
   // one) and then set the number of rows that the user requested.
-  data_schema_ = mindspore::make_unique<DataSchema>(schema);
+  data_schema_ = std::make_unique<DataSchema>(schema);
   CHECK_FAIL_RETURN_UNEXPECTED(num_rows <= MAX_INTEGER_INT32, "numRows exceeds the boundary numRows>2147483647");
   num_rows_in_dataset_ = num_rows;
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc
index 9da27eac04..2ca957ae6d 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc
@@ -303,7 +303,7 @@ Status StorageOp::init() {
     // For simplicity, we'll make both of them 3 so they are the same size.
     int32_t action_queue_size = (buffers_needed / num_workers_) + 1;
     for (int32_t i = 0; i < num_workers_; ++i) {
-      auto new_queue = mindspore::make_unique<Queue<int32_t>>(action_queue_size);
+      auto new_queue = std::make_unique<Queue<int32_t>>(action_queue_size);
       action_queue_.push_back(std::move(new_queue));
     }
   }
@@ -483,10 +483,10 @@ Status StorageOp::operator()() {
         // Post the control message to tell the workers to stop waiting on action queue
         // because we are done!
         RETURN_IF_NOT_OK(this->PostEndOfData());
-        std::unique_ptr<DataBuffer> eoeBuffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+        std::unique_ptr<DataBuffer> eoeBuffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
         RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoeBuffer)));
         MS_LOG(INFO) << "StorageOp master: Flow end-of-data eof message.";
-        std::unique_ptr<DataBuffer> eofBuffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
+        std::unique_ptr<DataBuffer> eofBuffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
         RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eofBuffer)));
         MS_LOG(INFO) << "StorageOp master: Main execution loop complete.";
         done = true;  // while loop exit
@@ -496,7 +496,7 @@ Status StorageOp::operator()() {
         // RepeatOp above us somewhere in the tree will re-init us with the data to fetch again
         // once it gets the end-of-epoch message.
         MS_LOG(INFO) << "StorageOp master: Flow end-of-epoch eoe message.";
-        std::unique_ptr<DataBuffer> eoe_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+        std::unique_ptr<DataBuffer> eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
         RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer)));
 
         // reset our buffer count and go to loop again.
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.cc
index 766c2149c4..372dcd2c1c 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.cc
@@ -27,7 +27,6 @@
 #include "dataset/core/data_type.h"
 #include "dataset/engine/datasetops/source/storage_client.h"
 #include "dataset/engine/data_schema.h"
-#include "dataset/util/make_unique.h"
 
 namespace mindspore {
 namespace dataset {
@@ -72,7 +71,7 @@ Status TFBuffer::Load() {
   }
 
   // Construct the Tensor table for this buffer.
-  tensor_table_ = mindspore::make_unique<TensorQTable>();
+  tensor_table_ = std::make_unique<TensorQTable>();
 
   // At each position in the tensor table, instantiate the shared pointer to it's Tensor.
   uint32_t row = 0;
@@ -272,7 +271,7 @@ Status TFBuffer::LoadFloatList(const ColDescriptor &current_col, const dataengin
   // Identify how many values we have and then create a local array of these
   // to deserialize into
   *num_elements = float_list.value_size();
-  *float_array = mindspore::make_unique<float[]>(*num_elements);
+  *float_array = std::make_unique<float[]>(*num_elements);
   for (int i = 0; i < float_list.value_size(); i++) {
     (*float_array)[i] = float_list.value(i);
   }
@@ -294,7 +293,7 @@ Status TFBuffer::LoadIntList(const ColDescriptor &current_col, const dataengine:
   // Identify how many values we have and then create a local array of these
   // to deserialize into
   *num_elements = int64_list.value_size();
-  *int_array = mindspore::make_unique<int64_t[]>(*num_elements);
+  *int_array = std::make_unique<int64_t[]>(*num_elements);
   for (int i = 0; i < int64_list.value_size(); i++) {
     (*int_array)[i] = int64_list.value(i);
   }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
index c872c02015..0764d7e0ad 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
@@ -36,7 +36,6 @@
 #include "dataset/engine/db_connector.h"
 #include "dataset/engine/execution_tree.h"
 #include "dataset/engine/jagged_connector.h"
-#include "dataset/util/make_unique.h"
 #include "dataset/util/path.h"
 #include "dataset/util/queue.h"
 #include "dataset/util/random.h"
@@ -54,7 +53,7 @@ TFReaderOp::Builder::Builder()
   builder_op_connector_size_ = config_manager->op_connector_size();
   builder_rows_per_buffer_ = config_manager->rows_per_buffer();
   builder_shuffle_files_ = false;
-  builder_data_schema_ = make_unique<DataSchema>();
+  builder_data_schema_ = std::make_unique<DataSchema>();
 }
 
 Status TFReaderOp::Builder::ValidateInputs() const {
@@ -103,7 +102,7 @@ TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64
       finished_reading_dataset_(false),
       shuffle_files_(shuffle_files),
       data_schema_(std::move(data_schema)),
-      filename_index_(make_unique<StringIndex>()),
+      filename_index_(std::make_unique<StringIndex>()),
       load_io_block_queue_(true),
       load_jagged_connector_(true),
       num_rows_(0),
@@ -129,7 +128,7 @@ Status TFReaderOp::Init() {
   // parallel op base.
   RETURN_IF_NOT_OK(ParallelOp::CreateWorkerConnector(worker_connector_size_));
 
-  jagged_buffer_connector_ = mindspore::make_unique<JaggedConnector>(num_workers_, 1, worker_connector_size_);
+  jagged_buffer_connector_ = std::make_unique<JaggedConnector>(num_workers_, 1, worker_connector_size_);
 
   // temporary: make size large enough to hold all files + EOE to avoid hangs
   int32_t safe_queue_size = static_cast<int32_t>(std::ceil(dataset_files_list_.size() / num_workers_)) + 1;
@@ -229,7 +228,7 @@ Status TFReaderOp::operator()() {
     }
 
     // all workers finished reading for this epoch, and we have read all the data from all workers
-    std::unique_ptr<DataBuffer> eoe_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+    std::unique_ptr<DataBuffer> eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
     RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer)));
 
     if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) {
@@ -241,7 +240,7 @@ Status TFReaderOp::operator()() {
     }
   }
 
-  std::unique_ptr<DataBuffer> eof_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
+  std::unique_ptr<DataBuffer> eof_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
   RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eof_buffer)));
 
   RETURN_IF_NOT_OK(PostEndOfData());
@@ -274,7 +273,7 @@ Status TFReaderOp::WorkerEntry(int32_t worker_id) {
         MS_LOG(INFO) << "TFReader operator worker " << worker_id << " loaded file " << filename << ".";
       }
     } else {
-      std::unique_ptr<DataBuffer> eoe_buffer = mindspore::make_unique<DataBuffer>(1, DataBuffer::kDeBFlagEOE);
+      std::unique_ptr<DataBuffer> eoe_buffer = std::make_unique<DataBuffer>(1, DataBuffer::kDeBFlagEOE);
       RETURN_IF_NOT_OK(jagged_buffer_connector_->Add(worker_id, std::move(eoe_buffer)));
     }
 
@@ -288,7 +287,7 @@ Status TFReaderOp::WorkerEntry(int32_t worker_id) {
 // When the worker pops this control indicator, it will shut itself down gracefully.
 Status TFReaderOp::PostEndOfData() {
   for (int i = 0; i < num_workers_; ++i) {
-    std::unique_ptr<FilenameBlock> eof = mindspore::make_unique<FilenameBlock>(IOBlock::kDeIoBlockFlagEof);
+    std::unique_ptr<FilenameBlock> eof = std::make_unique<FilenameBlock>(IOBlock::kDeIoBlockFlagEof);
     RETURN_IF_NOT_OK(PushIoBlockQueue(i, std::move(eof)));
   }
 
@@ -299,7 +298,7 @@ Status TFReaderOp::PostEndOfData() {
 // pops this control indicator, it will wait until the next epoch starts and then resume execution.
 Status TFReaderOp::PostEndOfEpoch(int32_t queue_index) {
   for (int i = 0; i < num_workers_; ++i) {
-    std::unique_ptr<FilenameBlock> eoe = mindspore::make_unique<FilenameBlock>(IOBlock::kDeIoBlockFlagEoe);
+    std::unique_ptr<FilenameBlock> eoe = std::make_unique<FilenameBlock>(IOBlock::kDeIoBlockFlagEoe);
     RETURN_IF_NOT_OK(PushIoBlockQueue((queue_index + i) % num_workers_, std::move(eoe)));
   }
 
@@ -358,7 +357,7 @@ Status TFReaderOp::FillIOBlockShuffle(const std::vector<int64_t> &i_keys) {
       }
       if (!equal_rows_per_shard_) {
         if (key_index++ % num_devices_ == device_id_) {
-          auto ioBlock = make_unique<FilenameBlock>(*it, kInvalidOffset, kInvalidOffset, IOBlock::kDeIoBlockNone);
+          auto ioBlock = std::make_unique<FilenameBlock>(*it, kInvalidOffset, kInvalidOffset, IOBlock::kDeIoBlockNone);
           RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock)));
           queue_index = (queue_index + 1) % num_workers_;
         }
@@ -367,7 +366,7 @@ Status TFReaderOp::FillIOBlockShuffle(const std::vector<int64_t> &i_keys) {
         auto file_it = filename_index_->Search(*it);
         std::string file_name = file_it.value();
         if (NeedPushFileToblockQueue(file_name, &start_offset, &end_offset, pre_count)) {
-          auto ioBlock = make_unique<FilenameBlock>(*it, start_offset, end_offset, IOBlock::kDeIoBlockNone);
+          auto ioBlock = std::make_unique<FilenameBlock>(*it, start_offset, end_offset, IOBlock::kDeIoBlockNone);
           RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock)));
           MS_LOG(DEBUG) << "File name " << *it << " start offset " << start_offset << " end_offset " << end_offset;
           queue_index = (queue_index + 1) % num_workers_;
@@ -404,14 +403,15 @@ Status TFReaderOp::FillIOBlockNoShuffle() {
       }
       if (!equal_rows_per_shard_) {
         if (key_index++ % num_devices_ == device_id_) {
-          auto ioBlock = make_unique<FilenameBlock>(it.key(), kInvalidOffset, kInvalidOffset, IOBlock::kDeIoBlockNone);
+          auto ioBlock =
+            std::make_unique<FilenameBlock>(it.key(), kInvalidOffset, kInvalidOffset, IOBlock::kDeIoBlockNone);
           RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock)));
           queue_index = (queue_index + 1) % num_workers_;
         }
       } else {
         std::string file_name = it.value();
         if (NeedPushFileToblockQueue(file_name, &start_offset, &end_offset, pre_count)) {
-          auto ioBlock = make_unique<FilenameBlock>(it.key(), start_offset, end_offset, IOBlock::kDeIoBlockNone);
+          auto ioBlock = std::make_unique<FilenameBlock>(it.key(), start_offset, end_offset, IOBlock::kDeIoBlockNone);
           RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock)));
           queue_index = (queue_index + 1) % num_workers_;
         }
@@ -490,14 +490,13 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off
 
   int64_t rows_read = 0;
   int64_t rows_total = 0;
-  std::unique_ptr<DataBuffer> current_buffer =
-    mindspore::make_unique<DataBuffer>(0, DataBuffer::BufferFlags::kDeBFlagNone);
+  std::unique_ptr<DataBuffer> current_buffer = std::make_unique<DataBuffer>(0, DataBuffer::BufferFlags::kDeBFlagNone);
   std::unordered_map<std::string, int32_t> column_name_map;
   for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
     column_name_map[data_schema_->column(i).name()] = i;
   }
   current_buffer->set_column_name_map(column_name_map);
-  std::unique_ptr<TensorQTable> new_tensor_table = make_unique<TensorQTable>();
+  std::unique_ptr<TensorQTable> new_tensor_table = std::make_unique<TensorQTable>();
 
   while (reader.peek() != EOF) {
     if (!load_jagged_connector_) {
@@ -532,9 +531,9 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off
       current_buffer->set_tensor_table(std::move(new_tensor_table));
       RETURN_IF_NOT_OK(jagged_buffer_connector_->Add(worker_id, std::move(current_buffer)));
 
-      current_buffer = make_unique<DataBuffer>(0, DataBuffer::BufferFlags::kDeBFlagNone);
+      current_buffer = std::make_unique<DataBuffer>(0, DataBuffer::BufferFlags::kDeBFlagNone);
       current_buffer->set_column_name_map(column_name_map);
-      new_tensor_table = make_unique<TensorQTable>();
+      new_tensor_table = std::make_unique<TensorQTable>();
       rows_read = 0;
     }
   }
@@ -742,7 +741,7 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor &current_col, const dataeng
   // Identify how many values we have and then create a local array of these
   // to deserialize into
   *num_elements = float_list.value_size();
-  *float_array = mindspore::make_unique<float[]>(*num_elements);
+  *float_array = std::make_unique<float[]>(*num_elements);
   for (int i = 0; i < float_list.value_size(); ++i) {
     (*float_array)[i] = float_list.value(i);
   }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
index d3785f4660..71b4c47cf5 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
@@ -38,7 +38,7 @@ Status VOCOp::Builder::Build(std::shared_ptr<VOCOp> *ptr) {
   if (builder_sampler_ == nullptr) {
     builder_sampler_ = std::make_shared<SequentialSampler>();
   }
-  builder_schema_ = make_unique<DataSchema>();
+  builder_schema_ = std::make_unique<DataSchema>();
   RETURN_IF_NOT_OK(
     builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
   RETURN_IF_NOT_OK(
@@ -85,7 +85,7 @@ Status VOCOp::TraverseSampleIds(const std::shared_ptr<Tensor> &sample_ids, std::
     row_cnt_++;
     if (row_cnt_ % rows_per_buffer_ == 0) {
       RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add(
-        make_unique<IOBlock>(IOBlock(*keys, IOBlock::kDeIoBlockNone))));
+        std::make_unique<IOBlock>(IOBlock(*keys, IOBlock::kDeIoBlockNone))));
       keys->clear();
     }
   }
@@ -110,21 +110,21 @@ Status VOCOp::operator()() {
     }
     if (keys.empty() == false) {
       RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(
-        make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
+        std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
     }
     if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) {
-      std::unique_ptr<IOBlock> eoe_block = make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe);
-      std::unique_ptr<IOBlock> eof_block = make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof);
+      std::unique_ptr<IOBlock> eoe_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe);
+      std::unique_ptr<IOBlock> eof_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof);
       RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block)));
       RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block)));
       for (int32_t i = 0; i < num_workers_; i++) {
         RETURN_IF_NOT_OK(
-          io_block_queues_[i]->Add(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
+          io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
       }
       return Status::OK();
     } else {
       RETURN_IF_NOT_OK(
-        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
       RETURN_IF_NOT_OK(wp_.Wait());
       wp_.Clear();
       RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer));
@@ -164,7 +164,7 @@ Status VOCOp::LoadTensorRow(const std::string &image_id, TensorRow *trow) {
 }
 
 Status VOCOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) {
-  std::unique_ptr<TensorQTable> deq = make_unique<TensorQTable>();
+  std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>();
   TensorRow trow;
   for (const uint64_t &key : keys) {
     RETURN_IF_NOT_OK(this->LoadTensorRow(image_ids_[key], &trow));
@@ -182,15 +182,15 @@ Status VOCOp::WorkerEntry(int32_t worker_id) {
   RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block));
   while (io_block != nullptr) {
     if (io_block->eoe() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
       buffer_id = worker_id;
     } else if (io_block->eof() == true) {
-      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, (make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, (std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
     } else {
       std::vector<int64_t> keys;
       RETURN_IF_NOT_OK(io_block->GetKeys(&keys));
       if (keys.empty() == true) return Status::OK();
-      std::unique_ptr<DataBuffer> db = make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
+      std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
       RETURN_IF_NOT_OK(LoadBuffer(keys, &db));
       RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db)));
       buffer_id += num_workers_;
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc
index 716c853488..ec771740c1 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc
@@ -65,13 +65,13 @@ Status ZipOp::operator()() {
   // initialize the iterators
   for (int32_t i = 0; i < children_num_; ++i) {
     // magic number 0 since Zip is not a parallel Op
-    child_iterators_.push_back(mindspore::make_unique<ChildIterator>(this, 0, i));
+    child_iterators_.push_back(std::make_unique<ChildIterator>(this, 0, i));
   }
 
   // Loop until eof is true
   while (!eof_) {
     // Create tensor table and prepare it by fetching and packing the first zipped row into it.
-    std::unique_ptr<TensorQTable> curr_table = mindspore::make_unique<TensorQTable>();
+    std::unique_ptr<TensorQTable> curr_table = std::make_unique<TensorQTable>();
     RETURN_IF_NOT_OK(prepare(curr_table.get()));
 
     // If an eof got picked up during the above prepare, then we're done
@@ -81,7 +81,7 @@ Status ZipOp::operator()() {
     while (!draining_) {
       // 1. If a previous loop iteration sent the current table out, then create a new one.
       if (curr_table == nullptr) {
-        curr_table = mindspore::make_unique<TensorQTable>();
+        curr_table = std::make_unique<TensorQTable>();
       }
 
       // 2 fill the table.  Note: draining mode might get turned on if any of the child inputs were done
@@ -89,8 +89,7 @@ Status ZipOp::operator()() {
 
       // 3 create and update buffer and send it to the out connector
       if (!curr_table->empty()) {
-        std::unique_ptr<DataBuffer> curr_buffer =
-          mindspore::make_unique<DataBuffer>(buffer_id_, DataBuffer::kDeBFlagNone);
+        std::unique_ptr<DataBuffer> curr_buffer = std::make_unique<DataBuffer>(buffer_id_, DataBuffer::kDeBFlagNone);
         curr_buffer->set_tensor_table(std::move(curr_table));
         curr_buffer->set_column_name_map(col_name_id_map_);
         MS_LOG(DEBUG) << "Zip operator finished one buffer, pushing, rows " << curr_buffer->NumRows() << ", cols "
@@ -105,15 +104,14 @@ Status ZipOp::operator()() {
       MS_LOG(DEBUG) << "Zip operator is now draining child inputs.";
       RETURN_IF_NOT_OK(drainPipeline());
       // Now that we have drained child inputs, send the eoe up.
-      RETURN_IF_NOT_OK(
-        out_connector_->Add(0, std::move(mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))));
+      RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))));
     }
   }
 
   // 5 handle eof
   // propagate eof here.
   MS_LOG(INFO) << "Zip operator got EOF, propagating.";
-  RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
+  RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
   return Status::OK();
 }
 
diff --git a/mindspore/ccsrc/dataset/engine/db_connector.h b/mindspore/ccsrc/dataset/engine/db_connector.h
index 243e77e6ac..7ea9837c44 100644
--- a/mindspore/ccsrc/dataset/engine/db_connector.h
+++ b/mindspore/ccsrc/dataset/engine/db_connector.h
@@ -65,7 +65,7 @@ class DbConnector : public Connector<std::unique_ptr<DataBuffer>> {
       RETURN_IF_NOT_OK(cv_.Wait(&lk, [this, worker_id]() { return expect_consumer_ == worker_id; }));
       // Once an EOF message is encountered this flag will be set and we can return early.
       if (end_of_file_) {
-        *result = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
+        *result = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
       } else {
         RETURN_IF_NOT_OK(queues_[pop_from_]->PopFront(result));
         if (*result == nullptr) {
diff --git a/mindspore/ccsrc/dataset/engine/execution_tree.cc b/mindspore/ccsrc/dataset/engine/execution_tree.cc
index 3dbeaa5ed1..20fcb836c5 100644
--- a/mindspore/ccsrc/dataset/engine/execution_tree.cc
+++ b/mindspore/ccsrc/dataset/engine/execution_tree.cc
@@ -24,7 +24,7 @@ namespace mindspore {
 namespace dataset {
 // Constructor
 ExecutionTree::ExecutionTree() : id_count_(0) {
-  tg_ = mindspore::make_unique<TaskGroup>();
+  tg_ = std::make_unique<TaskGroup>();
   tree_state_ = kDeTStateInit;
   prepare_flags_ = kDePrepNone;
 }
diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc
index 0412f07636..8735cf7a05 100644
--- a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc
+++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc
@@ -24,7 +24,6 @@
 #include "dataset/core/cv_tensor.h"
 #include "dataset/core/tensor.h"
 #include "dataset/core/tensor_shape.h"
-#include "dataset/util/make_unique.h"
 #include "dataset/util/random.h"
 
 #define MAX_INT_PRECISION 16777216  // float int precision is 16777216
@@ -376,7 +375,7 @@ Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output)
     int width = input_cv->shape()[1];
     int num_channels = input_cv->shape()[2];
 
-    auto output_cv = mindspore::make_unique<CVTensor>(TensorShape{num_channels, height, width}, input_cv->type());
+    auto output_cv = std::make_unique<CVTensor>(TensorShape{num_channels, height, width}, input_cv->type());
     for (int i = 0; i < num_channels; ++i) {
       cv::Mat mat;
       RETURN_IF_NOT_OK(output_cv->Mat({i}, &mat));
diff --git a/mindspore/ccsrc/dataset/kernels/py_func_op.cc b/mindspore/ccsrc/dataset/kernels/py_func_op.cc
index 69bd3443c4..c9e5d5b169 100644
--- a/mindspore/ccsrc/dataset/kernels/py_func_op.cc
+++ b/mindspore/ccsrc/dataset/kernels/py_func_op.cc
@@ -20,7 +20,6 @@
 
 #include "dataset/core/tensor.h"
 #include "dataset/kernels/tensor_op.h"
-#include "dataset/util/make_unique.h"
 #include "dataset/util/status.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/arena.cc b/mindspore/ccsrc/dataset/util/arena.cc
index 856f7fef24..68673529ff 100644
--- a/mindspore/ccsrc/dataset/util/arena.cc
+++ b/mindspore/ccsrc/dataset/util/arena.cc
@@ -16,7 +16,6 @@
 #include "dataset/util/arena.h"
 #include <unistd.h>
 #include <utility>
-#include "dataset/util/make_unique.h"
 #include "dataset/util/system_pool.h"
 #include "dataset/util/de_error.h"
 #include "./securec.h"
diff --git a/mindspore/ccsrc/dataset/util/circular_pool.cc b/mindspore/ccsrc/dataset/util/circular_pool.cc
index f6e43f35bf..92b169c94a 100644
--- a/mindspore/ccsrc/dataset/util/circular_pool.cc
+++ b/mindspore/ccsrc/dataset/util/circular_pool.cc
@@ -18,10 +18,8 @@
 #include <algorithm>
 #include <limits>
 #include <utility>
-
 #include "./securec.h"
-
-#include "dataset/util/make_unique.h"
+#include "dataset/util/de_error.h"
 #include "dataset/util/system_pool.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/de_error.h b/mindspore/ccsrc/dataset/util/de_error.h
index 07d7c125f7..d4988c58db 100644
--- a/mindspore/ccsrc/dataset/util/de_error.h
+++ b/mindspore/ccsrc/dataset/util/de_error.h
@@ -16,6 +16,13 @@
 #ifndef DATASET_UTIL_DE_ERROR_H_
 #define DATASET_UTIL_DE_ERROR_H_
 
+#ifdef DEBUG
+#include <cassert>
+#define DS_ASSERT(f) assert(f)
+#else
+#define DS_ASSERT(f) ((void)0)
+#endif
+
 #include <map>
 #include "utils/error_code.h"
 
diff --git a/mindspore/ccsrc/dataset/util/list.h b/mindspore/ccsrc/dataset/util/list.h
index f01201e34d..5a08f4514e 100644
--- a/mindspore/ccsrc/dataset/util/list.h
+++ b/mindspore/ccsrc/dataset/util/list.h
@@ -18,8 +18,7 @@
 
 #include <iostream>
 #include <iterator>
-
-#include "dataset/util/make_unique.h"
+#include "dataset/util/de_error.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/lock.cc b/mindspore/ccsrc/dataset/util/lock.cc
index 7e92a1e53f..13a43e3e84 100644
--- a/mindspore/ccsrc/dataset/util/lock.cc
+++ b/mindspore/ccsrc/dataset/util/lock.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 #include "dataset/util/lock.h"
+#include "dataset/util/de_error.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/lock.h b/mindspore/ccsrc/dataset/util/lock.h
index 8fef6a143a..9492d34bdf 100644
--- a/mindspore/ccsrc/dataset/util/lock.h
+++ b/mindspore/ccsrc/dataset/util/lock.h
@@ -19,7 +19,6 @@
 #include <atomic>
 #include <condition_variable>
 #include <mutex>
-#include "dataset/util/make_unique.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/make_unique.h b/mindspore/ccsrc/dataset/util/make_unique.h
deleted file mode 100644
index 2fe0bf4550..0000000000
--- a/mindspore/ccsrc/dataset/util/make_unique.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef DATASET_UTIL_MAKE_UNIQUE_H_
-#define DATASET_UTIL_MAKE_UNIQUE_H_
-
-#ifdef DEBUG
-#include <cassert>
-#define DS_ASSERT(f) assert(f)
-#else
-#define DS_ASSERT(f) ((void)0)
-#endif
-
-#include <memory>
-#include <type_traits>
-#include <utility>
-#include "dataset/util/de_error.h"
-#include "utils/log_adapter.h"
-
-namespace mindspore {
-using std::make_unique;
-}  // namespace mindspore
-
-#endif  // DATASET_UTIL_MAKE_UNIQUE_H_
diff --git a/mindspore/ccsrc/dataset/util/queue.h b/mindspore/ccsrc/dataset/util/queue.h
index 4048deb86b..f0b087cf6d 100644
--- a/mindspore/ccsrc/dataset/util/queue.h
+++ b/mindspore/ccsrc/dataset/util/queue.h
@@ -212,7 +212,7 @@ class QueueList {
   void Init(int num_queues, int capacity) {
     queue_list_.reserve(num_queues);
     for (int i = 0; i < num_queues; i++) {
-      queue_list_.emplace_back(mindspore::make_unique<Queue<T>>(capacity));
+      queue_list_.emplace_back(std::make_unique<Queue<T>>(capacity));
     }
   }
 
diff --git a/mindspore/ccsrc/dataset/util/task.h b/mindspore/ccsrc/dataset/util/task.h
index aaf2f80a3d..d6149caec8 100644
--- a/mindspore/ccsrc/dataset/util/task.h
+++ b/mindspore/ccsrc/dataset/util/task.h
@@ -27,7 +27,6 @@
 #include <string>
 #include <thread>
 #include "dataset/util/de_error.h"
-#include "dataset/util/make_unique.h"
 #include "dataset/util/intrp_resource.h"
 #include "dataset/util/list.h"
 #include "dataset/util/memory_pool.h"
diff --git a/mindspore/ccsrc/device/gpu/blocking_queue.cc b/mindspore/ccsrc/device/gpu/blocking_queue.cc
index 7417115ae9..c36b1cdbf5 100644
--- a/mindspore/ccsrc/device/gpu/blocking_queue.cc
+++ b/mindspore/ccsrc/device/gpu/blocking_queue.cc
@@ -17,7 +17,6 @@
 #include "device/gpu/blocking_queue.h"
 #include <chrono>
 #include "device/gpu/gpu_common.h"
-#include "dataset/util/make_unique.h"
 #include "common/utils.h"
 
 namespace mindspore {
@@ -32,7 +31,7 @@ GpuQueue::GpuQueue(void *addr, size_t feature_size, size_t label_size, size_t ca
       stream_(0),
       node_info_(nullptr) {
   CHECK_CUDA_RET_WITH_ERROR(cudaStreamCreate(&stream_), "Cuda Create Stream Failed");
-  node_info_ = mindspore::make_unique<NodeInfo[]>(capacity);
+  node_info_ = std::make_unique<NodeInfo[]>(capacity);
 }
 
 GpuQueue::~GpuQueue() { buffer_ = nullptr; }
diff --git a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h
index 265180afe6..90609c3be5 100644
--- a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h
@@ -23,7 +23,6 @@
 #include <vector>
 #include "kernel/gpu/gpu_kernel.h"
 #include "kernel/gpu/gpu_kernel_factory.h"
-#include "dataset/util/make_unique.h"
 #include "kernel/gpu/kernel_constants.h"
 
 namespace mindspore {
@@ -74,8 +73,8 @@ class BiasAddGpuKernel : public GpuKernel {
 
     // Expand to 4 dims for cudnnSetTensorNdDescriptorEx.
     auto cudnn_dims = std::max(num_dims, 4UL);
-    std::unique_ptr<int[]> x_dims = mindspore::make_unique<int[]>(cudnn_dims);
-    std::unique_ptr<int[]> b_dims = mindspore::make_unique<int[]>(cudnn_dims);
+    std::unique_ptr<int[]> x_dims = std::make_unique<int[]>(cudnn_dims);
+    std::unique_ptr<int[]> b_dims = std::make_unique<int[]>(cudnn_dims);
     for (size_t i = 0; i < cudnn_dims; i++) {
       x_dims[i] = (i < num_dims) ? SizeToInt(x_shape[i]) : 1;
       b_dims[i] = (i == pos) ? SizeToInt(x_shape[i]) : 1;
diff --git a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h b/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h
index b0e8102ee3..fd73f378d8 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h
@@ -26,7 +26,6 @@
 #include "kernel/gpu/gpu_kernel.h"
 #include "kernel/gpu/gpu_kernel_factory.h"
 #include "kernel/gpu/kernel_constants.h"
-#include "dataset/util/make_unique.h"
 
 namespace mindspore {
 namespace kernel {
@@ -84,8 +83,8 @@ class BiasAddGradGpuKernel : public GpuKernel {
 
     // Expand to 4 dims for cudnnSetTensorNdDescriptorEx.
     auto cudnn_dims = std::max(num_dims, 4UL);
-    std::unique_ptr<int[]> dy_dims = mindspore::make_unique<int[]>(cudnn_dims);
-    std::unique_ptr<int[]> db_dims = mindspore::make_unique<int[]>(cudnn_dims);
+    std::unique_ptr<int[]> dy_dims = std::make_unique<int[]>(cudnn_dims);
+    std::unique_ptr<int[]> db_dims = std::make_unique<int[]>(cudnn_dims);
     for (size_t i = 0; i < cudnn_dims; i++) {
       dy_dims[i] = (i < num_dims) ? SizeToInt(dy_shape[i]) : 1;
       db_dims[i] = (i == pos) ? SizeToInt(dy_shape[i]) : 1;
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h
index 51a2da8574..c3e839b9c5 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h
@@ -22,7 +22,6 @@
 #include <memory>
 #include "kernel/gpu/gpu_kernel.h"
 #include "kernel/gpu/gpu_kernel_factory.h"
-#include "dataset/util/make_unique.h"
 #include "kernel/gpu/kernel_constants.h"
 
 namespace mindspore {
@@ -144,8 +143,8 @@ class LstmGpuKernel : public GpuKernel {
     int x_dims[3]{batch_size_, input_size_, 1};
     int y_dims[3]{batch_size_, hidden_size_ * (bidirectional_ ? 2 : 1), 1};
 
-    x_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
-    y_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
+    x_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
+    y_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
 
     for (size_t i = 0; i < IntToSize(seq_len_); ++i) {
       CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&x_desc_[i]), "create x_desc failed");
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h
index a60ab78f7d..b12fa3bea5 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h
@@ -23,7 +23,6 @@
 #include "kernel/gpu/gpu_kernel.h"
 #include "kernel/gpu/gpu_kernel_factory.h"
 #include "kernel/gpu/kernel_constants.h"
-#include "dataset/util/make_unique.h"
 
 namespace mindspore {
 namespace kernel {
@@ -212,9 +211,9 @@ class LstmGradDataGpuKernel : public GpuKernel {
     int x_dims[3]{batch_size_, input_size_, 1};
     int y_dims[3]{batch_size_, hidden_size_ * (bidirectional_ ? 2 : 1), 1};
 
-    dx_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
-    y_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
-    dy_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
+    dx_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
+    y_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
+    dy_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
 
     for (size_t i = 0; i < IntToSize(seq_len_); ++i) {
       CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&dx_desc_[i]), "create x_desc failed");
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h
index b28736cc96..e081b9d070 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h
@@ -22,7 +22,6 @@
 #include <memory>
 #include "kernel/gpu/gpu_kernel.h"
 #include "kernel/gpu/gpu_kernel_factory.h"
-#include "dataset/util/make_unique.h"
 #include "kernel/gpu/kernel_constants.h"
 namespace mindspore {
 namespace kernel {
@@ -169,8 +168,8 @@ class LstmGradWeightGpuKernel : public GpuKernel {
     int x_dims[3]{batch_size_, input_size_, 1};
     int y_dims[3]{batch_size_, hidden_size_ * (bidirectional_ ? 2 : 1), 1};
 
-    x_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
-    y_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
+    x_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
+    y_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
 
     for (size_t i = 0; i < IntToSize(seq_len_); ++i) {
       CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&x_desc_[i]), "create x_desc failed");
diff --git a/tests/ut/cpp/dataset/celeba_op_test.cc b/tests/ut/cpp/dataset/celeba_op_test.cc
index 69314771a3..35be4d7378 100644
--- a/tests/ut/cpp/dataset/celeba_op_test.cc
+++ b/tests/ut/cpp/dataset/celeba_op_test.cc
@@ -116,7 +116,7 @@ TEST_F(MindDataTestCelebaDataset, TestCelebaRepeat) {
 
 TEST_F(MindDataTestCelebaDataset, TestSubsetRandomSamplerCeleba) {
   std::vector<int64_t> indices({1});
-  std::unique_ptr<Sampler> sampler = mindspore::make_unique<SubsetRandomSampler>(indices);
+  std::unique_ptr<Sampler> sampler = std::make_unique<SubsetRandomSampler>(indices);
   uint32_t expect_labels[1][40] = {{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}};
   std::string dir = datasets_root_path_ + "/testCelebAData/";
   uint32_t count = 0;
diff --git a/tests/ut/cpp/dataset/cifar_op_test.cc b/tests/ut/cpp/dataset/cifar_op_test.cc
index 0cd1db65b5..dcbea83df4 100644
--- a/tests/ut/cpp/dataset/cifar_op_test.cc
+++ b/tests/ut/cpp/dataset/cifar_op_test.cc
@@ -92,7 +92,7 @@ TEST_F(MindDataTestCifarOp, TestSequentialSamplerCifar10) {
 TEST_F(MindDataTestCifarOp, TestRandomSamplerCifar10) {
   uint32_t original_seed = GlobalContext::config_manager()->seed();
   GlobalContext::config_manager()->set_seed(0);
-  std::unique_ptr<Sampler> sampler = mindspore::make_unique<RandomSampler>(true, 12);
+  std::unique_ptr<Sampler> sampler = std::make_unique<RandomSampler>(true, 12);
   std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
   auto tree = Build({Cifarop(16, 2, 32, folder_path, std::move(sampler), 100)});
   tree->Prepare();
diff --git a/tests/ut/cpp/dataset/image_folder_op_test.cc b/tests/ut/cpp/dataset/image_folder_op_test.cc
index 5b118a629a..e149e687c6 100644
--- a/tests/ut/cpp/dataset/image_folder_op_test.cc
+++ b/tests/ut/cpp/dataset/image_folder_op_test.cc
@@ -138,7 +138,7 @@ TEST_F(MindDataTestImageFolderSampler, TestRandomImageFolder) {
 TEST_F(MindDataTestImageFolderSampler, TestRandomSamplerImageFolder) {
   int32_t original_seed = GlobalContext::config_manager()->seed();
   GlobalContext::config_manager()->set_seed(0);
-  std::unique_ptr<Sampler> sampler = mindspore::make_unique<RandomSampler>(true, 12);
+  std::unique_ptr<Sampler> sampler = std::make_unique<RandomSampler>(true, 12);
   int32_t res[] = {2, 2, 2, 3, 2, 3, 2, 3, 1, 2, 2, 1};  // ground truth label
   std::string folder_path = datasets_root_path_ + "/testPK/data";
   auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
@@ -200,7 +200,7 @@ TEST_F(MindDataTestImageFolderSampler, TestSequentialImageFolderWithRepeatBatch)
 TEST_F(MindDataTestImageFolderSampler, TestSubsetRandomSamplerImageFolder) {
   // id range 0 - 10 is label 0, and id range 11 - 21 is label 1
   std::vector<int64_t> indices({0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11});
-  std::unique_ptr<Sampler> sampler = mindspore::make_unique<SubsetRandomSampler>(indices);
+  std::unique_ptr<Sampler> sampler = std::make_unique<SubsetRandomSampler>(indices);
   std::string folder_path = datasets_root_path_ + "/testPK/data";
   // Expect 6 samples for label 0 and 1
   int res[2] = {6, 6};
@@ -238,7 +238,7 @@ TEST_F(MindDataTestImageFolderSampler, TestWeightedRandomSamplerImageFolder) {
 
   // create sampler with replacement = replacement
   std::unique_ptr<Sampler> sampler =
-    mindspore::make_unique<WeightedRandomSampler>(weights, num_samples, true, samples_per_buffer);
+    std::make_unique<WeightedRandomSampler>(weights, num_samples, true, samples_per_buffer);
 
   std::string folder_path = datasets_root_path_ + "/testPK/data";
   auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
@@ -295,7 +295,7 @@ TEST_F(MindDataTestImageFolderSampler, TestImageFolderClassIndex) {
 }
 
 TEST_F(MindDataTestImageFolderSampler, TestDistributedSampler) {
-  std::unique_ptr<Sampler> sampler = mindspore::make_unique<DistributedSampler>(11, 10, false);
+  std::unique_ptr<Sampler> sampler = std::make_unique<DistributedSampler>(11, 10, false);
   std::string folder_path = datasets_root_path_ + "/testPK/data";
   auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler)), Repeat(4)});
   tree->Prepare();
@@ -322,7 +322,7 @@ TEST_F(MindDataTestImageFolderSampler, TestDistributedSampler) {
 }
 
 TEST_F(MindDataTestImageFolderSampler, TestPKSamplerImageFolder) {
-  std::unique_ptr<Sampler> sampler = mindspore::make_unique<PKSampler>(3, false, 4);
+  std::unique_ptr<Sampler> sampler = std::make_unique<PKSampler>(3, false, 4);
   int32_t res[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3};  // ground truth label
   std::string folder_path = datasets_root_path_ + "/testPK/data";
   auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
@@ -431,7 +431,7 @@ TEST_F(MindDataTestImageFolderSampler, TestImageFolderDatasetSize) {
 }
 
 TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding1) {
-  std::unique_ptr<Sampler> sampler = mindspore::make_unique<DistributedSampler>(4, 0, false);
+  std::unique_ptr<Sampler> sampler = std::make_unique<DistributedSampler>(4, 0, false);
   std::string folder_path = datasets_root_path_ + "/testPK/data";
   // numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode
   auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler), {}, 5)});
@@ -460,7 +460,7 @@ TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding1) {
 }
 
 TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding2) {
-  std::unique_ptr<Sampler> sampler = mindspore::make_unique<DistributedSampler>(4, 3, false);
+  std::unique_ptr<Sampler> sampler = std::make_unique<DistributedSampler>(4, 3, false);
   std::string folder_path = datasets_root_path_ + "/testPK/data";
   // numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode
   auto tree = Build({ImageFolder(16, 16, 32, folder_path, false, std::move(sampler), {}, 12)});
diff --git a/tests/ut/cpp/dataset/manifest_op_test.cc b/tests/ut/cpp/dataset/manifest_op_test.cc
index 9e36f8c747..f662f98fc8 100644
--- a/tests/ut/cpp/dataset/manifest_op_test.cc
+++ b/tests/ut/cpp/dataset/manifest_op_test.cc
@@ -86,7 +86,7 @@ TEST_F(MindDataTestManifest, TestSequentialManifestWithRepeat) {
 
 TEST_F(MindDataTestManifest, TestSubsetRandomSamplerManifest) {
   std::vector<int64_t> indices({1});
-  std::unique_ptr<Sampler> sampler = mindspore::make_unique<SubsetRandomSampler>(indices);
+  std::unique_ptr<Sampler> sampler = std::make_unique<SubsetRandomSampler>(indices);
   std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
   // Expect 6 samples for label 0 and 1
   auto tree = Build({Manifest(16, 2, 32, file, "train", std::move(sampler))});
diff --git a/tests/ut/cpp/dataset/project_op_test.cc b/tests/ut/cpp/dataset/project_op_test.cc
index 1df2ce05bb..484396321c 100644
--- a/tests/ut/cpp/dataset/project_op_test.cc
+++ b/tests/ut/cpp/dataset/project_op_test.cc
@@ -45,7 +45,7 @@ TEST_F(MindDataTestProjectOp, TestProjectProject) {
     .SetRowsPerBuffer(16)
     .SetWorkerConnectorSize(16)
     .SetNumWorkers(16);
-  std::unique_ptr<DataSchema> schema = mindspore::make_unique<DataSchema>();
+  std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
   schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
   builder.SetDataSchema(std::move(schema));
   Status rc = builder.Build(&my_tfreader_op);  ASSERT_TRUE(rc.IsOk());
diff --git a/tests/ut/cpp/dataset/stand_alone_samplers_test.cc b/tests/ut/cpp/dataset/stand_alone_samplers_test.cc
index c686a9486b..48cc811615 100644
--- a/tests/ut/cpp/dataset/stand_alone_samplers_test.cc
+++ b/tests/ut/cpp/dataset/stand_alone_samplers_test.cc
@@ -74,7 +74,7 @@ TEST_F(MindDataTestStandAloneSampler, TestDistributedSampler) {
   std::unique_ptr<DataBuffer> db;
   std::shared_ptr<Tensor> tensor;
   for (int i = 0; i < 6; i++) {
-    std::unique_ptr<Sampler> sampler = mindspore::make_unique<DistributedSampler>(3, i % 3, (i < 3 ? false : true));
+    std::unique_ptr<Sampler> sampler = std::make_unique<DistributedSampler>(3, i % 3, (i < 3 ? false : true));
     sampler->Init(&mock);
     sampler->GetNextBuffer(&db);
     db->GetTensor(&tensor, 0, 0);
diff --git a/tests/ut/cpp/dataset/tfReader_op_test.cc b/tests/ut/cpp/dataset/tfReader_op_test.cc
index c70d5fb6ee..5fb1f4e909 100644
--- a/tests/ut/cpp/dataset/tfReader_op_test.cc
+++ b/tests/ut/cpp/dataset/tfReader_op_test.cc
@@ -48,7 +48,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderBasic1) {
   builder.SetDatasetFilesList({dataset_path})
       .SetRowsPerBuffer(16)
       .SetNumWorkers(16);
-  std::unique_ptr<DataSchema> schema = mindspore::make_unique<DataSchema>();
+  std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
   schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
   builder.SetDataSchema(std::move(schema));
   Status rc = builder.Build(&my_tfreader_op);
@@ -102,7 +102,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeRowsPerBuffer) {
   builder.SetDatasetFilesList({dataset_path})
       .SetRowsPerBuffer(500)
       .SetNumWorkers(16);
-   std::unique_ptr<DataSchema> schema = mindspore::make_unique<DataSchema>();
+   std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
   schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
   builder.SetDataSchema(std::move(schema));
   Status rc = builder.Build(&my_tfreader_op);
@@ -156,7 +156,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderSmallRowsPerBuffer) {
   builder.SetDatasetFilesList({dataset_path})
       .SetRowsPerBuffer(1)
       .SetNumWorkers(16);
-   std::unique_ptr<DataSchema> schema = mindspore::make_unique<DataSchema>();
+   std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
   schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
   builder.SetDataSchema(std::move(schema));
   Status rc = builder.Build(&my_tfreader_op);
@@ -211,7 +211,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeQueueSize) {
       .SetWorkerConnectorSize(1)
       .SetRowsPerBuffer(16)
       .SetNumWorkers(16);
-   std::unique_ptr<DataSchema> schema = mindspore::make_unique<DataSchema>();
+   std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
   schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
   builder.SetDataSchema(std::move(schema));
   Status rc = builder.Build(&my_tfreader_op);
@@ -265,7 +265,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderOneThread) {
   builder.SetDatasetFilesList({dataset_path})
       .SetRowsPerBuffer(16)
       .SetNumWorkers(1);
-   std::unique_ptr<DataSchema> schema = mindspore::make_unique<DataSchema>();
+   std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
   schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
   builder.SetDataSchema(std::move(schema));
   Status rc = builder.Build(&my_tfreader_op);
@@ -321,7 +321,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderRepeat) {
     .SetRowsPerBuffer(16)
     .SetWorkerConnectorSize(16)
     .SetNumWorkers(16);
-  std::unique_ptr<DataSchema> schema = mindspore::make_unique<DataSchema>();
+  std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
   schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
   builder.SetDataSchema(std::move(schema));
   Status rc= builder.Build(&my_tfreader_op);
@@ -379,7 +379,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderSchemaConstructor) {
   std::string dataset_path;
   dataset_path = datasets_root_path_ + "/testTFTestAllTypes";
 
-  std::unique_ptr<DataSchema> data_schema = mindspore::make_unique<DataSchema>();
+  std::unique_ptr<DataSchema> data_schema = std::make_unique<DataSchema>();
   std::vector<std::string> columns_to_load;
   columns_to_load.push_back("col_sint32");
   columns_to_load.push_back("col_binary");
@@ -445,7 +445,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Row) {
   std::shared_ptr<TFReaderOp> my_tfreader_op;
   TFReaderOp::Builder builder;
   builder.SetDatasetFilesList({dataset_path + "/test.data"}).SetRowsPerBuffer(5).SetNumWorkers(16);
-  std::unique_ptr<DataSchema> schema = mindspore::make_unique<DataSchema>();
+  std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
   schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {});
   builder.SetDataSchema(std::move(schema));
 
@@ -503,7 +503,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Buffer) {
   std::shared_ptr<TFReaderOp> my_tfreader_op;
   TFReaderOp::Builder builder;
   builder.SetDatasetFilesList({dataset_path + "/test.data"}).SetRowsPerBuffer(5).SetNumWorkers(16);
-  std::unique_ptr<DataSchema> schema = mindspore::make_unique<DataSchema>();
+  std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
   schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema5Rows.json", {});
   builder.SetDataSchema(std::move(schema));
 
@@ -561,7 +561,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderTake7Rows) {
   std::shared_ptr<TFReaderOp> my_tfreader_op;
   TFReaderOp::Builder builder;
   builder.SetDatasetFilesList({dataset_path + "/test.data"}).SetRowsPerBuffer(5).SetNumWorkers(16);
-  std::unique_ptr<DataSchema> schema = mindspore::make_unique<DataSchema>();
+  std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
   schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema7Rows.json", {});
   builder.SetDataSchema(std::move(schema));
 

From d3757240551c610545b63efbc1f9c63ecafddd9e Mon Sep 17 00:00:00 2001
From: mxm <83028974@qq.com>
Date: Tue, 31 Mar 2020 12:08:02 +0800
Subject: [PATCH 39/58] fixed: PrimitiveToInferImplMap map is global, and key
 of the map PrimitivePtr also a global variable. If key is initialized later
 than the map initialized during compilation, will cause the primitive map
 initialize failed. Variable initialization order is not guaranteed during
 compilation.

---
 .../ccsrc/pipeline/static_analysis/prim.cc    | 248 +++++++++---------
 .../static_analysis/static_analysis.cc        |  49 ++--
 2 files changed, 148 insertions(+), 149 deletions(-)

diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.cc b/mindspore/ccsrc/pipeline/static_analysis/prim.cc
index d71e098009..403bbdf433 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/prim.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/prim.cc
@@ -42,92 +42,95 @@
 
 namespace mindspore {
 namespace abstract {
-PrimitiveEvalImplMap PrimitiveToInferImplMap = {
-  // Statements
-  {prim::kPrimReturn, {InferImplReturn, true}},
-  {prim::kPrimTypeOf, {InferImplTypeof, false}},
-  {prim::kPrimHasType, {InferImplHasType, false}},
-  {prim::kPrimDot, {InferImplDot, true}},
-  {prim::kPrimSwitch, {InferImplSwitch, true}},
-  {prim::kPrimIs_, {InferImplIs_, true}},
-  {prim::kPrimIsNot, {InferImplIsNot, true}},
-  // Maths
-  {prim::kPrimMaximumGrad, {InferImplMinOrMaxGrad, true}},
-  {prim::kPrimMinimumGrad, {InferImplMinOrMaxGrad, true}},
-  // Array
-  {prim::kPrimScalarToArray, {InferImplScalarToArray, true}},
-  {prim::kPrimArrayToScalar, {InferImplArrayToScalar, true}},
-  {prim::kPrimBroadcastShape, {InferImplBroadCastShape, true}},
-  {prim::kPrimShape, {InferImplShape, true}},
-  {prim::kPrimPack, {InferImplPack, true}},
-  // Structure
-  {prim::kPrimMakeTuple, {InferImplMakeTuple, true}},
-  {prim::kPrimMakeList, {InferImplMakeList, true}},
-  {prim::kPrimMakeDict, {InferImplMakeDict, true}},
-  {prim::kPrimMakeSlice, {InferImplMakeSlice, true}},
-  {prim::kPrimMakeKeywordArg, {InferImplMakeKwarg, true}},
-  {prim::kPrimExtractKeywordArg, {InferImplExtractKwarg, true}},
-  {prim::kPrimMakeRecord, {InferImplMakeRecord, false}},
-  {prim::kPrimTupleGetItem, {InferImplTupleGetItem, true}},
-  {prim::kPrimListGetItem, {InferImplListGetItem, true}},
-  {prim::kPrimTupleSetItem, {InferImplTupleSetItem, true}},
-  {prim::kPrimListSetItem, {InferImplListSetItem, true}},
-  {prim::kPrimDictGetItem, {InferImplDictGetItem, true}},
-  {prim::kPrimDictSetItem, {InferImplDictSetItem, true}},
-  {prim::kPrimListAppend, {InferImplListAppend, true}},
-  {prim::kPrimTupleLen, {InferImplTupleLen, true}},
-  {prim::kPrimListLen, {InferImplListLen, true}},
-  {prim::kPrimArrayLen, {InferImplArrayLen, true}},
-  {prim::kPrimListMap, {InferImplListMap, false}},
-  {prim::kPrimListReduce, {InferImplListReduce, false}},
-  {prim::kPrimTupleReversed, {InferImplTupleReversed, false}},
-  {prim::kPrimReducedShape, {InferImplReduceShape, false}},
-  {prim::kPrimTupleDiv, {InferImplTupleDiv, false}},
-  {prim::kPrimTupleToArray, {InferImplTuple2Array, false}},
-  {prim::kPrimShapeMul, {InferImplShapeMul, false}},
-  {prim::kPrimTupleEqual, {InferImplTupleEqual, false}},
-  {prim::kPrimListEqual, {InferImplListEqual, false}},
-  {prim::kPrimMakeRange, {InferImplMakeRange, false}},
-  {prim::kPrimStopGradient, {InferImplStopGradient, false}},
-  {prim::kPrimStringEqual, {InferImplStringEqual, false}},
-  {prim::kPrimDictLen, {InferImplDictLen, false}},
-  // NN
-  {prim::kPrimPooling, {InferImplPooling, true}},
-  {prim::kPrimPoolingGrad, {InferImplPoolingGrad, true}},
-  {prim::kPrimFusedBatchNorm, {InferImplFusedBatchNorm, true}},
-  {prim::kPrimFusedBatchNormGrad, {InferImplFusedBatchNormGrad, true}},
-  {prim::kPrimReluGrad, {InferImplReluGrad, true}},
-  {prim::kPrimConv2DBackpropInput, {InferImplConv2DBackpropInput, true}},
-  {prim::kPrimConv2DBackpropFilter, {InferImplConv2DBackpropFilter, true}},
-  {prim::kPrimBiasAddGrad, {InferImplBiasAddGrad, true}},
-  {prim::kPrimRelu, {InferImplRelu, true}},
-  {prim::kPrimZerosLikeTensor, {InferImplZerosLikeTensor, true}},
-  {prim::kPrimFakeBprop, {InferImplFakeBprop, false}},
-  {prim::kPrimLayerNorm, {InferImplLayerNorm, true}},
-  {prim::kPrimLayerNormGrad, {InferImplLayerNormGrad, true}},
-  {prim::kPrimDropoutGenMask, {InferImplDropoutGenMask, true}},
-  // Others
-  {prim::kPrimIdentity, {InferImplIdentity, true}},
-  // Set impl to null as it will use PartialEvaluator;
-  {prim::kPrimPartial, {nullptr, true}},
-  {prim::kPrimJ, {InferImplJ, false}},
-  {prim::kPrimEnvGetItem, {InferImplEnvGetItem, true}},
-  {prim::kPrimEnvSetItem, {InferImplEnvSetItem, true}},
-  {prim::kPrimEnvAdd, {InferImplEnvAdd, true}},
-  {prim::kPrimMakeRefKey, {InferImplMakeRefKey, true}},
-  {prim::kPrimMakeRef, {InferImplMakeRef, true}},
-  {prim::kPrimGetRefKey, {InferImplGetRefKey, true}},
-  {prim::kPrimGetRefValue, {InferImplGetRefValue, true}},
-  {prim::kPrimGetRefOrigin, {InferImplGetRefOrigin, true}},
-  {prim::kPrimStateSetItem, {InferImplStateSetItem, true}},
-  {prim::kPrimDepend, {InferImplDepend, true}},
-  {prim::kPrimBroadcastGradientArgs, {InferImplBroadcastGradientArgs, false}},
-  {prim::kPrimControlDepend, {InferImplControlDepend, true}},
-  // Debug
-  {prim::kPrimScalarSummary, {InferImplScalarSummary, true}},
-  {prim::kPrimImageSummary, {InferImplTensorSummary, true}},
-  {prim::kPrimTensorSummary, {InferImplTensorSummary, true}},
-};
+PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() {
+  static PrimitiveEvalImplMap prim_eval_implement_map = {
+    // Statements
+    {prim::kPrimReturn, {InferImplReturn, true}},
+    {prim::kPrimTypeOf, {InferImplTypeof, false}},
+    {prim::kPrimHasType, {InferImplHasType, false}},
+    {prim::kPrimDot, {InferImplDot, true}},
+    {prim::kPrimSwitch, {InferImplSwitch, true}},
+    {prim::kPrimIs_, {InferImplIs_, true}},
+    {prim::kPrimIsNot, {InferImplIsNot, true}},
+    // Maths
+    {prim::kPrimMaximumGrad, {InferImplMinOrMaxGrad, true}},
+    {prim::kPrimMinimumGrad, {InferImplMinOrMaxGrad, true}},
+    // Array
+    {prim::kPrimScalarToArray, {InferImplScalarToArray, true}},
+    {prim::kPrimArrayToScalar, {InferImplArrayToScalar, true}},
+    {prim::kPrimBroadcastShape, {InferImplBroadCastShape, true}},
+    {prim::kPrimShape, {InferImplShape, true}},
+    {prim::kPrimPack, {InferImplPack, true}},
+    // Structure
+    {prim::kPrimMakeTuple, {InferImplMakeTuple, true}},
+    {prim::kPrimMakeList, {InferImplMakeList, true}},
+    {prim::kPrimMakeDict, {InferImplMakeDict, true}},
+    {prim::kPrimMakeSlice, {InferImplMakeSlice, true}},
+    {prim::kPrimMakeKeywordArg, {InferImplMakeKwarg, true}},
+    {prim::kPrimExtractKeywordArg, {InferImplExtractKwarg, true}},
+    {prim::kPrimMakeRecord, {InferImplMakeRecord, false}},
+    {prim::kPrimTupleGetItem, {InferImplTupleGetItem, true}},
+    {prim::kPrimListGetItem, {InferImplListGetItem, true}},
+    {prim::kPrimTupleSetItem, {InferImplTupleSetItem, true}},
+    {prim::kPrimListSetItem, {InferImplListSetItem, true}},
+    {prim::kPrimDictGetItem, {InferImplDictGetItem, true}},
+    {prim::kPrimDictSetItem, {InferImplDictSetItem, true}},
+    {prim::kPrimListAppend, {InferImplListAppend, true}},
+    {prim::kPrimTupleLen, {InferImplTupleLen, true}},
+    {prim::kPrimListLen, {InferImplListLen, true}},
+    {prim::kPrimArrayLen, {InferImplArrayLen, true}},
+    {prim::kPrimListMap, {InferImplListMap, false}},
+    {prim::kPrimListReduce, {InferImplListReduce, false}},
+    {prim::kPrimTupleReversed, {InferImplTupleReversed, false}},
+    {prim::kPrimReducedShape, {InferImplReduceShape, false}},
+    {prim::kPrimTupleDiv, {InferImplTupleDiv, false}},
+    {prim::kPrimTupleToArray, {InferImplTuple2Array, false}},
+    {prim::kPrimShapeMul, {InferImplShapeMul, false}},
+    {prim::kPrimTupleEqual, {InferImplTupleEqual, false}},
+    {prim::kPrimListEqual, {InferImplListEqual, false}},
+    {prim::kPrimMakeRange, {InferImplMakeRange, false}},
+    {prim::kPrimStopGradient, {InferImplStopGradient, false}},
+    {prim::kPrimStringEqual, {InferImplStringEqual, false}},
+    {prim::kPrimDictLen, {InferImplDictLen, false}},
+    // NN
+    {prim::kPrimPooling, {InferImplPooling, true}},
+    {prim::kPrimPoolingGrad, {InferImplPoolingGrad, true}},
+    {prim::kPrimFusedBatchNorm, {InferImplFusedBatchNorm, true}},
+    {prim::kPrimFusedBatchNormGrad, {InferImplFusedBatchNormGrad, true}},
+    {prim::kPrimReluGrad, {InferImplReluGrad, true}},
+    {prim::kPrimConv2DBackpropInput, {InferImplConv2DBackpropInput, true}},
+    {prim::kPrimConv2DBackpropFilter, {InferImplConv2DBackpropFilter, true}},
+    {prim::kPrimBiasAddGrad, {InferImplBiasAddGrad, true}},
+    {prim::kPrimRelu, {InferImplRelu, true}},
+    {prim::kPrimZerosLikeTensor, {InferImplZerosLikeTensor, true}},
+    {prim::kPrimFakeBprop, {InferImplFakeBprop, false}},
+    {prim::kPrimLayerNorm, {InferImplLayerNorm, true}},
+    {prim::kPrimLayerNormGrad, {InferImplLayerNormGrad, true}},
+    {prim::kPrimDropoutGenMask, {InferImplDropoutGenMask, true}},
+    // Others
+    {prim::kPrimIdentity, {InferImplIdentity, true}},
+    // Set impl to null as it will use PartialEvaluator;
+    {prim::kPrimPartial, {nullptr, true}},
+    {prim::kPrimJ, {InferImplJ, false}},
+    {prim::kPrimEnvGetItem, {InferImplEnvGetItem, true}},
+    {prim::kPrimEnvSetItem, {InferImplEnvSetItem, true}},
+    {prim::kPrimEnvAdd, {InferImplEnvAdd, true}},
+    {prim::kPrimMakeRefKey, {InferImplMakeRefKey, true}},
+    {prim::kPrimMakeRef, {InferImplMakeRef, true}},
+    {prim::kPrimGetRefKey, {InferImplGetRefKey, true}},
+    {prim::kPrimGetRefValue, {InferImplGetRefValue, true}},
+    {prim::kPrimGetRefOrigin, {InferImplGetRefOrigin, true}},
+    {prim::kPrimStateSetItem, {InferImplStateSetItem, true}},
+    {prim::kPrimDepend, {InferImplDepend, true}},
+    {prim::kPrimBroadcastGradientArgs, {InferImplBroadcastGradientArgs, false}},
+    {prim::kPrimControlDepend, {InferImplControlDepend, true}},
+    // Debug
+    {prim::kPrimScalarSummary, {InferImplScalarSummary, true}},
+    {prim::kPrimImageSummary, {InferImplTensorSummary, true}},
+    {prim::kPrimTensorSummary, {InferImplTensorSummary, true}},
+  };
+  return prim_eval_implement_map;
+}
 
 using mindspore::parse::PyObjectWrapper;
 
@@ -961,10 +964,7 @@ class PartialEvaluator : public Evaluator {
     new_nodes_inputs[1] = NewValueNode(new_signature_value);
     FuncGraphPtr func_graph = cnode->func_graph();
 
-    ScopePtr scope = kDefaultScope;
-    if (out_conf != nullptr) {
-      scope = out_conf->node()->scope();
-    }
+    ScopePtr scope = out_conf->node()->scope();
     ScopeGuard scope_guard(scope);
 
     CNodePtr new_cnode = func_graph->NewCNode(new_nodes_inputs);
@@ -981,39 +981,41 @@ struct PrimitiveImplInferValue {
 };
 
 using PrimitiveToImplMap = std::unordered_map<PrimitivePtr, PrimitiveImplInferValue, PrimitiveHasher, PrimitiveEqual>;
-
-PrimitiveToImplMap UniformPrimitiveToImplMapValue = {
-  {prim::kPrimScalarAdd, {prim::ScalarAdd, true, nullptr, true}},
-  {prim::kPrimScalarSub, {prim::ScalarSub, true, nullptr, true}},
-  {prim::kPrimScalarMul, {prim::ScalarMul, true, nullptr, true}},
-  {prim::kPrimScalarDiv, {prim::ScalarDiv, true, nullptr, true}},
-  {prim::kPrimScalarMod, {prim::ScalarMod, true, nullptr, true}},
-  {prim::kPrimScalarUadd, {prim::ScalarUAdd, true, nullptr, true}},
-  {prim::kPrimScalarUsub, {prim::ScalarUSub, true, nullptr, true}},
-  {prim::kPrimScalarLog, {prim::ScalarLog, true, nullptr, true}},
-  {prim::kPrimScalarEq, {prim::ScalarEq, true, std::make_shared<Bool>(), true}},
-  {prim::kPrimScalarLt, {prim::ScalarLt, true, std::make_shared<Bool>(), true}},
-  {prim::kPrimScalarGt, {prim::ScalarGt, true, std::make_shared<Bool>(), true}},
-  {prim::kPrimScalarNe, {prim::ScalarNe, true, std::make_shared<Bool>(), true}},
-  {prim::kPrimScalarLe, {prim::ScalarLe, true, std::make_shared<Bool>(), true}},
-  {prim::kPrimScalarGe, {prim::ScalarGe, true, std::make_shared<Bool>(), true}},
-  {prim::kPrimBoolNot, {prim::BoolNot, true, std::make_shared<Bool>(), true}},
-  {prim::kPrimBoolAnd, {prim::BoolAnd, true, std::make_shared<Bool>(), true}},
-  {prim::kPrimBoolEq, {prim::BoolEq, true, std::make_shared<Bool>(), true}},
-  {prim::kPrimBoolOr, {prim::BoolOr, true, std::make_shared<Bool>(), true}},
-};
+PrimitiveToImplMap &GetUniformPrimitiveToImplMap() {
+  static PrimitiveToImplMap uniform_prim_implement_map = {
+    {prim::kPrimScalarAdd, {prim::ScalarAdd, true, nullptr, true}},
+    {prim::kPrimScalarSub, {prim::ScalarSub, true, nullptr, true}},
+    {prim::kPrimScalarMul, {prim::ScalarMul, true, nullptr, true}},
+    {prim::kPrimScalarDiv, {prim::ScalarDiv, true, nullptr, true}},
+    {prim::kPrimScalarMod, {prim::ScalarMod, true, nullptr, true}},
+    {prim::kPrimScalarUadd, {prim::ScalarUAdd, true, nullptr, true}},
+    {prim::kPrimScalarUsub, {prim::ScalarUSub, true, nullptr, true}},
+    {prim::kPrimScalarLog, {prim::ScalarLog, true, nullptr, true}},
+    {prim::kPrimScalarEq, {prim::ScalarEq, true, std::make_shared<Bool>(), true}},
+    {prim::kPrimScalarLt, {prim::ScalarLt, true, std::make_shared<Bool>(), true}},
+    {prim::kPrimScalarGt, {prim::ScalarGt, true, std::make_shared<Bool>(), true}},
+    {prim::kPrimScalarNe, {prim::ScalarNe, true, std::make_shared<Bool>(), true}},
+    {prim::kPrimScalarLe, {prim::ScalarLe, true, std::make_shared<Bool>(), true}},
+    {prim::kPrimScalarGe, {prim::ScalarGe, true, std::make_shared<Bool>(), true}},
+    {prim::kPrimBoolNot, {prim::BoolNot, true, std::make_shared<Bool>(), true}},
+    {prim::kPrimBoolAnd, {prim::BoolAnd, true, std::make_shared<Bool>(), true}},
+    {prim::kPrimBoolEq, {prim::BoolEq, true, std::make_shared<Bool>(), true}},
+    {prim::kPrimBoolOr, {prim::BoolOr, true, std::make_shared<Bool>(), true}},
+  };
+  return uniform_prim_implement_map;
+}
 
 PrimEvaluatorMap PrimEvaluatorConstructors = PrimEvaluatorMap();
 std::mutex PrimEvaluatorConstructorMutex;
 
-void InitPrimEvaluatorConstructors(const PrimitiveEvalImplMap &prim_eval_impl_map) {
+void InitPrimEvaluatorConstructors() {
   PrimEvaluatorMap &constructor = PrimEvaluatorConstructors;
 
-  for (const auto &iter : prim_eval_impl_map) {
+  for (const auto &iter : GetPrimitiveToEvalImplMap()) {
     constructor[iter.first] = InitStandardPrimEvaluator(iter.first, iter.second.impl_);
   }
 
-  for (const auto &iter : UniformPrimitiveToImplMapValue) {
+  for (const auto &iter : GetUniformPrimitiveToImplMap()) {
     constructor[iter.first] =
       InitUniformPrimEvaluator(iter.first, iter.second.impl_, iter.second.eval_value_, iter.second.specify_out_type_);
   }
@@ -1028,20 +1030,20 @@ void InitPrimEvaluatorConstructors(const PrimitiveEvalImplMap &prim_eval_impl_ma
 
 void ClearPrimEvaluatorMap() {
   PrimEvaluatorConstructors.clear();
-  PrimitiveToInferImplMap.clear();
-  UniformPrimitiveToImplMapValue.clear();
+  GetPrimitiveToEvalImplMap().clear();
+  GetUniformPrimitiveToImplMap().clear();
 }
 
 bool IsInWhiteList(const PrimitivePtr primitive) {
   MS_EXCEPTION_IF_NULL(primitive);
 
-  auto iter = PrimitiveToInferImplMap.find(primitive);
-  if (iter != PrimitiveToInferImplMap.end()) {
+  auto iter = GetPrimitiveToEvalImplMap().find(primitive);
+  if (iter != GetPrimitiveToEvalImplMap().end()) {
     return iter->second.in_white_list_;
   }
 
-  auto uni_iter = UniformPrimitiveToImplMapValue.find(primitive);
-  if (uni_iter != UniformPrimitiveToImplMapValue.end()) {
+  auto uni_iter = GetUniformPrimitiveToImplMap().find(primitive);
+  if (uni_iter != GetUniformPrimitiveToImplMap().end()) {
     return uni_iter->second.in_white_list_;
   }
 
@@ -1050,8 +1052,8 @@ bool IsInWhiteList(const PrimitivePtr primitive) {
 
 StandardPrimitiveEvalImpl GetPrimitiveInferImpl(const PrimitivePtr &primitive) {
   MS_EXCEPTION_IF_NULL(primitive);
-  auto iter = PrimitiveToInferImplMap.find(primitive);
-  if (iter == PrimitiveToInferImplMap.end()) {
+  auto iter = GetPrimitiveToEvalImplMap().find(primitive);
+  if (iter == GetPrimitiveToEvalImplMap().end()) {
     return nullptr;
   }
   return iter->second.impl_;
@@ -1064,7 +1066,7 @@ PrimEvaluatorMap &GetPrimEvaluatorConstructors() {
   }
   std::lock_guard<std::mutex> initLock(PrimEvaluatorConstructorMutex);
   if (constructor.empty()) {
-    InitPrimEvaluatorConstructors(PrimitiveToInferImplMap);
+    InitPrimEvaluatorConstructors();
   }
 
   return constructor;
diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
index 4ab7b9d20b..6230df44a5 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
@@ -296,38 +296,35 @@ EvaluatorPtr GetPrimEvaluator(const PrimitivePtr &prim, const AnalysisEnginePtr
   if (prim->HasPyEvaluator()) {
     auto prim_py = dyn_cast<PrimitivePy>(prim);
     if (prim_py != nullptr) {
-      evaluator = std::make_shared<PythonPrimEvaluator>(prim_py);
-    } else {
-      MS_LOG(EXCEPTION) << "The primitive with python evaluator should be a python primitive.";
+      return std::make_shared<PythonPrimEvaluator>(prim_py);
+    }
+    MS_LOG(EXCEPTION) << "The primitive with python evaluator should be a python primitive.";
+  }
+
+  if (prim->isa<PrimitivePy>() || prim->HasAttr()) {
+    if (engine == nullptr) {
+      (void)GetPrimEvaluatorConstructors();
     }
-  } else if (prim->isa<PrimitivePy>() || prim->HasAttr()) {
     // If a primitive may have attr, try to create a new evaluator.
     StandardPrimitiveEvalImpl eval_impl = GetPrimitiveInferImpl(prim);
     if (eval_impl != nullptr) {
-      std::shared_ptr<StandardPrimEvaluator> standard_evaluator =
-        std::make_shared<StandardPrimEvaluator>(prim, eval_impl);
-      evaluator = standard_evaluator;
+      return std::make_shared<StandardPrimEvaluator>(prim, eval_impl);
     }
   }
-  if (evaluator == nullptr) {
-    if (engine == nullptr) {
-      // If engine is nullptr, get constructor from default.
-      const PrimEvaluatorMap &prim_evaluator_map = GetPrimEvaluatorConstructors();
-      auto iter = prim_evaluator_map.find(prim);
-      if (iter == prim_evaluator_map.end()) {
-        evaluator = nullptr;
-      } else {
-        evaluator = iter->second;
-      }
-    } else {
-      // If engine is given, get constructor from engine resource.
-      const PrimEvaluatorMap &prim_evaluator_map = engine->PrimConstructors();
-      auto iter = prim_evaluator_map.find(prim);
-      if (iter == prim_evaluator_map.end()) {
-        evaluator = nullptr;
-      } else {
-        evaluator = iter->second;
-      }
+
+  if (engine == nullptr) {
+    // If engine is nullptr, get constructor from default.
+    const PrimEvaluatorMap &prim_evaluator_map = GetPrimEvaluatorConstructors();
+    auto iter = prim_evaluator_map.find(prim);
+    if (iter != prim_evaluator_map.end()) {
+      evaluator = iter->second;
+    }
+  } else {
+    // If engine is given, get constructor from engine resource.
+    const PrimEvaluatorMap &prim_evaluator_map = engine->PrimConstructors();
+    auto iter = prim_evaluator_map.find(prim);
+    if (iter != prim_evaluator_map.end()) {
+      evaluator = iter->second;
     }
   }
   if (evaluator == nullptr) {

From d84cfb01080b650c16fed895f7717ba667b60ca3 Mon Sep 17 00:00:00 2001
From: kswang <wangkaisheng2@huawei.com>
Date: Wed, 8 Apr 2020 17:05:17 +0800
Subject: [PATCH 40/58] add mem manager

---
 mindspore/ccsrc/CMakeLists.txt                |   1 +
 .../device/ascend/ascend_kernel_runtime.cc    |  57 +----
 .../device/ascend/ascend_kernel_runtime.h     |   5 +-
 .../device/ascend/ascend_memory_manager.cc    |  65 +++++
 .../device/ascend/ascend_memory_manager.h     |  35 +++
 mindspore/ccsrc/device/device_address.h       |   5 +
 .../ccsrc/device/gpu/gpu_kernel_runtime.cc    | 108 ++------
 .../ccsrc/device/gpu/gpu_kernel_runtime.h     |   9 +-
 .../ccsrc/device/gpu/gpu_memory_manager.cc    |  88 +++++++
 .../ccsrc/device/gpu/gpu_memory_manager.h     |  40 +++
 mindspore/ccsrc/device/kernel_runtime.cc      | 231 +++---------------
 mindspore/ccsrc/device/kernel_runtime.h       |  38 +--
 mindspore/ccsrc/device/memory_manager.cc      | 170 +++++++++++++
 mindspore/ccsrc/device/memory_manager.h       |  71 ++++++
 .../ccsrc/session/anf_runtime_algorithm.cc    |  10 +
 .../ccsrc/session/anf_runtime_algorithm.h     |   1 +
 mindspore/ccsrc/session/gpu_session.cc        |   4 -
 tests/ut/cpp/CMakeLists.txt                   |   2 +
 18 files changed, 562 insertions(+), 378 deletions(-)
 create mode 100644 mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
 create mode 100644 mindspore/ccsrc/device/ascend/ascend_memory_manager.h
 create mode 100644 mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
 create mode 100644 mindspore/ccsrc/device/gpu/gpu_memory_manager.h
 create mode 100644 mindspore/ccsrc/device/memory_manager.cc
 create mode 100644 mindspore/ccsrc/device/memory_manager.h

diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index 9f559a51eb..1d104148c3 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -132,6 +132,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         "kernel/kash/*.cc"
         "device/kernel_info.cc"
         "device/kernel_runtime.cc"
+        "device/memory_manager.cc"
         "device/kernel_runtime_manager.cc"
         "device/convert_tensor_utils.cc"
         "pre_activate/common/*.cc"
diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
index dc7eb5449b..0c2a97a5a6 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
@@ -37,6 +37,7 @@
 #include "kernel/tbe/tbe_utils.h"
 #include "kernel/tbe/tbe_python_funcs.h"
 #include "pre_activate/mem_reuse/mem_reuse_checker.h"
+#include "device/ascend/ascend_memory_manager.h"
 
 using mindspore::device::ascend::ProfilingManager;
 using mindspore::device::ascend::ProfilingUtils;
@@ -47,8 +48,6 @@ using std::vector;
 namespace mindspore {
 namespace device {
 namespace ascend {
-static const uint64_t ASCEND_MEM_SIZE = 20;
-static const uint64_t ASCEND_MEM_SIZE_BYTE = (ASCEND_MEM_SIZE << 30);
 static const size_t PRAMATER_OUTPUT_INDEX = 0;
 
 AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); }
@@ -86,7 +85,8 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
     MS_EXCEPTION(DeviceProcessError) << "rtSetDevice, ret[" << static_cast<int>(ret) << "]";
   }
 
-  FreeDeviceMemory();
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->FreeDeviceMemory();
   (void)DestroyHccl();
   (void)ResetDevice();
   (void)ProfilingManager::GetInstance().StopProfiling();
@@ -109,11 +109,9 @@ bool AscendKernelRuntime::Init() {
   if (!ret) {
     return ret;
   }
-
-  ret = MallocDeviceMemory();
-  if (!ret) {
-    return ret;
-  }
+  mem_manager_ = std::make_shared<AscendMemoryManager>();
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->MallocDeviceMemory();
 
   ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
   if (!ret) {
@@ -239,13 +237,6 @@ DeviceAddressPtr AscendKernelRuntime::CreateDeviceAddress(void *device_ptr, size
   return std::make_shared<AscendDeviceAddress>(device_ptr, device_size, format, type_id);
 }
 
-void AscendKernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int) {
-  auto device_ptr = AscendMemoryAllocator::GetInstance().AllocTensorMem(size);
-  MS_EXCEPTION_IF_NULL(device_ptr);
-  address->ptr_ = device_ptr;
-  address->mem_dynamic_alloc_ = true;
-}
-
 bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
@@ -474,42 +465,6 @@ bool AscendKernelRuntime::DestroyHccl() {
   context_ptr->set_enable_hccl(false);
   return true;
 }
-
-bool AscendKernelRuntime::MallocDeviceMemory() {
-  device_mem_size_ = ASCEND_MEM_SIZE_BYTE;
-  static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO);
-  auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM);
-  if (ret != RT_ERROR_NONE) {
-    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]";
-  }
-  device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO));
-  ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM);
-  if (ret != RT_ERROR_NONE) {
-    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
-  }
-  AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
-  AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
-  return true;
-}
-
-void AscendKernelRuntime::FreeDeviceMemory() {
-  if (device_mem_base_ != nullptr) {
-    auto ret = rtFree(device_mem_base_);
-    if (ret != RT_ERROR_NONE) {
-      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
-    }
-    device_mem_base_ = nullptr;
-  }
-  if (device_mem_pool_base_ != nullptr) {
-    auto ret = rtFree(device_mem_pool_base_);
-    if (ret != RT_ERROR_NONE) {
-      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
-    }
-    device_mem_pool_base_ = nullptr;
-  }
-}
-
-void AscendKernelRuntime::FreeHostMemory() { dynamic_mem_offset_ = 0; }
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
index dbd1460d24..0eedad3d2b 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
@@ -39,13 +39,11 @@ class AscendKernelRuntime : public KernelRuntime {
   bool GenTask(const session::KernelGraph *graph) override;
   bool RunTask(const session::KernelGraph *graph) override;
   bool LoadTask(const session::KernelGraph *graph) override;
-  void FreeHostMemory() override;
 
  protected:
   DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,
                                        TypeId type_id) override;
   bool SyncStream() override;
-  void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) override;
 
  private:
   bool InitDevice();
@@ -53,8 +51,7 @@ class AscendKernelRuntime : public KernelRuntime {
   bool HcclInit();
   bool NeedDestroyHccl();
   bool DestroyHccl();
-  bool MallocDeviceMemory();
-  void FreeDeviceMemory();
+
   void ClearGraphModelMap();
   void ReleaseDeviceRes() override;
   uint32_t GetGraphModelId(const session::KernelGraph *kernel_graph);
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
new file mode 100644
index 0000000000..f033d81d82
--- /dev/null
+++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
@@ -0,0 +1,65 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/ascend/ascend_memory_manager.h"
+#include "device/ascend/ascend_memory_allocator.h"
+#include "utils/context/ms_context.h"
+#include "runtime/mem.h"
+namespace mindspore {
+namespace device {
+namespace ascend {
+static const uint64_t ASCEND_MEM_SIZE = 20;
+static const uint64_t ASCEND_MEM_SIZE_BYTE = (ASCEND_MEM_SIZE << 30);
+
+void AscendMemoryManager::MallocDeviceMemory() {
+  device_mem_size_ = ASCEND_MEM_SIZE_BYTE;
+  static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO);
+  auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM);
+  if (ret != RT_ERROR_NONE) {
+    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]";
+  }
+  device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO));
+  ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM);
+  if (ret != RT_ERROR_NONE) {
+    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
+  }
+  AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
+  AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
+}
+
+void AscendMemoryManager::FreeDeviceMemory() {
+  if (device_mem_base_ != nullptr) {
+    auto ret = rtFree(device_mem_base_);
+    if (ret != RT_ERROR_NONE) {
+      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
+    }
+    device_mem_base_ = nullptr;
+  }
+  if (device_mem_pool_base_ != nullptr) {
+    auto ret = rtFree(device_mem_pool_base_);
+    if (ret != RT_ERROR_NONE) {
+      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
+    }
+    device_mem_pool_base_ = nullptr;
+  }
+}
+
+void *AscendMemoryManager::AllocTensorMemDynamic(size_t size) {
+  return AscendMemoryAllocator::GetInstance().AllocTensorMem(size);
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h
new file mode 100644
index 0000000000..8639fb5c72
--- /dev/null
+++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
+#include "device/memory_manager.h"
+namespace mindspore {
+namespace device {
+namespace ascend {
+class AscendMemoryManager : public MemoryManager {
+ public:
+  AscendMemoryManager() = default;
+  virtual ~AscendMemoryManager() = default;
+
+  void MallocDeviceMemory() override;
+  void FreeDeviceMemory() override;
+  void *AllocTensorMemDynamic(size_t size) override;
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
diff --git a/mindspore/ccsrc/device/device_address.h b/mindspore/ccsrc/device/device_address.h
index 1610d43372..cb022427e3 100644
--- a/mindspore/ccsrc/device/device_address.h
+++ b/mindspore/ccsrc/device/device_address.h
@@ -33,12 +33,14 @@ class CPUKernelRuntime;
 }  // namespace cpu
 namespace ascend {
 class AscendKernelRuntime;
+class AscendMemoryManager;
 namespace tasksink {
 class TaskGenerator;
 }  // namespace tasksink
 }  // namespace ascend
 namespace gpu {
 class GPUKernelRuntime;
+class GPUMemoryManager;
 }  // namespace gpu
 }  // namespace device
 }  // namespace mindspore
@@ -70,12 +72,15 @@ class DeviceAddress {
   TypeId type_id_{kNumberTypeFloat16};
   bool mem_dynamic_alloc_{false};
   friend class KernelRuntime;
+  friend class MemoryManager;
   friend class mindspore::device::ascend::tasksink::TaskGenerator;
   friend class mindspore::device::cpu::CPUSimpleMemPlan;
   friend class mindspore::device::cpu::CPUResourceManager;
   friend class mindspore::device::cpu::CPUKernelRuntime;
   friend class mindspore::device::gpu::GPUKernelRuntime;
+  friend class mindspore::device::gpu::GPUMemoryManager;
   friend class mindspore::device::ascend::AscendKernelRuntime;
+  friend class mindspore::device::ascend::AscendMemoryManager;
 };
 
 using DeviceAddressPtr = std::shared_ptr<DeviceAddress>;
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
index 9eeb1062f7..597e188e9d 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
@@ -26,6 +26,7 @@
 #include "device/kernel_runtime_manager.h"
 #include "device/gpu/gpu_common.h"
 #include "common/utils.h"
+#include "device/gpu/gpu_memory_manager.h"
 
 namespace mindspore {
 namespace device {
@@ -36,26 +37,14 @@ bool GPUKernelRuntime::Init() {
   if (device_init_ == true) {
     return true;
   }
-
   auto ret = InitDevice();
   if (!ret) {
     MS_LOG(ERROR) << "InitDevice error.";
     return ret;
   }
-
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  // If use the dynamic memory pool, then alloc the first memory block to init.
-  if (context_ptr->enable_dynamic_mem_pool()) {
-    auto device_addr = AllocTensorMemDynamic(1);
-    if (!device_addr) {
-      MS_LOG(ERROR) << "Dynamic memory pool init error.";
-      return false;
-    }
-  } else {
-    MallocDeviceMemory();
-  }
-
+  mem_manager_ = std::make_shared<GPUMemoryManager>();
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->MallocDeviceMemory();
   const void *collective_handle_ = CollectiveInitializer::instance().collective_handle();
   bool collective_inited = CollectiveInitializer::instance().collective_inited();
   if (collective_inited && collective_handle_ != nullptr) {
@@ -101,16 +90,6 @@ bool GPUKernelRuntime::InitDevice() {
   return true;
 }
 
-void GPUKernelRuntime::MallocDeviceMemory() {
-  // Need to reserve 20% space for dynamic memory
-  const float init_gpu_mem_ratio = 0.8;
-  size_t mem_size = FloatToSize(GPUMemoryAllocator::GetInstance().free_mem_size() * init_gpu_mem_ratio);
-  auto alloc_size =
-    GPUMemoryAllocator::GetInstance().AllocDeviceMem(mem_size, reinterpret_cast<void **>(&device_mem_base_));
-  device_mem_size_ = alloc_size;
-  static_mem_offset_ = device_mem_size_;
-}
-
 void GPUKernelRuntime::ReleaseDeviceRes() {
   // For dataset mode.
   if (GpuBufferMgr::GetInstance().IsInit()) {
@@ -122,39 +101,22 @@ void GPUKernelRuntime::ReleaseDeviceRes() {
     CHECK_OP_RET_WITH_EXCEPT(GpuBufferMgr::GetInstance().Destroy(), "Could not destroy gpu data queue.");
   }
   GPUDeviceManager::GetInstance().ReleaseDevice();
-  if (device_mem_base_ != nullptr) {
-    if (!GPUMemoryAllocator::GetInstance().FreeDeviceMem(device_mem_base_)) {
-      MS_LOG(EXCEPTION) << "Could not free gpu device memory.";
-    }
-  }
-  GPUMemoryAllocator::GetInstance().ReleaseDeviceRes();
-}
-
-void GPUKernelRuntime::FreeHostMemory() { dynamic_mem_offset_ = 0; }
-
-void *GPUKernelRuntime::AllocTensorMemDynamic(size_t size) {
-  return GPUMemoryAllocator::GetInstance().AllocTensorMem(size);
-}
-
-void GPUKernelRuntime::FreeTensorMemDynamic(void *device_ptr) {
-  GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->FreeDeviceMemory();
 }
 
 void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->ResetDynamicMemory();
   AssignStaticMemory(graph);
-  bool is_enable_mem_reuse = context_ptr->enable_mem_reuse();
   bool is_enable_dynamic_mem = context_ptr->enable_dynamic_mem_pool();
   if (is_enable_dynamic_mem) {
     // Use the dynamic memory pool.
     InitKernelRefCount(graph);
     InitKernelOutputAddress(graph);
-  } else if (is_enable_mem_reuse) {
-    // Use the memory reuse.
-    ReuseAssignDynamicMemory(graph);
   } else {
-    // Normal way.
     AssignDynamicMemory(graph);
   }
 }
@@ -179,32 +141,6 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph) {
   return ret;
 }
 
-uint8_t *GPUKernelRuntime::MallocStaticMem(size_t size, bool) {
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  if (context_ptr->enable_dynamic_mem_pool()) {
-    auto device_ptr = AllocTensorMemDynamic(size);
-    MS_EXCEPTION_IF_NULL(device_ptr);
-    return AddressOffset(device_ptr, 0);
-  }
-
-  auto align_size = GetCommonAlignSize(size);
-  if (static_mem_offset_ < align_size) {
-    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
-                      << "] static[" << total_static_size_ << "])"
-                      << " malloc [" << align_size << "] failed!";
-  }
-  auto offset = static_mem_offset_ - align_size;
-  if (dynamic_mem_offset_ > offset) {
-    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
-                      << "] static[" << total_static_size_ << "])"
-                      << " malloc [" << align_size << "] failed!";
-  }
-  total_static_size_ += align_size;
-  static_mem_offset_ = offset;
-  return device_mem_base_ + offset;
-}
-
 void GPUKernelRuntime::InitKernelRefCount(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
   MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>();
@@ -273,6 +209,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
   MS_EXCEPTION_IF_NULL(kernel_inputs);
   MS_EXCEPTION_IF_NULL(kernel_workspaces);
   MS_EXCEPTION_IF_NULL(kernel_outputs);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) {
     auto device_address = AnfAlgo::GetPrevNodeOutputAddr(kernel, i);
     MS_EXCEPTION_IF_NULL(device_address);
@@ -290,7 +227,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
     MS_EXCEPTION_IF_NULL(device_address);
     auto device_ptr = device_address->ptr_;
     if (device_ptr == nullptr) {
-      device_ptr = AllocTensorMemDynamic(output_sizes[i]);
+      device_ptr = mem_manager_->AllocTensorMemDynamic(output_sizes[i]);
       MS_EXCEPTION_IF_NULL(device_ptr);
       device_address->ptr_ = device_ptr;
     }
@@ -307,7 +244,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
       kernel_workspaces->emplace_back(nullptr);
       continue;
     }
-    auto device_ptr = AllocTensorMemDynamic(workspace_sizes[i]);
+    auto device_ptr = mem_manager_->AllocTensorMemDynamic(workspace_sizes[i]);
     MS_EXCEPTION_IF_NULL(device_ptr);
     kernel::AddressPtr workspace = std::make_shared<kernel::Address>();
     MS_EXCEPTION_IF_NULL(workspace);
@@ -333,6 +270,7 @@ void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph
 
 void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfNodePtr &kernel) {
   MS_EXCEPTION_IF_NULL(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   // The reference count of communication kernel input is not 0.
   if (communication_op_input_ref_count_ != 0) {
     MS_LOG(ERROR) << "The reference count of communication kernel input is not 0.";
@@ -354,7 +292,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN
     addr_size.emplace_back(device_address.get(), output_size);
   }
 
-  auto device_mem_ptr = AllocTensorMemDynamic(total);
+  auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total);
   MS_EXCEPTION_IF_NULL(device_mem_ptr);
   for (const auto &iter : addr_size) {
     MS_EXCEPTION_IF_NULL(iter.first);
@@ -366,6 +304,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN
 
 void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::AnfNodePtr &kernel) {
   MS_EXCEPTION_IF_NULL(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   // The reference count of communication kernel output is not 0.
   if (communication_op_output_ref_count_ != 0) {
     MS_LOG(ERROR) << "The reference count of communication kernel output is not 0.";
@@ -389,7 +328,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf
     addr_size.emplace_back(device_address.get(), output_sizes[i]);
   }
 
-  auto device_mem_ptr = AllocTensorMemDynamic(total);
+  auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total);
   MS_EXCEPTION_IF_NULL(device_mem_ptr);
   for (const auto &iter : addr_size) {
     MS_EXCEPTION_IF_NULL(iter.first);
@@ -402,6 +341,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf
 void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
                                             const AddressPtrList &kernel_workspaces) {
   MS_EXCEPTION_IF_NULL(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto cnode = kernel->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(cnode);
   // Free the input of kernel by reference count.
@@ -421,7 +361,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
         auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i);
         MS_EXCEPTION_IF_NULL(device_address);
         MS_EXCEPTION_IF_NULL(device_address->ptr_);
-        FreeTensorMemDynamic(device_address->ptr_);
+        mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
         device_address->ptr_ = nullptr;
       }
     }
@@ -432,7 +372,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
     auto workspace = kernel_workspaces[i];
     if (workspace != nullptr) {
       MS_EXCEPTION_IF_NULL(workspace->addr);
-      FreeTensorMemDynamic(workspace->addr);
+      mem_manager_->FreeTensorMemDynamic(workspace->addr);
       workspace->addr = nullptr;
     }
   }
@@ -441,6 +381,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
 void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr &kernel, size_t input_idx,
                                                      bool *is_communication_op) {
   MS_EXCEPTION_IF_NULL(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   // The inputs memory of communication kernel is one piece memory, need release together.
   if (AnfAlgo::GetCNodeName(kernel) == kAllReduceOpName) {
     communication_op_input_ref_count_--;
@@ -448,7 +389,7 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr
       auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, 0);
       MS_EXCEPTION_IF_NULL(device_address);
       MS_EXCEPTION_IF_NULL(device_address->ptr_);
-      FreeTensorMemDynamic(device_address->ptr_);
+      mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
       device_address->ptr_ = nullptr;
     }
     *is_communication_op = true;
@@ -470,19 +411,12 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr
       auto device_address = AnfAlgo::GetMutableOutputAddr(kernel_input.first, 0);
       MS_EXCEPTION_IF_NULL(device_address);
       MS_EXCEPTION_IF_NULL(device_address->ptr_);
-      FreeTensorMemDynamic(device_address->ptr_);
+      mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
       device_address->ptr_ = nullptr;
     }
     *is_communication_op = true;
   }
 }
-
-void GPUKernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int) {
-  auto device_ptr = AllocTensorMemDynamic(size);
-  MS_EXCEPTION_IF_NULL(device_ptr);
-  address->ptr_ = device_ptr;
-  address->mem_dynamic_alloc_ = true;
-}
 }  // namespace gpu
 }  // namespace device
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
index f3fdb5fa98..6f761342d3 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
@@ -33,7 +33,6 @@ class GPUKernelRuntime : public KernelRuntime {
   ~GPUKernelRuntime() override = default;
   bool Init() override;
   void ReleaseDeviceRes() override;
-  void FreeHostMemory() override;
   void AssignMemory(session::KernelGraph *graph) override;
   bool Run(session::KernelGraph *graph) override;
 
@@ -41,18 +40,11 @@ class GPUKernelRuntime : public KernelRuntime {
   DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,
                                        TypeId type_id) override;
   bool SyncStream() override;
-  // Alloc memory use the dynamic memory pool.
-  void *AllocTensorMemDynamic(size_t size) override;
-  // Free memory use the dynamic memory pool.
-  void FreeTensorMemDynamic(void *device_ptr) override;
-  void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) override;
-  uint8_t *MallocStaticMem(size_t size, bool communication_mem) override;
 
  private:
   GPUKernelRuntime(const GPUKernelRuntime &);
   GPUKernelRuntime &operator=(const GPUKernelRuntime &);
   bool InitDevice();
-  void MallocDeviceMemory();
   bool device_init_{false};
 
   // The related functions and members for using dynamic memory pool.
@@ -69,6 +61,7 @@ class GPUKernelRuntime : public KernelRuntime {
   void FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr &kernel, size_t input_idx, bool *is_communication_op);
   size_t communication_op_input_ref_count_{0};
   size_t communication_op_output_ref_count_{0};
+  MemReuseUtilPtr mem_reuse_util_ptr_{nullptr};
 };
 MS_REG_KERNEL_RUNTIME(kGPUDevice, GPUKernelRuntime);
 }  // namespace gpu
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
new file mode 100644
index 0000000000..3944b504e4
--- /dev/null
+++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
@@ -0,0 +1,88 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/gpu/gpu_memory_manager.h"
+#include "device/gpu/gpu_memory_allocator.h"
+#include "utils/context/ms_context.h"
+#include "utils/convert_utils.h"
+namespace mindspore {
+namespace device {
+namespace gpu {
+void *GPUMemoryManager::AllocTensorMemDynamic(size_t size) {
+  return GPUMemoryAllocator::GetInstance().AllocTensorMem(size);
+}
+
+void GPUMemoryManager::FreeTensorMemDynamic(void *device_ptr) {
+  GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr);
+}
+
+void GPUMemoryManager::MallocDeviceMemory() {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  // If use the dynamic memory pool, then alloc the first memory block to init.
+  if (context_ptr->enable_dynamic_mem_pool()) {
+    auto device_addr = AllocTensorMemDynamic(1);
+    if (!device_addr) {
+      MS_LOG(ERROR) << "Dynamic memory pool init error.";
+    }
+  } else {
+    // Need to reserve 20% space for dynamic memory
+    const float init_gpu_mem_ratio = 0.8;
+    size_t mem_size = FloatToSize(GPUMemoryAllocator::GetInstance().free_mem_size() * init_gpu_mem_ratio);
+    auto alloc_size =
+      GPUMemoryAllocator::GetInstance().AllocDeviceMem(mem_size, reinterpret_cast<void **>(&device_mem_base_));
+    device_mem_size_ = alloc_size;
+    static_mem_offset_ = device_mem_size_;
+  }
+}
+
+void GPUMemoryManager::FreeDeviceMemory() {
+  if (device_mem_base_ != nullptr) {
+    if (!GPUMemoryAllocator::GetInstance().FreeDeviceMem(device_mem_base_)) {
+      MS_LOG(EXCEPTION) << "Could not free gpu device memory.";
+    }
+  }
+  GPUMemoryAllocator::GetInstance().ReleaseDeviceRes();
+}
+
+uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool) {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  if (context_ptr->enable_dynamic_mem_pool()) {
+    auto device_ptr = AllocTensorMemDynamic(size);
+    MS_EXCEPTION_IF_NULL(device_ptr);
+    return AddressOffset(device_ptr, 0);
+  }
+
+  auto align_size = GetCommonAlignSize(size);
+  if (static_mem_offset_ < align_size) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+                      << "] static[" << total_static_size_ << "])"
+                      << " malloc [" << align_size << "] failed!";
+  }
+  auto offset = static_mem_offset_ - align_size;
+  if (dynamic_mem_offset_ > offset) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+                      << "] static[" << total_static_size_ << "])"
+                      << " malloc [" << align_size << "] failed!";
+  }
+  total_static_size_ += align_size;
+  static_mem_offset_ = offset;
+  return device_mem_base_ + offset;
+}
+}  // namespace gpu
+}  // namespace device
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h b/mindspore/ccsrc/device/gpu/gpu_memory_manager.h
new file mode 100644
index 0000000000..a18226bdf3
--- /dev/null
+++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.h
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_
+#include "device/memory_manager.h"
+namespace mindspore {
+namespace device {
+namespace gpu {
+class GPUMemoryManager : public MemoryManager {
+ public:
+  GPUMemoryManager() = default;
+  virtual ~GPUMemoryManager() = default;
+
+  void MallocDeviceMemory() override;
+  void FreeDeviceMemory() override;
+
+  void *AllocTensorMemDynamic(size_t size) override;
+  void FreeTensorMemDynamic(void *device_ptr) override;
+
+ protected:
+  uint8_t *MallocStaticMem(size_t size, bool communication_mem);
+};
+}  // namespace gpu
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_
diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/device/kernel_runtime.cc
index 0a9be35fb5..16025ed8a4 100644
--- a/mindspore/ccsrc/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/device/kernel_runtime.cc
@@ -31,18 +31,13 @@
 #include "ir/value.h"
 using mindspore::kernel::Address;
 using mindspore::kernel::AddressPtr;
-using mindspore::memreuse::BestFitMemReuse;
-using mindspore::memreuse::MemReuseUtilPtr;
 
 namespace mindspore {
 namespace device {
 KernelRuntime::~KernelRuntime() {
-  device_mem_base_ = nullptr;
-  device_mem_pool_base_ = nullptr;
 #ifdef ENABLE_DUMP_E2E
   dump_conf_ptr_ = nullptr;
 #endif
-  mem_reuse_util_ptr_ = nullptr;
 }
 
 bool KernelRuntime::Run(session::KernelGraph *graph) {
@@ -88,11 +83,6 @@ bool KernelRuntime::LoadTask(const session::KernelGraph *graph) {
   return false;
 }
 
-void KernelRuntime::FreeHostMemory() {
-  dynamic_mem_offset_ = 0;
-  static_mem_offset_ = 0;
-}
-
 // for D to impl
 bool KernelRuntime::RunTask(const session::KernelGraph *graph) {
   if (graph != nullptr) {
@@ -126,13 +116,11 @@ size_t KernelRuntime::CountNodeDeviceMemorySize(const mindspore::AnfNodePtr &nod
 void KernelRuntime::AssignMemory(session::KernelGraph *graph) {
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->ResetDynamicMemory();
   AssignStaticMemory(graph);
-  bool is_enable_mem_reuse = context_ptr->enable_mem_reuse();
-  if (is_enable_mem_reuse) {
-    ReuseAssignDynamicMemory(graph);
-  } else {
-    AssignDynamicMemory(graph);
-  }
+  AssignDynamicMemory(graph);
+
   UpdateRefNodeOutputMem(graph);
 }
 
@@ -159,6 +147,7 @@ void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) {
 void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> &input_tensors,
                                            const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   for (size_t input_index = 0; input_index < graph->inputs().size(); ++input_index) {
     auto item = graph->inputs()[input_index];
     MS_EXCEPTION_IF_NULL(item);
@@ -180,7 +169,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr>
       auto device_address =
         CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id);
       MS_EXCEPTION_IF_NULL(device_address);
-      MallocOpMemory(device_address, tensor_size, kStaticMem);
+      mem_manager_->MallocOpMemory(device_address, tensor_size);
       AnfAlgo::SetOutputAddr(device_address, index, item.get());
     }
   }
@@ -188,6 +177,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr>
 
 void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) {
   MS_EXCEPTION_IF_NULL(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
   MS_EXCEPTION_IF_NULL(kernel_mod);
   auto output_sizes = kernel_mod->GetOutputSizeList();
@@ -208,13 +198,14 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) {
     auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
     auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type);
     MS_EXCEPTION_IF_NULL(device_address);
-    MallocOpMemory(device_address, output_sizes[i], kDynamicMem);
+    mem_manager_->MallocOpMemory(device_address, output_sizes[i]);
     AnfAlgo::SetOutputAddr(device_address, i, kernel.get());
   }
 }
 
 void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) {
   MS_EXCEPTION_IF_NULL(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   if (kernel->isa<CNode>()) {
     auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
     MS_EXCEPTION_IF_NULL(kernel_mod);
@@ -222,7 +213,7 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) {
     for (size_t i = 0; i < workspace_lists.size(); ++i) {
       auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown);
       MS_EXCEPTION_IF_NULL(device_address);
-      MallocOpMemory(device_address, workspace_lists[i], kDynamicMem);
+      mem_manager_->MallocOpMemory(device_address, workspace_lists[i]);
       AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get());
     }
   }
@@ -230,6 +221,7 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) {
 
 void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   for (auto &item : graph->inputs()) {
     MS_EXCEPTION_IF_NULL(item);
     if (!item->isa<Parameter>()) {
@@ -247,7 +239,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
         output_type_id = AnfAlgo::GetOutputInferDataType(item, index);
       }
       auto tensor_size = CountNodeDeviceMemorySize(item, index);
-      auto ptr = MallocStaticMem(tensor_size, false);
+      auto ptr = mem_manager_->MallocMem(kStaticMem, tensor_size);
       auto address = CreateDeviceAddress(ptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id);
       AnfAlgo::SetOutputAddr(address, index, item.get());
     }
@@ -301,6 +293,7 @@ void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph *graph) {
 
 void KernelRuntime::AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto kernel_mod = AnfAlgo::GetKernelMod(node);
   MS_EXCEPTION_IF_NULL(kernel_mod);
   auto output_sizes = kernel_mod->GetOutputSizeList();
@@ -314,12 +307,12 @@ void KernelRuntime::AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr
   std::vector<size_t> align_size_list;
   for (uint64_t mem_size : output_sizes) {
     if (context_ptr->enable_hccl()) {
-      mem_size = GetCommonAlignSize(mem_size);
+      mem_size = mem_manager_->GetCommonAlignSize(mem_size);
     }
     total_size += mem_size;
     align_size_list.emplace_back(mem_size);
   }
-  uint8_t *output_ptr = CalDeviceMem(node, total_size, flag, 0);
+  uint8_t *output_ptr = mem_manager_->MallocOutputMem(node, 0, flag, total_size);
   for (size_t j = 0; j < align_size_list.size(); ++j) {
     std::string output_format = AnfAlgo::GetOutputFormat(node, j);
     auto output_type = AnfAlgo::GetOutputDeviceDataType(node, j);
@@ -333,6 +326,7 @@ void KernelRuntime::UpdateCommunicationOpInputMem(const AnfNodePtr &node) {
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
   MS_EXCEPTION_IF_NULL(node);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   size_t total_size = 0;
   std::vector<std::pair<mindspore::device::DeviceAddress *, size_t>> addr_size;
   for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(node); ++i) {
@@ -340,12 +334,12 @@ void KernelRuntime::UpdateCommunicationOpInputMem(const AnfNodePtr &node) {
     MS_EXCEPTION_IF_NULL(address);
     auto mem_size = address->size();
     if (context_ptr->enable_hccl()) {
-      mem_size = GetCommonAlignSize(mem_size);
+      mem_size = mem_manager_->GetCommonAlignSize(mem_size);
     }
     total_size += mem_size;
     addr_size.emplace_back(address.get(), mem_size);
   }
-  uint8_t *input_ptr = CalDeviceMem(node, total_size, kDynamicMem, 0);
+  uint8_t *input_ptr = mem_manager_->MallocOutputMem(node, 0, kDynamicMem, total_size);
   for (const auto &iter : addr_size) {
     MS_EXCEPTION_IF_NULL(iter.first);
     iter.first->set_ptr(input_ptr);
@@ -355,7 +349,8 @@ void KernelRuntime::UpdateCommunicationOpInputMem(const AnfNodePtr &node) {
 
 void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int index) {
   MS_EXCEPTION_IF_NULL(node);
-  if (IsCommunicationOp(node)) {
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  if (AnfAlgo::IsCommunicationOp(node)) {
     UpdateCommunicationOpInputMem(node);
     AssignCommunicationNodeOutputMem(flag, node);
     return;
@@ -375,7 +370,7 @@ void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int in
       MS_LOG(INFO) << "Already malloc index:" << i;
       continue;
     }
-    auto ptr = CalDeviceMem(node, output_sizes[i], flag, i);
+    auto ptr = mem_manager_->MallocOutputMem(node, i, flag, output_sizes[i]);
     if (ptr == nullptr) {
       // reused ptr, no need alloc, continue;
       continue;
@@ -390,6 +385,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
                                           size_t output_idx) {
   MS_EXCEPTION_IF_NULL(value_node);
   MS_EXCEPTION_IF_NULL(node_value);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto tensor = node_value->cast<TensorPtr>();
   if (tensor == nullptr) {
     MS_LOG(WARNING) << "Tensor is null";
@@ -397,7 +393,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
   }
   size_t tensor_size = tensor->data().nbytes();
   auto node_size = CountNodeDeviceMemorySize(value_node, output_idx);
-  auto ptr = MallocStaticMem(node_size, false);
+  auto ptr = mem_manager_->MallocMem(kStaticMem, node_size);
   TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(value_node, output_idx);
   if (output_type_id == kTypeUnknown) {
     output_type_id = AnfAlgo::GetOutputInferDataType(value_node, output_idx);
@@ -414,6 +410,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
 
 void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   for (auto &value_node : graph->graph_value_nodes()) {
     MS_EXCEPTION_IF_NULL(value_node);
     if (AnfAlgo::OutputAddrExist(value_node, 0)) {
@@ -440,7 +437,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
     } else if (node_value->isa<StringImm>()) {
       auto value = GetValue<std::string>(node_value);
       size_t tensor_size = value.size();
-      auto ptr = MallocStaticMem(tensor_size, false);
+      auto ptr = mem_manager_->MallocMem(kStaticMem, tensor_size);
       auto address = CreateDeviceAddress(ptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeUInt8);
       MS_EXCEPTION_IF_NULL(address);
       AnfAlgo::SetOutputAddr(address, 0, value_node.get());
@@ -452,103 +449,37 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
   }
 }
 
-void KernelRuntime::AssignDynamicMemory(const session::KernelGraph *graph) {
+void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
-  // reset dynamic mem offset
-  dynamic_mem_offset_ = 0;
-  auto &kernels = graph->execution_order();
-  for (auto &kernel : kernels) {
-    AssignNodeOutputMem(kDynamicMem, kernel, kGetAllOuts);
-    AssignWorkSpaceMem(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  bool is_enable_mem_reuse = context_ptr->enable_mem_reuse();
+  auto mem_flag = kDynamicMem;
+  if (is_enable_mem_reuse) {
+    mem_manager_->InitReuseDynamicMemory(graph);
+    mem_flag = kReuseDynamicMem;
   }
-}
-
-void KernelRuntime::ReuseAssignDynamicMemory(session::KernelGraph *graph) {
-  MS_EXCEPTION_IF_NULL(graph);
-  dynamic_mem_offset_ = 0;
-  MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>();
-  MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr);
-  // set all infos
-  mem_reuse_util_ptr->SetAllInfo(graph);
-  auto bestfit_mem_reuse = std::make_shared<BestFitMemReuse>();
-  MS_EXCEPTION_IF_NULL(bestfit_mem_reuse);
-  bestfit_mem_reuse->Reuse(mem_reuse_util_ptr.get());
-  size_t total_allocated_size = bestfit_mem_reuse->GetAllocatedSize();
-  MS_LOG(INFO) << "TotalReuseDynamicSize [" << total_allocated_size << "]";
-  mem_reuse_util_ptr_ = mem_reuse_util_ptr;
-  auto base_ptr = MallocDynamicMem(total_allocated_size, false);
-  mem_reuse_util_ptr_->set_mem_base(base_ptr);
   auto &kernels = graph->execution_order();
   for (auto &kernel : kernels) {
-    AssignNodeOutputMem(kReuseDynamicMem, kernel, kGetAllOuts);
-    AssignReuseWorkSpaceMem(kernel);
+    AssignNodeOutputMem(mem_flag, kernel, kGetAllOuts);
+    AssignWorkSpaceMem(mem_flag, kernel);
   }
 }
 
-void KernelRuntime::AssignReuseWorkSpaceMem(const AnfNodePtr &node) {
+void KernelRuntime::AssignWorkSpaceMem(int flag, const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto kernel_mod = AnfAlgo::GetKernelMod(node);
   MS_EXCEPTION_IF_NULL(kernel_mod);
   size_t index = 0;
   for (auto &size : kernel_mod->GetWorkspaceSizeList()) {
-    auto wk_ptr = mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index);
-    AnfAlgo::SetWorkspaceAddr(CreateDeviceAddress(wk_ptr, size, "", kTypeUnknown), index, node.get());
+    auto ptr = mem_manager_->MallocWorkSpaceMem(node, flag, index, size);
+    AnfAlgo::SetWorkspaceAddr(CreateDeviceAddress(ptr, size, "", kTypeUnknown), index, node.get());
     index++;
   }
 }
 
-void KernelRuntime::AssignWorkSpaceMem(const AnfNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(node);
-  if (node->isa<CNode>()) {
-    auto kernel_mod = AnfAlgo::GetKernelMod(node);
-    MS_EXCEPTION_IF_NULL(kernel_mod);
-    size_t index = 0;
-    for (auto &size : kernel_mod->GetWorkspaceSizeList()) {
-      auto ptr = MallocDynamicMem(size, false);
-      AnfAlgo::SetWorkspaceAddr(CreateDeviceAddress(ptr, size, "", kTypeUnknown), index, node.get());
-      index++;
-    }
-  }
-}
-
-bool KernelRuntime::IsCommunicationOp(const AnfNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(node);
-  auto kernel_name = AnfAlgo::GetCNodeName(node);
-  auto kernel_type = AnfAlgo::GetKernelType(node);
-  if (kernel_name == kAllReduceOpName || kernel_type == HCCL_KERNEL) {
-    return true;
-  }
-  return false;
-}
-
-uint8_t *KernelRuntime::CalDeviceMem(const AnfNodePtr &node, size_t size, int flag, size_t index) {
-  MS_EXCEPTION_IF_NULL(node);
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  uint8_t *ptr = nullptr;
-  if (IsCommunicationOp(node)) {
-    bool communication_mem = false;
-    if (context_ptr->enable_hccl()) {
-      communication_mem = true;
-    }
-    if (flag == kStaticMem) {
-      ptr = MallocStaticMem(size, communication_mem);
-    } else {
-      ptr = MallocDynamicMem(size, communication_mem);
-    }
-    return ptr;
-  }
-
-  if (flag == kStaticMem) {
-    ptr = MallocStaticMem(size, false);
-  } else if (flag == kDynamicMem) {
-    ptr = MallocDynamicMem(size, false);
-  } else if (flag == kReuseDynamicMem) {
-    ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index);
-  }
-  return ptr;
-}
-
 void KernelRuntime::GenLaunchArgs(const mindspore::kernel::KernelMod &kernel_mod, const mindspore::AnfNodePtr &kernel,
                                   AddressPtrList *kernel_inputs, AddressPtrList *const kernel_workspaces,
                                   AddressPtrList *kernel_outputs) {
@@ -659,65 +590,6 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph) {
   return true;
 }
 
-size_t KernelRuntime::GetCommonAlignSize(size_t input_size) const {
-  return (input_size + mem_align_size_ + 31) / mem_align_size_ * mem_align_size_;
-}
-
-size_t KernelRuntime::GetCommunicationAlignSize(size_t input_size) const {
-  return (input_size + mem_align_size_ - 1) / mem_align_size_ * mem_align_size_ + 2 * mem_align_size_;
-}
-
-uint8_t *KernelRuntime::MallocStaticMem(size_t size, bool communication_mem) {
-  size_t align_size = 0;
-  if (communication_mem) {
-    align_size = GetCommunicationAlignSize(size);
-  } else {
-    align_size = GetCommonAlignSize(size);
-  }
-  if (static_mem_offset_ < align_size) {
-    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
-                      << "] static[" << total_static_size_ << "])"
-                      << " malloc [" << align_size << "] failed!";
-  }
-  total_static_size_ += align_size;
-  auto offset = static_mem_offset_ - align_size;
-  if (dynamic_mem_offset_ > offset) {
-    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
-                      << "] static[" << total_static_size_ << "])"
-                      << " malloc [" << align_size << "] failed!";
-  }
-  static_mem_offset_ = offset;
-  if (communication_mem) {
-    return device_mem_base_ + offset + mem_align_size_;
-  } else {
-    return device_mem_base_ + offset;
-  }
-}
-
-uint8_t *KernelRuntime::MallocDynamicMem(size_t size, bool communication_mem) {
-  size_t align_size = 0;
-  if (communication_mem) {
-    align_size = GetCommunicationAlignSize(size);
-  } else {
-    align_size = GetCommonAlignSize(size);
-  }
-  uint64_t offset = dynamic_mem_offset_;
-  auto new_offset = dynamic_mem_offset_ + align_size;
-  if (new_offset > static_mem_offset_) {
-    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
-                      << "] static[" << total_static_size_ << "])"
-                      << " malloc [" << align_size << "] failed!";
-  }
-  total_dynamic_size_ += align_size;
-  dynamic_mem_offset_ = new_offset;
-
-  if (communication_mem) {
-    return device_mem_base_ + offset + mem_align_size_;
-  } else {
-    return device_mem_base_ + offset;
-  }
-}
-
 bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
   if (!LaunchKernelMod(*graph)) {
@@ -731,29 +603,6 @@ bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph) {
   return true;
 }
 
-void KernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) {
-  if (flag == kStaticMem) {
-    address->ptr_ = MallocStaticMem(size, false);
-  } else if (flag == kDynamicMem) {
-    address->ptr_ = MallocDynamicMem(size, false);
-  } else {
-    MS_LOG(EXCEPTION) << "Unknown memory type!";
-  }
-}
-
-void *KernelRuntime::AllocTensorMemDynamic(size_t size) {
-  if (size == 0) {
-    MS_LOG(ERROR) << "AllocTensorMemDynamic size is 0.";
-  }
-  return nullptr;
-}
-
-void KernelRuntime::FreeTensorMemDynamic(void *device_ptr) {
-  if (device_ptr == nullptr) {
-    MS_LOG(ERROR) << "FreeTensorMemDynamic device_ptr is null.";
-  }
-}
-
 #ifdef ENABLE_DUMP_E2E
 bool KernelRuntime::SetDumpConf() {
   dump_conf_ptr_ = std::make_shared<Dump>();
diff --git a/mindspore/ccsrc/device/kernel_runtime.h b/mindspore/ccsrc/device/kernel_runtime.h
index ac9a56ed4d..1224bf14eb 100644
--- a/mindspore/ccsrc/device/kernel_runtime.h
+++ b/mindspore/ccsrc/device/kernel_runtime.h
@@ -20,8 +20,7 @@
 #include <memory>
 #include <string>
 #include <map>
-#include "pre_activate/mem_reuse/mem_reuse.h"
-#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
+
 #include "device/device_address.h"
 #include "ir/meta_tensor.h"
 #include "predict/generator/utils/ir_model_util.h"
@@ -32,21 +31,16 @@
 #include "session/anf_runtime_algorithm.h"
 #include "kernel/kernel.h"
 #include "utils/context/ms_context.h"
+#include "device/memory_manager.h"
 
 // using mindspore::session::KernelGraph;
 using mindspore::tensor::Tensor;
 using TensorPtr = std::shared_ptr<Tensor>;
-using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr;
 using mindspore::kernel::AddressPtr;
 using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
 
 namespace mindspore {
 namespace device {
-const int kStaticMem = 0;
-const int kDynamicMem = 1;
-const int kReuseDynamicMem = 2;
-const int kGetAllOuts = -1;
-
 class KernelRuntime {
  public:
   KernelRuntime() = default;
@@ -65,7 +59,6 @@ class KernelRuntime {
   DumpConfPtr GetDumpConf();
 #endif
   virtual bool LoadTask(const session::KernelGraph *graph);
-  virtual void FreeHostMemory();
   // for GPU and D to impl
   virtual void ReleaseDeviceRes() {}
   void set_device_id(uint32_t device_id) { device_id_ = device_id; }
@@ -75,29 +68,17 @@ class KernelRuntime {
                                                TypeId type_id) = 0;
   virtual bool SyncStream() = 0;
   void AssignStaticMemory(session::KernelGraph *graph);
-  void AssignDynamicMemory(const session::KernelGraph *graph);
+  void AssignDynamicMemory(session::KernelGraph *graph);
   void ReuseAssignDynamicMemory(session::KernelGraph *graph);
   void AssignNodeOutputMem(int flag, const AnfNodePtr &node, int index);
-  void AssignWorkSpaceMem(const AnfNodePtr &node);
+  void AssignWorkSpaceMem(int flag, const AnfNodePtr &node);
   void AssignReuseWorkSpaceMem(const AnfNodePtr &node);
   void AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr &node);
   void UpdateRefNodeOutputMem(const session::KernelGraph *graph);
   void UpdateCommunicationOpInputMem(const AnfNodePtr &node);
-  bool IsCommunicationOp(const AnfNodePtr &node);
-  size_t GetCommonAlignSize(size_t input_size) const;
-  size_t GetCommunicationAlignSize(size_t input_size) const;
-
-  uint8_t *CalDeviceMem(const AnfNodePtr &node, size_t size, int flag, size_t index);
-  virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem);
-  uint8_t *MallocDynamicMem(size_t size, bool communication_mem);
 #ifdef ENABLE_DUMP_E2E
   bool SetDumpConf();
 #endif
-  // Alloc memory use the dynamic memory pool.
-  virtual void *AllocTensorMemDynamic(size_t size);
-  // Free memory use the dynamic memory pool.
-  virtual void FreeTensorMemDynamic(void *device_ptr);
-  virtual void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag);
 
  private:
   void AssignStaticMemoryOutput(const session::KernelGraph *graph);
@@ -114,20 +95,11 @@ class KernelRuntime {
 
  protected:
   uint32_t device_id_{0};
-  uint8_t *device_mem_base_{nullptr};
-  uint8_t *device_mem_pool_base_{nullptr};
-  uint64_t device_mem_size_{0};
-  uint64_t device_mem_pool_size_{0};
-  uint64_t dynamic_mem_offset_{0};
-  uint64_t static_mem_offset_{0};
-  const uint64_t mem_align_size_ = 512;
 #ifdef ENABLE_DUMP_E2E
   DumpConfPtr dump_conf_ptr_;
 #endif
   void *stream_ = nullptr;
-  size_t total_static_size_ = 0;
-  size_t total_dynamic_size_ = 0;
-  MemReuseUtilPtr mem_reuse_util_ptr_{nullptr};
+  std::shared_ptr<MemoryManager> mem_manager_{nullptr};
 };
 using KernelRuntimePtr = std::shared_ptr<KernelRuntime>;
 }  // namespace device
diff --git a/mindspore/ccsrc/device/memory_manager.cc b/mindspore/ccsrc/device/memory_manager.cc
new file mode 100644
index 0000000000..3c1ddee6bc
--- /dev/null
+++ b/mindspore/ccsrc/device/memory_manager.cc
@@ -0,0 +1,170 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/memory_manager.h"
+#include "session/anf_runtime_algorithm.h"
+#include "utils/context/ms_context.h"
+using mindspore::memreuse::BestFitMemReuse;
+using mindspore::memreuse::MemReuseUtilPtr;
+namespace mindspore {
+namespace device {
+MemoryManager::~MemoryManager() {
+  device_mem_base_ = nullptr;
+  device_mem_pool_base_ = nullptr;
+  mem_reuse_util_ptr_ = nullptr;
+}
+
+size_t MemoryManager::GetCommonAlignSize(size_t input_size) const {
+  return (input_size + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize;
+}
+
+size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) const {
+  return (input_size + kMemAlignSize - 1) / kMemAlignSize * kMemAlignSize + 2 * kMemAlignSize;
+}
+
+void MemoryManager::InitReuseDynamicMemory(session::KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>();
+  MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr);
+  // set all infos
+  mem_reuse_util_ptr->SetAllInfo(graph);
+  auto bestfit_mem_reuse = std::make_shared<BestFitMemReuse>();
+  MS_EXCEPTION_IF_NULL(bestfit_mem_reuse);
+  bestfit_mem_reuse->Reuse(mem_reuse_util_ptr.get());
+  size_t total_allocated_size = bestfit_mem_reuse->GetAllocatedSize();
+  MS_LOG(INFO) << "TotalReuseDynamicSize [" << total_allocated_size << "]";
+  mem_reuse_util_ptr_ = mem_reuse_util_ptr;
+  auto base_ptr = MallocDynamicMem(total_allocated_size, false);
+  mem_reuse_util_ptr_->set_mem_base(base_ptr);
+}
+
+uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size) {
+  MS_EXCEPTION_IF_NULL(node);
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  uint8_t *ptr = nullptr;
+  if (AnfAlgo::IsCommunicationOp(node)) {
+    bool communication_mem = false;
+    if (context_ptr->enable_hccl()) {
+      communication_mem = true;
+    }
+    if (flag == kStaticMem) {
+      ptr = MallocStaticMem(size, communication_mem);
+    } else {
+      ptr = MallocDynamicMem(size, communication_mem);
+    }
+    return ptr;
+  }
+
+  if (flag == kStaticMem) {
+    ptr = MallocStaticMem(size, false);
+  } else if (flag == kDynamicMem) {
+    ptr = MallocDynamicMem(size, false);
+  } else if (flag == kReuseDynamicMem) {
+    ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index);
+  }
+  return ptr;
+}
+
+uint8_t *MemoryManager::MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size) {
+  if (flag == kReuseDynamicMem) {
+    return mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index);
+  }
+  return MallocDynamicMem(size, false);
+}
+
+uint8_t *MemoryManager::MallocMem(int flag, size_t size) {
+  uint8_t *ptr = nullptr;
+  if (flag == kStaticMem) {
+    ptr = MallocStaticMem(size, false);
+  } else if (flag == kDynamicMem) {
+    ptr = MallocDynamicMem(size, false);
+  }
+  return ptr;
+}
+
+uint8_t *MemoryManager::MallocStaticMem(size_t size, bool communication_mem) {
+  size_t align_size = 0;
+  if (communication_mem) {
+    align_size = GetCommunicationAlignSize(size);
+  } else {
+    align_size = GetCommonAlignSize(size);
+  }
+  if (static_mem_offset_ < align_size) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+                      << "] static[" << total_static_size_ << "])"
+                      << " malloc [" << align_size << "] failed!";
+  }
+  total_static_size_ += align_size;
+  auto offset = static_mem_offset_ - align_size;
+  if (dynamic_mem_offset_ > offset) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+                      << "] static[" << total_static_size_ << "])"
+                      << " malloc [" << align_size << "] failed!";
+  }
+  static_mem_offset_ = offset;
+  if (communication_mem) {
+    return device_mem_base_ + offset + kMemAlignSize;
+  } else {
+    return device_mem_base_ + offset;
+  }
+}
+
+uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
+  size_t align_size = 0;
+  if (communication_mem) {
+    align_size = GetCommunicationAlignSize(size);
+  } else {
+    align_size = GetCommonAlignSize(size);
+  }
+  uint64_t offset = dynamic_mem_offset_;
+  auto new_offset = dynamic_mem_offset_ + align_size;
+  if (new_offset > static_mem_offset_) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+                      << "] static[" << total_static_size_ << "])"
+                      << " malloc [" << align_size << "] failed!";
+  }
+  total_dynamic_size_ += align_size;
+  dynamic_mem_offset_ = new_offset;
+
+  if (communication_mem) {
+    return device_mem_base_ + offset + kMemAlignSize;
+  } else {
+    return device_mem_base_ + offset;
+  }
+}
+
+void MemoryManager::MallocOpMemory(const DeviceAddressPtr address, size_t size) {
+  auto device_ptr = AllocTensorMemDynamic(size);
+  MS_EXCEPTION_IF_NULL(device_ptr);
+  address->ptr_ = device_ptr;
+  address->mem_dynamic_alloc_ = true;
+}
+
+void *MemoryManager::AllocTensorMemDynamic(size_t size) {
+  if (size == 0) {
+    MS_LOG(ERROR) << "AllocTensorMemDynamic size is 0.";
+  }
+  return nullptr;
+}
+
+void MemoryManager::FreeTensorMemDynamic(void *device_ptr) {
+  if (device_ptr == nullptr) {
+    MS_LOG(ERROR) << "FreeTensorMemDynamic device_ptr is null.";
+  }
+}
+}  // namespace device
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/memory_manager.h b/mindspore/ccsrc/device/memory_manager.h
new file mode 100644
index 0000000000..2e47237def
--- /dev/null
+++ b/mindspore/ccsrc/device/memory_manager.h
@@ -0,0 +1,71 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_
+#include <memory>
+#include "pre_activate/mem_reuse/mem_reuse.h"
+#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
+namespace mindspore {
+namespace device {
+const int kStaticMem = 0;
+const int kDynamicMem = 1;
+const int kReuseDynamicMem = 2;
+const int kGetAllOuts = -1;
+const uint64_t kMemAlignSize = 512;
+using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr;
+
+class MemoryManager {
+ public:
+  MemoryManager() = default;
+  virtual ~MemoryManager();
+
+  virtual void MallocDeviceMemory() = 0;
+  virtual void FreeDeviceMemory() = 0;
+  void ResetDynamicMemory() {
+    total_dynamic_size_ = 0;
+    dynamic_mem_offset_ = 0;
+  }
+
+  void InitReuseDynamicMemory(session::KernelGraph *graph);
+  uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size);
+  uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size);
+  virtual uint8_t *MallocMem(int flag, size_t size);
+
+  // Alloc memory use the dynamic memory pool.
+  virtual void *AllocTensorMemDynamic(size_t size);
+  // Free memory use the dynamic memory pool.
+  virtual void FreeTensorMemDynamic(void *device_ptr);
+  virtual void MallocOpMemory(const DeviceAddressPtr address, size_t size);
+  size_t GetCommonAlignSize(size_t input_size) const;
+  size_t GetCommunicationAlignSize(size_t input_size) const;
+
+ protected:
+  virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem);
+  virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem);
+  uint8_t *device_mem_base_{nullptr};
+  uint8_t *device_mem_pool_base_{nullptr};
+  uint64_t device_mem_size_{0};
+  uint64_t device_mem_pool_size_{0};
+  uint64_t dynamic_mem_offset_{0};
+  uint64_t static_mem_offset_{0};
+  size_t total_static_size_ = 0;
+  size_t total_dynamic_size_ = 0;
+  MemReuseUtilPtr mem_reuse_util_ptr_{nullptr};
+};
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_
diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/session/anf_runtime_algorithm.cc
index cc23dbbdd2..78922448af 100644
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/session/anf_runtime_algorithm.cc
@@ -857,5 +857,15 @@ void AnfRuntimeAlgorithm::SetNodeInput(const CNodePtr &node, const AnfNodePtr &i
   MS_EXCEPTION_IF_NULL(input_node);
   node->set_input(index + 1, input_node);
 }
+
+bool AnfRuntimeAlgorithm::IsCommunicationOp(const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  auto kernel_name = AnfAlgo::GetCNodeName(node);
+  auto kernel_type = AnfAlgo::GetKernelType(node);
+  if (kernel_name == kAllReduceOpName || kernel_type == HCCL_KERNEL) {
+    return true;
+  }
+  return false;
+}
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.h b/mindspore/ccsrc/session/anf_runtime_algorithm.h
index 2de68f0098..55650ac31e 100644
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.h
+++ b/mindspore/ccsrc/session/anf_runtime_algorithm.h
@@ -166,6 +166,7 @@ class AnfRuntimeAlgorithm {
   static bool IsFeatureMapInput(const AnfNodePtr &node, size_t input_index);
   // get real input index for some tbe ops which input order is different between me and tbe impl
   static size_t GetRealInputIndex(const AnfNodePtr &anf_node, const size_t cur_index);
+  static bool IsCommunicationOp(const AnfNodePtr &node);
 };
 }  // namespace session
 using AnfAlgo = session::AnfRuntimeAlgorithm;
diff --git a/mindspore/ccsrc/session/gpu_session.cc b/mindspore/ccsrc/session/gpu_session.cc
index 29330fb193..bbcf2228cc 100644
--- a/mindspore/ccsrc/session/gpu_session.cc
+++ b/mindspore/ccsrc/session/gpu_session.cc
@@ -102,10 +102,6 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
   graph->set_execution_order(execution_order);
   // Alloc memory, including static memory and dynamic memory
   AllocateMemory(graph.get());
-  // Reset memory resource
-  auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
-  MS_EXCEPTION_IF_NULL(runtime_instance);
-  runtime_instance->FreeHostMemory();
   return graph_id;
 }
 
diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt
index 8d3f8a8138..3c1351a857 100644
--- a/tests/ut/cpp/CMakeLists.txt
+++ b/tests/ut/cpp/CMakeLists.txt
@@ -85,6 +85,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         "../../../mindspore/ccsrc/kernel/oplib/*.cc"
         "../../../mindspore/ccsrc/kernel/tbe/*.cc"
         "../../../mindspore/ccsrc/device/kernel_runtime.cc"
+        "../../../mindspore/ccsrc/device/memory_manager.cc"
         "../../../mindspore/ccsrc/device/kernel_runtime_manager.cc"
         "../../../mindspore/ccsrc/device/kernel_info.cc"
         "../../../mindspore/ccsrc/device/ascend/profiling/*.cc"
@@ -92,6 +93,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         "../../../mindspore/ccsrc/device/convert_tensor_utils.cc"
         "../../../mindspore/ccsrc/device/ascend/kernel_build_ascend.cc"
         "../../../mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc"
+        "../../../mindspore/ccsrc/device/ascend/ascend_memory_manager.cc"
         "../../../mindspore/ccsrc/device/ascend/ascend_device_address.cc"
         "../../../mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc"
         "../../../mindspore/ccsrc/predict/generator/utils/ir_model_util.cc"

From ed04b8e165e65645b74e7cb532151356ad14ad45 Mon Sep 17 00:00:00 2001
From: zhoufeng <zhoufeng54@huawei.com>
Date: Mon, 6 Apr 2020 11:22:47 +0800
Subject: [PATCH 41/58] default build command

 "-z" (minddata) and "-M on" (gpu) by default in build.sh
---
 build.sh                    | 24 +++++++++++++++++-------
 cmake/mind_expression.cmake |  8 ++++----
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/build.sh b/build.sh
index 9d812d6dcc..8bb5f3d95a 100755
--- a/build.sh
+++ b/build.sh
@@ -26,7 +26,7 @@ usage()
   echo "Usage:"
   echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-s] [-b ge|cpu] [-m infer|train] \\"
   echo "              [-a on|off] [-g on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
-  echo "              [-P on|off] [-z] [-M on|off] [-V 9.2|10.1] [-I] [-K]"
+  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K]"
   echo ""
   echo "Options:"
   echo "    -d Debug mode"
@@ -50,8 +50,8 @@ usage()
   echo "    -P Enable dump anf graph to file in ProtoBuffer format, default on"
   echo "    -Q Enable dump end to end, default off"
   echo "    -D Enable dumping of function graph ir, default on"
-  echo "    -z Compile dataset & mindrecord, default off"
-  echo "    -M Enable MPI and NCCL for GPU training, default off"
+  echo "    -z Compile dataset & mindrecord, default on"
+  echo "    -M Enable MPI and NCCL for GPU training, default on"
   echo "    -V Specify the minimum required cuda version, default CUDA 9.2"
   echo "    -I Compile predict, default off"
   echo "    -K Compile with AKG, default off"
@@ -88,8 +88,8 @@ checkopts()
   ENABLE_DUMP2PROTO="on"
   ENABLE_DUMPE2E="off"
   ENABLE_DUMP_IR="on"
-  COMPILE_MINDDATA="off"
-  ENABLE_MPI="off"
+  COMPILE_MINDDATA="on"
+  ENABLE_MPI="on"
   CUDA_VERSION="9.2"
   COMPILE_PREDICT="off"
   USE_GLOG="on"
@@ -177,7 +177,7 @@ checkopts()
         if [[ "X$OPTARG" == "Xgpu" ]]; then
           ENABLE_GPU="on"
           ENABLE_CPU="on"
-        elif [[ "X$OPTARG" == "Xd" ]]; then
+        elif [[ "X$OPTARG" == "Xd" || "X$OPTARG" == "Xascend" ]]; then
           ENABLE_D="on"
           ENABLE_CPU="on"
         elif [[ "X$OPTARG" == "Xcpu" ]]; then
@@ -216,7 +216,17 @@ checkopts()
         echo "enable dump function graph ir"
         ;;
       z)
-        COMPILE_MINDDATA="on"
+        eval ARG=\$\{$OPTIND\}
+        if [[ -n $ARG && $ARG != -* ]]; then
+          OPTARG=$ARG
+          check_on_off $OPTARG z
+          OPTIND=$((OPTIND + 1))
+        else
+          OPTARG=""
+        fi
+        if [[ "X$OPTARG" == "Xoff" ]]; then
+          COMPILE_MINDDATA="off"
+        fi
         ;;
       I)
         COMPILE_PREDICT="on"
diff --git a/cmake/mind_expression.cmake b/cmake/mind_expression.cmake
index 345fd4675e..af122d4117 100644
--- a/cmake/mind_expression.cmake
+++ b/cmake/mind_expression.cmake
@@ -29,11 +29,11 @@ if (ENABLE_GPU)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/dmlc_core.cmake)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/rang.cmake)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/tvm_gpu.cmake)
-endif()
 
-if (ENABLE_MPI)
-    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/nccl.cmake)
-    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ompi.cmake)
+    if (ENABLE_MPI)
+        include(${CMAKE_SOURCE_DIR}/cmake/external_libs/nccl.cmake)
+        include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ompi.cmake)
+    endif()
 endif()
 
 if (ENABLE_GE)

From 849543ac96c76c876195192bb94ff5a6008fbc6a Mon Sep 17 00:00:00 2001
From: zhoufeng <zhoufeng54@huawei.com>
Date: Mon, 6 Apr 2020 12:17:43 +0800
Subject: [PATCH 42/58] Distinguish package name according to hardware platform

---
 build.sh         |  6 ++++--
 package.sh       | 17 ++++++++++++++++-
 setup_package.py |  2 +-
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/build.sh b/build.sh
index 8bb5f3d95a..6dc699000a 100755
--- a/build.sh
+++ b/build.sh
@@ -462,8 +462,10 @@ if [[ "X$INC_BUILD" = "Xoff" ]]; then
         bash "${PROJECT_PATH}/package.sh" ge
     elif [[ "X$ENABLE_GPU" = "Xon" ]]; then
         bash "${PROJECT_PATH}/package.sh" ms gpu
-    elif [[ "X$ENABLE_D" = "Xon" ]] || [[ "X$ENABLE_CPU" = "Xon" ]]; then
-        bash "${PROJECT_PATH}/package.sh" ms
+    elif [[ "X$ENABLE_D" = "Xon" ]]; then
+        bash "${PROJECT_PATH}/package.sh" ms ascend
+    elif [[ "X$ENABLE_CPU" = "Xon" ]]; then
+        bash "${PROJECT_PATH}/package.sh" ms cpu
     else
         bash "${PROJECT_PATH}/package.sh" debug
     fi
diff --git a/package.sh b/package.sh
index 0d4147c9f6..67f4761f37 100755
--- a/package.sh
+++ b/package.sh
@@ -96,14 +96,29 @@ if [ -n "$1" ];then
 else
     export BACKEND_POLICY="ms"
 fi
+
+# package name
+if [[ "X$1" = "Xge" ]]; then
+    export MS_PACKAGE_NAME="mindspore"
+elif [[ "X$1" = "Xms" && "X$2" = "Xgpu" ]]; then
+    export MS_PACKAGE_NAME="mindspore-gpu"
+elif [[ "X$1" = "Xms" && "X$2" = "Xascend" ]]; then
+    export MS_PACKAGE_NAME="mindspore-ascend"
+elif [[ "X$1" = "Xms" && "X$2" = "Xcpu" ]]; then
+    export MS_PACKAGE_NAME="mindspore"
+else
+    export MS_PACKAGE_NAME="mindspore"
+fi
+
 ${PYTHON} "${BASEPATH}/setup_package.py" bdist_wheel
 
 chmod -R 700 ${PACKAGE_PATH}/mindspore/
-chmod -R 700 ${PACKAGE_PATH}/mindspore.egg-info/
+chmod -R 700 ${PACKAGE_PATH}/${MS_PACKAGE_NAME//-/_}.egg-info/
 
 # rename package
 PACKAGE_FULL_NAME=$(find "${PACKAGE_PATH}" -iname "*.whl")
 PACKAGE_BASE_NAME=$(echo ${PACKAGE_FULL_NAME} | awk -F / '{print $NF}' | awk -F - '{print $1"-"$2}')
+PACKAGE_BASE_NAME=${PACKAGE_BASE_NAME//_*-/-}
 
 PACKAGE_NEW_NAME="${PACKAGE_BASE_NAME}-${PY_TAGS}-${PLATFORM_TAG}.whl"
 cp -rf "${PACKAGE_PATH}/dist"/*.whl "${PACKAGE_PATH}/${PACKAGE_NEW_NAME}"
diff --git a/setup_package.py b/setup_package.py
index 8b6889cd34..87b5718de2 100644
--- a/setup_package.py
+++ b/setup_package.py
@@ -21,7 +21,6 @@ from setuptools import setup, find_packages
 from setuptools.command.egg_info import egg_info
 from setuptools.command.build_py import build_py
 
-package_name = 'mindspore'
 version = '0.1.0'
 author = 'The MindSpore Authors'
 author_email = 'contact@mindspore.cn'
@@ -29,6 +28,7 @@ home_page = 'https://www.mindspore.cn'
 
 backend_policy = os.getenv('BACKEND_POLICY')
 commit_id = os.getenv('COMMIT_ID').replace("\n", "")
+package_name = os.getenv('MS_PACKAGE_NAME').replace("\n", "")
 
 pwd = os.path.dirname(os.path.realpath(__file__))
 pkg_dir = os.path.join(pwd, 'build/package')

From 513f384c43f5d850fabdfc9ca878ed7cd7f403a3 Mon Sep 17 00:00:00 2001
From: yao_yf <yaoyifan1@huawei.com>
Date: Wed, 8 Apr 2020 17:24:22 +0800
Subject: [PATCH 43/58] fix auto parallel prelu

---
 mindspore/ccsrc/parallel/ops_info/prelu_info.cc |  2 +-
 tests/ut/cpp/parallel/ops_info/prelu_test.cc    |  6 ++----
 tests/ut/python/parallel/test_prelu.py          | 17 +++++++++++++++++
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/mindspore/ccsrc/parallel/ops_info/prelu_info.cc b/mindspore/ccsrc/parallel/ops_info/prelu_info.cc
index 9aa8513331..1a44501f42 100644
--- a/mindspore/ccsrc/parallel/ops_info/prelu_info.cc
+++ b/mindspore/ccsrc/parallel/ops_info/prelu_info.cc
@@ -52,7 +52,7 @@ Status PReLUInfo::CheckStrategy(const StrategyPtr& strategy) {
     }
     return FAILED;
   }
-  if ((stra[0][PRELU_CHANNEL_INDEX] != PRELU_CHANNEL_STRATEGY) || (stra[1][0] != PRELU_CHANNEL_STRATEGY)) {
+  if (stra[0][PRELU_CHANNEL_INDEX] != stra[1][0]) {
     if (is_auto_parallel_) {
       MS_LOG(DEBUG) << name_ << ": Invalid channel strategy.";
     } else {
diff --git a/tests/ut/cpp/parallel/ops_info/prelu_test.cc b/tests/ut/cpp/parallel/ops_info/prelu_test.cc
index 5ff261234f..d6db1b8460 100644
--- a/tests/ut/cpp/parallel/ops_info/prelu_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/prelu_test.cc
@@ -146,11 +146,10 @@ TEST_F(TestPReLUInfo, CheckStrategy1) {
 }
 
 TEST_F(TestPReLUInfo, CheckStrategy2) {
-  // Success: {{2,1,8,16},{1}}
   std::vector<Dimensions> inputs = {{2, 4, 8, 16}, {4}};
   StrategyPtr strategy = NewStrategy(0, inputs);
   Status ret = prelu->Init(strategy);
-  ASSERT_EQ(ret, FAILED);
+  ASSERT_EQ(ret, SUCCESS);
 }
 
 TEST_F(TestPReLUInfo, AutoStrategy1) {
@@ -252,11 +251,10 @@ TEST_F(TestPReLUInfo, CheckStrategy_2d1) {
 }
 
 TEST_F(TestPReLUInfo, CheckStrategy_2d2) {
-  // Success: {{2,1,8,16},{1}}
   std::vector<Dimensions> inputs = {{128, 4}, {4}};
   StrategyPtr strategy = NewStrategy(0, inputs);
   Status ret = prelu_2d->Init(strategy);
-  ASSERT_EQ(ret, FAILED);
+  ASSERT_EQ(ret, SUCCESS);
 }
 
 TEST_F(TestPReLUInfo, AutoStrategy_2d1) {
diff --git a/tests/ut/python/parallel/test_prelu.py b/tests/ut/python/parallel/test_prelu.py
index c601045491..d3ad1cc710 100755
--- a/tests/ut/python/parallel/test_prelu.py
+++ b/tests/ut/python/parallel/test_prelu.py
@@ -149,3 +149,20 @@ def test_prelu_parallel_success3():
     w = Tensor(np.random.rand(16),dtype=ms.float32)
     net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
     _executor.compile(net, x, y, w)
+
+def test_prelu_parallel_success4():
+    class Net(nn.Cell):
+        def __init__(self, strategy):
+            super().__init__()
+            self.prelu = P.PReLU().set_strategy(strategy)
+        def construct(self, x, y):
+            out = self.prelu(x, y)
+            return out
+    context.reset_auto_parallel_context()
+    context.set_auto_parallel_context(device_num=64, global_rank=0)
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+    strategy = ((2, 4, 4, 2), (4, ))
+    x = Tensor(np.random.rand(4, 16, 32, 64),dtype=ms.float32)
+    w = Tensor(np.random.rand(16),dtype=ms.float32)
+    net = GradWrap(NetWithLoss(Net(strategy)))
+    _executor.compile(net, x, w)

From c0e2a63fdb989ff598869b38e184c5049cea1948 Mon Sep 17 00:00:00 2001
From: Cathy Wong <cathy.wong@huawei.com>
Date: Wed, 8 Apr 2020 16:36:06 -0400
Subject: [PATCH 44/58] Correct dataset error checking

---
 mindspore/dataset/engine/datasets.py   | 2 --
 mindspore/dataset/engine/validators.py | 8 ++++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index ab2290c13c..2058bbf826 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -82,8 +82,6 @@ def zip(datasets):
     if len(datasets) <= 1:
         raise ValueError(
             "Can't zip empty or just one dataset!")
-    if not isinstance(datasets, tuple):
-        raise TypeError("The zip function %s type error!" % (datasets))
     return ZipDataset(datasets)
 
 
diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index 26d6241945..4c84cfe354 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -105,13 +105,13 @@ def check(method):
                     "The %s function %s exceeds the boundary!" % (
                         func_name, param_name))
             if isinstance(arg, int) and param_name == "num_parallel_workers" and (
-                    arg <= 0 or arg > cpu_count()):
+                    arg < 1 or arg > cpu_count()):
                 raise ValueError(
                     "The %s function %s exceeds the boundary(%s)!" % (
                         func_name, param_name, cpu_count()))
             if isinstance(arg, int) and param_name != "seed" \
                     and param_name != "count" and param_name != "prefetch_size" \
-                    and param_name != "num_parallel_workers" and (arg <= 0 or arg > 2147483647):
+                    and param_name != "num_parallel_workers" and (arg < 1 or arg > 2147483647):
                 raise ValueError(
                     "The %s function %s exceeds the boundary!" % (
                         func_name, param_name))
@@ -271,8 +271,8 @@ def check_interval_closed(param, param_name, valid_range):
 
 def check_num_parallel_workers(value):
     check_type(value, 'num_parallel_workers', int)
-    if value <= 0 or value > cpu_count():
-        raise ValueError("num_parallel_workers exceeds the boundary between 0 and {}!".format(cpu_count()))
+    if value < 1 or value > cpu_count():
+        raise ValueError("num_parallel_workers exceeds the boundary between 1 and {}!".format(cpu_count()))
 
 
 def check_num_samples(value):

From 0b1ae67418a588a0f58e3f7bdadfb42cf53e97ef Mon Sep 17 00:00:00 2001
From: jinyaohui <jinyaohui@huawei.com>
Date: Thu, 9 Apr 2020 09:23:39 +0800
Subject: [PATCH 45/58] modify comment

---
 example/yolov3_coco2017/train.py       | 2 +-
 mindspore/ccsrc/transform/convert.cc   | 2 +-
 mindspore/nn/wrap/loss_scale.py        | 2 +-
 tests/ut/python/utils/test_callback.py | 8 ++++----
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/example/yolov3_coco2017/train.py b/example/yolov3_coco2017/train.py
index 0a32a6d30d..121e2aa810 100644
--- a/example/yolov3_coco2017/train.py
+++ b/example/yolov3_coco2017/train.py
@@ -67,7 +67,7 @@ if __name__ == '__main__':
     parser.add_argument("--distribute", type=bool, default=False, help="Run distribute, default is false.")
     parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
     parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.")
-    parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or non-sink mode, default is sink")
+    parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or not, default is sink")
     parser.add_argument("--epoch_size", type=int, default=10, help="Epoch size, default is 10")
     parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.")
     parser.add_argument("--checkpoint_path", type=str, default="", help="Checkpoint file path")
diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc
index 59985c8ae3..bebd000958 100755
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@@ -449,7 +449,7 @@ void DfGraphConvertor::InitLoopVar(std::vector<ge::Operator> *init_input) {
     if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE) {
       value = ConfigManager::GetInstance().iter_num();
     } else {
-      MS_LOG(INFO) << "Run with non-sink mode, the iterator number will always be 1";
+      MS_LOG(INFO) << "Run with normal(non-sink) mode, the iterator number will always be 1";
       value = 1;
       ConfigManager::GetInstance().set_iter_num(value);
     }
diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py
index fd1c22be1f..c6d61e6983 100644
--- a/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/nn/wrap/loss_scale.py
@@ -51,7 +51,7 @@ class DynamicLossScaleUpdateCell(Cell):
     In every training step, the loss scaling value  will be updated by loss scaling value/`scale_factor`
     when there is overflow. And it will be increased by loss scaling value * `scale_factor` if there is no
     overflow for a continuous `scale_window` steps. This cell is used for Graph mode training in which all
-    logic will be executed on device side(Another training mode is non-sink mode in which some logic will be
+    logic will be executed on device side(Another training mode is normal(non-sink) mode in which some logic will be
     executed on host).
 
     Args:
diff --git a/tests/ut/python/utils/test_callback.py b/tests/ut/python/utils/test_callback.py
index 7e7b893e0c..8c10c8886d 100644
--- a/tests/ut/python/utils/test_callback.py
+++ b/tests/ut/python/utils/test_callback.py
@@ -112,8 +112,8 @@ def test_save_checkpoint():
         os.remove('./test_files/test_ckpt-model.pkl')
 
 
-def test_loss_monitor_sink_model():
-    """Test loss monitor sink model."""
+def test_loss_monitor_sink_mode():
+    """Test loss monitor sink mode."""
     cb_params = _InternalCallbackParam()
     cb_params.cur_epoch_num = 4
     cb_params.cur_step_num = 2
@@ -131,8 +131,8 @@ def test_loss_monitor_sink_model():
     callbacklist.end(run_context)
 
 
-def test_loss_monitor_feed_model():
-    """Test loss monitor non-sink mode."""
+def test_loss_monitor_normal_mode():
+    """Test loss monitor normal(non-sink) mode."""
     cb_params = _InternalCallbackParam()
     run_context = RunContext(cb_params)
     loss_cb = LossMonitor(1)

From 25cdf4833679e14d1cda2ae4d523f6856af9c2d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=B8=87=E4=B8=87=E6=B2=A1=E6=83=B3=E5=88=B0?=
 <yuhuijun1@huawei.com>
Date: Tue, 7 Apr 2020 14:56:05 +0800
Subject: [PATCH 46/58] fix review opinions in doc/comments

---
 mindspore/common/initializer.py       |   2 +-
 mindspore/ops/operations/array_ops.py |  86 ++++++++++-----------
 mindspore/ops/operations/math_ops.py  | 105 +++++++++++++-------------
 mindspore/ops/operations/nn_ops.py    |  44 +++++------
 mindspore/train/serialization.py      |   2 +-
 5 files changed, 116 insertions(+), 123 deletions(-)

diff --git a/mindspore/common/initializer.py b/mindspore/common/initializer.py
index 4261621272..d55e03314d 100644
--- a/mindspore/common/initializer.py
+++ b/mindspore/common/initializer.py
@@ -276,7 +276,7 @@ def initializer(init, shape=None, dtype=mstype.float32):
 
         shape (Union[tuple, list, int]): A list of integers, a tuple of integers or an integer as the shape of
             output. Default: None.
-        dtype (:class:`mindspore.dtype`): The type of data in initialized tensor. Default: mstype.float32.
+        dtype (:class:`mindspore.dtype`): The type of data in initialized tensor. Default: mindspore.float32.
 
     Returns:
         Tensor, initialized tensor.
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index 59d3083c5d..dda490566f 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -62,7 +62,7 @@ class ExpandDims(PrimitiveWithInfer):
 
     Examples:
         >>> input_tensor = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32)
-        >>> expand_dims = ExpandDims()
+        >>> expand_dims = P.ExpandDims()
         >>> output = expand_dims(input_tensor, 0)
     """
 
@@ -101,7 +101,7 @@ class DType(PrimitiveWithInfer):
 
     Examples:
         >>> input_tensor = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32)
-        >>> type = DType()(input_tensor)
+        >>> type = P.DType()(input_tensor)
     """
 
     @prim_attr_register
@@ -134,7 +134,7 @@ class SameTypeShape(PrimitiveWithInfer):
     Examples:
         >>> input_x = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32)
         >>> input_y = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32)
-        >>> out = SameTypeShape()(input_x, input_y)
+        >>> out = P.SameTypeShape()(input_x, input_y)
     """
 
     @prim_attr_register
@@ -175,7 +175,7 @@ class Cast(PrimitiveWithInfer):
         >>> input_np = np.random.randn(2, 3, 4, 5).astype(np.float32)
         >>> input_x = Tensor(input_np)
         >>> type_dst = mindspore.int32
-        >>> cast = Cast()
+        >>> cast = P.Cast()
         >>> result = cast(input_x, type_dst)
         >>> expect = input_np.astype(type_dst)
     """
@@ -227,7 +227,7 @@ class IsSubClass(PrimitiveWithInfer):
         bool, the check result.
 
     Examples:
-        >>> result = IsSubClass()(mindspore.int32,  mindspore.intc)
+        >>> result = P.IsSubClass()(mindspore.int32,  mindspore.intc)
     """
 
     @prim_attr_register
@@ -262,7 +262,7 @@ class IsInstance(PrimitiveWithInfer):
 
     Examples:
         >>> a = 1
-        >>> result = IsInstance()(a, mindspore.int32)
+        >>> result = P.IsInstance()(a, mindspore.int32)
     """
 
     @prim_attr_register
@@ -303,7 +303,7 @@ class Reshape(PrimitiveWithInfer):
 
     Examples:
         >>> input_tensor = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32)
-        >>> reshape = Reshape()
+        >>> reshape = P.Reshape()
         >>> output = reshape(input_tensor, (3, 2))
     """
 
@@ -366,7 +366,7 @@ class Shape(Primitive):
 
     Examples:
         >>> input_tensor = Tensor(np.ones(shape=[3, 2, 1]), mindspore.float32)
-        >>> shape = Shape()
+        >>> shape = P.Shape()
         >>> output = shape(input_tensor)
     """
 
@@ -398,7 +398,7 @@ class Squeeze(PrimitiveWithInfer):
 
     Examples:
         >>> input_tensor = Tensor(np.ones(shape=[3, 2, 1]), mindspore.float32)
-        >>> squeeze = Squeeze(2)
+        >>> squeeze = P.Squeeze(2)
         >>> output = squeeze(input_tensor)
     """
 
@@ -450,7 +450,7 @@ class Transpose(PrimitiveWithInfer):
     Examples:
         >>> input_tensor = Tensor(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]), mindspore.float32)
         >>> perm = (0, 2, 1)
-        >>> transpose = Transpose()
+        >>> transpose = P.Transpose()
         >>> output = transpose(input_tensor, perm)
     """
 
@@ -504,10 +504,10 @@ class GatherV2(PrimitiveWithInfer):
         Tensor, the shape of tensor is :math:`(z_1, z_2, ..., z_N)`.
 
     Examples:
-        >>> params = Tensor(np.array([[1, 2, 7, 42], [3, 4, 54, 22], [2, 2, 55, 3]]), mindspore.float32)
-        >>> indices = Tensor(np.array([1, 2]), mindspore.int32)
+        >>> input_params = Tensor(np.array([[1, 2, 7, 42], [3, 4, 54, 22], [2, 2, 55, 3]]), mindspore.float32)
+        >>> input_indices = Tensor(np.array([1, 2]), mindspore.int32)
         >>> axis = 1
-        >>> out = GatherV2()(params, indices, axis)
+        >>> out = P.GatherV2()(input_params, input_indices, axis)
     """
 
     @prim_attr_register
@@ -556,7 +556,7 @@ class Split(PrimitiveWithInfer):
         :math:`(y_1, y_2, ..., y_S)`.
 
     Examples:
-        >>> split = Split(1, 2)
+        >>> split = P.Split(1, 2)
         >>> x = Tensor(np.array([[1, 1, 1, 1], [2, 2, 2, 2]]))
         >>> output = split(x)
     """
@@ -606,7 +606,7 @@ class Rank(PrimitiveWithInfer):
 
     Examples:
         >>> input_tensor = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32)
-        >>> rank = Rank()
+        >>> rank = P.Rank()
         >>> rank(input_tensor)
     """
 
@@ -640,7 +640,7 @@ class TruncatedNormal(PrimitiveWithInfer):
 
     Examples:
         >>> input_shape = Tensor(np.array([1, 2, 3]))
-        >>> truncated_normal = TruncatedNormal()
+        >>> truncated_normal = P.TruncatedNormal()
         >>> output = truncated_normal(input_shape)
     """
 
@@ -681,7 +681,7 @@ class Size(PrimitiveWithInfer):
 
     Examples:
         >>> input_tensor = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32)
-        >>> size = Size()
+        >>> size = P.Size()
         >>> output = size(input_tensor)
     """
 
@@ -826,7 +826,7 @@ class TupleToArray(PrimitiveWithInfer):
         Tensor, if the input tuple contain `N` numbers, then the output tensor shape is (N,).
 
     Examples:
-        >>> type = TupleToArray()((1,2,3))
+        >>> type = P.TupleToArray()((1,2,3))
     """
 
     @prim_attr_register
@@ -861,7 +861,7 @@ class ScalarToArray(PrimitiveWithInfer):
         Tensor. 0-D Tensor and the content is the input.
 
     Examples:
-        >>> op = ScalarToArray()
+        >>> op = P.ScalarToArray()
         >>> data = 1.0
         >>> output = op(data)
     """
@@ -893,7 +893,7 @@ class ScalarToTensor(PrimitiveWithInfer):
         Tensor. 0-D Tensor and the content is the input.
 
     Examples:
-        >>> op = ScalarToTensor()
+        >>> op = P.ScalarToTensor()
         >>> data = 1
         >>> output = op(data, mindspore.float32)
     """
@@ -934,7 +934,7 @@ class InvertPermutation(PrimitiveWithInfer):
         tuple[int]. the lenth is same as input.
 
     Examples:
-        >>> invert = InvertPermutation()
+        >>> invert = P.InvertPermutation()
         >>> input_data = (3, 4, 0, 2, 1)
         >>> output = invert(input_data)
         >>> output == (2, 4, 3, 0, 1)
@@ -982,8 +982,8 @@ class Argmax(PrimitiveWithInfer):
         Tensor, indices of the max value of input tensor across the axis.
 
     Examples:
-        >>> input = Tensor(np.array([2.0, 3.1, 1.2]))
-        >>> index = Argmax()(input)
+        >>> input_x = Tensor(np.array([2.0, 3.1, 1.2]))
+        >>> index = P.Argmax()(input_x)
         >>> assert index == Tensor(1, mindspore.int64)
     """
 
@@ -1030,8 +1030,8 @@ class Argmin(PrimitiveWithInfer):
         Tensor, indices of the min value of input tensor across the axis.
 
     Examples:
-        >>> input = Tensor(np.array([2.0, 3.1, 1.2]))
-        >>> index = Argmin()(input)
+        >>> input_x = Tensor(np.array([2.0, 3.1, 1.2]))
+        >>> index = P.Argmin()(input_x)
         >>> assert index == Tensor(2, mindspore.int64)
     """
 
@@ -1082,8 +1082,8 @@ class ArgMaxWithValue(PrimitiveWithInfer):
         :math:`(x_1, x_2, ..., x_{axis-1}, x_{axis+1}, ..., x_N)`.
 
     Examples:
-        >>> input = Tensor(np.random.rand(5))
-        >>> index, output = ArgMaxWithValue()(input)
+        >>> input_x = Tensor(np.random.rand(5))
+        >>> index, output = P.ArgMaxWithValue()(input_x)
     """
 
     @prim_attr_register
@@ -1129,8 +1129,8 @@ class ArgMinWithValue(PrimitiveWithInfer):
         :math:`(x_1, x_2, ..., x_{axis-1}, x_{axis+1}, ..., x_N)`.
 
     Examples:
-        >>> input = Tensor(np.random.rand(5))
-        >>> index, output = ArgMinWithValue()(input)
+        >>> input_x = Tensor(np.random.rand(5))
+        >>> index, output = P.ArgMinWithValue()(input_x)
     """
     @prim_attr_register
     def __init__(self, axis=0, keep_dims=False):
@@ -1325,7 +1325,7 @@ class Concat(PrimitiveWithInfer):
     Examples:
         >>> data1 = Tensor(np.array([[0, 1], [2, 1]]).astype(np.int32))
         >>> data2 = Tensor(np.array([[0, 1], [2, 1]]).astype(np.int32))
-        >>> op = Concat()
+        >>> op = P.Concat()
         >>> output = op((data1, data2))
     """
 
@@ -1607,7 +1607,7 @@ class Select(PrimitiveWithInfer):
         Tensor, has the same shape as input_y. The shape is :math:`(x_1, x_2, ..., x_N, ..., x_R)`.
 
     Examples:
-        >>> select = Select()
+        >>> select = P.Select()
         >>> input_x = Tensor([True, False])
         >>> input_y = Tensor([2,3], mindspore.float32)
         >>> input_z = Tensor([1,2], mindspore.float32)
@@ -1681,7 +1681,7 @@ class StridedSlice(PrimitiveWithInfer):
     Examples
         >>> input_x = Tensor([[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]],
         >>>                   [[5, 5, 5], [6, 6, 6]]], mindspore.float32)
-        >>> slice = StridedSlice()
+        >>> slice = P.StridedSlice()
         >>> output = slice(input_x, (1, 0, 0), (2, 1, 3), (1, 1, 1))
         >>> output.shape()
         (1, 1, 3)
@@ -1913,9 +1913,9 @@ class ScatterNd(PrimitiveWithInfer):
         Tensor, the new tensor, has the same type as `update` and the same shape as `shape`.
 
     Examples:
-        >>> op = ScatterNd()
-        >>> update = Tensor(np.array([3.2, 1.1]), mindspore.float32)
+        >>> op = P.ScatterNd()
         >>> indices = Tensor(np.array([[0, 1], [1, 1]]), mindspore.int32)
+        >>> update = Tensor(np.array([3.2, 1.1]), mindspore.float32)
         >>> shape = (3, 3)
         >>> output = op(indices, update, shape)
     """
@@ -1964,7 +1964,7 @@ class ResizeNearestNeighbor(PrimitiveWithInfer):
 
     Examples:
         >>> input_tensor = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32)
-        >>> resize = ResizeNearestNeighbor((2, 2))
+        >>> resize = P.ResizeNearestNeighbor((2, 2))
         >>> output = resize(input_tensor)
     """
 
@@ -1997,7 +1997,7 @@ class GatherNd(PrimitiveWithInfer):
     Examples:
         >>> input_x = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32)
         >>> indices = Tensor(np.array([[0, 0], [1, 1]]), mindspore.int32)
-        >>> op = GatherNd()
+        >>> op = P.GatherNd()
         >>> output = op(input_x, indices)
     """
 
@@ -2039,7 +2039,7 @@ class ScatterNdUpdate(PrimitiveWithInfer):
         >>> input_x = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32)
         >>> indices = Tensor(np.array([[0, 0], [1, 1]]), mindspore.int32)
         >>> update = Tensor(np.array([1.0, 2.2]), mindspore.float32)
-        >>> op = ScatterNdUpdate()
+        >>> op = P.ScatterNdUpdate()
         >>> output = op(input_x, indices, update)
     """
 
@@ -2090,7 +2090,7 @@ class SpaceToDepth(PrimitiveWithInfer):
     Examples:
         >>> x = Tensor(np.random.rand(1,3,2,2), mindspore.float32)
         >>> block_size = 2
-        >>> op = SpaceToDepth(block_size)
+        >>> op = P.SpaceToDepth(block_size)
         >>> output = op(x)
         >>> output.asnumpy().shape == (1,12,1,1)
     """
@@ -2148,7 +2148,7 @@ class DepthToSpace(PrimitiveWithInfer):
     Examples:
         >>> x = Tensor(np.random.rand(1,12,1,1), mindspore.float32)
         >>> block_size = 2
-        >>> op = DepthToSpace(block_size)
+        >>> op = P.DepthToSpace(block_size)
         >>> output = op(x)
         >>> output.asnumpy().shape == (1,3,2,2)
     """
@@ -2212,8 +2212,8 @@ class SpaceToBatch(PrimitiveWithInfer):
         >>> block_size = 2
         >>> paddings = [[0, 0], [0, 0]]
         >>> space_to_batch = P.SpaceToBatch(block_size, paddings)
-        >>> x = Tensor(np.array([[[[1, 2], [3, 4]]]]), mindspore.float32)
-        >>> space_to_batch(x)
+        >>> input_x = Tensor(np.array([[[[1, 2], [3, 4]]]]), mindspore.float32)
+        >>> space_to_batch(input_x)
         [[[[1.]]], [[[2.]]], [[[3.]]], [[[4.]]]]
 
     """
@@ -2280,8 +2280,8 @@ class BatchToSpace(PrimitiveWithInfer):
         >>> block_size = 2
         >>> crops = [[0, 0], [0, 0]]
         >>> op = P.BatchToSpace(block_size, crops)
-        >>> x = Tensor(np.array([[[[1]]], [[[2]]], [[[3]]], [[[4]]]]), mindspore.float32)
-        >>> output = op(x)
+        >>> input_x = Tensor(np.array([[[[1]]], [[[2]]], [[[3]]], [[[4]]]]), mindspore.float32)
+        >>> output = op(input_x)
         [[[[1., 2.], [3., 4.]]]]
 
     """
diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py
index d003f6ee8b..1294a65d02 100644
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -112,9 +112,9 @@ class TensorAdd(_MathBinaryOp):
 
     Examples:
         >>> add = P.TensorAdd()
-        >>> x = Tensor(np.array([1,2,3]).astype(np.float32))
-        >>> y = Tensor(np.array([4,5,6]).astype(np.float32))
-        >>> add(x, y)
+        >>> input_x = Tensor(np.array([1,2,3]).astype(np.float32))
+        >>> input_y = Tensor(np.array([4,5,6]).astype(np.float32))
+        >>> add(input_x, input_y)
         [5,7,9]
     """
 
@@ -124,23 +124,24 @@ class AssignAdd(PrimitiveWithInfer):
     Updates a `Parameter` by adding a value to it.
 
     Inputs:
-        - **input_x** (Parameter) - The `Parameter`.
-        - **input_y** (Union[scalar, Tensor]) - Has the same shape as `input_x`.
+        - **variable** (Parameter) - The `Parameter`.
+        - **value** (Union[numbers.Number, Tensor]) - The value to be added to the `variable`.
+          It should have the same shape as `variable` if it is a Tensor.
 
     Examples:
         >>> class Net(Cell):
         >>>     def __init__(self):
         >>>         super(Net, self).__init__()
         >>>         self.AssignAdd = P.AssignAdd()
-        >>>         self.inputdata = Parameter(initializer(1, [1], mindspore.int64), name="global_step")
+        >>>         self.variable = Parameter(initializer(1, [1], mindspore.int64), name="global_step")
         >>>
         >>>     def construct(self, x):
-        >>>         self.AssignAdd(self.inputdata, x)
-        >>>         return self.inputdata
+        >>>         self.AssignAdd(self.variable, x)
+        >>>         return self.variable
         >>>
         >>> net = Net()
-        >>> x = Tensor(np.ones([1]).astype(np.int64)*100)
-        >>> net(x)
+        >>> value = Tensor(np.ones([1]).astype(np.int64)*100)
+        >>> net(value)
     """
     __mindspore_signature__ = (
         ('variable', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD),
@@ -166,22 +167,24 @@ class AssignSub(PrimitiveWithInfer):
     Updates a `Parameter` by subtracting a value from it.
 
     Inputs:
-        - **input_x** (Parameter) - The `Parameter`.
-        - **input_y** (Union[scalar, Tensor]) - Has the same shape as `input_x`.
+        - **variable** (Parameter) - The `Parameter`.
+        - **value** (Union[numbers.Number, Tensor]) - The value to be subtracted from the `variable`.
+          It should have the same shape as `variable` if it is a Tensor.
 
     Examples:
         >>> class Net(Cell):
         >>>     def __init__(self):
+        >>>         super(Net, self).__init__()
         >>>         self.AssignSub = P.AssignSub()
-        >>>         self.inputdata = Parameter(initializer(1, [1], mindspore.int64), name="global_step")
+        >>>         self.variable = Parameter(initializer(1, [1], mindspore.int64), name="global_step")
         >>>
         >>>     def construct(self, x):
-        >>>         self.AssignSub(self.inputdata, x)
-        >>>         return self.inputdata
+        >>>         self.AssignSub(self.variable, x)
+        >>>         return self.variable
         >>>
         >>> net = Net()
-        >>> x = Tensor(np.ones([1]).astype(np.int64)*100)
-        >>> net(x)
+        >>> value = Tensor(np.ones([1]).astype(np.int64)*100)
+        >>> net(value)
     """
 
     __mindspore_signature__ = (
@@ -263,9 +266,9 @@ class ReduceMean(_Reduce):
           the shape of output is :math:`(x_1, x_4, ..., x_R)`.
 
     Examples:
-        >>> data = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
+        >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
         >>> op = P.ReduceMean(keep_dims=True)
-        >>> output = op(data, 1)
+        >>> output = op(input_x, 1)
     """
 
 
@@ -295,9 +298,9 @@ class ReduceSum(_Reduce):
           the shape of output is :math:`(x_1, x_4, ..., x_R)`.
 
     Examples:
-        >>> data = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
+        >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
         >>> op = P.ReduceSum(keep_dims=True)
-        >>> output = op(data, 1)
+        >>> output = op(input_x, 1)
     """
 
 
@@ -328,9 +331,9 @@ class ReduceAll(_Reduce):
           the shape of output is :math:`(x_1, x_4, ..., x_R)`.
 
     Examples:
-        >>> data = Tensor(np.array([[True, False], [True, True]]))
+        >>> input_x = Tensor(np.array([[True, False], [True, True]]))
         >>> op = P.ReduceAll(keep_dims=True)
-        >>> output = op(data, 1)
+        >>> output = op(input_x, 1)
     """
 
     def __infer__(self, input_x, axis):
@@ -364,9 +367,9 @@ class ReduceMax(_Reduce):
           the shape of output is :math:`(x_1, x_4, ..., x_R)`.
 
     Examples:
-        >>> data = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
+        >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
         >>> op = P.ReduceMax(keep_dims=True)
-        >>> output = op(data, 1)
+        >>> output = op(input_x, 1)
     """
 
 
@@ -397,9 +400,9 @@ class ReduceMin(_Reduce):
           the shape of output is :math:`(x_1, x_4, ..., x_R)`.
 
     Examples:
-        >>> data = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
+        >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
         >>> op = P.ReduceMin(keep_dims=True)
-        >>> output = op(data, 1)
+        >>> output = op(input_x, 1)
     """
 
 
@@ -429,9 +432,9 @@ class ReduceProd(_Reduce):
           the shape of output is :math:`(x_1, x_4, ..., x_R)`.
 
     Examples:
-        >>> data = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
+        >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
         >>> op = P.ReduceProd(keep_dims=True)
-        >>> output = op(data, 1)
+        >>> output = op(input_x, 1)
     """
 
 
@@ -451,15 +454,15 @@ class CumProd(PrimitiveWithInfer):
         Tensor, has the same shape and dtype as the 'input_x'.
 
     Examples:
-        >>> data = Tensor(np.array([a, b, c]).astype(np.float32))
+        >>> input_x = Tensor(np.array([a, b, c]).astype(np.float32))
         >>> op0 = P.CumProd()
-        >>> output = op0(data, 0) # output=[a, a * b, a * b * c]
+        >>> output = op0(input_x, 0) # output=[a, a * b, a * b * c]
         >>> op1 = P.CumProd(exclusive=True)
-        >>> output = op1(data, 0) # output=[1, a, a * b]
+        >>> output = op1(input_x, 0) # output=[1, a, a * b]
         >>> op2 = P.CumProd(reverse=True)
-        >>> output = op2(data, 0) # output=[a * b * c, b * c, c]
+        >>> output = op2(input_x, 0) # output=[a * b * c, b * c, c]
         >>> op3 = P.CumProd(exclusive=True, reverse=True)
-        >>> output = op3(data, 0) # output=[b * c, c, 1]
+        >>> output = op3(input_x, 0) # output=[b * c, c, 1]
     """
     @prim_attr_register
     def __init__(self, exclusive=False, reverse=False):
@@ -1190,7 +1193,7 @@ class FloorMod(_MathBinaryOp):
     Examples:
         >>> input_x = Tensor(np.array([2, 4, -1]), mindspore.int32)
         >>> input_y = Tensor(np.array([3, 3, 3]), mindspore.int32)
-        >>> floor_mod = FloorMod()
+        >>> floor_mod = P.FloorMod()
         >>> floor_mod(input_x, input_y)
         [2, 1, 2]
     """
@@ -1207,9 +1210,9 @@ class Acosh(PrimitiveWithInfer):
         Tensor, has the same shape as `input_x`.
 
     Examples:
-        >>> acosh = Acosh()
-        >>> X = Tensor(np.array([1.0, 1.5, 3.0, 100.0]), mindspore.float32)
-        >>> output = acosh(X)
+        >>> acosh = P.Acosh()
+        >>> input_x = Tensor(np.array([1.0, 1.5, 3.0, 100.0]), mindspore.float32)
+        >>> output = acosh(input_x)
     """
 
     @prim_attr_register
@@ -1286,7 +1289,7 @@ class EqualCount(PrimitiveWithInfer):
         - **input_y** (Tensor) - The second input tensor.
 
     Outputs:
-        Tensor, has the same shape as the `input_x`.
+        Tensor, with the type as `mindspore.int32` and size as (1,).
 
     Examples:
         >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.int32)
@@ -1324,7 +1327,7 @@ class NotEqual(_LogicBinaryOp):
     Inputs:
         - **input_x** (Union[Tensor, Number, bool]) - The first input is a tensor whose data type is number or bool, or
           a number or a bool object.
-        - **input_y** (Union[Tensor, Number, bool]) - The second input tensor whose data type is same as 'input_x' or
+        - **input_y** (Union[Tensor, Number, bool]) - The second input tensor whose data type is same as `input_x` or
           a number or a bool object.
 
     Outputs:
@@ -1359,11 +1362,11 @@ class Greater(_LogicBinaryOp):
 
     Inputs:
         - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number.
-        - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or
+        - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as `input_x` or
           a number.
 
     Outputs:
-        Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'.
+        Tensor, the shape is same as the shape after broadcasting, and the data type is bool.
 
     Examples:
         >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.int32)
@@ -1386,11 +1389,11 @@ class GreaterEqual(_LogicBinaryOp):
 
     Inputs:
         - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number.
-        - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or
+        - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as `input_x` or
           a number.
 
     Outputs:
-        Tensor, the shape is same as the shape after broadcasting, and the data type is bool'.
+        Tensor, the shape is same as the shape after broadcasting, and the data type is bool.
 
     Examples:
         >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.int32)
@@ -1413,7 +1416,7 @@ class Less(_LogicBinaryOp):
 
     Inputs:
         - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number.
-        - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or
+        - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as `input_x` or
           a number.
 
     Outputs:
@@ -1440,7 +1443,7 @@ class LessEqual(_LogicBinaryOp):
 
     Inputs:
         - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number.
-        - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or
+        - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as `input_x` or
           a number.
 
     Outputs:
@@ -1752,8 +1755,8 @@ class Cos(PrimitiveWithInfer):
 
     Examples:
         >>> cos = P.Cos()
-        >>> X = Tensor(np.array([0.24, 0.83, 0.31, 0.09]), mindspore.float32)
-        >>> output = cos(X)
+        >>> input_x = Tensor(np.array([0.24, 0.83, 0.31, 0.09]), mindspore.float32)
+        >>> output = cos(input_x)
     """
 
     @prim_attr_register
@@ -1780,8 +1783,8 @@ class ACos(PrimitiveWithInfer):
 
     Examples:
         >>> acos = P.ACos()
-        >>> X = Tensor(np.array([0.74, 0.04, 0.30, 0.56]), mindspore.float32)
-        >>> output = acos(X)
+        >>> input_x = Tensor(np.array([0.74, 0.04, 0.30, 0.56]), mindspore.float32)
+        >>> output = acos(input_x)
     """
 
     @prim_attr_register
@@ -1993,7 +1996,7 @@ class Atan2(_MathBinaryOp):
         - **input_y** (Tensor) - The input tensor.
 
     Outputs:
-        Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'.
+        Tensor, the shape is same as the shape after broadcasting, and the data type is same as `input_x`.
 
     Examples:
          >>> input_x = Tensor(np.array([[0, 1]]), mindspore.float32)
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index c7c823aeb6..3cc6718484 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -41,7 +41,7 @@ class Flatten(PrimitiveWithInfer):
 
     Examples:
         >>> input_tensor = Tensor(np.ones(shape=[1, 2, 3, 4]), mindspore.float32)
-        >>> flatten = Flatten()
+        >>> flatten = P.Flatten()
         >>> output = flatten(input_tensor)
         >>> assert output.shape() == (1, 24)
     """
@@ -155,7 +155,7 @@ class ReLU(PrimitiveWithInfer):
 
     Examples:
         >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]], np.float32))
-        >>> relu = ReLU()
+        >>> relu = P.ReLU()
         >>> result = relu(input_x)
         [[0, 4.0, 0.0], [2.0, 0.0, 9.0]]
     """
@@ -188,7 +188,7 @@ class ReLU6(PrimitiveWithInfer):
 
     Examples:
         >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]], np.float32))
-        >>> relu6 = ReLU6()
+        >>> relu6 = P.ReLU6()
         >>> result = relu6(input_x)
     """
 
@@ -222,10 +222,10 @@ class Elu(PrimitiveWithInfer):
 
     Examples:
         >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]], np.float32))
-        >>> elu = Elu()
+        >>> elu = P.Elu()
         >>> result = elu(input_x)
         Tensor([[-0.632  4.0   -0.999]
-                [2.0    -0.993  9.0  ]], shape=(2, 3), dtype=ms.float32)
+                [2.0    -0.993  9.0  ]], shape=(2, 3), dtype=mindspore.float32)
     """
 
     @prim_attr_register
@@ -1082,7 +1082,7 @@ class TopK(PrimitiveWithInfer):
 
     Examples:
         >>> topk = P.TopK(sorted=True)
-        >>> input_x = Tensor([1, 2, 3, 4, 5], mindspore.float16))
+        >>> input_x = Tensor([1, 2, 3, 4, 5], mindspore.float16)
         >>> k = 3
         >>> values, indices = topk(input_x, k)
         >>> assert values == Tensor(np.array([5, 4, 3]))
@@ -1223,8 +1223,8 @@ class ApplyMomentum(PrimitiveWithInfer):
 
     Examples:
         >>> net = ResNet50()
-        >>> loss = SoftmaxCrossEntropyWithLogits()
-        >>> opt = ApplyMomentum(Tensor(np.array([0.001])), Tensor(np.array([0.9])),
+        >>> loss = nn.SoftmaxCrossEntropyWithLogits()
+        >>> opt = P.ApplyMomentum(Tensor(np.array([0.001])), Tensor(np.array([0.9])),
                                 filter(lambda x: x.requires_grad, net.get_parameters()))
         >>> model = Model(net, loss, opt)
     """
@@ -1351,6 +1351,7 @@ class SGD(PrimitiveWithInfer):
 class ApplyRMSProp(PrimitiveWithInfer):
     """
     Optimizer that implements the Root Mean Square prop(RMSProp) algorithm.
+    Please refer to the usage in source code of `nn.RMSProp`.
 
     Note:
         Update `var` according to the RMSProp algorithm.
@@ -1386,12 +1387,6 @@ class ApplyRMSProp(PrimitiveWithInfer):
 
     Outputs:
         Tensor, parameters to be update.
-
-    Examples:
-        >>> net = Net()
-        >>> loss = nn.SoftmaxCrossEntropyWithLogits()
-        >>> opt = RMSProp(params=net.trainable_params(), learning_rate=learning_rate)
-        >>> model = Model(net, loss, opt)
     """
 
     @prim_attr_register
@@ -1424,6 +1419,7 @@ class ApplyRMSProp(PrimitiveWithInfer):
 class ApplyCenteredRMSProp(PrimitiveWithInfer):
     """
     Optimizer that implements the centered RMSProp algorithm.
+    Please refer to the usage in source code of `nn.RMSProp`.
 
     Note:
         Update `var` according to the centered RMSProp algorithm.
@@ -1464,12 +1460,6 @@ class ApplyCenteredRMSProp(PrimitiveWithInfer):
 
     Outputs:
         Tensor, parameters to be update.
-
-    Examples:
-        >>> net = Net()
-        >>> loss = nn.SoftmaxCrossEntropyWithLogits()
-        >>> opt = RMSProp(params=net.trainable_params(), learning_rate=learning_rate, centered=True)
-        >>> model = Model(net, loss, opt)
     """
 
     @prim_attr_register
@@ -1596,7 +1586,7 @@ class DropoutGenMask(Primitive):
         Tensor, the value of generated mask for input shape.
 
     Examples:
-        >>> dropout_gen_mask = DropoutGenMask()
+        >>> dropout_gen_mask = P.DropoutGenMask()
         >>> shape = (20, 16, 50)
         >>> keep_prob = Tensor(0.5, mindspore.float32)
         >>> mask = dropout_gen_mask(shape, keep_prob)
@@ -1631,8 +1621,8 @@ class DropoutDoMask(PrimitiveWithInfer):
         >>> x = Tensor(np.ones([20, 16, 50]), mindspore.float32)
         >>> shape = (20, 16, 50)
         >>> keep_prob = Tensor(0.5, mindspore.float32)
-        >>> dropout_gen_mask = DropoutGenMask()
-        >>> dropout_do_mask = DropoutDoMask()
+        >>> dropout_gen_mask = P.DropoutGenMask()
+        >>> dropout_do_mask = P.DropoutDoMask()
         >>> mask = dropout_gen_mask(shape, keep_prob)
         >>> output = dropout_do_mask(x, mask, keep_prob)
         >>> assert output.shape() == (20, 16, 50)
@@ -1737,7 +1727,7 @@ class OneHot(PrimitiveWithInfer):
     Examples:
         >>> indices = Tensor(np.array([0, 1, 2]), mindspore.int32)
         >>> depth, on_value, off_value = 3, Tensor(1.0, mindspore.float32), Tensor(0.0, mindspore.float32)
-        >>> onehot = OneHot()
+        >>> onehot = P.OneHot()
         >>> result = onehot(indices, depth, on_value, off_value)
         [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
     """
@@ -1793,7 +1783,7 @@ class Gelu(PrimitiveWithInfer):
 
     Examples:
         >>> tensor = Tensor(np.array([1.0, 2.0, 3.0]), mindspore.float32)
-        >>> gelu = Gelu()
+        >>> gelu = P.Gelu()
         >>> result = gelu(tensor)
     """
 
@@ -1834,7 +1824,7 @@ class GetNext(PrimitiveWithInfer):
         and the type is described is `types`.
 
     Examples:
-        >>> get_next = GetNext([mindspore.float32, mindspore.int32], [[32, 1, 28, 28], [10]], 'shared_name')
+        >>> get_next = P.GetNext([mindspore.float32, mindspore.int32], [[32, 1, 28, 28], [10]], 'shared_name')
         >>> feature, label = get_next()
     """
 
@@ -2015,7 +2005,7 @@ class Pad(PrimitiveWithInfer):
 
     Examples:
         >>> input_tensor = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32)
-        >>> pad_op = Pad(((1, 2), (2, 1)))
+        >>> pad_op = P.Pad(((1, 2), (2, 1)))
         >>> output_tensor = pad_op(input_tensor)
         >>> assert output_tensor == Tensor(np.array([[ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
         >>>                                          [ 0. ,  0. , -0.1,  0.3,  3.6,  0. ],
diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py
index b334c3e9d8..90d8816094 100644
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -406,7 +406,7 @@ def export(net, *inputs, file_name, file_format='GEIR'):
         file_format (str): MindSpore currently supports 'GEIR', 'ONNX' and 'LITE' format for exported model.
 
             - GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of
-            Ascend model.
+              Ascend model.
             - ONNX: Open Neural Network eXchange. An open format built to represent machine learning models.
             - LITE: Huawei model format for mobile.
     """

From a6747c522fdc03da9ea9b24b1f9086308120efdf Mon Sep 17 00:00:00 2001
From: kswang <wangkaisheng2@huawei.com>
Date: Thu, 9 Apr 2020 15:08:47 +0800
Subject: [PATCH 47/58] add ascend mem pool

---
 .../device/ascend/ascend_device_address.cc    |  4 +--
 .../device/ascend/ascend_device_address.h     |  2 +-
 .../device/ascend/ascend_kernel_runtime.cc    |  2 +-
 .../device/ascend/ascend_memory_manager.cc    | 22 +++++++------
 .../device/ascend/ascend_memory_manager.h     |  6 +++-
 ...ory_allocator.cc => ascend_memory_pool.cc} | 31 +++++++------------
 ...emory_allocator.h => ascend_memory_pool.h} | 31 ++++++++++---------
 mindspore/ccsrc/device/device_address.h       |  2 +-
 .../ccsrc/device/gpu/gpu_device_address.cc    |  2 +-
 .../ccsrc/device/gpu/gpu_kernel_runtime.cc    | 16 +++++-----
 .../ccsrc/device/gpu/gpu_memory_manager.cc    |  8 ++---
 .../ccsrc/device/gpu/gpu_memory_manager.h     |  6 ++--
 mindspore/ccsrc/device/kernel_runtime.cc      |  8 ++---
 mindspore/ccsrc/device/kernel_runtime.h       |  1 -
 mindspore/ccsrc/device/memory_manager.cc      | 22 +++++--------
 mindspore/ccsrc/device/memory_manager.h       | 15 ++++-----
 tests/ut/cpp/CMakeLists.txt                   |  2 +-
 17 files changed, 84 insertions(+), 96 deletions(-)
 rename mindspore/ccsrc/device/ascend/{ascend_memory_allocator.cc => ascend_memory_pool.cc} (62%)
 rename mindspore/ccsrc/device/ascend/{ascend_memory_allocator.h => ascend_memory_pool.h} (67%)

diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/device/ascend/ascend_device_address.cc
index b8b7f452e3..93f039af0e 100644
--- a/mindspore/ccsrc/device/ascend/ascend_device_address.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_device_address.cc
@@ -262,8 +262,8 @@ AscendDeviceAddress::~AscendDeviceAddress() {
   if (ptr_ == nullptr) {
     return;
   }
-  if (mem_dynamic_alloc_) {
-    AscendMemoryAllocator::GetInstance().FreeTensorMem(ptr_);
+  if (from_mem_pool_) {
+    AscendMemoryPool::GetInstance().FreeTensorMem(ptr_);
     ptr_ = nullptr;
   }
 }
diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.h b/mindspore/ccsrc/device/ascend/ascend_device_address.h
index 60cc64cca7..93746082c1 100644
--- a/mindspore/ccsrc/device/ascend/ascend_device_address.h
+++ b/mindspore/ccsrc/device/ascend/ascend_device_address.h
@@ -21,7 +21,7 @@
 #include <vector>
 #include <memory>
 #include "device/device_address.h"
-#include "device/ascend/ascend_memory_allocator.h"
+#include "device/ascend/ascend_memory_pool.h"
 #include "ir/dtype.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
index 0c2a97a5a6..0c6861e21f 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
@@ -29,7 +29,7 @@
 #include "hccl/hcom.h"
 #include "runtime/context.h"
 #include "device/ascend/ascend_stream_assign.h"
-#include "device/ascend/ascend_memory_allocator.h"
+#include "device/ascend/ascend_memory_pool.h"
 #include "framework/ge_runtime/model_runner.h"
 #include "device/ascend/tasksink/task_generator.h"
 #include "session/anf_runtime_algorithm.h"
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
index f033d81d82..42830f54fa 100644
--- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
@@ -15,29 +15,31 @@
  */
 
 #include "device/ascend/ascend_memory_manager.h"
-#include "device/ascend/ascend_memory_allocator.h"
+#include "device/ascend/ascend_memory_pool.h"
 #include "utils/context/ms_context.h"
 #include "runtime/mem.h"
 namespace mindspore {
 namespace device {
 namespace ascend {
-static const uint64_t ASCEND_MEM_SIZE = 20;
-static const uint64_t ASCEND_MEM_SIZE_BYTE = (ASCEND_MEM_SIZE << 30);
+const uint64_t kAscendDeviceMemGB = 20;
+const uint64_t kAscendMemPoolGB = 5;
+const uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << 30);
+const uint64_t kAscendMemPoolSize = (kAscendMemPoolGB << 30);
 
 void AscendMemoryManager::MallocDeviceMemory() {
-  device_mem_size_ = ASCEND_MEM_SIZE_BYTE;
-  static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO);
+  device_mem_size_ = kAscendDeviceMemSize;
+  static_mem_offset_ = device_mem_size_;
   auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM);
   if (ret != RT_ERROR_NONE) {
     MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]";
   }
-  device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO));
+  device_mem_pool_size_ = kAscendMemPoolSize;
   ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM);
   if (ret != RT_ERROR_NONE) {
     MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
   }
-  AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
-  AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
+  AscendMemoryPool::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
+  AscendMemoryPool::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
 }
 
 void AscendMemoryManager::FreeDeviceMemory() {
@@ -57,8 +59,8 @@ void AscendMemoryManager::FreeDeviceMemory() {
   }
 }
 
-void *AscendMemoryManager::AllocTensorMemDynamic(size_t size) {
-  return AscendMemoryAllocator::GetInstance().AllocTensorMem(size);
+void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
+  return AscendMemoryPool::GetInstance().AllocTensorMem(size);
 }
 }  // namespace ascend
 }  // namespace device
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h
index 8639fb5c72..dea88ac10a 100644
--- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h
+++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h
@@ -27,7 +27,11 @@ class AscendMemoryManager : public MemoryManager {
 
   void MallocDeviceMemory() override;
   void FreeDeviceMemory() override;
-  void *AllocTensorMemDynamic(size_t size) override;
+  void *MallocMemFromMemPool(size_t size) override;
+
+ private:
+  uint8_t *device_mem_pool_base_{nullptr};
+  uint64_t device_mem_pool_size_{0};
 };
 }  // namespace ascend
 }  // namespace device
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc b/mindspore/ccsrc/device/ascend/ascend_memory_pool.cc
similarity index 62%
rename from mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc
rename to mindspore/ccsrc/device/ascend/ascend_memory_pool.cc
index 08a30a28b7..2c38e4290d 100644
--- a/mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_memory_pool.cc
@@ -14,24 +14,15 @@
  * limitations under the License.
  */
 
-#include "device/ascend/ascend_memory_allocator.h"
+#include "device/ascend/ascend_memory_pool.h"
 #include "device/ascend/ascend_kernel_runtime.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
 namespace device {
 namespace ascend {
-const uint64_t MEM_SIZE = 20;
-const uint64_t MEM_SIZE_BYTE = (MEM_SIZE << 30);
-
-AscendMemoryAllocator::AscendMemoryAllocator() {
-  hasMalloc_ = false;
-  free_mem_size_ = FloatToSize(MEM_SIZE_BYTE * (1 - GRAPH_INIT_ASCEND_MEM_RATIO));
-  total_mem_size_ = free_mem_size_;
-}
-
-size_t AscendMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr* addr) {
-  if (hasMalloc_) {
+size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr* addr) {
+  if (has_malloc_) {
     MS_LOG(EXCEPTION) << "Has alloc memory pool memory !";
   }
   if (size == 0 || size > free_mem_size_) {
@@ -41,35 +32,35 @@ size_t AscendMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr* addr) {
   if (*addr == nullptr) {
     MS_LOG(EXCEPTION) << "Device memory pool base is nullptr, failed to alloc memory pool memory!";
   }
-  hasMalloc_ = true;
+  has_malloc_ = true;
   free_mem_size_ -= size;
   return size;
 }
 
-bool AscendMemoryAllocator::FreeDeviceMem(const DeviceMemPtr& addr) {
+bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr& addr) {
   MS_EXCEPTION_IF_NULL(addr);
-  hasMalloc_ = false;
+  has_malloc_ = false;
   free_mem_size_ = total_mem_size_;
   return true;
 }
 
-size_t AscendMemoryAllocator::AlignMemorySize(size_t size) const {
+size_t AscendMemoryPool::AlignMemorySize(size_t size) const {
   if (size == 0) {
     return DYNAMIC_MEM_ALIGN_SIZE;
   }
   return ((size + DYNAMIC_MEM_ALIGN_SIZE + 31) / DYNAMIC_MEM_ALIGN_SIZE) * DYNAMIC_MEM_ALIGN_SIZE;
 }
 
-size_t AscendMemoryAllocator::mem_alloc_unit_size() const { return free_mem_size_ - 512; }
+size_t AscendMemoryPool::mem_alloc_unit_size() const { return free_mem_size_ - 512; }
 
-void AscendMemoryAllocator::set_device_mem_pool_base(uint8_t* device_mem_pool_base) {
+void AscendMemoryPool::set_device_mem_pool_base(uint8_t* device_mem_pool_base) {
   MS_EXCEPTION_IF_NULL(device_mem_pool_base);
   device_mem_pool_base_ = device_mem_pool_base;
 }
 
-size_t AscendMemoryAllocator::free_mem_size() { return free_mem_size_; }
+size_t AscendMemoryPool::free_mem_size() { return free_mem_size_; }
 
-size_t AscendMemoryAllocator::total_mem_size() { return total_mem_size_; }
+size_t AscendMemoryPool::total_mem_size() { return total_mem_size_; }
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_allocator.h b/mindspore/ccsrc/device/ascend/ascend_memory_pool.h
similarity index 67%
rename from mindspore/ccsrc/device/ascend/ascend_memory_allocator.h
rename to mindspore/ccsrc/device/ascend/ascend_memory_pool.h
index 8b0f89a9b8..c2a29725f4 100644
--- a/mindspore/ccsrc/device/ascend/ascend_memory_allocator.h
+++ b/mindspore/ccsrc/device/ascend/ascend_memory_pool.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_
-#define MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_
+#ifndef MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_
+#define MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_
 
 #include <memory>
 #include "pre_activate/mem_reuse/mem_dynamic_allocator.h"
@@ -23,22 +23,23 @@
 namespace mindspore {
 namespace device {
 namespace ascend {
-// The fraction of total ascend memory used to compute the graph.
-static const float GRAPH_INIT_ASCEND_MEM_RATIO = 0.8;
-
-class AscendMemoryAllocator : public DynamicMemPoolBestFit {
+class AscendMemoryPool : public DynamicMemPoolBestFit {
  public:
-  ~AscendMemoryAllocator() override = default;
+  ~AscendMemoryPool() override = default;
 
   size_t AllocDeviceMem(size_t size, DeviceMemPtr* addr) override;
   bool FreeDeviceMem(const DeviceMemPtr& addr) override;
   void set_device_mem_pool_base(uint8_t* device_mem_pool_base);
-  void set_device_mem_pool_size(uint64_t device_mem_pool_size) { device_mem_pool_size_ = device_mem_pool_size; }
+  void set_device_mem_pool_size(uint64_t device_mem_pool_size) {
+    device_mem_pool_size_ = device_mem_pool_size;
+    free_mem_size_ = device_mem_pool_size_;
+    total_mem_size_ = free_mem_size_;
+  }
   size_t free_mem_size() override;
   size_t total_mem_size() override;
 
-  static AscendMemoryAllocator& GetInstance() {
-    static AscendMemoryAllocator instance;
+  static AscendMemoryPool& GetInstance() {
+    static AscendMemoryPool instance;
     return instance;
   }
 
@@ -49,10 +50,10 @@ class AscendMemoryAllocator : public DynamicMemPoolBestFit {
   size_t mem_alloc_unit_size() const override;
 
  private:
-  AscendMemoryAllocator();
-  AscendMemoryAllocator(const AscendMemoryAllocator&) = delete;
-  AscendMemoryAllocator& operator=(const AscendMemoryAllocator&) = delete;
-  bool hasMalloc_;
+  AscendMemoryPool() = default;
+  AscendMemoryPool(const AscendMemoryPool&) = delete;
+  AscendMemoryPool& operator=(const AscendMemoryPool&) = delete;
+  bool has_malloc_{false};
   uint8_t* device_mem_pool_base_{nullptr};
   uint64_t device_mem_pool_size_{0};
   size_t free_mem_size_;
@@ -62,4 +63,4 @@ class AscendMemoryAllocator : public DynamicMemPoolBestFit {
 }  // namespace device
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_
+#endif  // MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_
diff --git a/mindspore/ccsrc/device/device_address.h b/mindspore/ccsrc/device/device_address.h
index cb022427e3..2d43963934 100644
--- a/mindspore/ccsrc/device/device_address.h
+++ b/mindspore/ccsrc/device/device_address.h
@@ -70,7 +70,7 @@ class DeviceAddress {
   size_t ref_count_{0};
   string format_{"DefaultFormat"};
   TypeId type_id_{kNumberTypeFloat16};
-  bool mem_dynamic_alloc_{false};
+  bool from_mem_pool_{false};
   friend class KernelRuntime;
   friend class MemoryManager;
   friend class mindspore::device::ascend::tasksink::TaskGenerator;
diff --git a/mindspore/ccsrc/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/device/gpu/gpu_device_address.cc
index 36391d27db..c27a1aa65b 100644
--- a/mindspore/ccsrc/device/gpu/gpu_device_address.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_device_address.cc
@@ -46,7 +46,7 @@ GPUDeviceAddress::~GPUDeviceAddress() {
   }
   auto ms_context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(ms_context);
-  if (mem_dynamic_alloc_) {
+  if (from_mem_pool_) {
     GPUMemoryAllocator::GetInstance().FreeTensorMem(ptr_);
     ptr_ = nullptr;
   }
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
index 597e188e9d..2ec1a5df29 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
@@ -227,7 +227,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
     MS_EXCEPTION_IF_NULL(device_address);
     auto device_ptr = device_address->ptr_;
     if (device_ptr == nullptr) {
-      device_ptr = mem_manager_->AllocTensorMemDynamic(output_sizes[i]);
+      device_ptr = mem_manager_->MallocMemFromMemPool(output_sizes[i]);
       MS_EXCEPTION_IF_NULL(device_ptr);
       device_address->ptr_ = device_ptr;
     }
@@ -244,7 +244,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
       kernel_workspaces->emplace_back(nullptr);
       continue;
     }
-    auto device_ptr = mem_manager_->AllocTensorMemDynamic(workspace_sizes[i]);
+    auto device_ptr = mem_manager_->MallocMemFromMemPool(workspace_sizes[i]);
     MS_EXCEPTION_IF_NULL(device_ptr);
     kernel::AddressPtr workspace = std::make_shared<kernel::Address>();
     MS_EXCEPTION_IF_NULL(workspace);
@@ -292,7 +292,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN
     addr_size.emplace_back(device_address.get(), output_size);
   }
 
-  auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total);
+  auto device_mem_ptr = mem_manager_->MallocMemFromMemPool(total);
   MS_EXCEPTION_IF_NULL(device_mem_ptr);
   for (const auto &iter : addr_size) {
     MS_EXCEPTION_IF_NULL(iter.first);
@@ -328,7 +328,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf
     addr_size.emplace_back(device_address.get(), output_sizes[i]);
   }
 
-  auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total);
+  auto device_mem_ptr = mem_manager_->MallocMemFromMemPool(total);
   MS_EXCEPTION_IF_NULL(device_mem_ptr);
   for (const auto &iter : addr_size) {
     MS_EXCEPTION_IF_NULL(iter.first);
@@ -361,7 +361,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
         auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i);
         MS_EXCEPTION_IF_NULL(device_address);
         MS_EXCEPTION_IF_NULL(device_address->ptr_);
-        mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
+        mem_manager_->FreeMemFromMemPool(device_address->ptr_);
         device_address->ptr_ = nullptr;
       }
     }
@@ -372,7 +372,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
     auto workspace = kernel_workspaces[i];
     if (workspace != nullptr) {
       MS_EXCEPTION_IF_NULL(workspace->addr);
-      mem_manager_->FreeTensorMemDynamic(workspace->addr);
+      mem_manager_->FreeMemFromMemPool(workspace->addr);
       workspace->addr = nullptr;
     }
   }
@@ -389,7 +389,7 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr
       auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, 0);
       MS_EXCEPTION_IF_NULL(device_address);
       MS_EXCEPTION_IF_NULL(device_address->ptr_);
-      mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
+      mem_manager_->FreeMemFromMemPool(device_address->ptr_);
       device_address->ptr_ = nullptr;
     }
     *is_communication_op = true;
@@ -411,7 +411,7 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr
       auto device_address = AnfAlgo::GetMutableOutputAddr(kernel_input.first, 0);
       MS_EXCEPTION_IF_NULL(device_address);
       MS_EXCEPTION_IF_NULL(device_address->ptr_);
-      mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
+      mem_manager_->FreeMemFromMemPool(device_address->ptr_);
       device_address->ptr_ = nullptr;
     }
     *is_communication_op = true;
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
index 3944b504e4..7d042264b6 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
@@ -21,11 +21,11 @@
 namespace mindspore {
 namespace device {
 namespace gpu {
-void *GPUMemoryManager::AllocTensorMemDynamic(size_t size) {
+void *GPUMemoryManager::MallocMemFromMemPool(size_t size) {
   return GPUMemoryAllocator::GetInstance().AllocTensorMem(size);
 }
 
-void GPUMemoryManager::FreeTensorMemDynamic(void *device_ptr) {
+void GPUMemoryManager::FreeMemFromMemPool(void *device_ptr) {
   GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr);
 }
 
@@ -34,7 +34,7 @@ void GPUMemoryManager::MallocDeviceMemory() {
   MS_EXCEPTION_IF_NULL(context_ptr);
   // If use the dynamic memory pool, then alloc the first memory block to init.
   if (context_ptr->enable_dynamic_mem_pool()) {
-    auto device_addr = AllocTensorMemDynamic(1);
+    auto device_addr = MallocMemFromMemPool(1);
     if (!device_addr) {
       MS_LOG(ERROR) << "Dynamic memory pool init error.";
     }
@@ -62,7 +62,7 @@ uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool) {
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
   if (context_ptr->enable_dynamic_mem_pool()) {
-    auto device_ptr = AllocTensorMemDynamic(size);
+    auto device_ptr = MallocMemFromMemPool(size);
     MS_EXCEPTION_IF_NULL(device_ptr);
     return AddressOffset(device_ptr, 0);
   }
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h b/mindspore/ccsrc/device/gpu/gpu_memory_manager.h
index a18226bdf3..cc5dac2a5e 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h
+++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.h
@@ -28,11 +28,11 @@ class GPUMemoryManager : public MemoryManager {
   void MallocDeviceMemory() override;
   void FreeDeviceMemory() override;
 
-  void *AllocTensorMemDynamic(size_t size) override;
-  void FreeTensorMemDynamic(void *device_ptr) override;
+  void *MallocMemFromMemPool(size_t size) override;
+  void FreeMemFromMemPool(void *device_ptr) override;
 
  protected:
-  uint8_t *MallocStaticMem(size_t size, bool communication_mem);
+  uint8_t *MallocStaticMem(size_t size, bool communication_mem) override;
 };
 }  // namespace gpu
 }  // namespace device
diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/device/kernel_runtime.cc
index 16025ed8a4..eebc650347 100644
--- a/mindspore/ccsrc/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/device/kernel_runtime.cc
@@ -169,7 +169,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr>
       auto device_address =
         CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id);
       MS_EXCEPTION_IF_NULL(device_address);
-      mem_manager_->MallocOpMemory(device_address, tensor_size);
+      mem_manager_->MallocMemFromMemPool(device_address, tensor_size);
       AnfAlgo::SetOutputAddr(device_address, index, item.get());
     }
   }
@@ -198,7 +198,7 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) {
     auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
     auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type);
     MS_EXCEPTION_IF_NULL(device_address);
-    mem_manager_->MallocOpMemory(device_address, output_sizes[i]);
+    mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
     AnfAlgo::SetOutputAddr(device_address, i, kernel.get());
   }
 }
@@ -213,7 +213,7 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) {
     for (size_t i = 0; i < workspace_lists.size(); ++i) {
       auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown);
       MS_EXCEPTION_IF_NULL(device_address);
-      mem_manager_->MallocOpMemory(device_address, workspace_lists[i]);
+      mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]);
       AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get());
     }
   }
@@ -457,7 +457,7 @@ void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) {
   bool is_enable_mem_reuse = context_ptr->enable_mem_reuse();
   auto mem_flag = kDynamicMem;
   if (is_enable_mem_reuse) {
-    mem_manager_->InitReuseDynamicMemory(graph);
+    mem_manager_->MallocReusedDynamicMem(graph);
     mem_flag = kReuseDynamicMem;
   }
   auto &kernels = graph->execution_order();
diff --git a/mindspore/ccsrc/device/kernel_runtime.h b/mindspore/ccsrc/device/kernel_runtime.h
index 1224bf14eb..61b43fd5c0 100644
--- a/mindspore/ccsrc/device/kernel_runtime.h
+++ b/mindspore/ccsrc/device/kernel_runtime.h
@@ -33,7 +33,6 @@
 #include "utils/context/ms_context.h"
 #include "device/memory_manager.h"
 
-// using mindspore::session::KernelGraph;
 using mindspore::tensor::Tensor;
 using TensorPtr = std::shared_ptr<Tensor>;
 using mindspore::kernel::AddressPtr;
diff --git a/mindspore/ccsrc/device/memory_manager.cc b/mindspore/ccsrc/device/memory_manager.cc
index 3c1ddee6bc..6977628eb1 100644
--- a/mindspore/ccsrc/device/memory_manager.cc
+++ b/mindspore/ccsrc/device/memory_manager.cc
@@ -21,12 +21,6 @@ using mindspore::memreuse::BestFitMemReuse;
 using mindspore::memreuse::MemReuseUtilPtr;
 namespace mindspore {
 namespace device {
-MemoryManager::~MemoryManager() {
-  device_mem_base_ = nullptr;
-  device_mem_pool_base_ = nullptr;
-  mem_reuse_util_ptr_ = nullptr;
-}
-
 size_t MemoryManager::GetCommonAlignSize(size_t input_size) const {
   return (input_size + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize;
 }
@@ -35,7 +29,7 @@ size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) const {
   return (input_size + kMemAlignSize - 1) / kMemAlignSize * kMemAlignSize + 2 * kMemAlignSize;
 }
 
-void MemoryManager::InitReuseDynamicMemory(session::KernelGraph *graph) {
+void MemoryManager::MallocReusedDynamicMem(session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
   MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>();
   MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr);
@@ -147,23 +141,23 @@ uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
   }
 }
 
-void MemoryManager::MallocOpMemory(const DeviceAddressPtr address, size_t size) {
-  auto device_ptr = AllocTensorMemDynamic(size);
+void MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) {
+  auto device_ptr = MallocMemFromMemPool(size);
   MS_EXCEPTION_IF_NULL(device_ptr);
   address->ptr_ = device_ptr;
-  address->mem_dynamic_alloc_ = true;
+  address->from_mem_pool_ = true;
 }
 
-void *MemoryManager::AllocTensorMemDynamic(size_t size) {
+void *MemoryManager::MallocMemFromMemPool(size_t size) {
   if (size == 0) {
-    MS_LOG(ERROR) << "AllocTensorMemDynamic size is 0.";
+    MS_LOG(ERROR) << "MallocMemFromMemPool size is 0.";
   }
   return nullptr;
 }
 
-void MemoryManager::FreeTensorMemDynamic(void *device_ptr) {
+void MemoryManager::FreeMemFromMemPool(void *device_ptr) {
   if (device_ptr == nullptr) {
-    MS_LOG(ERROR) << "FreeTensorMemDynamic device_ptr is null.";
+    MS_LOG(ERROR) << "FreeMemFromMemPool device_ptr is null.";
   }
 }
 }  // namespace device
diff --git a/mindspore/ccsrc/device/memory_manager.h b/mindspore/ccsrc/device/memory_manager.h
index 2e47237def..82c22f4548 100644
--- a/mindspore/ccsrc/device/memory_manager.h
+++ b/mindspore/ccsrc/device/memory_manager.h
@@ -31,7 +31,7 @@ using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr;
 class MemoryManager {
  public:
   MemoryManager() = default;
-  virtual ~MemoryManager();
+  virtual ~MemoryManager() = default;
 
   virtual void MallocDeviceMemory() = 0;
   virtual void FreeDeviceMemory() = 0;
@@ -40,16 +40,15 @@ class MemoryManager {
     dynamic_mem_offset_ = 0;
   }
 
-  void InitReuseDynamicMemory(session::KernelGraph *graph);
+  void MallocReusedDynamicMem(session::KernelGraph *graph);
   uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size);
   uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size);
   virtual uint8_t *MallocMem(int flag, size_t size);
 
-  // Alloc memory use the dynamic memory pool.
-  virtual void *AllocTensorMemDynamic(size_t size);
-  // Free memory use the dynamic memory pool.
-  virtual void FreeTensorMemDynamic(void *device_ptr);
-  virtual void MallocOpMemory(const DeviceAddressPtr address, size_t size);
+  virtual void MallocMemFromMemPool(const DeviceAddressPtr address, size_t size);
+  virtual void *MallocMemFromMemPool(size_t size);
+  virtual void FreeMemFromMemPool(void *device_ptr);
+
   size_t GetCommonAlignSize(size_t input_size) const;
   size_t GetCommunicationAlignSize(size_t input_size) const;
 
@@ -57,9 +56,7 @@ class MemoryManager {
   virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem);
   virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem);
   uint8_t *device_mem_base_{nullptr};
-  uint8_t *device_mem_pool_base_{nullptr};
   uint64_t device_mem_size_{0};
-  uint64_t device_mem_pool_size_{0};
   uint64_t dynamic_mem_offset_{0};
   uint64_t static_mem_offset_{0};
   size_t total_static_size_ = 0;
diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt
index 3c1351a857..f5bc07ff69 100644
--- a/tests/ut/cpp/CMakeLists.txt
+++ b/tests/ut/cpp/CMakeLists.txt
@@ -95,7 +95,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         "../../../mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc"
         "../../../mindspore/ccsrc/device/ascend/ascend_memory_manager.cc"
         "../../../mindspore/ccsrc/device/ascend/ascend_device_address.cc"
-        "../../../mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc"
+        "../../../mindspore/ccsrc/device/ascend/ascend_memory_pool.cc"
         "../../../mindspore/ccsrc/predict/generator/utils/ir_model_util.cc"
         "../../../mindspore/ccsrc/predict/predict.cc"
         "../../../mindspore/ccsrc/predict/converter/*.cc"

From 735923c0ce8765d885ece6530fa35ae03cf8cb48 Mon Sep 17 00:00:00 2001
From: zjun <zhangjun0@huawei.com>
Date: Tue, 7 Apr 2020 21:52:28 +0800
Subject: [PATCH 48/58] add aicpu opinfo register

---
 .../ccsrc/kernel/aicpu/aicpu_kernel_build.cc  | 116 +++++----
 .../kernel/aicpu/aicpu_kernel_metadata.cc     |  78 ++----
 mindspore/ccsrc/kernel/aicpu/aicpu_util.h     |   3 +-
 mindspore/ccsrc/kernel/common_utils.cc        |   9 +
 mindspore/ccsrc/kernel/oplib/opinfo.h         |   2 +-
 mindspore/ccsrc/kernel/oplib/oplib.cc         |  21 +-
 mindspore/ops/__init__.py                     |   4 +-
 mindspore/ops/_op_impl/__init__.py            |   1 +
 mindspore/ops/_op_impl/aicpu/__init__.py      |  19 ++
 .../ops/_op_impl/aicpu/dropout_genmask.py     |  32 +++
 mindspore/ops/_op_impl/aicpu/get_next.py      |  39 +++
 .../ops/_op_impl/aicpu/init_data_set_queue.py |  27 +++
 mindspore/ops/_op_impl/aicpu/print_tensor.py  |  39 +++
 mindspore/ops/op_info_register.py             | 225 +++++++++++-------
 14 files changed, 409 insertions(+), 206 deletions(-)
 create mode 100644 mindspore/ops/_op_impl/aicpu/__init__.py
 create mode 100644 mindspore/ops/_op_impl/aicpu/dropout_genmask.py
 create mode 100644 mindspore/ops/_op_impl/aicpu/get_next.py
 create mode 100644 mindspore/ops/_op_impl/aicpu/init_data_set_queue.py
 create mode 100644 mindspore/ops/_op_impl/aicpu/print_tensor.py

diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc
index c89e27c8ce..cf23779415 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc
+++ b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc
@@ -39,45 +39,7 @@ namespace mindspore {
 namespace kernel {
 using FNodeAttrHandle = std::function<void(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto)>;
 
-const std::vector<std::string> local_framework_op_vec = {kInitDataSetQueue, kGetNext, kDropoutGenMask, kPrint};
-
-void InitDataSetQueueAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
-  MS_EXCEPTION_IF_NULL(anf_node);
-  MS_EXCEPTION_IF_NULL(proto);
-
-  ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs();
-  MS_EXCEPTION_IF_NULL(node_attr);
-  std::string channel_name = AnfAlgo::GetNodeAttr<std::string>(anf_node, kQueueName);
-  (*node_attr)[kChannelName].set_s(channel_name);
-}
-
-void GetNextAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
-  MS_EXCEPTION_IF_NULL(anf_node);
-  MS_EXCEPTION_IF_NULL(proto);
-
-  ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs();
-  MS_EXCEPTION_IF_NULL(node_attr);
-  std::string shared_name = AnfAlgo::GetNodeAttr<std::string>(anf_node, kSharedName);
-  (*node_attr)[kChannelName].set_s(shared_name);
-}
-
-void DropoutGenMaskAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
-  MS_EXCEPTION_IF_NULL(anf_node);
-  MS_EXCEPTION_IF_NULL(proto);
-
-  ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs();
-  MS_EXCEPTION_IF_NULL(node_attr);
-  int seed = AnfAlgo::GetNodeAttr<int>(anf_node, kSeed);
-  int seed2 = AnfAlgo::GetNodeAttr<int>(anf_node, kSeed2);
-  (*node_attr)["seed"].set_i(seed);
-  (*node_attr)["seed2"].set_i(seed2);
-}
-
-void CreateAttrFuncMap(std::map<std::string, FNodeAttrHandle> *mOpAttrFuncMap) {
-  (void)mOpAttrFuncMap->emplace(std::pair<std::string, FNodeAttrHandle>(kInitDataSetQueue, InitDataSetQueueAttr));
-  (void)mOpAttrFuncMap->emplace(std::pair<std::string, FNodeAttrHandle>(kGetNext, GetNextAttr));
-  (void)mOpAttrFuncMap->emplace(std::pair<std::string, FNodeAttrHandle>(kDropoutGenMask, DropoutGenMaskAttr));
-}
+const std::vector<std::string> local_framework_op_vec = {kInitData, kGetNext, kDropoutGenMask, kPrint};
 
 bool SetIOIputSize(const std::shared_ptr<AnfNode> &anf_node, const size_t &input_num,
                    std::vector<size_t> *input_size_list) {
@@ -147,24 +109,74 @@ bool SetIOSize(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<A
   return true;
 }
 
+void ParseAttrValue(const std::string &type, const std::string &attr_name, const mindspore::ValuePtr &value,
+                    ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr) {
+  MS_EXCEPTION_IF_NULL(node_attr);
+  if (type == "int") {
+    auto attr_value = GetValue<int>(value);
+    (*node_attr)[attr_name].set_i(attr_value);
+  } else if (type == "str") {
+    auto attr_value = GetValue<std::string>(value);
+    (*node_attr)[attr_name].set_s(attr_value);
+  } else if (type == "bool") {
+    auto attr_value = GetValue<bool>(value);
+    (*node_attr)[attr_name].set_b(attr_value);
+  } else if (type == "float") {
+    auto attr_value = GetValue<float>(value);
+    (*node_attr)[attr_name].set_f(attr_value);
+  } else if (type == "listInt") {
+    std::vector<int> attr_value;
+    auto value_type = value->type();
+    MS_EXCEPTION_IF_NULL(value_type);
+    auto value_type_str = value_type->ToString();
+    if (value_type_str == "Int32") {
+      int data = GetValue<int>(value);
+      attr_value.push_back(data);
+    } else {
+      attr_value = GetValue<std::vector<int>>(value);
+    }
+    mindspore::AttrValue input_shape_attr;
+    mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array();
+    MS_EXCEPTION_IF_NULL(input_shape_attr_list);
+    for (const auto shape : attr_value) {
+      input_shape_attr_list->add_i(shape);
+    }
+    (*node_attr)[attr_name] = input_shape_attr;
+  } else {
+    MS_LOG(EXCEPTION) << "type: " << type << "not support";
+  }
+}
+
 void SetNodeAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
   std::string op_name = AnfAlgo::GetCNodeName(anf_node);
-  if (op_name == "InitDataSetQueue") {
-    op_name = "InitData";
+  if (op_name == kInitDataSetQueue) {
+    op_name = kInitData;
   }
-  if (op_name == "Print") {
+  if (op_name == kPrint) {
     return;
   }
-  std::map<std::string, FNodeAttrHandle> mOpAttrFuncMap;
-  CreateAttrFuncMap(&mOpAttrFuncMap);
-  FNodeAttrHandle func_ptr = nullptr;
-  auto iter = mOpAttrFuncMap.find(op_name);
-  if (iter != mOpAttrFuncMap.end()) {
-    func_ptr = iter->second;
-    MS_EXCEPTION_IF_NULL(func_ptr);
-    func_ptr(anf_node, proto);
-  } else {
-    MS_LOG(ERROR) << "Don't support node [" << op_name << "] to set nodedef of attr";
+
+  auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU);
+  MS_EXCEPTION_IF_NULL(op_info_ptr);
+  auto attrs_ptr = op_info_ptr->attrs_ptr();
+  auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
+  MS_EXCEPTION_IF_NULL(primitive);
+  ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs();
+  for (const auto &attr_ptr : attrs_ptr) {
+    std::string attr_name = attr_ptr->name();
+    std::string real_name;
+    auto value = primitive->GetAttr(attr_name);
+    if (value != nullptr) {
+      if (attr_name == kQueueName || attr_name == kSharedName) {
+        real_name = kChannelName;
+      } else if (attr_name == kSeed) {
+        real_name = "seed";
+      } else if (attr_name == kSeed2) {
+        real_name = "seed2";
+      }
+      std::string type = attr_ptr->type();
+      ParseAttrValue(type, real_name, value, node_attr);
+    }
   }
   MS_LOG(INFO) << "Set node attr end!";
 }
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc
index ac0b0d9f7a..6675051069 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc
+++ b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc
@@ -17,68 +17,27 @@
 #include "kernel/aicpu/aicpu_kernel_metadata.h"
 #include <memory>
 #include <string>
+#include "kernel/oplib/oplib.h"
+#include "kernel/common_utils.h"
+#include "kernel/aicpu/aicpu_util.h"
 #include "session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
-constexpr auto kInitDataSetQueueOpName = "InitDataSetQueue";
-constexpr auto kGetNext = "GetNext";
-constexpr auto kDropoutGenMask = "DropoutGenMask";
-constexpr auto kPrint = "Print";
-const std::vector<std::string> AICPU_OPS = {kInitDataSetQueueOpName, kGetNext, kDropoutGenMask, kPrint};
-
-std::shared_ptr<KernelBuildInfo> CreateKernelInfo(const std::vector<std::string> &inputs_format,
-                                                  const std::vector<TypeId> &inputs_device_type,
-                                                  const std::vector<std::string> &outputs_format,
-                                                  const std::vector<TypeId> &outputs_device_type) {
-  auto builder = KernelBuildInfo::KernelBuildInfoBuilder();
-  builder.SetInputsFormat(inputs_format);
-  builder.SetInputsDeviceType(inputs_device_type);
-  builder.SetOutputsFormat(outputs_format);
-  builder.SetOutputsDeviceType(outputs_device_type);
-  builder.SetProcessor(AICPU);
-  builder.SetKernelType(AICPU_KERNEL);
-  builder.SetFusionType(OPAQUE);
-  return builder.Build();
-}
-
-bool CheckIfExistAicpuMeta(const std::string &op_name) {
-  if (std::find(AICPU_OPS.begin(), AICPU_OPS.end(), op_name) != AICPU_OPS.end()) {
-    return false;
-  }
-  return true;
-}
-
 void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
   MS_LOG(INFO) << "AicpuMetadataInfo.";
   MS_EXCEPTION_IF_NULL(kernel_node);
   MS_EXCEPTION_IF_NULL(kernel_info_list);
   std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
-  if (CheckIfExistAicpuMeta(op_name)) {
-    MS_LOG(DEBUG) << "Aicpu doesn't have metadata of op [" << op_name << "].";
-    return;
-  }
-
-  if (op_name == kInitDataSetQueueOpName) {
-    kernel_info_list->push_back(CreateKernelInfo({}, {}, {}, {}));
+  if (op_name == kInitDataSetQueue) {
+    op_name = kInitData;
   }
-
-  if (op_name == kGetNext) {
-    std::vector<std::string> outputs_format;
-    std::vector<TypeId> outputs_type;
-    for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) {
-      outputs_format.emplace_back(kOpFormat_DEFAULT);
-      outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index));
-    }
-    kernel_info_list->push_back(CreateKernelInfo({}, {}, outputs_format, outputs_type));
-  }
-
-  if (op_name == kDropoutGenMask) {
-    kernel_info_list->push_back(CreateKernelInfo({kOpFormat_NCHW, kOpFormat_NCHW},
-                                                 {kInt32->type_id(), kFloat16->type_id()}, {kOpFormat_NCHW},
-                                                 {kUInt8->type_id()}));
+  auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU);
+  if (op_info_ptr == nullptr) {
+    MS_LOG(WARNING) << "Aicpu doestn't have metadata of op [" << op_name << "]";
+    return;
   }
-
+  // For compatibility with the current framework
   if (op_name == kPrint) {
     std::vector<std::string> inputs_format;
     std::vector<TypeId> inputs_type;
@@ -92,11 +51,20 @@ void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<
       outputs_format.emplace_back(kOpFormat_DEFAULT);
       outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index));
     }
-    kernel_info_list->push_back(CreateKernelInfo(inputs_format, inputs_type, outputs_format, outputs_type));
+    auto builder = KernelBuildInfo::KernelBuildInfoBuilder();
+    builder.SetInputsFormat(inputs_format);
+    builder.SetInputsDeviceType(inputs_type);
+    builder.SetOutputsFormat(outputs_format);
+    builder.SetOutputsDeviceType(outputs_type);
+    builder.SetProcessor(AICPU);
+    builder.SetKernelType(AICPU_KERNEL);
+    builder.SetFusionType(OPAQUE);
+    kernel_info_list->push_back(builder.Build());
+    return;
   }
-
-  if (kernel_info_list->empty()) {
-    MS_LOG(INFO) << "Aicpu dose not has metadata of op[ " << op_name << "].";
+  if (!ParseMetadata(kernel_node, op_info_ptr, AICPU, kernel_info_list)) {
+    MS_LOG(WARNING) << "Aicpu parsed metadata op [" << op_name << "] failed";
+    return;
   }
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_util.h b/mindspore/ccsrc/kernel/aicpu/aicpu_util.h
index f521418f6b..08fca16a3b 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_util.h
+++ b/mindspore/ccsrc/kernel/aicpu/aicpu_util.h
@@ -24,7 +24,8 @@
 
 namespace mindspore {
 namespace kernel {
-constexpr auto kInitDataSetQueue = "InitData";
+constexpr auto kInitDataSetQueue = "InitDataSetQueue";
+constexpr auto kInitData = "InitData";
 constexpr auto kGetNext = "GetNext";
 constexpr auto kDropoutGenMask = "DropoutGenMask";
 constexpr auto kPrint = "Print";
diff --git a/mindspore/ccsrc/kernel/common_utils.cc b/mindspore/ccsrc/kernel/common_utils.cc
index c2f2638753..137ae65414 100644
--- a/mindspore/ccsrc/kernel/common_utils.cc
+++ b/mindspore/ccsrc/kernel/common_utils.cc
@@ -417,6 +417,8 @@ void SetKernelBuildInfo(const std::shared_ptr<KernelBuildInfo::KernelBuildInfoBu
 
   if (imply_type == kAKG) {
     builder->SetKernelType(AUTO_DIFF_KERNEL);
+  } else if (imply_type == kAICPU) {
+    builder->SetKernelType(AICPU_KERNEL);
   } else {
     builder->SetKernelType(TBE_KERNEL);
   }
@@ -471,6 +473,13 @@ bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr<const OpIn
         return false;
       }
 
+      kernel_info_list->push_back(builder->Build());
+    }
+  } else {
+    if (processor == AICPU) {
+      auto builder = std::make_shared<KernelBuildInfo::KernelBuildInfoBuilder>();
+      MS_EXCEPTION_IF_NULL(builder);
+      SetKernelBuildInfo(builder, processor, op_info_ptr);
       kernel_info_list->push_back(builder->Build());
     }
   }
diff --git a/mindspore/ccsrc/kernel/oplib/opinfo.h b/mindspore/ccsrc/kernel/oplib/opinfo.h
index 56abea9269..215df21776 100644
--- a/mindspore/ccsrc/kernel/oplib/opinfo.h
+++ b/mindspore/ccsrc/kernel/oplib/opinfo.h
@@ -24,7 +24,7 @@
 
 namespace mindspore {
 namespace kernel {
-enum OpImplyType { kAKG = 0, kTBE };
+enum OpImplyType { kAKG = 0, kTBE = 1, kAICPU };
 enum OpIOType { kInput = 0, kOutput };
 
 class OpAttr {
diff --git a/mindspore/ccsrc/kernel/oplib/oplib.cc b/mindspore/ccsrc/kernel/oplib/oplib.cc
index 4059b8e246..d2464bce47 100644
--- a/mindspore/ccsrc/kernel/oplib/oplib.cc
+++ b/mindspore/ccsrc/kernel/oplib/oplib.cc
@@ -39,6 +39,7 @@ constexpr auto kDtypeFormat = "dtype_format";
 constexpr auto kAttr = "attr";
 constexpr auto kIputs = "inputs";
 constexpr auto kOutputs = "outputs";
+constexpr auto kAiCPU = "AiCPU";
 constexpr auto kTbe = "TBE";
 constexpr auto kAkg = "akg";
 constexpr auto kAutodiff = "AutoDiff";
@@ -60,6 +61,8 @@ std::string ImplTypeToStr(OpImplyType impl_type) {
       return kTbe;
     case kAKG:
       return kAkg;
+    case kAICPU:
+      return kAiCPU;
     default:
       return "unknow";
   }
@@ -76,6 +79,9 @@ bool OpLib::RegOp(const std::string& json_string, const std::string& impl_path)
     } else if (imply_type_string == kAutodiff) {
       OpImplyType imply_type = kAKG;
       ret = DecodeOpInfo(op_json, imply_type, impl_path);
+    } else if (imply_type_string == kAiCPU) {
+      OpImplyType imply_type = kAICPU;
+      ret = DecodeOpInfo(op_json, imply_type, impl_path);
     } else {
       MS_LOG(DEBUG) << "Not support imply_type";
     }
@@ -154,7 +160,9 @@ bool OpLib::DecodeAttr(const nlohmann::json& obj, const OpImplyType imply_type,
     std::shared_ptr<OpAttr> op_attr = std::make_shared<OpAttr>();
     MS_EXCEPTION_IF_NULL(op_attr);
     op_attr->set_name(obj.at(kName));
-    op_attr->set_param_type(obj.at(kParamType));
+    if (imply_type != kAICPU) {
+      op_attr->set_param_type(obj.at(kParamType));
+    }
     op_attr->set_type(obj.at(kType));
     if (imply_type == kTBE) {
       op_attr->set_value(obj.at(kValue));
@@ -242,9 +250,10 @@ std::shared_ptr<OpInfo> OpLib::FindOp(const std::string& op_name, OpImplyType im
   auto context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context);
   bool is_gpu = (context->device_target() == kGPUDevice);
-  if ((is_gpu && imply_type == kTBE) || (!is_gpu && imply_type != kTBE)) {
-    MS_LOG(DEBUG) << "FindOp failed: opname:" << op_name << "imply_type:" << ImplTypeToStr(imply_type)
-                  << "current op num:" << op_info_.size();
+  if ((is_gpu && (imply_type == kTBE || imply_type == kAICPU)) ||
+      (!is_gpu && (imply_type != kTBE && imply_type != kAICPU))) {
+    MS_LOG(ERROR) << "FindOp failed: opname:" << op_name << ", imply_type:" << ImplTypeToStr(imply_type)
+                  << ", current op num:" << op_info_.size();
     return nullptr;
   }
   for (const auto& op_info : op_info_) {
@@ -253,8 +262,8 @@ std::shared_ptr<OpInfo> OpLib::FindOp(const std::string& op_name, OpImplyType im
       return op_info;
     }
   }
-  MS_LOG(DEBUG) << "FindOp failed: opname:" << op_name << "imply_type:" << ImplTypeToStr(imply_type)
-                << "current op num:" << op_info_.size();
+  MS_LOG(DEBUG) << "FindOp failed: opname:" << op_name << ", imply_type:" << ImplTypeToStr(imply_type)
+                << ", current op num:" << op_info_.size();
   return nullptr;
 }
 
diff --git a/mindspore/ops/__init__.py b/mindspore/ops/__init__.py
index 6f4f680672..0e6c114566 100644
--- a/mindspore/ops/__init__.py
+++ b/mindspore/ops/__init__.py
@@ -30,7 +30,7 @@ Note:
 
 from .primitive import Primitive, PrimitiveWithInfer, prim_attr_register
 from .vm_impl_registry import get_vm_impl_fn, vm_impl_registry
-from .op_info_register import op_info_register, TBERegOp, DataType
+from .op_info_register import op_info_register, AiCPURegOp, TBERegOp, DataType
 from .primitive import constexpr
 from .._c_expression import signature_rw, signature_kind
 
@@ -40,6 +40,6 @@ __primitive__ = [
 ]
 
 __all__ = ["get_vm_impl_fn", "vm_impl_registry",
-           "op_info_register", "TBERegOp", "DataType",
+           "op_info_register", "AiCPURegOp", "TBERegOp", "DataType",
            "constexpr"]
 __all__.extend(__primitive__)
diff --git a/mindspore/ops/_op_impl/__init__.py b/mindspore/ops/_op_impl/__init__.py
index b8370cc64e..76444881cc 100644
--- a/mindspore/ops/_op_impl/__init__.py
+++ b/mindspore/ops/_op_impl/__init__.py
@@ -16,5 +16,6 @@
 
 from .akg.gpu import *
 from .tbe import *
+from .aicpu import *
 
 __all__ = []
diff --git a/mindspore/ops/_op_impl/aicpu/__init__.py b/mindspore/ops/_op_impl/aicpu/__init__.py
new file mode 100644
index 0000000000..b0f90a629b
--- /dev/null
+++ b/mindspore/ops/_op_impl/aicpu/__init__.py
@@ -0,0 +1,19 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""aicpu ops"""
+from .init_data_set_queue import _init_data_set_queue_aicpu
+from .dropout_genmask import _dropout_genmask_aicpu
+from .get_next import _get_next_aicpu
+from .print_tensor import _print_aicpu
diff --git a/mindspore/ops/_op_impl/aicpu/dropout_genmask.py b/mindspore/ops/_op_impl/aicpu/dropout_genmask.py
new file mode 100644
index 0000000000..96707a5010
--- /dev/null
+++ b/mindspore/ops/_op_impl/aicpu/dropout_genmask.py
@@ -0,0 +1,32 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""InitDataSetQueue op"""
+from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
+
+dropout_genmask_op_info = AiCPURegOp("DropoutGenMask") \
+    .fusion_type("OPAQUE") \
+    .input(0, "x1", "required") \
+    .input(1, "x2", "required") \
+    .output(0, "y", "required") \
+    .attr("Seed0", "int") \
+    .attr("Seed1", "int") \
+    .dtype_format(DataType.I32_NCHW, DataType.F16_NCHW, DataType.U8_NCHW) \
+    .get_op_info()
+
+@op_info_register(dropout_genmask_op_info)
+def _dropout_genmask_aicpu():
+    """Dropout AiCPU register"""
+    return
diff --git a/mindspore/ops/_op_impl/aicpu/get_next.py b/mindspore/ops/_op_impl/aicpu/get_next.py
new file mode 100644
index 0000000000..ce32014211
--- /dev/null
+++ b/mindspore/ops/_op_impl/aicpu/get_next.py
@@ -0,0 +1,39 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""InitDataSetQueue op"""
+from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
+
+get_next_op_info = AiCPURegOp("GetNext") \
+    .fusion_type("OPAQUE") \
+    .output(0, "y", "dynamic") \
+    .attr("shared_name", "str") \
+    .dtype_format(DataType.BOOL_Default) \
+    .dtype_format(DataType.I8_Default) \
+    .dtype_format(DataType.I16_Default) \
+    .dtype_format(DataType.I32_Default) \
+    .dtype_format(DataType.I64_Default) \
+    .dtype_format(DataType.F16_Default) \
+    .dtype_format(DataType.U8_Default) \
+    .dtype_format(DataType.U16_Default) \
+    .dtype_format(DataType.U32_Default) \
+    .dtype_format(DataType.U64_Default) \
+    .dtype_format(DataType.F32_Default) \
+    .get_op_info()
+
+@op_info_register(get_next_op_info)
+def _get_next_aicpu():
+    """GetNext AiCPU register"""
+    return
diff --git a/mindspore/ops/_op_impl/aicpu/init_data_set_queue.py b/mindspore/ops/_op_impl/aicpu/init_data_set_queue.py
new file mode 100644
index 0000000000..a48e01eced
--- /dev/null
+++ b/mindspore/ops/_op_impl/aicpu/init_data_set_queue.py
@@ -0,0 +1,27 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""InitDataSetQueue op"""
+from mindspore.ops.op_info_register import op_info_register, AiCPURegOp
+
+init_data_set_queue_op_info = AiCPURegOp("InitData") \
+    .fusion_type("OPAQUE") \
+    .attr("queue_name", "str") \
+    .get_op_info()
+
+@op_info_register(init_data_set_queue_op_info)
+def _init_data_set_queue_aicpu():
+    """InitDataSetQueue AiCPU register"""
+    return
diff --git a/mindspore/ops/_op_impl/aicpu/print_tensor.py b/mindspore/ops/_op_impl/aicpu/print_tensor.py
new file mode 100644
index 0000000000..011f4a3d9d
--- /dev/null
+++ b/mindspore/ops/_op_impl/aicpu/print_tensor.py
@@ -0,0 +1,39 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""InitDataSetQueue op"""
+from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
+
+print_op_info = AiCPURegOp("Print") \
+    .fusion_type("OPAQUE") \
+    .input(0, "x", "dynamic") \
+    .output(0, "y", "required") \
+    .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.I8_Default, DataType.I8_Default) \
+    .dtype_format(DataType.I16_Default, DataType.I16_Default) \
+    .dtype_format(DataType.I32_Default, DataType.I32_Default) \
+    .dtype_format(DataType.I64_Default, DataType.I64_Default) \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default) \
+    .dtype_format(DataType.U8_Default, DataType.U8_Default) \
+    .dtype_format(DataType.U16_Default, DataType.U16_Default) \
+    .dtype_format(DataType.U32_Default, DataType.U32_Default) \
+    .dtype_format(DataType.U64_Default, DataType.U64_Default) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default) \
+    .get_op_info()
+
+@op_info_register(print_op_info)
+def _print_aicpu():
+    """Print AiCPU register"""
+    return
diff --git a/mindspore/ops/op_info_register.py b/mindspore/ops/op_info_register.py
index 6a42099c89..0750094e18 100644
--- a/mindspore/ops/op_info_register.py
+++ b/mindspore/ops/op_info_register.py
@@ -78,14 +78,15 @@ class RegOp():
         self.inputs = []
         self.outputs = []
         self.attr_ = []
+        self.fusion_type_ = ''
         self.dtype_format_ = []
 
-    def is_string(self, value):
+    def _is_string(self, value):
         """
         Check if the value is a str type.
 
         Args:
-            value: Parameter to to check.
+            value: Parameter to be checked.
 
         Raises:
             TypeError: If the type of value is not a str.
@@ -93,12 +94,12 @@ class RegOp():
         if not isinstance(value, str):
             raise TypeError("%s value must be str" % str(value))
 
-    def is_int(self, value):
+    def _is_int(self, value):
         """
         Check if the value is a int.
 
         Args:
-            value: Parameter to to check.
+            value: Parameter to be checked.
 
         Raises:
             TypeError: If the type of value is not a int.
@@ -106,12 +107,12 @@ class RegOp():
         if not isinstance(value, int):
             raise TypeError("%s value must be int" % str(value))
 
-    def is_bool(self, value):
+    def _is_bool(self, value):
         """
         Check if the value is a bool.
 
         Args:
-            value: Parameter to to check.
+            value: Parameter to be checked.
 
         Raises:
             TypeError: If the type of value is not a bool.
@@ -119,6 +120,51 @@ class RegOp():
         if not isinstance(value, bool):
             raise TypeError("%s value must be bool" % str(value))
 
+    def _check_param(self, param_list, key_list, fn_list, kwargs):
+        """
+        Check if the parameter type is correct.
+
+        Args:
+            param_list (list): Parameter list to be checked.
+            key_list (list): The keys of output dict.
+            fn_list (list): Function used for parameter checking. If the function list has only one element,
+                            all parameters will use the same function.
+            kwargs (dict): Other parameter information.
+
+        Raises:
+            TypeError: If the type of value is not list.
+            ValueError: If the size of param list is not equal to the size of key list, or
+                        the size of param list is not equal to the size of funtion list.
+        """
+        for i in [param_list, key_list, fn_list]:
+            if not isinstance(i, list):
+                raise TypeError("%s value must be list type" % str(i))
+        if len(param_list) != len(key_list) or (len(fn_list) != 1 and len(param_list) != len(fn_list)):
+            raise ValueError("param_list size {}, key_list size {}, must be equal.And fn_list size {}.".
+                             format(len(param_list), len(key_list), len(fn_list)))
+        out_dict = {}
+        for idx, element in enumerate(param_list):
+            if element is not None:
+                if len(fn_list) == 1:
+                    fn_list[0](element)
+                else:
+                    fn_list[idx](element)
+                out_dict[key_list[idx]] = element
+        if kwargs:
+            out_dict = dict(out_dict, kwargs)
+        return out_dict
+
+    def fusion_type(self, fusion_type):
+        """
+        Register fusion type.
+
+        Args:
+            fusion_type (str): Value of fusion type.
+        """
+        self._is_string(fusion_type)
+        self.fusion_type_ = fusion_type
+        return self
+
     def dtype_format(self, *args):
         """
         Register dtype and format.
@@ -136,8 +182,8 @@ class RegOp():
         for arg in args:
             if not isinstance(arg, tuple) or len(arg) != 2:
                 raise ValueError("dtype and format value must be tuple of two elements")
-            self.is_string(arg[0])
-            self.is_string(arg[1])
+            self._is_string(arg[0])
+            self._is_string(arg[1])
             dtype_format.append(arg)
         self.dtype_format_.append(tuple(dtype_format))
         return self
@@ -159,13 +205,71 @@ class RegOp():
         return op_info
 
 
+class AiCPURegOp(RegOp):
+    """Class for AiCPU op info register"""
+
+    def __init__(self, op_name):
+        super(AiCPURegOp, self).__init__(op_name)
+        self.imply_type = "AiCPU"
+
+    def input(self, index=None, name=None, param_type=None, **kwargs):
+        """
+        Register AiCPU op input information.
+
+        Args:
+            index (int): Order of the input. Default: None.
+            name (str): Name of the input. Default: None.
+            param_type (str): Param type of the input. Default: None.
+            kwargs (dict): Other information for the input.
+        """
+        param_list = [index, name, param_type]
+        key_list = ["index", "name", "param_type"]
+        fn_list = [self._is_int, self._is_string, self._is_string]
+        input_dict = self._check_param(param_list, key_list, fn_list, kwargs)
+        self.inputs.append(input_dict)
+        return self
+
+    def output(self, index=None, name=None, param_type=None, **kwargs):
+        """
+        Register AiCPU op output information.
+
+        Args:
+            index (int): Order of the output. Default: None.
+            name (str): Name of the output. Default: None.
+            param_type (str): Param type of the output. Default: None.
+            kwargs (dict): Other information for the output.
+        """
+        param_list = [index, name, param_type]
+        key_list = ["index", "name", "param_type"]
+        fn_list = [self._is_int, self._is_string, self._is_string]
+        output_dict = self._check_param(param_list, key_list, fn_list, kwargs)
+        self.outputs.append(output_dict)
+        return self
+
+    def attr(self, name=None, value_type=None, value=None, **kwargs):
+        """
+        Register AiCPU op attribute information.
+
+        Args:
+            name (str): Name of the attribute. Default: None.
+            value_type (str): Value type of the attribute. Default: None.
+            value (str): Value type of the attribute. Default: None.
+            kwargs (dict): Other information for the attribute.
+        """
+        param_list = [name, value_type, value]
+        key_list = ["name", "type", "value"]
+        fn_list = [self._is_string]
+        attr_dict = self._check_param(param_list, key_list, fn_list, kwargs)
+        self.attr_.append(attr_dict)
+        return self
+
+
 class TBERegOp(RegOp):
     """Class for TBE op info register."""
 
     def __init__(self, op_name=""):
         super(TBERegOp, self).__init__(op_name)
         self.imply_type = "TBE"
-        self.fusion_type_ = ''
         self.async_flag_ = False
         self.binfile_name_ = ''
         self.compute_cost_ = 10
@@ -175,17 +279,6 @@ class TBERegOp(RegOp):
         self.dynamic_format_ = False
         self.op_pattern_ = ""
 
-    def fusion_type(self, fusion_type):
-        """
-        Register fusion type.
-
-        Args:
-            fusion_type (str): Value of fusion type.
-        """
-        self.is_string(fusion_type)
-        self.fusion_type_ = fusion_type
-        return self
-
     def async_flag(self, async_flag):
         """
         Register async flag.
@@ -193,7 +286,7 @@ class TBERegOp(RegOp):
         Args:
             async_flag (bool): Value of async flag.
         """
-        self.is_bool(async_flag)
+        self._is_bool(async_flag)
         self.async_flag_ = async_flag
         return self
 
@@ -204,7 +297,7 @@ class TBERegOp(RegOp):
         Args:
             binfile_name (str): Name of op binfile.
         """
-        self.is_string(binfile_name)
+        self._is_string(binfile_name)
         self.binfile_name_ = binfile_name
         return self
 
@@ -215,7 +308,7 @@ class TBERegOp(RegOp):
         Args:
             compute_cost (int): Value of compute cost.
         """
-        self.is_int(compute_cost)
+        self._is_int(compute_cost)
         self.compute_cost_ = compute_cost
         return self
 
@@ -226,7 +319,7 @@ class TBERegOp(RegOp):
         Args:
             kernel_name (str): Name of op kernel.
         """
-        self.is_string(kernel_name)
+        self._is_string(kernel_name)
         self.kernel_name_ = kernel_name
         return self
 
@@ -237,7 +330,7 @@ class TBERegOp(RegOp):
         Args:
             partial_flag (bool): Value of partial flag.
         """
-        self.is_bool(partial_flag)
+        self._is_bool(partial_flag)
         self.partial_flag_ = partial_flag
         return self
 
@@ -248,7 +341,7 @@ class TBERegOp(RegOp):
         Args:
             reshape_type (str): Value of reshape type.
         """
-        self.is_string(reshape_type)
+        self._is_string(reshape_type)
         self.reshape_type_ = reshape_type
         return self
 
@@ -259,56 +352,43 @@ class TBERegOp(RegOp):
         Args:
             reshape_type (bool): Value of dynamic format.
         """
-        self.is_bool(dynamic_format)
+        self._is_bool(dynamic_format)
         self.dynamic_format_ = dynamic_format
         return self
 
     def op_pattern(self, pattern=None):
         """
-        Register op pattern information.
+        Register TBE op pattern information.
 
         Args:
             pattern (str): Value of op pattern.
         """
-        if pattern is not None and self.istring(pattern):
+        if pattern is not None and self._is_string(pattern):
             self.op_pattern_ = pattern
         return self
 
     def attr(self, name=None, param_type=None, value_type=None, value=None, default_value=None, **kwargs):
         """
-        Register op attribute information.
+        Register TBE op attribute information.
 
         Args:
             name (str): Name of the attribute. Default: None.
             param_type (str): Param type of the attribute. Default: None.
-            type (str): Type of the attribute. Default: None.
+            value_type (str): Type of the attribute. Default: None.
             value (str): Value of the attribute. Default: None.
             default_value (str): Default value of attribute. Default: None.
             kwargs (dict): Other information for the attribute.
         """
         param_list = [name, param_type, value_type, value, default_value]
-        attr_dict = {}
-        for index, element in enumerate(param_list):
-            if element is not None:
-                self.is_string(element)
-                if index == 0:
-                    attr_dict["name"] = element
-                elif index == 1:
-                    attr_dict["param_type"] = element
-                elif index == 2:
-                    attr_dict["type"] = element
-                elif index == 3:
-                    attr_dict["value"] = element
-                elif index == 4:
-                    attr_dict["default_value"] = element
-        if kwargs:
-            attr_dict = dict(attr_dict, **kwargs)
+        key_list = ["name", "param_type", "type", "value", "default_value"]
+        fn_list = [self._is_string]
+        attr_dict = self._check_param(param_list, key_list, fn_list, kwargs)
         self.attr_.append(attr_dict)
         return self
 
     def input(self, index=None, name=None, need_compile=None, param_type=None, shape=None, **kwargs):
         """
-        Register op input information.
+        Register TBE op input information.
 
         Args:
             index (int): Order of the input. Default: None.
@@ -319,32 +399,15 @@ class TBERegOp(RegOp):
             kwargs (dict): Other information for the input.
         """
         param_list = [index, name, need_compile, param_type, shape]
-        input_dict = {}
-        for idx, element in enumerate(param_list):
-            if element is not None:
-                if idx == 0:
-                    self.is_int(element)
-                    input_dict["index"] = element
-                elif idx == 1:
-                    self.is_string(element)
-                    input_dict["name"] = element
-                elif idx == 2:
-                    self.is_bool(element)
-                    input_dict["need_compile"] = element
-                elif idx == 3:
-                    self.is_string(element)
-                    input_dict["param_type"] = element
-                elif idx == 4:
-                    self.is_string(element)
-                    input_dict["shape"] = element
-        if kwargs:
-            input_dict = dict(input_dict, **kwargs)
+        key_list = ["index", "name", "need_compile", "param_type", "shape"]
+        fn_list = [self._is_int, self._is_string, self._is_bool, self._is_string, self._is_string]
+        input_dict = self._check_param(param_list, key_list, fn_list, kwargs)
         self.inputs.append(input_dict)
         return self
 
     def output(self, index=None, name=None, need_compile=None, param_type=None, shape=None, **kwargs):
         """
-        Register op output information.
+        Register TBE op output information.
 
         Args:
             index (int): Order of the output. Default: None.
@@ -355,29 +418,13 @@ class TBERegOp(RegOp):
             kwargs (dict): Other information for the output.
         """
         param_list = [index, name, need_compile, param_type, shape]
-        output_dict = {}
-        for idx, element in enumerate(param_list):
-            if element is not None:
-                if idx == 0:
-                    self.is_int(element)
-                    output_dict["index"] = element
-                elif idx == 1:
-                    self.is_string(element)
-                    output_dict["name"] = element
-                elif idx == 2:
-                    self.is_bool(element)
-                    output_dict["need_compile"] = element
-                elif idx == 3:
-                    self.is_string(element)
-                    output_dict["param_type"] = element
-                elif idx == 4:
-                    self.is_string(element)
-                    output_dict["shape"] = element
-        if kwargs:
-            output_dict = dict(output_dict, **kwargs)
+        key_list = ["index", "name", "need_compile", "param_type", "shape"]
+        fn_list = [self._is_int, self._is_string, self._is_bool, self._is_string, self._is_string]
+        output_dict = self._check_param(param_list, key_list, fn_list, kwargs)
         self.outputs.append(output_dict)
         return self
 
+
 class DataType():
     """
     Various combinations of dtype and formatself.

From 99c353e6966cf14a23d421e3ac490fffa7cce324 Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Thu, 9 Apr 2020 19:07:01 +0800
Subject: [PATCH 49/58] add custom environment variable ASCEND_CUSTOM_PATH for
 customized Ascend software installation

---
 cmake/dependency_graphengine.cmake |  6 +++++-
 mindspore/ccsrc/CMakeLists.txt     | 12 ++++++++++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/cmake/dependency_graphengine.cmake b/cmake/dependency_graphengine.cmake
index 2420f47736..2a90cc1458 100644
--- a/cmake/dependency_graphengine.cmake
+++ b/cmake/dependency_graphengine.cmake
@@ -39,7 +39,11 @@ elseif (DEFINED ENV{D_LINK_PATH})
     find_library(resource libresource.so ${GE_LIB_PATH})
 else()
     # Ascend mode
-    set(ASCEND_PATH /usr/local/Ascend)
+    if(DEFINED ENV{ASCEND_CUSTOM_PATH})
+        set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH})
+    else()
+        set(ASCEND_PATH /usr/local/Ascend)
+    endif()
     set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
     set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64)
     find_library(c_sec libc_sec.so ${ASCEND_DRIVER_PATH})
diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index 1d104148c3..c49c962bdd 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -296,7 +296,11 @@ if(ENABLE_D)
         endif()
     else()
         MESSAGE("use system default lib")
-        set(ASCEND_PATH /usr/local/Ascend)
+        if(DEFINED ENV{ASCEND_CUSTOM_PATH})
+            set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH})
+        else()
+            set(ASCEND_PATH /usr/local/Ascend)
+        endif()
         set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
         set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64)
     endif()
@@ -500,7 +504,11 @@ add_dependencies(add_ms_lib _c_expression)
 
 if (NOT ENABLE_GE)
     if (ENABLE_D)
-        set(ASCEND_PATH /usr/local/Ascend)
+        if(DEFINED ENV{ASCEND_CUSTOM_PATH})
+            set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH})
+        else()
+            set(ASCEND_PATH /usr/local/Ascend)
+        endif()
         set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
         add_custom_target(add_ge_lib ALL
                 COMMAND cp ${MS_CCSRC_BUILD_PATH}/../../graphengine/src/common/graph/libgraph.so ${MS_LIB_PATH}

From fc4ad192dc11006f25fee75095072a6d5d5f2019 Mon Sep 17 00:00:00 2001
From: liuxiao <liuxiao93@huawei.com>
Date: Thu, 9 Apr 2020 14:37:00 +0800
Subject: [PATCH 50/58] modified api name Stack -> Pack, Unstack -> Unpack

---
 mindspore/ccsrc/transform/convert.cc  |  4 +-
 mindspore/ops/_grad/grad_array_ops.py | 20 ++++----
 mindspore/ops/operations/__init__.py  |  6 +--
 mindspore/ops/operations/array_ops.py | 70 ++++++++++++---------------
 tests/ut/python/ops/test_ops.py       | 32 ++++++------
 5 files changed, 62 insertions(+), 70 deletions(-)

diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc
index bebd000958..20adec5b97 100755
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@@ -148,8 +148,8 @@ const char kNameSlice[] = "Slice";
 const char kNameAddN[] = "AddN";
 const char kNameLess[] = "Less";
 const char kNameGreater[] = "Greater";
-const char kNameStack[] = "Stack";
-const char kNameUnstack[] = "Unstack";
+const char kNamePack[] = "Pack";
+const char kNameUnpack[] = "Unpack";
 const char kNameMerge[] = "Merge";
 const char kNameGeSwitch[] = "GeSwitch";
 
diff --git a/mindspore/ops/_grad/grad_array_ops.py b/mindspore/ops/_grad/grad_array_ops.py
index 0a0caf471e..abad030ae9 100644
--- a/mindspore/ops/_grad/grad_array_ops.py
+++ b/mindspore/ops/_grad/grad_array_ops.py
@@ -266,26 +266,26 @@ def get_bprop_gather_v2(self):
     return bprop
 
 
-@bprop_getters.register(P.Stack)
-def get_bprop_stack(self):
-    """Generate bprop for Stack"""
+@bprop_getters.register(P.Pack)
+def get_bprop_pack(self):
+    """Generate bprop for Pack"""
     axis = self.axis
 
     def bprop(x, out, dout):
-        stack_grad = P.Unstack(axis)
-        out = stack_grad(dout)
+        pack_grad = P.Unpack(axis)
+        out = pack_grad(dout)
         return (out,)
     return bprop
 
 
-@bprop_getters.register(P.Unstack)
-def get_bprop_unstack(self):
-    """Generate bprop for Unstack"""
+@bprop_getters.register(P.Unpack)
+def get_bprop_unpack(self):
+    """Generate bprop for Unpack"""
     axis = self.axis
 
     def bprop(x, out, dout):
-        unstack_grad = P.Stack(axis)
-        out = unstack_grad(dout)
+        unpack_grad = P.Pack(axis)
+        out = unpack_grad(dout)
         return (out,)
     return bprop
 
diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py
index 5fd3f07876..c10aef1ac0 100644
--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -19,7 +19,7 @@ Primitive operator classes.
 A collection of operators to build nerual networks or computing functions.
 """
 
-from .array_ops import (Argmax, Argmin, Cast, ConcatOffset, Concat, Stack, Unstack,
+from .array_ops import (Argmax, Argmin, Cast, ConcatOffset, Concat, Pack, Unpack,
                         Diag, DiagPart, DType, ExpandDims, Eye,
                         Fill, GatherNd, GatherV2, InvertPermutation,
                         IsInstance, IsSubClass, ArgMaxWithValue, OnesLike, ZerosLike,
@@ -112,8 +112,8 @@ __all__ = [
     'OneHot',
     'GatherV2',
     'Concat',
-    'Stack',
-    'Unstack',
+    'Pack',
+    'Unpack',
     'Tile',
     'BiasAdd',
     'Gelu',
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index dda490566f..ac7f8ed699 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -1350,8 +1350,8 @@ class Concat(PrimitiveWithInfer):
         return out
 
 
-def _get_stack_shape(x_shape, x_type, axis):
-    """for satck output shape"""
+def _get_pack_shape(x_shape, x_type, axis):
+    """for pack output shape"""
     validator.check_type("shape", x_shape, [tuple])
     validator.check_integer("len of input_x shape", len(x_shape), 0, Rel.GT)
     validator.check_subclass("shape0", x_type[0], mstype.tensor)
@@ -1368,43 +1368,40 @@ def _get_stack_shape(x_shape, x_type, axis):
         validator.check('x_type[%d]' % i, x_type[i], 'base', x_type[0])
         for j in range(rank_base):
             if v[j] != x_shape[0][j]:
-                raise ValueError("Stack evaluator element %d shape in input can not stack with first element" % i)
+                raise ValueError("Pack evaluator element %d shape in input can not pack with first element" % i)
     out_shape.insert(axis, N)
     return out_shape
 
-class Stack(PrimitiveWithInfer):
+class Pack(PrimitiveWithInfer):
     r"""
-    Stacks a list of rank-`R` tensors into one rank-`(R+1)` tensor.
+    Packs a list of tensors in specified axis.
 
-    Packs the list of tensors in `input_x` into a tensor with rank one higher than
-    each tensor in `input_x`, by packing them along the `axis` dimension.
-    Given a list of length `N` of tensors of shape `(A, B, C)`;
+    Packs the list of input tensors with the same rank `R`, output is a tensor of rank `(R+1)`.
 
-    If `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`.
-
-    If `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`. Etc.
+    Given input tensors of shape :math:`(x_1, x_2, ..., x_R)`. Set the number of input tensors as `N`.
+    If :math:`0 \le axis`, the output tensor shape is :math:`(x_1, x_2, ..., x_{axis}, N, x_{axis+1}, ..., x_R)`.
 
     Args:
-        axis (int): The axis to stack along. Negative values wrap around,
-                    so the valid range is [-(R+1), R+1). Default: 0.
+        axis (int): Dimension along which to pack. Default: 0.
+                    Negative values wrap around. The range is [-(R+1), R+1).
 
     Inputs:
         - **input_x** (Union[tuple, list]) - A Tuple or list of Tensor objects with the same shape and type.
 
     Outputs:
-        Tensor. A stacked Tensor with the same type as values.
+        Tensor. A packed Tensor with the same type as `input_x`.
 
     Examples:
         >>> data1 = Tensor(np.array([0, 1]).astype(np.float32))
         >>> data2 = Tensor(np.array([2, 3]).astype(np.float32))
-        >>> op = P.Stack()
-        >>> output = op([data1, data2])
+        >>> pack = P.Pack()
+        >>> output = pack([data1, data2])
         [[0, 1], [2, 3]]
     """
 
     @prim_attr_register
     def __init__(self, axis=0):
-        """init Stack"""
+        """init Pack"""
         self.__setattr_flag__ = True
         validator.check_type("axis", axis, [int])
         self.axis = axis
@@ -1413,38 +1410,33 @@ class Stack(PrimitiveWithInfer):
         x_shape = value['shape']
         x_type = value['dtype']
         self.add_prim_attr('num', len(x_shape))
-        all_shape = _get_stack_shape(x_shape, x_type, self.axis)
+        all_shape = _get_pack_shape(x_shape, x_type, self.axis)
         out = {'shape': all_shape,
                'dtype': x_type[0],
                'value': None}
         return out
 
 
-class Unstack(PrimitiveWithInfer):
+class Unpack(PrimitiveWithInfer):
     r"""
-    Unpacks the given dimension of a rank-`R` tensor into rank-`(R-1)` tensors.
-
-    Unpacks num tensors from value by chipping it along the axis dimension.
-    If num is not specified (the default), it is inferred from value's shape.
-    If value.shape[axis] is not known, ValueError is raised.
+    Unpacks tensor in specified axis.
 
-    For example, given a tensor of shape (A, B, C, D);
+    Unpacks a tensor of rank `R` along axis dimension, output tensors will have rank `(R-1)`.
 
-    If axis == 0 then the i'th tensor in output is the slice value[i, :, :, :] and
-    each tensor in output will have shape (B, C, D). (Note that the dimension unpacked along is gone, unlike split).
+    Given a tensor of shape :math:`(x_1, x_2, ..., x_R)`. If :math:`0 \le axis`,
+    the shape of tensor in output is :math:`(x_1, x_2, ..., x_{axis}, x_{axis+2}, ..., x_R)`.
 
-    If axis == 1 then the i'th tensor in output is the slice value[:, i, :, :] and
-    each tensor in output will have shape (A, C, D). Etc.
-
-    This is the opposite of stack.
+    This is the opposite of pack.
 
     Args:
-        axis (int): The axis to unstack along. Defaults to the first dimension.
-                    Negative values wrap around, so the valid range is [-R, R).
+        axis (int): Dimension along which to pack. Default: 0.
+                    Negative values wrap around. The range is [-R, R).
+        num (int): The number of tensors to be unpacked to. Default : "None".
+                   If `num` is not specified, it is inferred from the shape of `input_x`.
 
     Inputs:
         - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`.
-          A rank R > 0 Tensor to be unstacked.
+          A rank R > 0 Tensor to be unpacked.
 
     Outputs:
         A tuple of Tensors, the shape of each objects is same.
@@ -1454,15 +1446,15 @@ class Unstack(PrimitiveWithInfer):
                     or if len(input_x.shape[axis]) not equal to num.
 
     Examples:
-        >>> unstack = P.Unstack()
-        >>> x = Tensor(np.array([[1, 1, 1, 1], [2, 2, 2, 2]]))
-        >>> output = unstack(x)
+        >>> unpack = P.Unpack()
+        >>> input_x = Tensor(np.array([[1, 1, 1, 1], [2, 2, 2, 2]]))
+        >>> output = unpack(input_x)
         ([1, 1, 1, 1], [2, 2, 2, 2])
     """
 
     @prim_attr_register
     def __init__(self, axis=0):
-        """init Unstack"""
+        """init Unpack"""
         self.__setattr_flag__ = True
         validator.check_type("axis", axis, [int])
         self.axis = axis
@@ -1479,7 +1471,7 @@ class Unstack(PrimitiveWithInfer):
         validator.check_integer("output_num", output_num, 0, Rel.GT)
         self.add_prim_attr('num', output_num)
         output_valid_check = x_shape[self.axis] - output_num
-        validator.check_integer("the dimension which to unstack divides output_num", output_valid_check, 0, Rel.EQ)
+        validator.check_integer("The dimension which to unpack divides output_num", output_valid_check, 0, Rel.EQ)
         out_shapes = []
         out_dtypes = []
         out_shape = x_shape[:self.axis] + x_shape[self.axis + 1:]
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index 97481e69a2..a3d771d7ec 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -80,9 +80,9 @@ class NetForConcat1(nn.Cell):
         return self.concat((x1, x2))
 
 
-class NetForStackInput(nn.Cell):
+class NetForPackInput(nn.Cell):
     def __init__(self, op):
-        super(NetForStackInput, self).__init__()
+        super(NetForPackInput, self).__init__()
         self.op = op
         self.mul = P.Mul()
 
@@ -93,9 +93,9 @@ class NetForStackInput(nn.Cell):
         return self.op(t)
 
 
-class NetForUnstackInput(nn.Cell):
+class NetForUnpackInput(nn.Cell):
     def __init__(self, op):
-        super(NetForUnstackInput, self).__init__()
+        super(NetForUnpackInput, self).__init__()
         self.op = op
         self.mul = P.Mul()
 
@@ -996,33 +996,33 @@ test_case_array_ops = [
                          Tensor(np.array([1], np.float32)),
                          Tensor(np.array([1], np.float32)))],
         'desc_bprop': [[3,]]}),
-    ('StackV2_0', {
-        'block': NetForStackInput(P.Stack()),
+    ('Pack_0', {
+        'block': NetForPackInput(P.Pack()),
         'desc_inputs':[[2, 2], [2, 2], [2, 2]],
         'desc_bprop':[[3, 2, 2]],
     }),
-    ('StackV2_1', {
-        'block': NetForStackInput(P.Stack(axis=-2)),
+    ('Pack_1', {
+        'block': NetForPackInput(P.Pack(axis=-2)),
         'desc_inputs':[[3, 2, 3], [3, 2, 3], [3, 2, 3]],
         'desc_bprop':[[3, 2, 3, 3]],
     }),
-    ('StackV2_2', {
-        'block': NetForStackInput(P.Stack()),
+    ('Pack_2', {
+        'block': NetForPackInput(P.Pack()),
         'desc_inputs':[[2, 2]],
         'desc_bprop':[[2, 2, 2]],
     }),
-    ('StackV2_3', {
-        'block': NetForStackInput(P.Stack()),
+    ('Pack_3', {
+        'block': NetForPackInput(P.Pack()),
         'desc_inputs':[[128, 128], [128, 128]],
         'desc_bprop':[[2, 128, 128]],
     }),
-    ('UnstackV2_0', {
-        'block': NetForUnstackInput(P.Unstack(axis=0)),
+    ('Unpack_0', {
+        'block': NetForUnpackInput(P.Unpack(axis=0)),
         'desc_inputs':[[2, 4]],
         'desc_bprop':[[4], [4]],
     }),
-    ('UnstackV2_1', {
-        'block': NetForUnstackInput(P.Unstack(axis=-1)),
+    ('Unpack_1', {
+        'block': NetForUnpackInput(P.Unpack(axis=-1)),
         'desc_inputs':[Tensor(np.array([[1, 1, 1]], np.float32))],
         'desc_bprop':[[1], [1], [1]],
     }),

From f57bd919e08432e7fe4a6177371936cd4680c34c Mon Sep 17 00:00:00 2001
From: yao_yf <yaoyifan1@huawei.com>
Date: Thu, 9 Apr 2020 15:02:33 +0800
Subject: [PATCH 51/58] Integrate two allreduce fusion set interfaces into one

---
 mindspore/parallel/__init__.py                     |  4 +---
 mindspore/parallel/_auto_parallel_context.py       | 14 ++++++++++----
 ...allreduce_fusion.py => _dp_allreduce_fusion.py} |  4 ++--
 3 files changed, 13 insertions(+), 9 deletions(-)
 rename mindspore/parallel/{dp_allreduce_fusion.py => _dp_allreduce_fusion.py} (94%)

diff --git a/mindspore/parallel/__init__.py b/mindspore/parallel/__init__.py
index c79704f110..79d8e67a8d 100644
--- a/mindspore/parallel/__init__.py
+++ b/mindspore/parallel/__init__.py
@@ -15,9 +15,7 @@
 """
 This interface is ONLY used in Auto-parallel procedure.
 """
-from .dp_allreduce_fusion import set_fusion_strategy_by_idx, set_fusion_strategy_by_size
 from .algo_parameter_config import get_algo_parameters, reset_algo_parameters, \
     set_algo_parameters
 
-__all__ = ["set_fusion_strategy_by_idx", "set_fusion_strategy_by_size", "get_algo_parameters",
-           "reset_algo_parameters", "set_algo_parameters"]
+__all__ = ["get_algo_parameters", "reset_algo_parameters", "set_algo_parameters"]
diff --git a/mindspore/parallel/_auto_parallel_context.py b/mindspore/parallel/_auto_parallel_context.py
index 3564ad4395..c99ac4a3c7 100644
--- a/mindspore/parallel/_auto_parallel_context.py
+++ b/mindspore/parallel/_auto_parallel_context.py
@@ -14,6 +14,8 @@
 # ============================================================================
 """Context of auto parallel"""
 import threading
+import mindspore.context as context
+from mindspore.parallel._dp_allreduce_fusion import _set_fusion_strategy_by_idx, _set_fusion_strategy_by_size
 from mindspore._c_expression import AutoParallelContext
 from mindspore._extends.pynative_helper import args_type_check
 
@@ -219,13 +221,15 @@ class _AutoParallelContext:
             indices (list): Indices list.
 
         Raises:
-            ValueError: If type of indices item is not int.
+            TypeError: If type of indices item is not int.
         """
         self.check_context_handle()
         for index in indices:
             if not isinstance(index, int):
                 raise TypeError('indices has invalid value')
-        return self._context_handle.set_all_reduce_fusion_split_indices(indices)
+        self._context_handle.set_all_reduce_fusion_split_indices(indices)
+        if context.get_context("device_target") == "Ascend":
+            _set_fusion_strategy_by_idx(indices)
 
     def get_all_reduce_fusion_split_indices(self):
         """Get allreduce fusion split indices."""
@@ -240,13 +244,15 @@ class _AutoParallelContext:
             sizes (list): Sizes list.
 
         Raises:
-            ValueError: If type of sizes item is not int.
+            TypeError: If type of sizes item is not int.
         """
         self.check_context_handle()
         for size in sizes:
             if not isinstance(size, int):
                 raise TypeError('sizes has invalid value')
-        return self._context_handle.set_all_reduce_fusion_split_sizes(sizes)
+        self._context_handle.set_all_reduce_fusion_split_sizes(sizes)
+        if context.get_context("device_target") == "Ascend":
+            _set_fusion_strategy_by_size(sizes)
 
     def get_all_reduce_fusion_split_sizes(self):
         """Get allreduce fusion split sizes."""
diff --git a/mindspore/parallel/dp_allreduce_fusion.py b/mindspore/parallel/_dp_allreduce_fusion.py
similarity index 94%
rename from mindspore/parallel/dp_allreduce_fusion.py
rename to mindspore/parallel/_dp_allreduce_fusion.py
index 979823bd80..3c7039dbd6 100644
--- a/mindspore/parallel/dp_allreduce_fusion.py
+++ b/mindspore/parallel/_dp_allreduce_fusion.py
@@ -43,7 +43,7 @@ def _c_array(ctype, values):
     return (ctype * len(values))(*values)
 
 
-def set_fusion_strategy_by_idx(idxList, group="hccl_world_group"):
+def _set_fusion_strategy_by_idx(idxList, group="hccl_world_group"):
     """
     A function set gradient segment strategy according to the index list.
 
@@ -100,7 +100,7 @@ def set_fusion_strategy_by_idx(idxList, group="hccl_world_group"):
         raise RuntimeError('Allreduce split error')
 
 
-def set_fusion_strategy_by_size(dataSizeList, group="hccl_world_group"):
+def _set_fusion_strategy_by_size(dataSizeList, group="hccl_world_group"):
     """
     A function set gradient segment strategy according to the data size percentage list.
 

From dd112c98fc4efe734617dab0c7729c9e51390837 Mon Sep 17 00:00:00 2001
From: jojobugfree <caifubi1@huawei.com>
Date: Thu, 9 Apr 2020 14:47:05 +0800
Subject: [PATCH 52/58] change logging to mindspore.log

---
 mindspore/context.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mindspore/context.py b/mindspore/context.py
index 89365f3d1c..2938b87119 100644
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -17,16 +17,14 @@ The context of mindspore, used to configure the current execution environment,
 including execution mode, execution backend and other feature switchs.
 """
 import threading
-import logging
 from collections import namedtuple
 from types import FunctionType
+from mindspore import log as logger
 from mindspore._c_expression import MSContext
 from mindspore._extends.pynative_helper import args_type_check
 from mindspore.parallel._auto_parallel_context import _set_auto_parallel_context, _get_auto_parallel_context, \
     _reset_auto_parallel_context
 
-logger = logging.getLogger('Context')
-
 
 __all__ = ['GRAPH_MODE', 'PYNATIVE_MODE', 'set_context', 'get_context', 'set_auto_parallel_context',
            'get_auto_parallel_context', 'reset_auto_parallel_context']

From a73347db6a70429d45dbcf6c2889760dc95f8033 Mon Sep 17 00:00:00 2001
From: Yanjun Peng <pengyanjun1@huawei.com>
Date: Thu, 9 Apr 2020 11:04:13 +0800
Subject: [PATCH 53/58] fix dataset para validator check

---
 mindspore/dataset/engine/samplers.py              | 1 -
 mindspore/dataset/engine/validators.py            | 5 +++++
 mindspore/dataset/transforms/vision/validators.py | 4 ++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/mindspore/dataset/engine/samplers.py b/mindspore/dataset/engine/samplers.py
index ed36e72b65..62a3dbed18 100644
--- a/mindspore/dataset/engine/samplers.py
+++ b/mindspore/dataset/engine/samplers.py
@@ -127,7 +127,6 @@ class RandomSampler():
 
     Raises:
         ValueError: If replacement is not boolean.
-        ValueError: If num_samples is not None and replacement is false.
         ValueError: If num_samples is not positive.
      """
 
diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index 4c84cfe354..63d7c58270 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -556,6 +556,11 @@ def check_generatordataset(method):
         if column_names is None:
             raise ValueError("column_names is not provided.")
 
+        # check prefetch_size range
+        prefetch_size = param_dict.get('prefetch_size')
+        if prefetch_size is not None and (prefetch_size <= 0 or prefetch_size > 1024):
+            raise ValueError("prefetch_size exceeds the boundary.")
+
         check_param_type(nreq_param_int, param_dict, int)
 
         check_param_type(nreq_param_list, param_dict, list)
diff --git a/mindspore/dataset/transforms/vision/validators.py b/mindspore/dataset/transforms/vision/validators.py
index caab120af4..ef4b879f8c 100644
--- a/mindspore/dataset/transforms/vision/validators.py
+++ b/mindspore/dataset/transforms/vision/validators.py
@@ -104,6 +104,10 @@ def check_padding(padding):
             raise ValueError("The size of the padding list or tuple should be 2 or 4.")
     else:
         raise TypeError("Padding can be any of: a number, a tuple or list of size 2 or 4.")
+    if not (isinstance(left, int) and isinstance(top, int) and isinstance(right, int) and isinstance(bottom, int)):
+        raise TypeError("Padding value should be integer.")
+    if left < 0 or top < 0 or right < 0 or bottom < 0:
+        raise ValueError("Padding value could not be negative.")
     return left, top, right, bottom
 
 

From d87fc50e3542ba1ad039bc209ea0f760e68af37a Mon Sep 17 00:00:00 2001
From: Cathy Wong <cathy.wong@huawei.com>
Date: Thu, 9 Apr 2020 15:22:33 -0400
Subject: [PATCH 54/58] Correct shuffle UT buffer_size > #dataset-row as valid

---
 .../data/dataset/golden/shuffle_05_result.npz | Bin 0 -> 1507 bytes
 tests/ut/python/dataset/test_shuffle.py       |  39 +++++++++---------
 2 files changed, 20 insertions(+), 19 deletions(-)
 create mode 100644 tests/ut/data/dataset/golden/shuffle_05_result.npz

diff --git a/tests/ut/data/dataset/golden/shuffle_05_result.npz b/tests/ut/data/dataset/golden/shuffle_05_result.npz
new file mode 100644
index 0000000000000000000000000000000000000000..27eb0a470d370fab9d4938e5fb9e3eb50d603bd1
GIT binary patch
literal 1507
zcmbW1OH&hB6vw+00!c(c4L<NegJLciBjAIF4_bM&<}V?lD2OrU(Fh7jZYMaC8K)*2
zwZ>UmWtAJtD)+OVZ!lkD_Ooy{uH(5UT@6!HZoH}d``mL*e)pVH>6?PTC89M~ul9TG
z?dO$%rm1g8i)dNL$qdDd<(jT_vQs6hpjvN}i*u=EU3;$m94*+Dyc3-;qA%v7F(bNK
za;i?Ym?=30Te)Ym8x@=Um9=cyro9-C8N(w3G2<8GyH~jG&lk5g%e8pE<k<1et&QqB
z)tjw_Gmf1v6)RO|D_;#ut(g@r7MfA1LwhNGQ+iJNU(1#S84%DBLi;P(>ZTT|wXz+S
zZA(6CbAC()p+SEwLklu2;9t_o`$FzYCX<N<Ef!2xWqa<owA?Mya*seK_S$LLA>f0b
zj85?;sAfj14xJo(1@>V-8T$kd;2;_M1@@qWjEF!B0%UXvv|~3J2Lyr$k#SHUf-W)+
z3ACb(jKcz9>>{IEz(6nhaDp~R7~!TI{m<EMLB|A6q93OarS#(hF`PjhXKB+Ta1_UI
z96hu#_+<BpUV%e6jBXsENFVp;HgQ5gU;u+SO_7saw%bfUSM0WUO2EW5T*nlqF@srH
zlpghtP~Eye6aTAO&BSeJK;Q;$A_3qQ=8(iZVFm@3u#5*-!9%3+2pMvo78t`gCU6Ot
zaRpZ~NzRy8xf-Th+8Kc%3}Xc6a2^+M5u=nD_h!0dSZ4)NxQzwe!Cfrk9_~}-kT;qg
z!yERdvSYj>SkI>AIpj#sBTIS#Pf0IggLD*Yq+`gFj^hdG1Z>hvct&~|1=1^6CB15y
z{nj4_{h??xX-$6dHm3F0=SJi2X5^Y>M*scfZmwI_c4W#j`#${nxS2L>nE?+lW0^hw
zyfm7TS<CG5B9>(izJLF*8M$GZ9bVc^%k1_d2?VJqKqtMG(CUo_8FL7cCg&Qz(qkTN
zq$&7FZ=;oT0WG9=5GK6~Kj|U@r1zM0pIJ)@I=N2HWoA84Z=Ry9Fzq4J(oB2Av<%Z8
zGcC)s9MkelD{Q}##IPB;`X;DeX<CZ=gt=?XU1#o7=00QY26H!=TV!sDx#a{ktU8vA
zbf8cbMpYTL#i-|u`hihDGO9)>yDERm#Z%CI5&yUDWg3FJ>gp=mzNJ$Lv?_;ApHX_g
H<D2>`G6I{m

literal 0
HcmV?d00001

diff --git a/tests/ut/python/dataset/test_shuffle.py b/tests/ut/python/dataset/test_shuffle.py
index 2b7a251d2c..4a823c5fb7 100644
--- a/tests/ut/python/dataset/test_shuffle.py
+++ b/tests/ut/python/dataset/test_shuffle.py
@@ -98,6 +98,25 @@ def test_shuffle_04():
     save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
 
 
+def test_shuffle_05():
+    """
+    Test shuffle: buffer_size > number-of-rows-in-dataset
+    """
+    logger.info("test_shuffle_05")
+    # define parameters
+    buffer_size = 13
+    seed = 1
+    parameters = {"params": {'buffer_size': buffer_size, "seed": seed}}
+
+    # apply dataset operations
+    data1 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES)
+    ds.config.set_seed(seed)
+    data1 = data1.shuffle(buffer_size=buffer_size)
+
+    filename = "shuffle_05_result.npz"
+    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
+
+
 def test_shuffle_exception_01():
     """
     Test shuffle exception: buffer_size<0
@@ -152,24 +171,6 @@ def test_shuffle_exception_03():
         assert "buffer_size" in str(e)
 
 
-def test_shuffle_exception_04():
-    """
-    Test shuffle exception: buffer_size > number-of-rows-in-dataset
-    """
-    logger.info("test_shuffle_exception_04")
-
-    # apply dataset operations
-    data1 = ds.TFRecordDataset(DATA_DIR)
-    ds.config.set_seed(1)
-    try:
-        data1 = data1.shuffle(buffer_size=13)
-        sum([1 for _ in data1])
-
-    except BaseException as e:
-        logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "buffer_size" in str(e)
-
-
 def test_shuffle_exception_05():
     """
     Test shuffle exception: Missing mandatory buffer_size input parameter
@@ -229,10 +230,10 @@ if __name__ == '__main__':
     test_shuffle_02()
     test_shuffle_03()
     test_shuffle_04()
+    test_shuffle_05()
     test_shuffle_exception_01()
     test_shuffle_exception_02()
     test_shuffle_exception_03()
-    test_shuffle_exception_04()
     test_shuffle_exception_05()
     test_shuffle_exception_06()
     test_shuffle_exception_07()

From cc1416bfc2e5d1a06e66da466c7bee14aa0d1e42 Mon Sep 17 00:00:00 2001
From: biffex <zhaozhiqiang4@huawei.com>
Date: Thu, 9 Apr 2020 15:04:24 +0800
Subject: [PATCH 55/58] constant duplicate mul for momentum

---
 mindspore/ccsrc/optimizer/irpass.cc           |  6 +--
 .../optimizer/irpass/arithmetic_simplify.h    | 54 ++++++++++++++++++-
 mindspore/ccsrc/utils/graph_utils.cc          |  2 +
 mindspore/ops/operations/math_ops.py          |  8 +++
 tests/ut/cpp/optimizer/lib_test.cc            | 13 +++++
 .../gtest_input/optimizer/opt_test.py         | 33 ++++++++++++
 6 files changed, 112 insertions(+), 4 deletions(-)

diff --git a/mindspore/ccsrc/optimizer/irpass.cc b/mindspore/ccsrc/optimizer/irpass.cc
index cdc960792f..0991c31b00 100644
--- a/mindspore/ccsrc/optimizer/irpass.cc
+++ b/mindspore/ccsrc/optimizer/irpass.cc
@@ -45,9 +45,9 @@ namespace mindspore {
 namespace opt {
 namespace irpass {
 OptimizeIRPassLib::OptimizeIRPassLib() {
-  arithmetic_simplify_ = MakeSubstitution(
-    ArithmeticSimplify(), "arithmetic_simplify",
-    {prim::kPrimScalarAdd, prim::kPrimScalarMul, prim::kPrimTensorAdd, prim::kPrimIdentity, prim::kPrimMomentum});
+  arithmetic_simplify_ = MakeSubstitution(ArithmeticSimplify(), "arithmetic_simplify",
+                                          {prim::kPrimScalarAdd, prim::kPrimScalarMul, prim::kPrimTensorAdd,
+                                           prim::kPrimIdentity, prim::kPrimMomentum, prim::kPrimMul});
   special_op_eliminate_ = MakeSubstitution(SpecialOpEliminater(), "special_op_eliminate",
                                            {prim::kPrimInsertGradientOf, prim::kPrimPrintShapeType,
                                             prim::kPrimGetRefKey, prim::kPrimMirror, prim::kPrimVirtualDiv});
diff --git a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h b/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h
index 8c5610ed1b..ab191aab20 100644
--- a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h
+++ b/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h
@@ -179,6 +179,55 @@ class OptUpdateZeroTensor : public AnfVisitor {
   }
 };
 
+// {prim::kPrimMul, Tensor1, {orim::kPrimMul, Tensor2, {...}}} ->
+// {prim::kPrimMul, {...}, {prim::kPrimMul, Tensor1, Tensor2}}
+class ConstantDuplicateMul : public AnfVisitor {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
+    Reset();
+    // {prim::kPrimMul, Tensor1, {...}}
+    AnfVisitor::Match(prim::kPrimMul, {IsNode, IsNode})(node);
+    if (vnode_ == nullptr || cnode_ == nullptr) {
+      return nullptr;
+    }
+    auto tensor1 = vnode_;
+    auto mul = cnode_;
+
+    Reset();
+    // {prim::kPrimMul, Tensor2, {...}}
+    AnfVisitor::Match(prim::kPrimMul, {IsNode, IsNode})(mul);
+    if (vnode_ == nullptr || cnode_ == nullptr) {
+      return nullptr;
+    }
+    auto tensor2 = vnode_;
+    auto cnode = cnode_;
+
+    auto PrimMul = GetValueNode<PrimitivePtr>(mul->input(0));
+    auto fg = node->func_graph();
+    auto ttmul = NewCNode({NewValueNode(PrimMul), tensor1, tensor2}, fg);
+    return NewCNode({NewValueNode(PrimMul), cnode, ttmul}, fg);
+  }
+
+  void Visit(const AnfNodePtr &node) override {
+    if (IsValueNode<tensor::Tensor>(node)) {
+      vnode_ = node;
+    }
+
+    if (IsCNode(node)) {
+      cnode_ = node->cast<CNodePtr>();
+    }
+  }
+
+  void Reset() {
+    vnode_ = nullptr;
+    cnode_ = nullptr;
+  }
+
+ private:
+  AnfNodePtr vnode_;
+  CNodePtr cnode_;
+};
+
 class ArithmeticSimplify {
  public:
   ArithmeticSimplify()
@@ -186,12 +235,14 @@ class ArithmeticSimplify {
         add_by_zero_(),
         tensor_add_by_zero_(),
         identity_(prim::kPrimIdentity),
-        opt_update_zero_tensor_() {
+        opt_update_zero_tensor_(),
+        constant_duplicate_mul_() {
     eliminaters_.emplace_back(multiply_by_zero_or_one_);
     eliminaters_.emplace_back(add_by_zero_);
     eliminaters_.emplace_back(tensor_add_by_zero_);
     eliminaters_.emplace_back(identity_);
     eliminaters_.emplace_back(opt_update_zero_tensor_);
+    eliminaters_.emplace_back(constant_duplicate_mul_);
   }
   ~ArithmeticSimplify() = default;
 
@@ -212,6 +263,7 @@ class ArithmeticSimplify {
   TensorAddByZero tensor_add_by_zero_;
   PrimEliminater identity_;
   OptUpdateZeroTensor opt_update_zero_tensor_;
+  ConstantDuplicateMul constant_duplicate_mul_;
   std::vector<TransformFuncType> eliminaters_{};
 };
 }  // namespace irpass
diff --git a/mindspore/ccsrc/utils/graph_utils.cc b/mindspore/ccsrc/utils/graph_utils.cc
index 938df2c291..55ef8dc3d5 100644
--- a/mindspore/ccsrc/utils/graph_utils.cc
+++ b/mindspore/ccsrc/utils/graph_utils.cc
@@ -400,6 +400,8 @@ static bool SameNodeShallow(const AnfNodePtr& node1, const AnfNodePtr& node2, Fu
     auto a2 = GetValueNode(node2);
     if (a1->isa<Primitive>() && a2->isa<Primitive>()) {
       return a1->cast<PrimitivePtr>()->name() == a2->cast<PrimitivePtr>()->name();
+    } else if (a1->isa<tensor::Tensor>() && a2->isa<tensor::Tensor>()) {
+      return a1->cast<tensor::TensorPtr>()->ValueEqual(*(a2->cast<tensor::TensorPtr>()));
     } else {
       return *a1 == *a2;
     }
diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py
index 1294a65d02..106886c45c 100644
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -774,6 +774,14 @@ class Mul(_MathBinaryOp):
         >>> mul(input_x, input_y)
         [4, 10, 18]
     """
+    def infer_value(self, x, y):
+        if x is not None and y is not None:
+            x = x.asnumpy()
+            y = y.asnumpy()
+            out = x * y
+            out = np.array(out, x.dtype)
+            return Tensor(out)
+        return None
 
 
 class Square(PrimitiveWithInfer):
diff --git a/tests/ut/cpp/optimizer/lib_test.cc b/tests/ut/cpp/optimizer/lib_test.cc
index ff3c00d37a..2d4cf0e78e 100644
--- a/tests/ut/cpp/optimizer/lib_test.cc
+++ b/tests/ut/cpp/optimizer/lib_test.cc
@@ -543,5 +543,18 @@ TEST_F(TestOptLib, test_print_tuple_wrapper) {
   ASSERT_TRUE(CheckOpt(before2, after2, patterns));
   ASSERT_TRUE(CheckOpt(before3, before3, patterns));
 }
+
+TEST_F(TestOptLib, test_constant_duplicate_mul) {
+  FuncGraphPtr beforell = getPyFun.CallAndParseRet("test_constant_duplicate_mul", "beforell");
+  FuncGraphPtr beforelr = getPyFun.CallAndParseRet("test_constant_duplicate_mul", "beforelr");
+  FuncGraphPtr beforerl = getPyFun.CallAndParseRet("test_constant_duplicate_mul", "beforerl");
+  FuncGraphPtr beforerr = getPyFun.CallAndParseRet("test_constant_duplicate_mul", "beforerr");
+  FuncGraphPtr after = getPyFun.CallAndParseRet("test_constant_duplicate_mul", "after");
+  auto patterns = std::vector<SubstitutionPtr>({irpass.arithmetic_simplify_});
+  ASSERT_TRUE(CheckOpt(beforell, after, patterns));
+  ASSERT_TRUE(CheckOpt(beforelr, after, patterns));
+  ASSERT_TRUE(CheckOpt(beforerl, after, patterns));
+  ASSERT_TRUE(CheckOpt(beforerr, after, patterns));
+}
 }  // namespace opt
 }  // namespace mindspore
diff --git a/tests/ut/cpp/python_input/gtest_input/optimizer/opt_test.py b/tests/ut/cpp/python_input/gtest_input/optimizer/opt_test.py
index 53eb2130f0..d494ad27d3 100644
--- a/tests/ut/cpp/python_input/gtest_input/optimizer/opt_test.py
+++ b/tests/ut/cpp/python_input/gtest_input/optimizer/opt_test.py
@@ -16,6 +16,8 @@
 from mindspore.ops import Primitive, PrimitiveWithInfer
 from mindspore.ops import operations as P
 from mindspore.ops.operations import _grad_ops as G
+from mindspore import Tensor
+import numpy as np
 
 # pylint: disable=unused-variable
 
@@ -903,3 +905,34 @@ def test_print_tuple_wrapper(tag):
         return print_(make_tuple(x, y, z))
 
     return fns[tag]
+
+def test_constant_duplicate_mul(tag):
+    fns = FnDict()
+    Mul = Primitive('Mul');
+    Sqrt = Primitive('Sqrt');
+
+    x = Tensor(np.array([[2, 2], [2, 3]]).astype('float32'))
+    tensor1 = Tensor(np.array([[1.2, 2.1], [2.2, 3.2]]).astype('float32'))
+    tensor2 = Tensor(np.array([[2.2, 3.1], [3.2, 4.2]]).astype('float32'))
+
+    @fns
+    def beforell():
+        return Mul(tensor1, Mul(tensor2, Sqrt(x)))
+
+    @fns
+    def beforelr():
+        return Mul(tensor1, Mul(Sqrt(x), tensor2))
+
+    @fns
+    def beforerl():
+        return Mul(Mul(Sqrt(x), tensor2), tensor1)
+
+    @fns
+    def beforerr():
+        return Mul(Mul(Sqrt(x), tensor2), tensor1)
+
+    @fns
+    def after():
+        return Mul(Sqrt(x), Mul(tensor1, tensor2))
+
+    return fns[tag]

From 1c7d0c0b39c34d851d93132facddaa34e4b37fab Mon Sep 17 00:00:00 2001
From: leonwanghui <leon.wanghui@huawei.com>
Date: Thu, 9 Apr 2020 15:58:36 +0800
Subject: [PATCH 56/58] Update setuptool info

Signed-off-by: leonwanghui <leon.wanghui@huawei.com>
---
 CONTRIBUTING.md              |   4 +-
 README.md                    |   2 +-
 RELEASE.md                   |   2 +-
 package.sh                   |   2 +-
 setup_package.py => setup.py | 129 +++++++++++++++++++++++------------
 5 files changed, 90 insertions(+), 49 deletions(-)
 rename setup_package.py => setup.py (53%)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 85fee704c2..105c620942 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -105,11 +105,11 @@ When reporting issues, refer to this format:
 * If it is a new feature that needs lots of design details, a design proposal should also be submitted.
 * After reaching consensus in the issue discussions and design proposal reviews, complete the development on the forked repo and submit a PR.
 * None of PRs is not permitted until it receives **2+ LGTM** from approvers. Please NOTICE that approver is NOT allowed to add *LGTM* on his own PR.
-* After PR is sufficiently discussed, it will get merged, abondoned or rejected depending on the outcome of the discussion.
+* After PR is sufficiently discussed, it will get merged, abandoned or rejected depending on the outcome of the discussion.
 
 **PRs advisory:**
 
 - Any irrelevant changes should be avoided.
 - Make sure your commit history being ordered.
 - Always keep your branch up with the master branch.
-- For bug-fix PRs, make sure all related issues being linked. 
+- For bug-fix PRs, make sure all related issues being linked.
diff --git a/README.md b/README.md
index 925c22591d..be8ca5189a 100644
--- a/README.md
+++ b/README.md
@@ -129,7 +129,7 @@ Check out how MindSpore Open Governance [works](https://gitee.com/mindspore/comm
 - [MindSpore Slack](https://join.slack.com/t/mindspore/shared_invite/enQtOTcwMTIxMDI3NjM0LTNkMWM2MzI5NjIyZWU5ZWQ5M2EwMTQ5MWNiYzMxOGM4OWFhZjI4M2E5OGI2YTg3ODU1ODE2Njg1MThiNWI3YmQ) - Communication platform for developers.
 - IRC channel at `#mindspore` (only for meeting minutes logging purpose)
 - Video Conferencing: meet.jit.si
-- Mailing-list: https://mailweb.mindspore.cn/postorius/lists 
+- Mailing-list: https://mailweb.mindspore.cn/postorius/lists
 
 ## Contributing
 
diff --git a/RELEASE.md b/RELEASE.md
index 8920095bb5..ce9064e4b1 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -70,4 +70,4 @@
 * [MindSpore Official Website] (https://www.mindspore.cn/)
 * [MindInsight Visualization Debugging and Optimization] (https://gitee.com/mindspore/mindinsight)
 * [MindArmour Model Security Hardening Package] (https://gitee.com/mindspore/mindarmour)
-* [GraphEngine Computational Graph Engine] (https://gitee.com/mindspore/graphengine)
\ No newline at end of file
+* [GraphEngine Computational Graph Engine] (https://gitee.com/mindspore/graphengine)
diff --git a/package.sh b/package.sh
index 67f4761f37..0c75a1bbfd 100755
--- a/package.sh
+++ b/package.sh
@@ -110,7 +110,7 @@ else
     export MS_PACKAGE_NAME="mindspore"
 fi
 
-${PYTHON} "${BASEPATH}/setup_package.py" bdist_wheel
+${PYTHON} "${BASEPATH}/setup.py" bdist_wheel
 
 chmod -R 700 ${PACKAGE_PATH}/mindspore/
 chmod -R 700 ${PACKAGE_PATH}/${MS_PACKAGE_NAME//-/_}.egg-info/
diff --git a/setup_package.py b/setup.py
similarity index 53%
rename from setup_package.py
rename to setup.py
index 87b5718de2..e009a9b312 100644
--- a/setup_package.py
+++ b/setup.py
@@ -14,17 +14,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-"""setup_package."""
+"""setup package."""
 import os
 import stat
+
 from setuptools import setup, find_packages
 from setuptools.command.egg_info import egg_info
 from setuptools.command.build_py import build_py
 
 version = '0.1.0'
-author = 'The MindSpore Authors'
-author_email = 'contact@mindspore.cn'
-home_page = 'https://www.mindspore.cn'
 
 backend_policy = os.getenv('BACKEND_POLICY')
 commit_id = os.getenv('COMMIT_ID').replace("\n", "")
@@ -33,56 +31,70 @@ package_name = os.getenv('MS_PACKAGE_NAME').replace("\n", "")
 pwd = os.path.dirname(os.path.realpath(__file__))
 pkg_dir = os.path.join(pwd, 'build/package')
 
-def write_version(file):
+
+def _read_file(filename):
+    with open(os.path.join(pwd, filename)) as f:
+        return f.read()
+
+
+readme = _read_file('README.md')
+release = _read_file('RELEASE.md')
+
+
+def _write_version(file):
     file.write("__version__ = '{}'\n".format(version))
 
-def write_config(file):
+
+def _write_config(file):
     file.write("__backend__ = '{}'\n".format(backend_policy))
 
-def write_commit_file(file):
+
+def _write_commit_file(file):
     file.write("__commit_id__ = '{}'\n".format(commit_id))
 
-def build_depends():
+
+def build_dependencies():
     """generate python file"""
-    version_file = os.path.join(pwd, 'build/package/mindspore', 'version.py')
+    version_file = os.path.join(pkg_dir, 'mindspore', 'version.py')
     with open(version_file, 'w') as f:
-        write_version(f)
+        _write_version(f)
 
-    version_file = os.path.join(pwd, 'mindspore/', 'version.py')
+    version_file = os.path.join(pwd, 'mindspore', 'version.py')
     with open(version_file, 'w') as f:
-        write_version(f)
+        _write_version(f)
 
-    config_file = os.path.join(pwd, 'build/package/mindspore', 'default_config.py')
+    config_file = os.path.join(pkg_dir, 'mindspore', 'default_config.py')
     with open(config_file, 'w') as f:
-        write_config(f)
+        _write_config(f)
 
-    config_file = os.path.join(pwd, 'mindspore/', 'default_config.py')
+    config_file = os.path.join(pwd, 'mindspore', 'default_config.py')
     with open(config_file, 'w') as f:
-        write_config(f)
+        _write_config(f)
 
-    commit_file = os.path.join(pwd, 'build/package/mindspore', '.commit_id')
+    commit_file = os.path.join(pkg_dir, 'mindspore', '.commit_id')
     with open(commit_file, 'w') as f:
-        write_commit_file(f)
+        _write_commit_file(f)
 
-    commit_file = os.path.join(pwd, 'mindspore/', '.commit_id')
+    commit_file = os.path.join(pwd, 'mindspore', '.commit_id')
     with open(commit_file, 'w') as f:
-        write_commit_file(f)
-
-descriptions = 'An AI computing framework that supports development for AI applications in all scenarios.'
-
-requires = [
-        'numpy >= 1.17.0',
-        'protobuf >= 3.8.0',
-        'asttokens >= 1.1.13',
-        'pillow >= 6.2.0',
-        'scipy == 1.3.3',
-        'easydict >= 1.9',
-        'sympy >= 1.4',
-        'cffi >= 1.13.2',
-        'decorator >= 4.4.0'
-    ],
+        _write_commit_file(f)
+
 
-package_datas = {
+build_dependencies()
+
+required_package = [
+    'numpy >= 1.17.0',
+    'protobuf >= 3.8.0',
+    'asttokens >= 1.1.13',
+    'pillow >= 6.2.0',
+    'scipy == 1.3.3',
+    'easydict >= 1.9',
+    'sympy >= 1.4',
+    'cffi >= 1.13.2',
+    'decorator >= 4.4.0'
+]
+
+package_data = {
     '': [
         '*.so*',
         'lib/*.so*',
@@ -91,7 +103,6 @@ package_datas = {
     ]
 }
 
-build_depends()
 
 def update_permissions(path):
     """
@@ -103,20 +114,25 @@ def update_permissions(path):
     for dirpath, dirnames, filenames in os.walk(path):
         for dirname in dirnames:
             dir_fullpath = os.path.join(dirpath, dirname)
-            os.chmod(dir_fullpath, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC | stat.S_IRGRP | stat.S_IXGRP)
+            os.chmod(dir_fullpath, stat.S_IREAD | stat.S_IWRITE |
+                     stat.S_IEXEC | stat.S_IRGRP | stat.S_IXGRP)
         for filename in filenames:
             file_fullpath = os.path.join(dirpath, filename)
             os.chmod(file_fullpath, stat.S_IREAD)
 
+
 class EggInfo(egg_info):
     """Egg info."""
+
     def run(self):
         super().run()
         egg_info_dir = os.path.join(pkg_dir, 'mindspore.egg-info')
         update_permissions(egg_info_dir)
 
+
 class BuildPy(build_py):
     """BuildPy."""
+
     def run(self):
         super().run()
         mindspore_dir = os.path.join(pkg_dir, 'build', 'lib', 'mindspore')
@@ -124,21 +140,46 @@ class BuildPy(build_py):
         mindspore_dir = os.path.join(pkg_dir, 'build', 'lib', 'akg')
         update_permissions(mindspore_dir)
 
+
 setup(
-    python_requires='>=3.7',
     name=package_name,
     version=version,
-    author=author,
-    author_email=author_email,
-    url=home_page,
+    author='The MindSpore Authors',
+    author_email='contact@mindspore.cn',
+    url='https://www.mindspore.cn',
+    download_url='https://gitee.com/mindspore/mindspore/tags',
+    project_urls={
+        'Sources': 'https://gitee.com/mindspore/mindspore',
+        'Issue Tracker': 'https://gitee.com/mindspore/mindspore/issues',
+    },
+    description='MindSpore is a new open source deep learning training/inference '
+    'framework that could be used for mobile, edge and cloud scenarios.',
+    long_description="\n\n".join([readme, release]),
     packages=find_packages(),
-    package_data=package_datas,
+    package_data=package_data,
     include_package_data=True,
     cmdclass={
         'egg_info': EggInfo,
         'build_py': BuildPy,
     },
-    install_requires=requires,
-    description=descriptions,
+    python_requires='>=3.7',
+    install_requires=required_package,
+    classifiers=[
+        'Development Status :: 4 - Beta',
+        'Environment :: Console',
+        'Intended Audience :: Science/Research',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: Apache Software License',
+        'Programming Language :: Python :: 3 :: Only',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: C++',
+        'Topic :: Scientific/Engineering',
+        'Topic :: Scientific/Engineering :: Artificial Intelligence',
+        'Topic :: Software Development',
+        'Topic :: Software Development :: Libraries',
+        'Topic :: Software Development :: Libraries :: Python Modules',
+    ],
     license='Apache 2.0',
+    keywords='mindspore machine learning',
 )

From 406475160f0d604e1021e42da2c22a5330128d5b Mon Sep 17 00:00:00 2001
From: c00425699 <chentingting13@huawei.com>
Date: Thu, 9 Apr 2020 14:40:43 +0800
Subject: [PATCH 57/58] refactor OperatorCostPtr in OperatorInfo

---
 .../auto_parallel/operator_costmodel.cc       | 54 -------------------
 .../auto_parallel/operator_costmodel.h        | 30 ++---------
 .../ccsrc/parallel/ops_info/activation_info.h | 18 ++-----
 .../ccsrc/parallel/ops_info/arithmetic_info.h |  6 +--
 .../parallel/ops_info/batch_parallel_info.h   |  6 +--
 .../ccsrc/parallel/ops_info/bias_add_info.h   |  6 +--
 .../parallel/ops_info/dropout_do_mask_info.h  |  8 +--
 .../ccsrc/parallel/ops_info/generator_info.h  |  6 +--
 .../ccsrc/parallel/ops_info/get_next_info.h   |  6 +--
 .../parallel/ops_info/l2_normalize_info.h     |  6 +--
 mindspore/ccsrc/parallel/ops_info/loss_info.h |  6 +--
 .../ccsrc/parallel/ops_info/matmul_info.cc    |  6 +--
 .../ccsrc/parallel/ops_info/matmul_info.h     |  7 +--
 .../ccsrc/parallel/ops_info/onehot_info.h     |  6 +--
 .../ccsrc/parallel/ops_info/operator_info.cc  | 13 +++--
 .../ccsrc/parallel/ops_info/operator_info.h   | 13 +++--
 .../ccsrc/parallel/ops_info/prelu_info.h      |  6 +--
 .../parallel/ops_info/reduce_method_info.cc   |  8 ++-
 .../parallel/ops_info/reduce_method_info.h    |  8 +--
 .../ccsrc/parallel/ops_info/reshape_info.h    |  8 +--
 .../parallel/ops_info/tmp_identity_info.h     |  8 +--
 .../ccsrc/parallel/ops_info/transpose_info.h  |  6 +--
 .../parallel/ops_info/virtual_dataset_info.h  |  8 +--
 .../cpp/parallel/ops_info/activation_test.cc  |  8 +--
 .../cpp/parallel/ops_info/matmul_info_test.cc |  4 +-
 .../parallel/ops_info/tensor_add_info_test.cc |  8 +--
 .../cpp/parallel/ops_info/tmpidentity_test.cc |  4 +-
 27 files changed, 62 insertions(+), 211 deletions(-)

diff --git a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc
index 7c17b499b1..93d7dc56c5 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc
@@ -514,60 +514,6 @@ double ArithmeticCost::GetBackwardCommCost(const std::vector<TensorInfo>& inputs
   return result;
 }
 
-double L2NormalizeCost::GetBackwardCommCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
-                                            const int32_t& stage_id) const {
-  double result = 0.0;
-  if (is_parameter_[0]) {
-    TensorInfo input_tensor_info = inputs[0];
-    CheckGlobalDeviceManager();
-    MS_EXCEPTION_IF_NULL(g_device_manager);
-    auto total_device_num = g_device_manager->GetDeviceListByStageId(stage_id).size();
-
-    Shape input_shape = input_tensor_info.shape();
-    Shape input_slice_shape = input_tensor_info.slice_shape();
-    int32_t used_device_num = 1;
-    for (size_t i = 0; i < input_shape.size(); ++i) {
-      used_device_num *= input_shape[i] / input_slice_shape[i];
-    }
-
-    if (total_device_num != IntToSize(used_device_num))
-      result += ListProduct(input_slice_shape) * static_cast<double>(inputs_type_lengths_[0]);
-  }
-
-  return result;
-}
-
-double L2NormalizeCost::GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>&,
-                                                  const int32_t&) const {
-  TensorInfo input0_info = inputs[0];
-  Shape input0_slice_shape = input0_info.slice_shape();
-  return ListProduct(input0_slice_shape) * static_cast<double>(inputs_type_lengths_[0]);
-}
-
-double L2NormalizeCost::GetBackwardComputationCost(const std::vector<TensorInfo>& inputs,
-                                                   const std::vector<TensorInfo>&, const int32_t& stage_id) const {
-  double result = 0.0;
-
-  if (is_parameter_[0]) {
-    TensorInfo input_tensor_info = inputs[0];
-    CheckGlobalDeviceManager();
-    MS_EXCEPTION_IF_NULL(g_device_manager);
-    auto total_device_num = g_device_manager->GetDeviceListByStageId(stage_id).size();
-
-    Shape input_shape = input_tensor_info.shape();
-    Shape input_slice_shape = input_tensor_info.slice_shape();
-    int32_t used_device_num = 1;
-    for (size_t i = 0; i < input_shape.size(); ++i) {
-      used_device_num *= input_shape[i] / input_slice_shape[i];
-    }
-
-    if (total_device_num != IntToSize(used_device_num))
-      result += ListProduct(input_slice_shape) * static_cast<double>(inputs_type_lengths_[0]);
-  }
-
-  return result;
-}
-
 bool IsDataParallel(const Shape& shape, const Shape& slice_shape, const int32_t& stage_id) {
   CheckGlobalDeviceManager();
   MS_EXCEPTION_IF_NULL(g_device_manager);
diff --git a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h
index 8f0099bba3..73f3ff139f 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h
@@ -132,6 +132,8 @@ class ActivationCost : public OperatorCost {
 };
 
 using ActivationCostPtr = std::shared_ptr<ActivationCost>;
+using TransposeCost = ActivationCost;
+using TransposeCostPtr = std::shared_ptr<TransposeCost>;
 
 class SoftmaxCost : public OperatorCost {
  public:
@@ -415,32 +417,8 @@ class ArithmeticCost : public OperatorCost {
                                     const int32_t& stage_id) const override;
 };
 using ArithmeticCostPtr = std::shared_ptr<ArithmeticCost>;
-
-class L2NormalizeCost : public OperatorCost {
- public:
-  L2NormalizeCost() = default;
-  ~L2NormalizeCost() override = default;
-
-  double GetCommCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                     const int32_t& stage_id) const override {
-    return GetForwardCommCost(inputs, outputs, stage_id) + GetBackwardCommCost(inputs, outputs, stage_id);
-  }
-  double GetForwardCommCost(const std::vector<TensorInfo>&, const std::vector<TensorInfo>&,
-                            const int32_t&) const override {
-    return 0.0;
-  }
-  double GetBackwardCommCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                             const int32_t& stage_id) const override;
-  double GetComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                            const int32_t& stage_id) const override {
-    return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
-  }
-  double GetForwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                                   const int32_t& stage_id) const override;
-  double GetBackwardComputationCost(const std::vector<TensorInfo>& inputs, const std::vector<TensorInfo>& outputs,
-                                    const int32_t& stage_id) const override;
-};
-using L2NormalizeCostPtr = std::shared_ptr<L2NormalizeCost>;
+using BiasAddCost = ArithmeticCost;
+using BiasAddCostPtr = std::shared_ptr<BiasAddCost>;
 
 class ReduceMethodCost : public OperatorCost {
  public:
diff --git a/mindspore/ccsrc/parallel/ops_info/activation_info.h b/mindspore/ccsrc/parallel/ops_info/activation_info.h
index 183b593e23..21774c43ee 100644
--- a/mindspore/ccsrc/parallel/ops_info/activation_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/activation_info.h
@@ -32,8 +32,8 @@ namespace parallel {
 class ActivationBase : public OperatorInfo {
  public:
   ActivationBase(const std::string& operator_name, const Shapes& inputs_shape, const Shapes& outputs_shape,
-                 const PrimitiveAttrs& attrs)
-      : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs) {}
+                 const PrimitiveAttrs& attrs, OperatorCostPtr cost)
+      : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs, cost) {}
   ~ActivationBase() override = default;
 
   Status Init(const StrategyPtr& strategy) override;
@@ -51,19 +51,13 @@ class Activation : public ActivationBase {
  public:
   Activation(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape,
              const PrimitiveAttrs& attrs)
-      : ActivationBase(name, inputs_shape, outputs_shape, attrs) {
-    ac_cost_ptr_ = std::make_shared<ActivationCost>();
-  }
+      : ActivationBase(name, inputs_shape, outputs_shape, attrs, std::make_shared<ActivationCost>()) {}
   ~Activation() override = default;
   Status GenerateStrategies(int32_t stage_id) override;
   Status SetCostUnderStrategy(const StrategyPtr& strategy) override;
-  OperatorCostPtr GetOperatorCost() const override { return ac_cost_ptr_; }
 
  protected:
   Status CheckStrategy(const StrategyPtr& strategy) override;
-
- private:
-  ActivationCostPtr ac_cost_ptr_;
 };
 
 class ActivationInfo : public Activation {
@@ -108,13 +102,10 @@ class Softmax : public ActivationBase {
  public:
   explicit Softmax(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape,
                    const PrimitiveAttrs& attrs)
-      : ActivationBase(name, inputs_shape, outputs_shape, attrs) {
-    sm_cost_ptr_ = std::make_shared<SoftmaxCost>();
-  }
+      : ActivationBase(name, inputs_shape, outputs_shape, attrs, std::make_shared<SoftmaxCost>()) {}
   ~Softmax() override = default;
   Status GenerateStrategies(int32_t stage_id) override;
   Status SetCostUnderStrategy(const StrategyPtr& strategy) override;
-  OperatorCostPtr GetOperatorCost() const override { return sm_cost_ptr_; }
 
  protected:
   Status CheckStrategy(const StrategyPtr& strategy) override;
@@ -122,7 +113,6 @@ class Softmax : public ActivationBase {
 
  private:
   std::vector<int32_t> axis_;
-  SoftmaxCostPtr sm_cost_ptr_;
 };
 
 class SoftmaxInfo : public Softmax {
diff --git a/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h b/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h
index 7cd0d66b1b..daa2ad595c 100644
--- a/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h
@@ -33,15 +33,12 @@ class ArithmeticBase : public OperatorInfo {
  public:
   ArithmeticBase(const std::string& operator_name, const Shapes& inputs_shape, const Shapes& outputs_shape,
                  const PrimitiveAttrs& attrs)
-      : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs) {
-    arithmeticcost_ptr_ = std::make_shared<ArithmeticCost>();
-  }
+      : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs, std::make_shared<ArithmeticCost>()) {}
   ~ArithmeticBase() override = default;
   Status Init(const StrategyPtr& strategy) override;
   Status InitForCostModel(const StrategyPtr& strategy) override;
   Status GenerateStrategies(int32_t) override;
   Status SetCostUnderStrategy(const StrategyPtr&) override;
-  OperatorCostPtr GetOperatorCost() const override { return arithmeticcost_ptr_; }
   void ReComputeBatchSplitFlagList() override;
 
  protected:
@@ -54,7 +51,6 @@ class ArithmeticBase : public OperatorInfo {
   Status InferTensorMap() override;
   Status InferTensorLayout(TensorLayouts* inputs_layout, TensorLayouts* outputs_layout, const Shape& dev_matrix_array);
   Shapes InferExpendShape();
-  ArithmeticCostPtr arithmeticcost_ptr_;
 };
 
 class SubInfo : public ArithmeticBase {
diff --git a/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h b/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h
index 57711b5298..fae96dcab5 100644
--- a/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h
@@ -31,16 +31,13 @@ class BatchParallelInfo : public OperatorInfo {
  public:
   BatchParallelInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape,
                     const PrimitiveAttrs& attrs)
-      : OperatorInfo(name, inputs_shape, outputs_shape, attrs), dev_num_(1) {
-    bp_cost_ptr_ = std::make_shared<BatchParallelCost>();
-  }
+      : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<BatchParallelCost>()), dev_num_(1) {}
 
   ~BatchParallelInfo() override = default;
   Status Init(const StrategyPtr& strategy) override;
   Status InitForCostModel(const StrategyPtr& strategy) override;
   Status GenerateStrategies(int32_t stage_id) override;
   Status SetCostUnderStrategy(const StrategyPtr& strategy) override;
-  OperatorCostPtr GetOperatorCost() const override { return bp_cost_ptr_; }
 
  protected:
   Status CheckStrategy(const StrategyPtr& strategy) override;
@@ -55,7 +52,6 @@ class BatchParallelInfo : public OperatorInfo {
 
  private:
   int32_t dev_num_;
-  BatchParallelCostPtr bp_cost_ptr_;
 };
 
 class SparseSoftmaxCrossEntropyWithLogitsInfo : public BatchParallelInfo {
diff --git a/mindspore/ccsrc/parallel/ops_info/bias_add_info.h b/mindspore/ccsrc/parallel/ops_info/bias_add_info.h
index 07f0bc00ff..dea5c90c88 100644
--- a/mindspore/ccsrc/parallel/ops_info/bias_add_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/bias_add_info.h
@@ -34,16 +34,13 @@ class BiasAddInfo : public OperatorInfo {
  public:
   BiasAddInfo(const std::string& operator_name, const Shapes& inputs_shape, const Shapes& outputs_shape,
               const PrimitiveAttrs& attrs)
-      : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs) {
-    biasaddcost_ptr_ = std::make_shared<ArithmeticCost>();
-  }
+      : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs, std::make_shared<BiasAddCost>()) {}
   ~BiasAddInfo() override = default;
 
   Status Init(const StrategyPtr& strategy) override;
   Status InitForCostModel(const StrategyPtr& strategy) override;
   Status GenerateStrategies(int32_t) override;
   Status SetCostUnderStrategy(const StrategyPtr&) override;
-  OperatorCostPtr GetOperatorCost() const override { return biasaddcost_ptr_; }
   void ReComputeBatchSplitFlagList() override;
 
  protected:
@@ -55,7 +52,6 @@ class BiasAddInfo : public OperatorInfo {
   Status InferDevMatrixShape() override;
   Status InferTensorMap() override;
   Status InferTensorLayout(TensorLayouts* inputs_layout, TensorLayouts* outputs_layout, const Shape& dev_matrix_array);
-  ArithmeticCostPtr biasaddcost_ptr_;
 };
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h b/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h
index e43601355a..859b3e06a4 100644
--- a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h
@@ -33,15 +33,12 @@ class DropoutDoMaskInfo : public OperatorInfo {
  public:
   DropoutDoMaskInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape,
                     const PrimitiveAttrs& attrs)
-      : OperatorInfo(name, inputs_shape, outputs_shape, attrs) {
-    bpcost_ptr_ = std::make_shared<BatchParallelCost>();
-  }
+      : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<BatchParallelCost>()) {}
   ~DropoutDoMaskInfo() override = default;
 
   Status Init(const StrategyPtr& strategy) override;
   Status GenerateStrategies(int32_t stage_id) override;
   Status SetCostUnderStrategy(const StrategyPtr& strategy) override;
-  OperatorCostPtr GetOperatorCost() const override { return bpcost_ptr_; }
   Status InitForCostModel(const StrategyPtr& strategy) override;
   std::shared_ptr<std::vector<std::vector<int32_t>>> GenerateBatchStrategies() override;
 
@@ -53,9 +50,6 @@ class DropoutDoMaskInfo : public OperatorInfo {
   Status GetAttrs() override { return SUCCESS; }
   Status InferTensorInfo() override;
   Status InferDevMatrixShape() override;
-
- private:
-  BatchParallelCostPtr bpcost_ptr_;
 };
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/parallel/ops_info/generator_info.h b/mindspore/ccsrc/parallel/ops_info/generator_info.h
index a280fac28e..68024593f3 100644
--- a/mindspore/ccsrc/parallel/ops_info/generator_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/generator_info.h
@@ -32,15 +32,12 @@ class GeneratorBase : public OperatorInfo {
  public:
   GeneratorBase(const std::string &operator_name, const Shapes &inputs_shape, const Shapes &outputs_shape,
                 const PrimitiveAttrs &attrs)
-      : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs) {
-    generatorbasecost_ptr_ = std::make_shared<GeneratorBaseCost>();
-  }
+      : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs, std::make_shared<GeneratorBaseCost>()) {}
 
   ~GeneratorBase() override = default;
 
   Status Init(const StrategyPtr &strategy) override;
   Status SetCostUnderStrategy(const StrategyPtr &strategy) override;
-  OperatorCostPtr GetOperatorCost() const override { return generatorbasecost_ptr_; }
   Status InitForCostModel(const StrategyPtr &strategy) override;
 
  protected:
@@ -52,7 +49,6 @@ class GeneratorBase : public OperatorInfo {
   Status InferMirrorOps() override { return SUCCESS; }
   Status InferForwardCommunication() override { return SUCCESS; }
   virtual Status InferReplaceOps(const StrategyPtr &strategy) = 0;
-  GeneratorBaseCostPtr generatorbasecost_ptr_;
 };
 
 class DropoutGenMaskInfo : public GeneratorBase {
diff --git a/mindspore/ccsrc/parallel/ops_info/get_next_info.h b/mindspore/ccsrc/parallel/ops_info/get_next_info.h
index 32adce1165..9a65eff035 100644
--- a/mindspore/ccsrc/parallel/ops_info/get_next_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/get_next_info.h
@@ -32,14 +32,11 @@ class GetNextInfo : public OperatorInfo {
  public:
   GetNextInfo(const std::string &operator_name, const Shapes &inputs_shape, const Shapes &outputs_shape,
               const PrimitiveAttrs &attrs)
-      : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs) {
-    getnextcost_ptr_ = std::make_shared<GetNextCost>();
-  }
+      : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs, std::make_shared<GetNextCost>()) {}
   ~GetNextInfo() override = default;
 
   Status Init(const StrategyPtr &strategy) override;
   Status SetCostUnderStrategy(const StrategyPtr &strategy) override;
-  OperatorCostPtr GetOperatorCost() const override { return getnextcost_ptr_; }
   Status InitForCostModel(const StrategyPtr &strategy) override;
   Status GenerateStrategies(int32_t stage_id) override;
 
@@ -65,7 +62,6 @@ class GetNextInfo : public OperatorInfo {
   Shapes shapes_;
   int32_t output_num_ = 0;
   std::string shared_name_;
-  GetNextCostPtr getnextcost_ptr_;
 };
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h b/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h
index c0af9dbcb9..22ed5a965b 100644
--- a/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h
@@ -33,12 +33,9 @@ class L2NormalizeInfo : public Activation {
  public:
   L2NormalizeInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape,
                   const PrimitiveAttrs& attrs)
-      : Activation(name, inputs_shape, outputs_shape, attrs) {
-    l2normalizecost_ptr_ = std::make_shared<L2NormalizeCost>();
-  }
+      : Activation(name, inputs_shape, outputs_shape, attrs) {}
   ~L2NormalizeInfo() override = default;
   Status GenerateStrategies(int32_t stage_id) override;
-  OperatorCostPtr GetOperatorCost() const override { return l2normalizecost_ptr_; }
 
  protected:
   Status GetAttrs() override;
@@ -47,7 +44,6 @@ class L2NormalizeInfo : public Activation {
 
  private:
   int32_t axis_ = 0;  // Default value = 0
-  L2NormalizeCostPtr l2normalizecost_ptr_;
 };
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/parallel/ops_info/loss_info.h b/mindspore/ccsrc/parallel/ops_info/loss_info.h
index 6a9697a447..f1c2537a39 100644
--- a/mindspore/ccsrc/parallel/ops_info/loss_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/loss_info.h
@@ -36,16 +36,13 @@ class SoftmaxCrossEntropyWithLogitsInfo : public OperatorInfo {
  public:
   SoftmaxCrossEntropyWithLogitsInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape,
                                     const PrimitiveAttrs& attrs)
-      : OperatorInfo(name, inputs_shape, outputs_shape, attrs) {
-    softmax_loss_cost_ptr_ = std::make_shared<SoftmaxCrossEntropyWithLogitsCost>();
-  }
+      : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<SoftmaxCrossEntropyWithLogitsCost>()) {}
   ~SoftmaxCrossEntropyWithLogitsInfo() override = default;
   Status Init(const StrategyPtr& strategy) override;
   Status InitForCostModel(const StrategyPtr& strategy) override;
 
   Status GenerateStrategies(int32_t stage_id) override;
   Status SetCostUnderStrategy(const StrategyPtr& strategy) override;
-  OperatorCostPtr GetOperatorCost() const override { return softmax_loss_cost_ptr_; }
   void ReComputeBatchSplitFlagList() override;
 
  protected:
@@ -59,7 +56,6 @@ class SoftmaxCrossEntropyWithLogitsInfo : public OperatorInfo {
   // There are two outputs for SoftmaxCrossEntropyWithLogits, and outputs[1] is used for grad and overload
   // the InferAsLossDivisor.
   Status InferAsLossDivisor() override;
-  SoftmaxCrossEntropyWithLogitsCostPtr softmax_loss_cost_ptr_;
 
  private:
   int32_t axis_ = -1;  // default -1
diff --git a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc b/mindspore/ccsrc/parallel/ops_info/matmul_info.cc
index 2b02dc100d..848116d68a 100644
--- a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc
+++ b/mindspore/ccsrc/parallel/ops_info/matmul_info.cc
@@ -593,11 +593,11 @@ Status MatMulBase::SetCostUnderStrategy(const mindspore::parallel::StrategyPtr&
   // Here, we use the origin outputs_, because we only use the slice size of the output tensor.
   // It does not matter whether the output tensor is transposed or not.
   double computation_cost =
-    matmulcost_ptr->GetForwardComputationCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id);
-  double communication_cost = matmulcost_ptr->GetCommCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id);
+    cost()->GetForwardComputationCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id);
+  double communication_cost = cost()->GetCommCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id);
   std::shared_ptr<Cost> result = std::make_shared<Cost>(computation_cost, communication_cost);
   result->communication_without_parameter_ =
-    matmulcost_ptr->GetForwardCommCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id);
+    cost()->GetForwardCommCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id);
   result->communication_with_partial_para_ =
     result->communication_without_parameter_ +
     COST_MODEL_GAMMA * (communication_cost - result->communication_without_parameter_);
diff --git a/mindspore/ccsrc/parallel/ops_info/matmul_info.h b/mindspore/ccsrc/parallel/ops_info/matmul_info.h
index 7ced12b14a..2d3312774d 100644
--- a/mindspore/ccsrc/parallel/ops_info/matmul_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/matmul_info.h
@@ -34,9 +34,7 @@ class MatMulBase : public OperatorInfo {
  public:
   MatMulBase(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape,
              const PrimitiveAttrs& attrs)
-      : OperatorInfo(name, inputs_shape, outputs_shape, attrs) {
-    matmulcost_ptr = std::make_shared<MatMulCost>();
-  }
+      : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<MatMulCost>()) {}
   ~MatMulBase() override = default;
 
   Status Init(const StrategyPtr& strategy) override;
@@ -48,7 +46,6 @@ class MatMulBase : public OperatorInfo {
   Status PrepareStrategy(int32_t stage_id, size_t dev_num, Dimensions combined_partitions, size_t input0_shape_size,
                          size_t input1_shape_size, StrategyPtr* sp);
 
-  OperatorCostPtr GetOperatorCost() const override { return matmulcost_ptr; }
   Status SwapLastTwoElements(Shape* shape);
 
  protected:
@@ -66,8 +63,6 @@ class MatMulBase : public OperatorInfo {
   bool transpose_b_ = false;
   size_t mat_a_dimension_ = 0;
   size_t mat_b_dimension_ = 0;
-
-  MatMulCostPtr matmulcost_ptr;
 };
 
 class MatMul : public MatMulBase {
diff --git a/mindspore/ccsrc/parallel/ops_info/onehot_info.h b/mindspore/ccsrc/parallel/ops_info/onehot_info.h
index 4697e201a4..a54d8479b3 100644
--- a/mindspore/ccsrc/parallel/ops_info/onehot_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/onehot_info.h
@@ -33,16 +33,13 @@ class OneHotInfo : public OperatorInfo {
  public:
   OneHotInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape,
              const PrimitiveAttrs& attrs)
-      : OperatorInfo(name, inputs_shape, outputs_shape, attrs) {
-    onehot_cost_ptr_ = std::make_shared<OneHotCost>();
-  }
+      : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<OneHotCost>()) {}
   ~OneHotInfo() override = default;
   Status Init(const StrategyPtr& strategy) override;
   Status InitForCostModel(const StrategyPtr& strategy) override;
 
   Status GenerateStrategies(int32_t stage_id) override;
   Status SetCostUnderStrategy(const StrategyPtr& strategy) override;
-  OperatorCostPtr GetOperatorCost() const override { return onehot_cost_ptr_; }
   ReplaceGraphPtr replace_graph(const CNodePtr& cnode) override;
   std::shared_ptr<std::vector<std::vector<int32_t>>> GenerateBatchStrategies() override;
 
@@ -60,7 +57,6 @@ class OneHotInfo : public OperatorInfo {
   Status ComputeReplaceGraph(const CNodePtr& cnode);
 
   int axis_ = -1;
-  OneHotCostPtr onehot_cost_ptr_;
   int32_t rank_ = 0;
   int32_t total_class_number_ = 1;
   int32_t classes_each_device_ = 1;
diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.cc b/mindspore/ccsrc/parallel/ops_info/operator_info.cc
index 11c518d844..a24f3e616b 100644
--- a/mindspore/ccsrc/parallel/ops_info/operator_info.cc
+++ b/mindspore/ccsrc/parallel/ops_info/operator_info.cc
@@ -1034,12 +1034,11 @@ Status OperatorInfo::SetCostUnderStrategyBase(const StrategyPtr& strategy) {
     return FAILED;
   }
   int32_t stage_id = strategy->GetInputStage();
-  double computation_cost =
-    GetOperatorCost()->GetForwardComputationCost(inputs_tensor_info_, outputs_tensor_info_, stage_id);
-  double communication_cost = GetOperatorCost()->GetCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id);
+  double computation_cost = cost()->GetForwardComputationCost(inputs_tensor_info_, outputs_tensor_info_, stage_id);
+  double communication_cost = cost()->GetCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id);
   std::shared_ptr<Cost> result = std::make_shared<Cost>(computation_cost, communication_cost);
   result->communication_without_parameter_ =
-    GetOperatorCost()->GetForwardCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id);
+    cost()->GetForwardCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id);
   result->communication_with_partial_para_ =
     result->communication_without_parameter_ +
     COST_MODEL_GAMMA * (communication_cost - result->communication_without_parameter_);
@@ -1096,7 +1095,7 @@ Status OperatorInfo::set_is_parameter(const std::vector<bool>& is_parameter) {
     return FAILED;
   }
   is_parameter_ = is_parameter;
-  GetOperatorCost()->set_is_parameter(is_parameter);
+  cost()->set_is_parameter(is_parameter);
   return SUCCESS;
 }
 
@@ -1193,7 +1192,7 @@ Status OperatorInfo::SetInputAndOutputTypeLength(const std::vector<size_t>& inpu
   }
   inputs_type_lengths_ = input_lengths;
   outputs_type_lengths_ = output_lengths;
-  GetOperatorCost()->SetInputAndOutputTypeLength(input_lengths, output_lengths);
+  cost()->SetInputAndOutputTypeLength(input_lengths, output_lengths);
   return SUCCESS;
 }
 
@@ -1211,7 +1210,7 @@ void OperatorInfo::BreakingTiesForPerferringDataParallel(const StrategyPtr& stra
 }
 
 double OperatorInfo::GetForwardMemoryCostFromCNode() {
-  return GetOperatorCost()->GetForwardComputationCost(inputs_tensor_info_, outputs_tensor_info_, 0);
+  return cost()->GetForwardComputationCost(inputs_tensor_info_, outputs_tensor_info_, 0);
 }
 
 }  // namespace parallel
diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.h b/mindspore/ccsrc/parallel/ops_info/operator_info.h
index e7b8af0a7e..8fcae8ad33 100644
--- a/mindspore/ccsrc/parallel/ops_info/operator_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/operator_info.h
@@ -53,12 +53,13 @@ class Edge;
 
 class OperatorInfo {
  public:
-  OperatorInfo(std::string name, Shapes inputs_shape, Shapes outputs_shape, PrimitiveAttrs attrs)
+  OperatorInfo(std::string name, Shapes inputs_shape, Shapes outputs_shape, PrimitiveAttrs attrs, OperatorCostPtr cost)
       : name_(std::move(name)),
         inputs_shape_(std::move(inputs_shape)),
         outputs_shape_(std::move(outputs_shape)),
         attrs_(std::move(attrs)),
-        is_alive_(true) {
+        is_alive_(true),
+        cost_(cost) {
     std::vector<bool> not_parameteter(inputs_shape_.size(), false);
     is_parameter_ = not_parameteter;
     refkey_parameter_name_ = "";
@@ -75,7 +76,8 @@ class OperatorInfo {
   // Given the stage_id (which indicates the number of devices),
   // generate all strategies for this operator
   virtual Status GenerateStrategies(int32_t stage_id) = 0;
-  virtual OperatorCostPtr GetOperatorCost() const = 0;
+  const OperatorCostPtr& cost() const { return cost_; }
+  void set_cost(const OperatorCostPtr& cost) { cost_ = cost; }
   virtual Status SetCostUnderStrategy(const StrategyPtr& strategy) = 0;
 
   virtual std::shared_ptr<std::vector<std::vector<int32_t>>> GenerateBatchStrategies();
@@ -115,7 +117,7 @@ class OperatorInfo {
   void ReplaceSuccEdge(const std::shared_ptr<OperatorInfo>& op, const std::shared_ptr<Edge>& new_edge);
   void ReplacePreEdges(const std::shared_ptr<OperatorInfo>& op, const std::shared_ptr<Edge>& new_edge);
   void ReplaceSuccEdges(const std::shared_ptr<OperatorInfo>& op, const std::shared_ptr<Edge>& new_edge);
-  std::vector<size_t> GetOutputTypeLengths() const { return GetOperatorCost()->outputs_type_lengths(); }
+  std::vector<size_t> GetOutputTypeLengths() const { return cost()->outputs_type_lengths(); }
   void SetSelectedStrategyAndCost(const StrategyPtr& s_strategy, const CostPtr& cost) {
     selected_strategy_ = s_strategy;
     selected_cost_ = cost;
@@ -221,6 +223,9 @@ class OperatorInfo {
   std::string refkey_parameter_name_;
   CNodePtr cnode_;
   int32_t used_devices_ = -1;
+
+ private:
+  OperatorCostPtr cost_;
 };
 
 Shape GetSliceShape(const Shape& tensor_shape, const Dimensions& strategy);
diff --git a/mindspore/ccsrc/parallel/ops_info/prelu_info.h b/mindspore/ccsrc/parallel/ops_info/prelu_info.h
index d491ecb331..bdfb11550b 100644
--- a/mindspore/ccsrc/parallel/ops_info/prelu_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/prelu_info.h
@@ -35,15 +35,12 @@ class PReLUInfo : public OperatorInfo {
  public:
   PReLUInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape,
             const PrimitiveAttrs& attrs)
-      : OperatorInfo(name, inputs_shape, outputs_shape, attrs) {
-    prelucost_ptr = std::make_shared<PReLUCost>();
-  }
+      : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<PReLUCost>()) {}
   ~PReLUInfo() override = default;
   Status Init(const StrategyPtr& strategy) override;
   Status InitForCostModel(const StrategyPtr& strategy) override;
 
   Status GenerateStrategies(int32_t stage_id) override;
-  OperatorCostPtr GetOperatorCost() const override { return prelucost_ptr; }
   Status SetCostUnderStrategy(const StrategyPtr& strategy) override;
 
  protected:
@@ -59,7 +56,6 @@ class PReLUInfo : public OperatorInfo {
 
  private:
   Dimensions input_strategy_;
-  PReLUCostPtr prelucost_ptr;
 };
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc b/mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc
index 5b07f8d0a9..aa64e72d05 100644
--- a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc
+++ b/mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc
@@ -109,8 +109,12 @@ Status ReduceMethod::GetAttrs() {
     }
     cross_batch_ = cross_batch_iter->second->cast<BoolImmPtr>()->value();
   }
-  reducemethodcost_ptr_->set_cross_batch(cross_batch_);
-
+  auto reducemethodcost = std::dynamic_pointer_cast<ReduceMethodCost>(cost());
+  if (reducemethodcost == nullptr) {
+    MS_LOG(ERROR) << "Cost cast to ReduceMethodCostPtr failed!";
+    return FAILED;
+  }
+  reducemethodcost->set_cross_batch(cross_batch_);
   return SUCCESS;
 }
 
diff --git a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.h b/mindspore/ccsrc/parallel/ops_info/reduce_method_info.h
index 8e2e17af99..c2ddbc87ce 100644
--- a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/reduce_method_info.h
@@ -34,9 +34,7 @@ class ReduceMethod : public OperatorInfo {
  public:
   ReduceMethod(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape,
                const PrimitiveAttrs &attrs)
-      : OperatorInfo(name, inputs_shape, outputs_shape, attrs) {
-    reducemethodcost_ptr_ = std::make_shared<ReduceMethodCost>();
-  }
+      : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<ReduceMethodCost>()) {}
   ~ReduceMethod() override = default;
 
   Status Init(const StrategyPtr &strategy) override;
@@ -44,13 +42,11 @@ class ReduceMethod : public OperatorInfo {
 
   Status GenerateStrategies(int32_t stage_id) override;
   Status SetCostUnderStrategy(const StrategyPtr &strategy) override;
-  OperatorCostPtr GetOperatorCost() const override { return reducemethodcost_ptr_; }
 
  protected:
   std::string reduce_method_;
   bool keepdims_ = false;
   bool cross_batch_ = false;
-  ReduceMethodCostPtr reducemethodcost_ptr_;
   Status CheckStrategy(const StrategyPtr &strategy) override;
   Status GetAttrs() override;
   Dimensions InferOutputStrategy();
@@ -110,7 +106,7 @@ class ReduceMeanInfo : public ReduceMethod {
   ReduceMeanInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape,
                  const PrimitiveAttrs &attrs)
       : ReduceMethod(name, inputs_shape, outputs_shape, attrs) {
-    reducemethodcost_ptr_ = std::make_shared<ReduceMeanCost>();
+    set_cost(std::make_shared<ReduceMeanCost>());
   }
 
   ~ReduceMeanInfo() override = default;
diff --git a/mindspore/ccsrc/parallel/ops_info/reshape_info.h b/mindspore/ccsrc/parallel/ops_info/reshape_info.h
index 1d6a14b1f6..38192a5d01 100644
--- a/mindspore/ccsrc/parallel/ops_info/reshape_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/reshape_info.h
@@ -36,12 +36,10 @@ class ReshapeInfo : public OperatorInfo {
  public:
   ReshapeInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape,
               const PrimitiveAttrs& attrs)
-      : OperatorInfo(name, inputs_shape, outputs_shape, attrs),
+      : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<ReshapeCost>()),
         dev_num_(0),
         input_layout_set_flag_(false),
-        output_layout_set_flag_(false) {
-    reshape_cost_ptr_ = std::make_shared<ReshapeCost>();
-  }
+        output_layout_set_flag_(false) {}
   ~ReshapeInfo() override = default;
   Status Init(const StrategyPtr& strategy) override;
   void SetInputLayout(const TensorLayout& input_layout) {
@@ -55,7 +53,6 @@ class ReshapeInfo : public OperatorInfo {
   Status InitForCostModel(const StrategyPtr& strategy) override;
   Status GenerateStrategies(int32_t stage_id) override;
   Status SetCostUnderStrategy(const StrategyPtr& strategy) override;
-  OperatorCostPtr GetOperatorCost() const override { return reshape_cost_ptr_; }
 
  protected:
   Status CheckStrategy(const StrategyPtr& strategy) override;
@@ -67,7 +64,6 @@ class ReshapeInfo : public OperatorInfo {
   Status InferTensorLayout(TensorLayouts* inputs_layout, TensorLayouts* outputs_layout);
   Status GetAttrs() override;
   Strategys GetOutputsStrategy();
-  ReshapeCostPtr reshape_cost_ptr_;
 
  private:
   Status GetParameterInput();
diff --git a/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h b/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h
index 6df5856e0c..cf850683a6 100644
--- a/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h
@@ -34,9 +34,7 @@ class TmpIdentityInfo : public OperatorInfo {
  public:
   TmpIdentityInfo(const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs,
                   const std::string& name = IDENTITY_INFO)
-      : OperatorInfo(name, inputs_shape, outputs_shape, attrs) {
-    id_cost_ptr_ = std::make_shared<TmpIdentityCost>();
-  }
+      : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<TmpIdentityCost>()) {}
   ~TmpIdentityInfo() override = default;
 
   Status Init(const StrategyPtr& strategy) override;
@@ -44,7 +42,6 @@ class TmpIdentityInfo : public OperatorInfo {
 
   Status GenerateStrategies(int32_t stage_id) override;
   Status SetCostUnderStrategy(const StrategyPtr& strategy) override;
-  OperatorCostPtr GetOperatorCost() const override { return id_cost_ptr_; }
 
  protected:
   Status CheckStrategy(const StrategyPtr& strategy) override;
@@ -54,9 +51,6 @@ class TmpIdentityInfo : public OperatorInfo {
   Status InferTensorInfo() override;
   Status InferDevMatrixShape() override;
   Status InferTensorMap() override;
-
- private:
-  TmpIdentityCostPtr id_cost_ptr_;
 };
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/parallel/ops_info/transpose_info.h b/mindspore/ccsrc/parallel/ops_info/transpose_info.h
index 4f6f6bb695..2714b352b6 100644
--- a/mindspore/ccsrc/parallel/ops_info/transpose_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/transpose_info.h
@@ -35,15 +35,12 @@ class TransposeInfo : public OperatorInfo {
  public:
   TransposeInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape,
                 const PrimitiveAttrs& attrs)
-      : OperatorInfo(name, inputs_shape, outputs_shape, attrs) {
-    transpose_cost_ptr_ = std::make_shared<ActivationCost>();
-  }
+      : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<TransposeCost>()) {}
   ~TransposeInfo() override = default;
   Status Init(const StrategyPtr& strategy) override;
   Status InitForCostModel(const StrategyPtr& strategy) override;
   Status GenerateStrategies(int32_t stage_id) override;
   Status SetCostUnderStrategy(const StrategyPtr& strategy) override;
-  OperatorCostPtr GetOperatorCost() const override { return transpose_cost_ptr_; }
 
  protected:
   Status CheckStrategy(const StrategyPtr& strategy) override;
@@ -60,7 +57,6 @@ class TransposeInfo : public OperatorInfo {
   Status ComputeAxis();
   std::vector<int32_t> axis_v_;
   Dimensions input_strategy_;
-  ActivationCostPtr transpose_cost_ptr_;
 };
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h b/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h
index d0278f27d9..b958adeabe 100644
--- a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h
@@ -32,16 +32,13 @@ class VirtualDatasetInfo : public OperatorInfo {
  public:
   VirtualDatasetInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape,
                      const PrimitiveAttrs& attrs)
-      : OperatorInfo(name, inputs_shape, outputs_shape, attrs) {
-    vd_cost_ptr_ = std::make_shared<VirtualDatasetCost>();
-  }
+      : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<VirtualDatasetCost>()) {}
   ~VirtualDatasetInfo() override = default;
   Status Init(const StrategyPtr& strategy) override;
   Status InitForCostModel(const StrategyPtr& strategy) override;
 
   Status GenerateStrategies(int32_t stage_id) override;
   Status SetCostUnderStrategy(const StrategyPtr& strategy) override;
-  OperatorCostPtr GetOperatorCost() const override { return vd_cost_ptr_; }
   void ReComputeBatchSplitFlagList() override;
 
  protected:
@@ -53,9 +50,6 @@ class VirtualDatasetInfo : public OperatorInfo {
   Status InferTensorMap() override;
   Status GetAttrs() override;
   Status InferAsLossDivisor() override;
-
- private:
-  VirtualDatasetCostPtr vd_cost_ptr_;
 };
 
 }  // namespace parallel
diff --git a/tests/ut/cpp/parallel/ops_info/activation_test.cc b/tests/ut/cpp/parallel/ops_info/activation_test.cc
index 5d18c5372f..a8f8425ae9 100644
--- a/tests/ut/cpp/parallel/ops_info/activation_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/activation_test.cc
@@ -84,9 +84,9 @@ TEST_F(TestActivation, test_activation_strategies) {
     act_ptr_->InitForCostModel(sp);
     std::vector<TensorInfo> inputs_info = act_ptr_->inputs_tensor_info();
     std::vector<TensorInfo> outputs_info = act_ptr_->outputs_tensor_info();
-    ASSERT_DOUBLE_EQ(act_ptr_->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()),
+    ASSERT_DOUBLE_EQ(act_ptr_->cost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()),
                      cost.computation_cost_);
-    ASSERT_DOUBLE_EQ(act_ptr_->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()),
+    ASSERT_DOUBLE_EQ(act_ptr_->cost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()),
                      cost.communication_cost_);
   }
 }
@@ -109,9 +109,9 @@ TEST_F(TestActivation, test_softmax_strategies) {
     soft_ptr_->InitForCostModel(sp);
     std::vector<TensorInfo> inputs_info = soft_ptr_->inputs_tensor_info();
     std::vector<TensorInfo> outputs_info = soft_ptr_->outputs_tensor_info();
-    ASSERT_DOUBLE_EQ(soft_ptr_->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()),
+    ASSERT_DOUBLE_EQ(soft_ptr_->cost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()),
                      cost.computation_cost_);
-    ASSERT_DOUBLE_EQ(soft_ptr_->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()),
+    ASSERT_DOUBLE_EQ(soft_ptr_->cost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()),
                      cost.communication_cost_);
   }
 }
diff --git a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc
index 99ca9f8e0e..2fece098e8 100644
--- a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc
@@ -569,7 +569,7 @@ TEST_F(TestMatmulInfo, test_GenerateStrategies1) {
     matmul1->InitForCostModel(sp);
     std::vector<TensorInfo> inputs_info = matmul1->inputs_tensor_info();
     std::vector<TensorInfo> outputs_info = matmul1->outputs_tensor_info();
-    ASSERT_DOUBLE_EQ(matmul1->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()),
+    ASSERT_DOUBLE_EQ(matmul1->cost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()),
                      cost.computation_cost_);
     break;
   }
@@ -599,7 +599,7 @@ TEST_F(TestMatmulInfo, test_GenerateStrategies2) {
     TensorInfo replica_input1_info(tly, input1_shape, input1_slice_shape);
     replica_inputs_info.push_back(replica_input1_info);
 
-    ASSERT_DOUBLE_EQ(matmul3->GetOperatorCost()->GetComputationCost(replica_inputs_info, outputs_info, sp->GetInputStage()),
+    ASSERT_DOUBLE_EQ(matmul3->cost()->GetComputationCost(replica_inputs_info, outputs_info, sp->GetInputStage()),
                      cost.computation_cost_);
     break;
   }
diff --git a/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc b/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc
index 6cb9739b1c..8c956328a7 100644
--- a/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc
@@ -188,11 +188,11 @@ TEST_F(TestTensorAddInfo, GenerateStrategies) {
     tensor_add->InitForCostModel(sp);
     std::vector<TensorInfo> inputs_info = tensor_add->inputs_tensor_info();
     std::vector<TensorInfo> outputs_info = tensor_add->outputs_tensor_info();
-    double memory_cost0 = tensor_add->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage());
+    double memory_cost0 = tensor_add->cost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage());
     double memory_cost1 = cost.computation_cost_;
     bool memory = memory_cost0 - memory_cost1 <= 1.0;
 
-    double comm_cost0 = tensor_add->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage());
+    double comm_cost0 = tensor_add->cost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage());
     double comm_cost1 = cost.communication_cost_;
     bool comm = comm_cost0 - comm_cost1 <= 1.0;
 
@@ -210,11 +210,11 @@ TEST_F(TestTensorAddInfo, GenerateStrategies1) {
     tensor_add1->InitForCostModel(sp);
     std::vector<TensorInfo> inputs_info = tensor_add1->inputs_tensor_info();
     std::vector<TensorInfo> outputs_info = tensor_add1->outputs_tensor_info();
-    double memory_cost0 = tensor_add1->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage());
+    double memory_cost0 = tensor_add1->cost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage());
     double memory_cost1 = cost.computation_cost_;
     bool memory = memory_cost0 - memory_cost1 <= 1.0;
 
-    double comm_cost0 = tensor_add1->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage());
+    double comm_cost0 = tensor_add1->cost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage());
     double comm_cost1 = cost.communication_cost_;
     bool comm = comm_cost0 - comm_cost1 <= 1.0;
 
diff --git a/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc b/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc
index 043746498f..3971a2b471 100644
--- a/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc
@@ -145,9 +145,9 @@ TEST_F(TestTmpIdentityInfo, test_generate_strategies) {
     identity_ptr->Init(sp);
     std::vector<TensorInfo> inputs_info = identity_ptr->inputs_tensor_info();
     std::vector<TensorInfo> outputs_info = identity_ptr->outputs_tensor_info();
-    ASSERT_DOUBLE_EQ(identity_ptr->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()),
+    ASSERT_DOUBLE_EQ(identity_ptr->cost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()),
                      cost.computation_cost_);
-    ASSERT_DOUBLE_EQ(identity_ptr->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()),
+    ASSERT_DOUBLE_EQ(identity_ptr->cost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()),
                      cost.communication_cost_);
   }
 }

From 5e9cfaf6effad2cf6121e63ca434a31bd00f7d4d Mon Sep 17 00:00:00 2001
From: chang zherui <760161589@qq.com>
Date: Fri, 10 Apr 2020 19:20:37 +0800
Subject: [PATCH 58/58] syn-code1

---
 mindspore/ccsrc/pipeline/pipeline_ge.cc |  2 +-
 mindspore/ccsrc/utils/callbacks.h       |  2 +-
 mindspore/nn/optim/rmsprop.py           |  2 +-
 tests/st/networks/test_network_main.py  | 55 +------------------------
 4 files changed, 5 insertions(+), 56 deletions(-)

diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/pipeline_ge.cc
index ee67d46cf7..6ce0ea5316 100644
--- a/mindspore/ccsrc/pipeline/pipeline_ge.cc
+++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc
@@ -533,4 +533,4 @@ void ExportDFGraph(const std::string& file_name, const std::string& phase) {
   MS_LOG(DEBUG) << "ExportGraph End";
 }
 }  // namespace pipeline
-}  // namespace mindspore
\ No newline at end of file
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/utils/callbacks.h b/mindspore/ccsrc/utils/callbacks.h
index 6f099ef4ca..a1e4e75d5b 100644
--- a/mindspore/ccsrc/utils/callbacks.h
+++ b/mindspore/ccsrc/utils/callbacks.h
@@ -40,7 +40,7 @@ const int kCallbackOk = 0;
 const int kCallbackFalied = 1;
 
 bool GetParameterShape(const FuncGraphPtr& anf_graph, const std::string& param_name,
-                       const std::shared_ptr<std::vector<int>>& shape)
+                       const std::shared_ptr<std::vector<int>>& shape);
 uint32_t SummarySaveCallback(uint32_t, const std::map<std::string, TensorPtr>&);
 
 }  // namespace callbacks
diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py
index e252f89f2f..b17a101708 100644
--- a/mindspore/nn/optim/rmsprop.py
+++ b/mindspore/nn/optim/rmsprop.py
@@ -194,4 +194,4 @@ class RMSProp(Optimizer):
         else:
             success = self.hyper_map(F.partial(rmsprop_opt, self.opt, lr, self.decay, self.epsilon,
                                                self.momentum), params, self.ms, self.moment, gradients)
-        return success
\ No newline at end of file
+        return success
diff --git a/tests/st/networks/test_network_main.py b/tests/st/networks/test_network_main.py
index 730602c0ae..4689adee54 100644
--- a/tests/st/networks/test_network_main.py
+++ b/tests/st/networks/test_network_main.py
@@ -12,11 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-<<<<<<< HEAD:tests/st/networks/test_network_main.py
 """
-Function: 
+Function:
     test network
-Usage: 
+Usage:
     python test_network_main.py --net lenet --target Ascend
 """
 import os
@@ -32,47 +31,6 @@ from models.lenet import LeNet
 from models.resnetv1_5 import resnet50
 from models.alexnet import AlexNet
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-=======
-import pytest
-from mindspore.nn import TrainOneStepCell, WithLossCell
-import mindspore.context as context
-from mindspore.nn.optim import Momentum
-import numpy as np
-import mindspore.nn as nn
-from mindspore.ops import operations as P
-from mindspore import Tensor
-
-class LeNet(nn.Cell):
-    def __init__(self):
-        super(LeNet, self).__init__()
-        self.relu = P.ReLU()
-        self.batch_size = 32
-
-        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0, has_bias=False, pad_mode='valid')
-        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0, has_bias=False, pad_mode='valid')
-        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
-        self.reshape = P.Reshape()
-        self.fc1 = nn.Dense(400, 120)
-        self.fc2 = nn.Dense(120, 84)
-        self.fc3 = nn.Dense(84, 10)
-
-    def construct(self, input_x):
-        output = self.conv1(input_x)
-        output = self.relu(output)
-        output = self.pool(output)
-        output = self.conv2(output)
-        output = self.relu(output)
-        output = self.pool(output)
-        output = self.reshape(output, (self.batch_size, -1))
-        output = self.fc1(output)
-        output = self.relu(output)
-        output = self.fc2(output)
-        output = self.relu(output)
-        output = self.fc3(output)
-        return output
-
-context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
->>>>>>> add cpu st lenet:tests/st/networks/test_cpu_lenet.py
 
 def train(net, data, label):
     learning_rate = 0.01
@@ -89,24 +47,17 @@ def train(net, data, label):
     print("+++++++++++++++++++++++++++")
     assert res
 
-<<<<<<< HEAD:tests/st/networks/test_network_main.py
 def test_resnet50():
     data = Tensor(np.ones([32, 3 ,224, 224]).astype(np.float32) * 0.01)
     label = Tensor(np.ones([32]).astype(np.int32))
     net = resnet50(32, 10)
     train(net, data, label)
 
-=======
-@pytest.mark.level0
-@pytest.mark.platform_x86_cpu
-@pytest.mark.env_onecard
->>>>>>> add cpu st lenet:tests/st/networks/test_cpu_lenet.py
 def test_lenet():
     data = Tensor(np.ones([32, 1 ,32, 32]).astype(np.float32) * 0.01)
     label = Tensor(np.ones([32]).astype(np.int32))
     net = LeNet()
     train(net, data, label)
-<<<<<<< HEAD:tests/st/networks/test_network_main.py
 
 def test_alexnet():
     data = Tensor(np.ones([32, 3 ,227, 227]).astype(np.float32) * 0.01)
@@ -128,5 +79,3 @@ if __name__ == "__main__":
         test_alexnet()
     else:
         print("Please add net name like --net lenet")
-=======
->>>>>>> add cpu st lenet:tests/st/networks/test_cpu_lenet.py