From f0f55ad7e826199d96498b3efbae7bd305751870 Mon Sep 17 00:00:00 2001 From: lichenever Date: Mon, 30 Mar 2020 11:31:45 +0800 Subject: [PATCH 01/58] fix_cast_bug --- mindspore/ccsrc/parallel/step_parallel.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc index 78bec00bcf..31dc77b595 100644 --- a/mindspore/ccsrc/parallel/step_parallel.cc +++ b/mindspore/ccsrc/parallel/step_parallel.cc @@ -653,6 +653,13 @@ LossNodeInfo GetLossNodeInfo(const AnfNodePtr& loss_node) { MS_EXCEPTION_IF_NULL(pre_node); LossNodeInfo node_info; + // return -> cast + auto pre_cnode = pre_node->cast(); + MS_EXCEPTION_IF_NULL(pre_cnode); + auto pre_prim = GetValueNode(pre_cnode->input(0)); + if (pre_prim->name() == CAST && pre_cnode->operator_info() == nullptr) { + pre_node = pre_cnode->input(1); + } // return -> cast auto pre_cnode = pre_node->cast(); @@ -1970,7 +1977,10 @@ CNodePtr FindLossCNode(const FuncGraphPtr& func_graph) { MS_EXCEPTION_IF_NULL(current_value); PrimitivePtr current_prim = current_value->value()->cast(); MS_EXCEPTION_IF_NULL(current_prim); +<<<<<<< HEAD +======= +>>>>>>> fix_cast_bug // return -> cast if (current_prim->name() == CAST && pre_cnode->operator_info() == nullptr) { pre_cnode = pre_cnode->input(1)->cast(); From 4c2aa41f1d859ba3a57d12593736bb7cc6b52ed6 Mon Sep 17 00:00:00 2001 From: leonwanghui Date: Tue, 31 Mar 2020 10:27:40 +0800 Subject: [PATCH 02/58] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20!1?= =?UTF-8?q?7=20:=20[AutoParallel]Fix=20bug=20in=20the=20case=20of=20two=20?= =?UTF-8?q?cast'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ccsrc/parallel/step_auto_parallel.cc | 2 -- mindspore/ccsrc/parallel/step_parallel.cc | 11 +++---- .../parallel/test_element_wise_function.py | 29 ------------------- 3 files changed, 4 insertions(+), 38 deletions(-) diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc index 50e6a1e84e..7a895a9458 100644 --- a/mindspore/ccsrc/parallel/step_auto_parallel.cc +++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc @@ -350,8 +350,6 @@ bool IsAutoParallelCareNode(const CNodePtr &cnode) { } OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &cnode) { - MS_EXCEPTION_IF_NULL(prim); - MS_EXCEPTION_IF_NULL(cnode); auto attrs = prim->attrs(); std::vector shape_list = ExtractShape(cnode); if (shape_list.empty()) { diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc index 31dc77b595..af5eb0159f 100644 --- a/mindspore/ccsrc/parallel/step_parallel.cc +++ b/mindspore/ccsrc/parallel/step_parallel.cc @@ -374,6 +374,7 @@ bool IsParallelCareNode(const CNodePtr& cnode) { if (prim == nullptr) { return false; } + auto attrs = prim->attrs(); if (IsInBlackList(prim)) { MS_LOG(INFO) << "Parallel don't care node: " << prim->name(); return false; @@ -653,13 +654,6 @@ LossNodeInfo GetLossNodeInfo(const AnfNodePtr& loss_node) { MS_EXCEPTION_IF_NULL(pre_node); LossNodeInfo node_info; - // return -> cast - auto pre_cnode = pre_node->cast(); - MS_EXCEPTION_IF_NULL(pre_cnode); - auto pre_prim = GetValueNode(pre_cnode->input(0)); - if (pre_prim->name() == CAST && pre_cnode->operator_info() == nullptr) { - pre_node = pre_cnode->input(1); - } // return -> cast auto pre_cnode = pre_node->cast(); @@ -1978,6 +1972,7 @@ CNodePtr FindLossCNode(const FuncGraphPtr& func_graph) { PrimitivePtr current_prim = current_value->value()->cast(); MS_EXCEPTION_IF_NULL(current_prim); <<<<<<< HEAD +<<<<<<< HEAD ======= >>>>>>> fix_cast_bug @@ -1988,6 +1983,8 @@ CNodePtr FindLossCNode(const FuncGraphPtr& func_graph) { current_prim = GetValueNode(pre_cnode->input(0)); } +======= +>>>>>>> 回退 'Pull Request !17 : [AutoParallel]Fix bug in the case of two cast' // notice: the GetNext op has not input if (INVALID_LOSS_OPS.find(current_prim->name()) != INVALID_LOSS_OPS.end()) { MS_LOG(INFO) << "The loss is: " << current_prim->name(); diff --git a/tests/ut/python/parallel/test_element_wise_function.py b/tests/ut/python/parallel/test_element_wise_function.py index 2eb3a22ed2..0c65593d6a 100644 --- a/tests/ut/python/parallel/test_element_wise_function.py +++ b/tests/ut/python/parallel/test_element_wise_function.py @@ -268,32 +268,3 @@ def test_cast_before_mirror3(): y = Tensor(np.ones([32, 64]), dtype=ms.float16) b = Tensor(np.ones([64, 64]), dtype=ms.float32) _executor.compile(net, x, y, b) - - -def test_mul_two_cast(): - class Net(nn.Cell): - def __init__(self, strategy1, strategy2, strategy3): - super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.mul2 = P.Mul().set_strategy(strategy2) - self.cast = P.Cast().set_strategy(strategy3) - self.cast2 = P.Cast().set_strategy(strategy3) - - def construct(self, x, y, b): - out = self.mul(x, y) - out = self.mul2(out, b) - out = self.cast(out, ms.int32) - out = self.cast2(out, ms.bool_) - return out - - context.set_auto_parallel_context(device_num=8, global_rank=0) - strategy1 = ((2, 2), (2, 2)) - strategy2 = ((8, 1), (8, 1)) - strategy3 = ((8, 1), ) - net = GradWrap(Net(strategy1, strategy2, strategy3)) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - - x = Tensor(np.ones([128, 32]), dtype=ms.float32) - y = Tensor(np.ones([128, 32]), dtype=ms.float32) - b = Tensor(np.ones([128, 32]), dtype=ms.float32) - _executor.compile(net, x, y, b) From 1984e4a1ffd938a9d24e6f3db215c7a70cafb114 Mon Sep 17 00:00:00 2001 From: zhaozhenlong Date: Tue, 31 Mar 2020 09:34:09 +0800 Subject: [PATCH 03/58] add operator diag and diag_part --- mindspore/ccsrc/transform/convert.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc index d2a11948ef..8eed207f59 100755 --- a/mindspore/ccsrc/transform/convert.cc +++ b/mindspore/ccsrc/transform/convert.cc @@ -190,6 +190,7 @@ const char kNameAtan2[] = "Atan2"; const char kNameApplyRMSProp[] = "ApplyRMSProp"; const char kNameApplyCenteredRMSProp[] = "ApplyCenteredRMSProp"; + // -----------------OpAdapter initialization-------------- std::unordered_map &DfGraphConvertor::get_adpt_map() { static std::unordered_map adpt_map = { From b27129c9da5c4f54f2ec8d5170fd0628cbbe87bb Mon Sep 17 00:00:00 2001 From: chang zherui <760161589@qq.com> Date: Tue, 31 Mar 2020 16:18:04 +0800 Subject: [PATCH 04/58] modify longtime python ut --- .../train/summary/test_summary_performance.py | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 tests/ut/python/train/summary/test_summary_performance.py diff --git a/tests/ut/python/train/summary/test_summary_performance.py b/tests/ut/python/train/summary/test_summary_performance.py new file mode 100644 index 0000000000..9ee9725d13 --- /dev/null +++ b/tests/ut/python/train/summary/test_summary_performance.py @@ -0,0 +1,97 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +@File : test_summary.py +@Author: +@Date : 2019-07-4 +@Desc : test summary function +""" +import os +import logging +import time +import numpy as np +from mindspore.train.summary.summary_record import SummaryRecord, _cache_summary_tensor_data +from mindspore.common.tensor import Tensor + +CUR_DIR = os.getcwd() +SUMMARY_DIR = CUR_DIR + "/test_temp_summary_event_file/" + +log = logging.getLogger("test") +log.setLevel(level=logging.ERROR) + +def get_now_time_ns(): + """get the time of second""" + time_second = int(time.time_ns()) + return time_second + +def get_test_data(step): + """ get_test_data """ + # pylint: disable=unused-argument + test_data_list = [] + tag1 = "xt1[:Tensor]" + tag2 = "xt2[:Tensor]" + tag3 = "xt3[:Tensor]" + np1 = np.random.random((5, 4, 3, 5)) + np2 = np.random.random((5, 5, 3, 5)) + np3 = np.random.random((4, 5, 3, 5)) + + dict1 = {} + dict1["name"] = tag1 + dict1["data"] = Tensor(np1) + + dict2 = {} + dict2["name"] = tag2 + dict2["data"] = Tensor(np2) + + dict3 = {} + dict3["name"] = tag3 + dict3["data"] = Tensor(np3) + + test_data_list.append(dict1) + test_data_list.append(dict2) + + return test_data_list + + +# Test 1: summary sample of scalar +def test_summary_performance(): + """ test_summary_performance """ + log.debug("begin test_scalar_summary_sample") + current_time = time.time() + print("time = ", current_time) + # step 0: create the thread + test_writer = SummaryRecord(SUMMARY_DIR, flush_time=120) + + # step 1: create the test data for summary + old_time = get_now_time_ns() + # step 2: create the Event + for i in range(1, 10): + test_data = get_test_data(i) + _cache_summary_tensor_data(test_data) + test_writer.record(i) + now_time = get_now_time_ns() + consume_time = (now_time - old_time)/1000/1000 + old_time = now_time + print("step test_summary_performance conusmer time is:", consume_time) + + + # step 3: send the event to mq + + # step 4: accept the event and write the file + test_writer.flush() + test_writer.close() + current_time = time.time() - current_time + print("consume time = ", current_time) + log.debug("finished test_scalar_summary_sample") From 07449cd1cc6e44ba4d51f2891ada4410df02a725 Mon Sep 17 00:00:00 2001 From: lichenever Date: Tue, 31 Mar 2020 18:43:42 +0800 Subject: [PATCH 05/58] fix two cast bug in auto parallel --- .../ccsrc/parallel/step_auto_parallel.cc | 2 ++ mindspore/ccsrc/parallel/step_parallel.cc | 8 +---- .../parallel/test_element_wise_function.py | 29 +++++++++++++++++++ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc index 7a895a9458..50e6a1e84e 100644 --- a/mindspore/ccsrc/parallel/step_auto_parallel.cc +++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc @@ -350,6 +350,8 @@ bool IsAutoParallelCareNode(const CNodePtr &cnode) { } OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &cnode) { + MS_EXCEPTION_IF_NULL(prim); + MS_EXCEPTION_IF_NULL(cnode); auto attrs = prim->attrs(); std::vector shape_list = ExtractShape(cnode); if (shape_list.empty()) { diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc index af5eb0159f..9a08ead584 100644 --- a/mindspore/ccsrc/parallel/step_parallel.cc +++ b/mindspore/ccsrc/parallel/step_parallel.cc @@ -374,7 +374,6 @@ bool IsParallelCareNode(const CNodePtr& cnode) { if (prim == nullptr) { return false; } - auto attrs = prim->attrs(); if (IsInBlackList(prim)) { MS_LOG(INFO) << "Parallel don't care node: " << prim->name(); return false; @@ -1971,11 +1970,7 @@ CNodePtr FindLossCNode(const FuncGraphPtr& func_graph) { MS_EXCEPTION_IF_NULL(current_value); PrimitivePtr current_prim = current_value->value()->cast(); MS_EXCEPTION_IF_NULL(current_prim); -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> fix_cast_bug // return -> cast if (current_prim->name() == CAST && pre_cnode->operator_info() == nullptr) { pre_cnode = pre_cnode->input(1)->cast(); @@ -1983,8 +1978,7 @@ CNodePtr FindLossCNode(const FuncGraphPtr& func_graph) { current_prim = GetValueNode(pre_cnode->input(0)); } -======= ->>>>>>> 回退 'Pull Request !17 : [AutoParallel]Fix bug in the case of two cast' + // notice: the GetNext op has not input if (INVALID_LOSS_OPS.find(current_prim->name()) != INVALID_LOSS_OPS.end()) { MS_LOG(INFO) << "The loss is: " << current_prim->name(); diff --git a/tests/ut/python/parallel/test_element_wise_function.py b/tests/ut/python/parallel/test_element_wise_function.py index 0c65593d6a..2eb3a22ed2 100644 --- a/tests/ut/python/parallel/test_element_wise_function.py +++ b/tests/ut/python/parallel/test_element_wise_function.py @@ -268,3 +268,32 @@ def test_cast_before_mirror3(): y = Tensor(np.ones([32, 64]), dtype=ms.float16) b = Tensor(np.ones([64, 64]), dtype=ms.float32) _executor.compile(net, x, y, b) + + +def test_mul_two_cast(): + class Net(nn.Cell): + def __init__(self, strategy1, strategy2, strategy3): + super().__init__() + self.mul = P.Mul().set_strategy(strategy1) + self.mul2 = P.Mul().set_strategy(strategy2) + self.cast = P.Cast().set_strategy(strategy3) + self.cast2 = P.Cast().set_strategy(strategy3) + + def construct(self, x, y, b): + out = self.mul(x, y) + out = self.mul2(out, b) + out = self.cast(out, ms.int32) + out = self.cast2(out, ms.bool_) + return out + + context.set_auto_parallel_context(device_num=8, global_rank=0) + strategy1 = ((2, 2), (2, 2)) + strategy2 = ((8, 1), (8, 1)) + strategy3 = ((8, 1), ) + net = GradWrap(Net(strategy1, strategy2, strategy3)) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + + x = Tensor(np.ones([128, 32]), dtype=ms.float32) + y = Tensor(np.ones([128, 32]), dtype=ms.float32) + b = Tensor(np.ones([128, 32]), dtype=ms.float32) + _executor.compile(net, x, y, b) From 079df4c909dcfb3a5c665e3cceca64fe3038e097 Mon Sep 17 00:00:00 2001 From: kswang Date: Tue, 31 Mar 2020 21:25:48 +0800 Subject: [PATCH 06/58] add cpu st lenet --- tests/st/networks/test_cpu_lenet.py | 2 +- tests/st/networks/test_network_main.py | 51 ++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/tests/st/networks/test_cpu_lenet.py b/tests/st/networks/test_cpu_lenet.py index 9fd50f5d9b..bdcbc32382 100644 --- a/tests/st/networks/test_cpu_lenet.py +++ b/tests/st/networks/test_cpu_lenet.py @@ -78,4 +78,4 @@ def test_lenet(): data = Tensor(np.ones([32, 1, 32, 32]).astype(np.float32) * 0.01) label = Tensor(np.ones([32]).astype(np.int32)) net = LeNet() - train(net, data, label) + train(net, data, label) \ No newline at end of file diff --git a/tests/st/networks/test_network_main.py b/tests/st/networks/test_network_main.py index 7601739f8c..730602c0ae 100644 --- a/tests/st/networks/test_network_main.py +++ b/tests/st/networks/test_network_main.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ +<<<<<<< HEAD:tests/st/networks/test_network_main.py """ Function: test network @@ -31,6 +32,47 @@ from models.lenet import LeNet from models.resnetv1_5 import resnet50 from models.alexnet import AlexNet context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") +======= +import pytest +from mindspore.nn import TrainOneStepCell, WithLossCell +import mindspore.context as context +from mindspore.nn.optim import Momentum +import numpy as np +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore import Tensor + +class LeNet(nn.Cell): + def __init__(self): + super(LeNet, self).__init__() + self.relu = P.ReLU() + self.batch_size = 32 + + self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0, has_bias=False, pad_mode='valid') + self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0, has_bias=False, pad_mode='valid') + self.pool = nn.MaxPool2d(kernel_size=2, stride=2) + self.reshape = P.Reshape() + self.fc1 = nn.Dense(400, 120) + self.fc2 = nn.Dense(120, 84) + self.fc3 = nn.Dense(84, 10) + + def construct(self, input_x): + output = self.conv1(input_x) + output = self.relu(output) + output = self.pool(output) + output = self.conv2(output) + output = self.relu(output) + output = self.pool(output) + output = self.reshape(output, (self.batch_size, -1)) + output = self.fc1(output) + output = self.relu(output) + output = self.fc2(output) + output = self.relu(output) + output = self.fc3(output) + return output + +context.set_context(mode=context.GRAPH_MODE, device_target="CPU") +>>>>>>> add cpu st lenet:tests/st/networks/test_cpu_lenet.py def train(net, data, label): learning_rate = 0.01 @@ -47,17 +89,24 @@ def train(net, data, label): print("+++++++++++++++++++++++++++") assert res +<<<<<<< HEAD:tests/st/networks/test_network_main.py def test_resnet50(): data = Tensor(np.ones([32, 3 ,224, 224]).astype(np.float32) * 0.01) label = Tensor(np.ones([32]).astype(np.int32)) net = resnet50(32, 10) train(net, data, label) +======= +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +>>>>>>> add cpu st lenet:tests/st/networks/test_cpu_lenet.py def test_lenet(): data = Tensor(np.ones([32, 1 ,32, 32]).astype(np.float32) * 0.01) label = Tensor(np.ones([32]).astype(np.int32)) net = LeNet() train(net, data, label) +<<<<<<< HEAD:tests/st/networks/test_network_main.py def test_alexnet(): data = Tensor(np.ones([32, 3 ,227, 227]).astype(np.float32) * 0.01) @@ -79,3 +128,5 @@ if __name__ == "__main__": test_alexnet() else: print("Please add net name like --net lenet") +======= +>>>>>>> add cpu st lenet:tests/st/networks/test_cpu_lenet.py From 8adbcdbc4cd572d0bc57d9167841baee87b01501 Mon Sep 17 00:00:00 2001 From: chang zherui <760161589@qq.com> Date: Wed, 1 Apr 2020 11:53:35 +0800 Subject: [PATCH 07/58] delete longtime python ut --- .../train/summary/test_summary_performance.py | 97 ------------------- 1 file changed, 97 deletions(-) delete mode 100644 tests/ut/python/train/summary/test_summary_performance.py diff --git a/tests/ut/python/train/summary/test_summary_performance.py b/tests/ut/python/train/summary/test_summary_performance.py deleted file mode 100644 index 9ee9725d13..0000000000 --- a/tests/ut/python/train/summary/test_summary_performance.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -@File : test_summary.py -@Author: -@Date : 2019-07-4 -@Desc : test summary function -""" -import os -import logging -import time -import numpy as np -from mindspore.train.summary.summary_record import SummaryRecord, _cache_summary_tensor_data -from mindspore.common.tensor import Tensor - -CUR_DIR = os.getcwd() -SUMMARY_DIR = CUR_DIR + "/test_temp_summary_event_file/" - -log = logging.getLogger("test") -log.setLevel(level=logging.ERROR) - -def get_now_time_ns(): - """get the time of second""" - time_second = int(time.time_ns()) - return time_second - -def get_test_data(step): - """ get_test_data """ - # pylint: disable=unused-argument - test_data_list = [] - tag1 = "xt1[:Tensor]" - tag2 = "xt2[:Tensor]" - tag3 = "xt3[:Tensor]" - np1 = np.random.random((5, 4, 3, 5)) - np2 = np.random.random((5, 5, 3, 5)) - np3 = np.random.random((4, 5, 3, 5)) - - dict1 = {} - dict1["name"] = tag1 - dict1["data"] = Tensor(np1) - - dict2 = {} - dict2["name"] = tag2 - dict2["data"] = Tensor(np2) - - dict3 = {} - dict3["name"] = tag3 - dict3["data"] = Tensor(np3) - - test_data_list.append(dict1) - test_data_list.append(dict2) - - return test_data_list - - -# Test 1: summary sample of scalar -def test_summary_performance(): - """ test_summary_performance """ - log.debug("begin test_scalar_summary_sample") - current_time = time.time() - print("time = ", current_time) - # step 0: create the thread - test_writer = SummaryRecord(SUMMARY_DIR, flush_time=120) - - # step 1: create the test data for summary - old_time = get_now_time_ns() - # step 2: create the Event - for i in range(1, 10): - test_data = get_test_data(i) - _cache_summary_tensor_data(test_data) - test_writer.record(i) - now_time = get_now_time_ns() - consume_time = (now_time - old_time)/1000/1000 - old_time = now_time - print("step test_summary_performance conusmer time is:", consume_time) - - - # step 3: send the event to mq - - # step 4: accept the event and write the file - test_writer.flush() - test_writer.close() - current_time = time.time() - current_time - print("consume time = ", current_time) - log.debug("finished test_scalar_summary_sample") From 9b5d4eff89cd47856e1ee181207d4b62d05d542b Mon Sep 17 00:00:00 2001 From: zhaoting Date: Tue, 31 Mar 2020 09:14:08 +0800 Subject: [PATCH 08/58] add RMSProp optimizer --- mindspore/ccsrc/transform/op_declare.h | 1 + mindspore/nn/optim/rmsprop.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/mindspore/ccsrc/transform/op_declare.h b/mindspore/ccsrc/transform/op_declare.h index 9fbc97f3c9..339a5027c5 100755 --- a/mindspore/ccsrc/transform/op_declare.h +++ b/mindspore/ccsrc/transform/op_declare.h @@ -458,6 +458,7 @@ DECLARE_OP_USE_INPUT_ATTR(ApplyRMSPropD) DECLARE_OP_USE_OUTPUT(ApplyRMSPropD) DECLARE_OP_ADAPTER(ApplyCenteredRMSProp) DECLARE_OP_USE_OUTPUT(ApplyCenteredRMSProp) + #ifdef ENABLE_GE DECLARE_OP_ADAPTER(Print) DECLARE_OP_USE_DYN_INPUT(Print) diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py index b17a101708..a34de449d1 100644 --- a/mindspore/nn/optim/rmsprop.py +++ b/mindspore/nn/optim/rmsprop.py @@ -18,8 +18,12 @@ from mindspore.common.initializer import initializer from mindspore.common.parameter import Parameter from mindspore._checkparam import ParamValidator as validator import mindspore.common.dtype as mstype +<<<<<<< HEAD from mindspore.common import Tensor from .optimizer import Optimizer, grad_scale, apply_decay +======= +from .optimizer import Optimizer, grad_scale +>>>>>>> add RMSProp optimizer rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt") centered_rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt") @@ -119,9 +123,12 @@ class RMSProp(Optimizer): use_locking (bool): Enable a lock to protect the update of variable and accumlation tensors. Default: False. centered (bool): If True, gradients are normalized by the estimated variance of the gradient. Default: False loss_scale (float): A floating point value for the loss scale. Default: 1.0. +<<<<<<< HEAD weight_decay (float): Weight decay (L2 penalty). Default: 0.0. decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default: lambda x: 'beta' not in x.name and 'gamma' not in x.name. +======= +>>>>>>> add RMSProp optimizer Inputs: - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. @@ -132,12 +139,20 @@ class RMSProp(Optimizer): Examples: >>> net = Net() >>> loss = nn.SoftmaxCrossEntropyWithLogits() +<<<<<<< HEAD >>> opt = nn.RMSProp(params=net.trainable_params(), learning_rate=lr) >>> model = Model(net, loss, opt) """ def __init__(self, params, learning_rate=0.1, decay=0.9, momentum=0.0, epsilon=1e-10, use_locking=False, centered=False, loss_scale=1.0, weight_decay=0.0, decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): +======= + >>> opt = RMSProp(params=net.trainable_params(), learning_rate=lr) + >>> model = Model(net, loss, opt) + """ + def __init__(self, params, learning_rate=0.1, decay=0.9, momentum=0.0, epsilon=1e-10, + use_locking=False, centered=False, loss_scale=1.0): +>>>>>>> add RMSProp optimizer super(RMSProp, self).__init__(learning_rate, params) if isinstance(momentum, float) and momentum < 0.0: From 2ba026dbf2e8791281cbd4e98cf74a6464b6a991 Mon Sep 17 00:00:00 2001 From: Wei Luning Date: Mon, 23 Mar 2020 17:33:56 +0800 Subject: [PATCH 09/58] remove ge depend in cpu --- mindspore/ccsrc/pipeline/pipeline_ge.cc | 55 +++++++++++++++++++++++++ mindspore/ccsrc/utils/callbacks.h | 2 +- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/pipeline_ge.cc index 2f68935591..0b37e8f930 100644 --- a/mindspore/ccsrc/pipeline/pipeline_ge.cc +++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc @@ -391,7 +391,12 @@ std::shared_ptr DoExecGraph(const FuncGraphPtr& graph, const std::ve const std::string& phase) { std::vector ge_tensors = TransformUtil::ConvertInputTensors(inputs, kOpFormat_NCHW); if (ge_tensors.size() != inputs.size()) { +<<<<<<< HEAD MS_LOG(EXCEPTION) << "Convert me args to ge tensor error."; +======= + MS_LOG(ERROR) << "args convert to ge tensor error"; + return nullptr; +>>>>>>> remove ge depend in cpu } std::vector ge_outputs; @@ -402,7 +407,12 @@ std::shared_ptr DoExecGraph(const FuncGraphPtr& graph, const std::ve auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); if (graph_runner == nullptr) { +<<<<<<< HEAD MS_LOG(EXCEPTION) << "Can not found GraphRunner."; +======= + MS_LOG(ERROR) << "Can not found GraphRunner"; + return nullptr; +>>>>>>> remove ge depend in cpu } { @@ -419,7 +429,11 @@ std::shared_ptr DoExecGraph(const FuncGraphPtr& graph, const std::ve std::vector me_outputs = TransformUtil::ConvertGeTensors(ge_outputs); if (me_outputs.size() != ge_outputs.size()) { +<<<<<<< HEAD MS_LOG(WARNING) << "Convert output Ge tensor to Me tensor failed"; +======= + MS_LOG(ERROR) << "Convert output Ge tensor to Me tensor failed"; +>>>>>>> remove ge depend in cpu } py::tuple outputs(me_outputs.size()); @@ -429,11 +443,28 @@ std::shared_ptr DoExecGraph(const FuncGraphPtr& graph, const std::ve std::shared_ptr ret = nullptr; +<<<<<<< HEAD AnfNodePtr output_node = graph->get_return()->input(1); MS_EXCEPTION_IF_NULL(output_node); size_t count = 0; py::object oj = StructureOutput(output_node, outputs, &count); ret = std::make_shared(oj); +======= +#ifdef ENABLE_GE + AnfNodePtr root = graph->get_return(); + MS_EXCEPTION_IF_NULL(root); + AbstractBasePtr output = root->abstract(); + size_t count = 0; + py::object oj = StructureOutput(output, outputs, &count); + ret = std::make_shared(oj); +#else + if (outputs.size() == 1) { + ret = std::make_shared(outputs[0]); + } else { + ret = std::make_shared(outputs); + } +#endif +>>>>>>> remove ge depend in cpu return ret; } @@ -444,7 +475,11 @@ void ProcessGeArg(const std::map& info, const py:: std::size_t size = args.size(); if (info.count(phase) == 0) { +<<<<<<< HEAD MS_LOG(EXCEPTION) << "No phase in executor:" << GetPhasePrefix(phase); +======= + MS_LOG(EXCEPTION) << "no phase in executor:" << GetPhasePrefix(phase); +>>>>>>> remove ge depend in cpu } auto arg_size = info.at(phase)->arg_list_size; @@ -459,12 +494,20 @@ void ProcessGeArg(const std::map& info, const py:: ValuePtr converted = nullptr; bool succ = parse::ConvertData(args[i], &converted); if (!succ) { +<<<<<<< HEAD MS_LOG(EXCEPTION) << "Args convert error"; +======= + MS_LOG(EXCEPTION) << "args convert error"; +>>>>>>> remove ge depend in cpu } if (converted->isa()) { (*inputs).push_back(converted->cast()); } else { +<<<<<<< HEAD MS_LOG(EXCEPTION) << "Args " << converted->ToString() << " is not tensor"; +======= + MS_LOG(EXCEPTION) << "args, " << converted->ToString() << " is not tensor"; +>>>>>>> remove ge depend in cpu } } } @@ -481,12 +524,20 @@ py::object ExecDFGraph(const std::map& info, const } if (info.count(phase) == 0) { +<<<<<<< HEAD MS_LOG(EXCEPTION) << "There is no phase:" << phase; +======= + MS_LOG(EXCEPTION) << "has no phase:" << phase; +>>>>>>> remove ge depend in cpu } FuncGraphPtr anf_graph = info.at(phase)->func_graph; +<<<<<<< HEAD #ifdef ENABLE_INFER +======= +#if (!defined ENABLE_GE) || (defined ENABLE_INFER) +>>>>>>> remove ge depend in cpu // Now don't use the graph because the exec ge function don't take effect MS_EXCEPTION_IF_NULL(info.at(phase)->func_graph); if (ENABLE_TRAIN != info.at(phase)->func_graph->flags()["training"]) { @@ -511,7 +562,11 @@ py::object ExecDFGraph(const std::map& info, const if (ret != nullptr) { return *ret; } else { +<<<<<<< HEAD MS_LOG(EXCEPTION) << "Exec graph failed"; +======= + MS_LOG(EXCEPTION) << "exec graph failed"; +>>>>>>> remove ge depend in cpu } } void ExportDFGraph(const std::string& file_name, const std::string& phase) { diff --git a/mindspore/ccsrc/utils/callbacks.h b/mindspore/ccsrc/utils/callbacks.h index a1e4e75d5b..6f099ef4ca 100644 --- a/mindspore/ccsrc/utils/callbacks.h +++ b/mindspore/ccsrc/utils/callbacks.h @@ -40,7 +40,7 @@ const int kCallbackOk = 0; const int kCallbackFalied = 1; bool GetParameterShape(const FuncGraphPtr& anf_graph, const std::string& param_name, - const std::shared_ptr>& shape); + const std::shared_ptr>& shape) uint32_t SummarySaveCallback(uint32_t, const std::map&); } // namespace callbacks From 23c21e191ff2ebdcdbc7340e3748ce2d4c9c25f0 Mon Sep 17 00:00:00 2001 From: zhangz0911gm Date: Tue, 31 Mar 2020 23:14:21 -0400 Subject: [PATCH 10/58] Add FloorMod, Acosh in ME --- mindspore/ccsrc/transform/op_declare.h | 1 - 1 file changed, 1 deletion(-) diff --git a/mindspore/ccsrc/transform/op_declare.h b/mindspore/ccsrc/transform/op_declare.h index 339a5027c5..9fbc97f3c9 100755 --- a/mindspore/ccsrc/transform/op_declare.h +++ b/mindspore/ccsrc/transform/op_declare.h @@ -458,7 +458,6 @@ DECLARE_OP_USE_INPUT_ATTR(ApplyRMSPropD) DECLARE_OP_USE_OUTPUT(ApplyRMSPropD) DECLARE_OP_ADAPTER(ApplyCenteredRMSProp) DECLARE_OP_USE_OUTPUT(ApplyCenteredRMSProp) - #ifdef ENABLE_GE DECLARE_OP_ADAPTER(Print) DECLARE_OP_USE_DYN_INPUT(Print) From 2a82eb450efa5b26b8722807c1ff33db192594a9 Mon Sep 17 00:00:00 2001 From: zhaoting Date: Fri, 3 Apr 2020 11:45:49 +0800 Subject: [PATCH 11/58] add weight decay in RMSProp optimizer --- mindspore/nn/optim/rmsprop.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py index a34de449d1..e252f89f2f 100644 --- a/mindspore/nn/optim/rmsprop.py +++ b/mindspore/nn/optim/rmsprop.py @@ -18,12 +18,8 @@ from mindspore.common.initializer import initializer from mindspore.common.parameter import Parameter from mindspore._checkparam import ParamValidator as validator import mindspore.common.dtype as mstype -<<<<<<< HEAD from mindspore.common import Tensor from .optimizer import Optimizer, grad_scale, apply_decay -======= -from .optimizer import Optimizer, grad_scale ->>>>>>> add RMSProp optimizer rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt") centered_rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt") @@ -123,12 +119,9 @@ class RMSProp(Optimizer): use_locking (bool): Enable a lock to protect the update of variable and accumlation tensors. Default: False. centered (bool): If True, gradients are normalized by the estimated variance of the gradient. Default: False loss_scale (float): A floating point value for the loss scale. Default: 1.0. -<<<<<<< HEAD weight_decay (float): Weight decay (L2 penalty). Default: 0.0. decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default: lambda x: 'beta' not in x.name and 'gamma' not in x.name. -======= ->>>>>>> add RMSProp optimizer Inputs: - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. @@ -139,20 +132,12 @@ class RMSProp(Optimizer): Examples: >>> net = Net() >>> loss = nn.SoftmaxCrossEntropyWithLogits() -<<<<<<< HEAD >>> opt = nn.RMSProp(params=net.trainable_params(), learning_rate=lr) >>> model = Model(net, loss, opt) """ def __init__(self, params, learning_rate=0.1, decay=0.9, momentum=0.0, epsilon=1e-10, use_locking=False, centered=False, loss_scale=1.0, weight_decay=0.0, decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): -======= - >>> opt = RMSProp(params=net.trainable_params(), learning_rate=lr) - >>> model = Model(net, loss, opt) - """ - def __init__(self, params, learning_rate=0.1, decay=0.9, momentum=0.0, epsilon=1e-10, - use_locking=False, centered=False, loss_scale=1.0): ->>>>>>> add RMSProp optimizer super(RMSProp, self).__init__(learning_rate, params) if isinstance(momentum, float) and momentum < 0.0: @@ -209,4 +194,4 @@ class RMSProp(Optimizer): else: success = self.hyper_map(F.partial(rmsprop_opt, self.opt, lr, self.decay, self.epsilon, self.momentum), params, self.ms, self.moment, gradients) - return success + return success \ No newline at end of file From 93fe493cf46fdca43f2b0ecc486b210864629cc5 Mon Sep 17 00:00:00 2001 From: kingfo Date: Fri, 3 Apr 2020 10:53:46 +0800 Subject: [PATCH 12/58] fix ME+GE compile error --- mindspore/ccsrc/pipeline/pipeline_ge.cc | 63 ++----------------------- 1 file changed, 4 insertions(+), 59 deletions(-) diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/pipeline_ge.cc index 0b37e8f930..ee67d46cf7 100644 --- a/mindspore/ccsrc/pipeline/pipeline_ge.cc +++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc @@ -116,7 +116,7 @@ bool InitExecDatasetGe(const std::string& queue_name, int64_t size, int64_t batc return transform::TransformUtil::ConvertDataType(i->type_id()); }); - ConfigManager::GetInstance().set_dataset_mode(DatasetMode::DS_GRAPH_MODE); + ConfigManager::GetInstance().set_dataset_mode(DatasetMode::DS_SINK_MODE); ConfigManager::GetInstance().set_iter_num(size); ConfigManager::GetInstance().set_dataset_phase(phase); @@ -391,12 +391,7 @@ std::shared_ptr DoExecGraph(const FuncGraphPtr& graph, const std::ve const std::string& phase) { std::vector ge_tensors = TransformUtil::ConvertInputTensors(inputs, kOpFormat_NCHW); if (ge_tensors.size() != inputs.size()) { -<<<<<<< HEAD MS_LOG(EXCEPTION) << "Convert me args to ge tensor error."; -======= - MS_LOG(ERROR) << "args convert to ge tensor error"; - return nullptr; ->>>>>>> remove ge depend in cpu } std::vector ge_outputs; @@ -407,12 +402,7 @@ std::shared_ptr DoExecGraph(const FuncGraphPtr& graph, const std::ve auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); if (graph_runner == nullptr) { -<<<<<<< HEAD MS_LOG(EXCEPTION) << "Can not found GraphRunner."; -======= - MS_LOG(ERROR) << "Can not found GraphRunner"; - return nullptr; ->>>>>>> remove ge depend in cpu } { @@ -429,11 +419,7 @@ std::shared_ptr DoExecGraph(const FuncGraphPtr& graph, const std::ve std::vector me_outputs = TransformUtil::ConvertGeTensors(ge_outputs); if (me_outputs.size() != ge_outputs.size()) { -<<<<<<< HEAD MS_LOG(WARNING) << "Convert output Ge tensor to Me tensor failed"; -======= - MS_LOG(ERROR) << "Convert output Ge tensor to Me tensor failed"; ->>>>>>> remove ge depend in cpu } py::tuple outputs(me_outputs.size()); @@ -443,28 +429,11 @@ std::shared_ptr DoExecGraph(const FuncGraphPtr& graph, const std::ve std::shared_ptr ret = nullptr; -<<<<<<< HEAD AnfNodePtr output_node = graph->get_return()->input(1); MS_EXCEPTION_IF_NULL(output_node); size_t count = 0; py::object oj = StructureOutput(output_node, outputs, &count); ret = std::make_shared(oj); -======= -#ifdef ENABLE_GE - AnfNodePtr root = graph->get_return(); - MS_EXCEPTION_IF_NULL(root); - AbstractBasePtr output = root->abstract(); - size_t count = 0; - py::object oj = StructureOutput(output, outputs, &count); - ret = std::make_shared(oj); -#else - if (outputs.size() == 1) { - ret = std::make_shared(outputs[0]); - } else { - ret = std::make_shared(outputs); - } -#endif ->>>>>>> remove ge depend in cpu return ret; } @@ -475,11 +444,7 @@ void ProcessGeArg(const std::map& info, const py:: std::size_t size = args.size(); if (info.count(phase) == 0) { -<<<<<<< HEAD MS_LOG(EXCEPTION) << "No phase in executor:" << GetPhasePrefix(phase); -======= - MS_LOG(EXCEPTION) << "no phase in executor:" << GetPhasePrefix(phase); ->>>>>>> remove ge depend in cpu } auto arg_size = info.at(phase)->arg_list_size; @@ -488,26 +453,18 @@ void ProcessGeArg(const std::map& info, const py:: } // process the first args of tensor - // only in Dataset Feed Mode, fp_bp graph need input tensors - if (ConfigManager::GetInstance().dataset_mode() == DS_FEED_MODE) { + // only in dataset normal(non-sink) mode, fp_bp graph need input tensors + if (ConfigManager::GetInstance().dataset_mode() == DS_NORMAL_MODE) { for (std::size_t i = 0; i < size; i++) { ValuePtr converted = nullptr; bool succ = parse::ConvertData(args[i], &converted); if (!succ) { -<<<<<<< HEAD MS_LOG(EXCEPTION) << "Args convert error"; -======= - MS_LOG(EXCEPTION) << "args convert error"; ->>>>>>> remove ge depend in cpu } if (converted->isa()) { (*inputs).push_back(converted->cast()); } else { -<<<<<<< HEAD MS_LOG(EXCEPTION) << "Args " << converted->ToString() << " is not tensor"; -======= - MS_LOG(EXCEPTION) << "args, " << converted->ToString() << " is not tensor"; ->>>>>>> remove ge depend in cpu } } } @@ -524,20 +481,12 @@ py::object ExecDFGraph(const std::map& info, const } if (info.count(phase) == 0) { -<<<<<<< HEAD MS_LOG(EXCEPTION) << "There is no phase:" << phase; -======= - MS_LOG(EXCEPTION) << "has no phase:" << phase; ->>>>>>> remove ge depend in cpu } FuncGraphPtr anf_graph = info.at(phase)->func_graph; -<<<<<<< HEAD #ifdef ENABLE_INFER -======= -#if (!defined ENABLE_GE) || (defined ENABLE_INFER) ->>>>>>> remove ge depend in cpu // Now don't use the graph because the exec ge function don't take effect MS_EXCEPTION_IF_NULL(info.at(phase)->func_graph); if (ENABLE_TRAIN != info.at(phase)->func_graph->flags()["training"]) { @@ -562,11 +511,7 @@ py::object ExecDFGraph(const std::map& info, const if (ret != nullptr) { return *ret; } else { -<<<<<<< HEAD MS_LOG(EXCEPTION) << "Exec graph failed"; -======= - MS_LOG(EXCEPTION) << "exec graph failed"; ->>>>>>> remove ge depend in cpu } } void ExportDFGraph(const std::string& file_name, const std::string& phase) { @@ -588,4 +533,4 @@ void ExportDFGraph(const std::string& file_name, const std::string& phase) { MS_LOG(DEBUG) << "ExportGraph End"; } } // namespace pipeline -} // namespace mindspore +} // namespace mindspore \ No newline at end of file From 3aa51f35c1dd33344f40c8d475d1bd8b60bd3028 Mon Sep 17 00:00:00 2001 From: wanghua Date: Fri, 3 Apr 2020 14:55:25 +0800 Subject: [PATCH 13/58] fix bert precison bug --- mindspore/ccsrc/device/ascend/kernel_select_ascend.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc index d05b9fafa1..a7c8d131fb 100644 --- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc +++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc @@ -82,6 +82,12 @@ bool IsValidKernelInfo(const std::shared_ptr &kernel_node, const kernel:: } return true; }; + if (AnfAlgo::GetCNodeName(kernel_node) == "LayerNormBetaGammaBackprop" || + AnfAlgo::GetCNodeName(kernel_node) == "LayerNormXBackprop") { + if (AnfAlgo::GetPrevNodeOutputFormat(kernel_node, 0) != kernel_build_info.GetInputFormat(0)) { + return true; + } + } if (AnfAlgo::GetCNodeName(kernel_node) == prim::kPrimCast->name()) { return AnfAlgo::GetOutputInferDataType(kernel_node, 0) == kernel_build_info.GetOutputDeviceType(0) && AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0) == kernel_build_info.GetInputDeviceType(0); @@ -155,7 +161,7 @@ bool PriorityChooseItem(const std::vector &cur_item, std::vector *best return false; } } - return false; + return true; } void UpdateCurMatchCounts(const kernel::KernelBuildInfo &kernel_build_info, const std::shared_ptr &kernel_node, From a8f95e6d2c94565dadff47f39c30dcb676773f5d Mon Sep 17 00:00:00 2001 From: wanghua Date: Fri, 3 Apr 2020 17:51:56 +0800 Subject: [PATCH 14/58] modify bert test file --- mindspore/ccsrc/device/ascend/kernel_select_ascend.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc index a7c8d131fb..d05b9fafa1 100644 --- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc +++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc @@ -82,12 +82,6 @@ bool IsValidKernelInfo(const std::shared_ptr &kernel_node, const kernel:: } return true; }; - if (AnfAlgo::GetCNodeName(kernel_node) == "LayerNormBetaGammaBackprop" || - AnfAlgo::GetCNodeName(kernel_node) == "LayerNormXBackprop") { - if (AnfAlgo::GetPrevNodeOutputFormat(kernel_node, 0) != kernel_build_info.GetInputFormat(0)) { - return true; - } - } if (AnfAlgo::GetCNodeName(kernel_node) == prim::kPrimCast->name()) { return AnfAlgo::GetOutputInferDataType(kernel_node, 0) == kernel_build_info.GetOutputDeviceType(0) && AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0) == kernel_build_info.GetInputDeviceType(0); @@ -161,7 +155,7 @@ bool PriorityChooseItem(const std::vector &cur_item, std::vector *best return false; } } - return true; + return false; } void UpdateCurMatchCounts(const kernel::KernelBuildInfo &kernel_build_info, const std::shared_ptr &kernel_node, From 1efa4ffc39b9599769ea1ef8cb1deb6b9d4bd280 Mon Sep 17 00:00:00 2001 From: ms_yan <6576637+ms_yan@user.noreply.gitee.com> Date: Thu, 2 Apr 2020 21:56:48 +0800 Subject: [PATCH 15/58] add parameter check for Class Schema --- mindspore/dataset/engine/datasets.py | 20 +++++++---- mindspore/dataset/engine/validators.py | 49 ++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index db2b5169d2..2d5c219b71 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -38,7 +38,7 @@ from .iterators import DictIterator, TupleIterator from .validators import check, check_batch, check_shuffle, check_map, check_repeat, check_zip, check_rename, \ check_project, check_imagefolderdatasetv2, check_mnist_cifar_dataset, check_manifestdataset, \ check_tfrecorddataset, check_vocdataset, check_celebadataset, check_minddataset, check_generatordataset, \ - check_zip_dataset + check_zip_dataset, check_add_column, check_columns from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist try: @@ -2334,13 +2334,20 @@ class Schema: self.dataset_type = '' self.num_rows = 0 else: + if not os.path.isfile(schema_file) or not os.access(schema_file, os.R_OK): + raise ValueError("The file %s does not exist or permission denied!" % schema_file) try: with open(schema_file, 'r') as load_f: json_obj = json.load(load_f) - self.from_json(json_obj) except json.decoder.JSONDecodeError: - raise RuntimeError("Schema file failed to load") + raise RuntimeError("Schema file failed to load.") + except UnicodeDecodeError: + raise RuntimeError("Schema file failed to decode.") + except Exception: + raise RuntimeError("Schema file failed to open.") + self.from_json(json_obj) + @check_add_column def add_column(self, name, de_type, shape=None): """ Add new column to the schema. @@ -2359,10 +2366,8 @@ class Schema: if isinstance(de_type, typing.Type): de_type = mstype_to_detype(de_type) new_column["type"] = str(de_type) - elif isinstance(de_type, str): - new_column["type"] = str(DataType(de_type)) else: - raise ValueError("Unknown column type") + new_column["type"] = str(DataType(de_type)) if shape is not None: new_column["shape"] = shape @@ -2399,6 +2404,7 @@ class Schema: RuntimeError: If column's name field is missing. RuntimeError: If column's type field is missing. """ + check_columns(columns, columns) self.columns = [] for col in columns: name = None @@ -2443,6 +2449,8 @@ class Schema: RuntimeError: if dataset type is missing in the object. RuntimeError: if columns are missing in the object. """ + if not isinstance(json_obj, dict) or json_obj is None: + raise ValueError("Expected non-empty dict.") for k, v in json_obj.items(): if k == "datasetType": self.dataset_type = v diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py index b4d22a4a01..1c374ae879 100644 --- a/mindspore/dataset/engine/validators.py +++ b/mindspore/dataset/engine/validators.py @@ -19,10 +19,15 @@ import inspect as ins import os from functools import wraps from multiprocessing import cpu_count +from mindspore._c_expression import typing from . import samplers from . import datasets INT32_MAX = 2147483647 +valid_detype = [ + "bool", "int8", "int16", "int32", "int64", "uint8", "uint16", + "uint32", "uint64", "float16", "float32", "float64" +] def check(method): @@ -188,6 +193,12 @@ def check(method): return wrapper +def check_valid_detype(type_): + if type_ not in valid_detype: + raise ValueError("Unknown column type") + return True + + def check_filename(path): """ check the filename in the path @@ -743,3 +754,41 @@ def check_project(method): return method(*args, **kwargs) return new_method + + +def check_shape(shape, name): + if isinstance(shape, list): + for element in shape: + if not isinstance(element, int): + raise TypeError( + "Each element in {0} should be of type int. Got {1}.".format(name, type(element))) + else: + raise TypeError("Expected int list.") + + +def check_add_column(method): + """check the input arguments of add_column.""" + @wraps(method) + def new_method(*args, **kwargs): + param_dict = make_param_dict(method, args, kwargs) + + # check name; required argument + name = param_dict.get("name") + if not isinstance(name, str) or not name: + raise TypeError("Expected non-empty string.") + + # check type; required argument + de_type = param_dict.get("de_type") + if not isinstance(de_type, str) or not de_type: + raise TypeError("Expected non-empty string.") + if not isinstance(de_type, typing.Type) and not check_valid_detype(de_type): + raise ValueError("Unknown column type.") + + # check shape + shape = param_dict.get("shape") + if shape is not None: + check_shape(shape, "shape") + + return method(*args, **kwargs) + + return new_method From 0a595b4749525b94b10ddf33770255ad5effe674 Mon Sep 17 00:00:00 2001 From: chang zherui <760161589@qq.com> Date: Tue, 7 Apr 2020 11:39:10 +0800 Subject: [PATCH 16/58] fix runtest.sh for python ut --- mindspore/dataset/engine/datasets.py | 20 ++++------- mindspore/dataset/engine/validators.py | 49 -------------------------- 2 files changed, 6 insertions(+), 63 deletions(-) diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index 2d5c219b71..db2b5169d2 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -38,7 +38,7 @@ from .iterators import DictIterator, TupleIterator from .validators import check, check_batch, check_shuffle, check_map, check_repeat, check_zip, check_rename, \ check_project, check_imagefolderdatasetv2, check_mnist_cifar_dataset, check_manifestdataset, \ check_tfrecorddataset, check_vocdataset, check_celebadataset, check_minddataset, check_generatordataset, \ - check_zip_dataset, check_add_column, check_columns + check_zip_dataset from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist try: @@ -2334,20 +2334,13 @@ class Schema: self.dataset_type = '' self.num_rows = 0 else: - if not os.path.isfile(schema_file) or not os.access(schema_file, os.R_OK): - raise ValueError("The file %s does not exist or permission denied!" % schema_file) try: with open(schema_file, 'r') as load_f: json_obj = json.load(load_f) + self.from_json(json_obj) except json.decoder.JSONDecodeError: - raise RuntimeError("Schema file failed to load.") - except UnicodeDecodeError: - raise RuntimeError("Schema file failed to decode.") - except Exception: - raise RuntimeError("Schema file failed to open.") - self.from_json(json_obj) + raise RuntimeError("Schema file failed to load") - @check_add_column def add_column(self, name, de_type, shape=None): """ Add new column to the schema. @@ -2366,8 +2359,10 @@ class Schema: if isinstance(de_type, typing.Type): de_type = mstype_to_detype(de_type) new_column["type"] = str(de_type) - else: + elif isinstance(de_type, str): new_column["type"] = str(DataType(de_type)) + else: + raise ValueError("Unknown column type") if shape is not None: new_column["shape"] = shape @@ -2404,7 +2399,6 @@ class Schema: RuntimeError: If column's name field is missing. RuntimeError: If column's type field is missing. """ - check_columns(columns, columns) self.columns = [] for col in columns: name = None @@ -2449,8 +2443,6 @@ class Schema: RuntimeError: if dataset type is missing in the object. RuntimeError: if columns are missing in the object. """ - if not isinstance(json_obj, dict) or json_obj is None: - raise ValueError("Expected non-empty dict.") for k, v in json_obj.items(): if k == "datasetType": self.dataset_type = v diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py index 1c374ae879..b4d22a4a01 100644 --- a/mindspore/dataset/engine/validators.py +++ b/mindspore/dataset/engine/validators.py @@ -19,15 +19,10 @@ import inspect as ins import os from functools import wraps from multiprocessing import cpu_count -from mindspore._c_expression import typing from . import samplers from . import datasets INT32_MAX = 2147483647 -valid_detype = [ - "bool", "int8", "int16", "int32", "int64", "uint8", "uint16", - "uint32", "uint64", "float16", "float32", "float64" -] def check(method): @@ -193,12 +188,6 @@ def check(method): return wrapper -def check_valid_detype(type_): - if type_ not in valid_detype: - raise ValueError("Unknown column type") - return True - - def check_filename(path): """ check the filename in the path @@ -754,41 +743,3 @@ def check_project(method): return method(*args, **kwargs) return new_method - - -def check_shape(shape, name): - if isinstance(shape, list): - for element in shape: - if not isinstance(element, int): - raise TypeError( - "Each element in {0} should be of type int. Got {1}.".format(name, type(element))) - else: - raise TypeError("Expected int list.") - - -def check_add_column(method): - """check the input arguments of add_column.""" - @wraps(method) - def new_method(*args, **kwargs): - param_dict = make_param_dict(method, args, kwargs) - - # check name; required argument - name = param_dict.get("name") - if not isinstance(name, str) or not name: - raise TypeError("Expected non-empty string.") - - # check type; required argument - de_type = param_dict.get("de_type") - if not isinstance(de_type, str) or not de_type: - raise TypeError("Expected non-empty string.") - if not isinstance(de_type, typing.Type) and not check_valid_detype(de_type): - raise ValueError("Unknown column type.") - - # check shape - shape = param_dict.get("shape") - if shape is not None: - check_shape(shape, "shape") - - return method(*args, **kwargs) - - return new_method From bf4c09931eec2c2516cfe0a2cc790ea9f92d8eb1 Mon Sep 17 00:00:00 2001 From: VectorSL Date: Fri, 3 Apr 2020 16:55:37 +0800 Subject: [PATCH 17/58] edit loss_scale for gpu --- mindspore/nn/wrap/loss_scale.py | 40 ++++++++++---- mindspore/ops/operations/__init__.py | 6 +- mindspore/ops/operations/math_ops.py | 83 ++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 12 deletions(-) diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py index 1ce3179273..4d929352b3 100644 --- a/mindspore/nn/wrap/loss_scale.py +++ b/mindspore/nn/wrap/loss_scale.py @@ -25,6 +25,7 @@ from ...ops import operations as P from ...ops.operations import NPUGetFloatStatus, NPUAllocFloatStatus, NPUClearFloatStatus, ReduceSum, LessEqual, \ ControlDepend from ...common import dtype as mstype +import mindspore.context as context _grad_scale = C.MultitypeFuncGraph("grad_scale") reciprocal = P.Reciprocal() @@ -34,6 +35,12 @@ reciprocal = P.Reciprocal() def tensor_grad_scale(scale, grad): return grad * F.cast(reciprocal(scale), F.dtype(grad)) +_grad_overflow = C.MultitypeFuncGraph("_grad_overflow") +grad_overflow = P.FloatStatus() + +@_grad_overflow.register("Tensor") +def _tensor_grad_overflow(grad): + return grad_overflow(grad) class DynamicLossScaleUpdateCell(Cell): r""" @@ -197,9 +204,15 @@ class TrainOneStepWithLossScaleCell(Cell): self.optimizer = optimizer self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.hyper_map = C.HyperMap() - self.alloc_status = NPUAllocFloatStatus() - self.get_status = NPUGetFloatStatus() - self.clear_status = NPUClearFloatStatus() + if context.get_context("device_target") == "GPU": + self.gpu_target = True + self.float_status = P.FloatStatus() + self.addn = P.AddN() + else: + self.gpu_target = False + self.alloc_status = NPUAllocFloatStatus() + self.get_status = NPUGetFloatStatus() + self.clear_status = NPUClearFloatStatus() self.reduce_sum = ReduceSum(keep_dims=False) self.base = Tensor(1, mstype.float32) self.less_equal = LessEqual() @@ -224,10 +237,11 @@ class TrainOneStepWithLossScaleCell(Cell): def construct(self, data, label, sens=None): weights = self.weights loss = self.network(data, label) - # init overflow buffer - init = self.alloc_status() - # clear overflow buffer - self.clear_status(init) + if not self.gpu_target: + # init overflow buffer + init = self.alloc_status() + # clear overflow buffer + self.clear_status(init) if sens is None: scaling_sens = self.loss_scale else: @@ -237,10 +251,14 @@ class TrainOneStepWithLossScaleCell(Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - # get the overflow buffer - self.get_status(init) - # sum overflow buffer elements, 0:not overflow , >0:overflow - flag_sum = self.reduce_sum(init, (0,)) + if not self.gpu_target: + # get the overflow buffer + self.get_status(init) + # sum overflow buffer elements, 0:not overflow , >0:overflow + flag_sum = self.reduce_sum(init, (0,)) + else: + flag_sum = self.hyper_map(F.partial(_grad_overflow), grads) + flag_sum = self.addn(flag_sum) if self.is_distributed: # sum overflow flag over devices flag_reduce = self.allreduce(flag_sum) diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index 37a3b38bb6..48a985b33d 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -44,7 +44,7 @@ from .math_ops import (Abs, ACos, AddN, AssignAdd, AssignSub, Atan2, BatchMatMul LogicalNot, LogicalOr, MatMul, Maximum, Minimum, Mul, Neg, NMSWithMask, NotEqual, NPUAllocFloatStatus, NPUClearFloatStatus, - NPUGetFloatStatus, Pow, RealDiv, + NPUGetFloatStatus, Pow, RealDiv, IsNan, IsInf, IsFinite, FloatStatus, Reciprocal, CumSum, Sin, Sqrt, Rsqrt, Square, Sub, TensorAdd, Sign, Round) @@ -151,6 +151,10 @@ __all__ = [ 'Neg', 'Slice', 'DType', + 'IsNan', + 'IsInf', + 'IsFinite', + 'FloatStatus', 'NPUAllocFloatStatus', 'NPUGetFloatStatus', 'NPUClearFloatStatus', diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 175b72560f..5f8c24d78b 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -1541,6 +1541,89 @@ class LogicalOr(_LogicBinaryOp): def infer_dtype(self, x_dtype, y_dtype): return _LogicBinaryOp.do_infer_dtype(x_dtype, y_dtype, (mstype.bool_,), self.prim_name()) +class IsNan(PrimitiveWithInfer): + """ + Judging which elements are nan for each position + Inputs: + - **input_x** (Tensor) - The input tensor. + + Outputs: + Tensor, has the same shape of input. + """ + + @prim_attr_register + def __init__(self): + """init IsNan""" + self.init_prim_io_names(inputs=['x'], outputs=['output']) + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_dtype): + return mstype.bool_ + +class IsInf(PrimitiveWithInfer): + """ + Judging which elements are inf or -inf for each position + Inputs: + - **input_x** (Tensor) - The input tensor. + + Outputs: + Tensor, has the same shape of input. + """ + + @prim_attr_register + def __init__(self): + """init IsInf""" + self.init_prim_io_names(inputs=['x'], outputs=['output']) + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_dtype): + return mstype.bool_ + +class IsFinite(PrimitiveWithInfer): + """ + Judging which elements are finite for each position + Inputs: + - **input_x** (Tensor) - The input tensor. + + Outputs: + Tensor, has the same shape of input. + """ + + @prim_attr_register + def __init__(self): + """init IsFinite""" + self.init_prim_io_names(inputs=['x'], outputs=['output']) + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_dtype): + return mstype.bool_ + +class FloatStatus(PrimitiveWithInfer): + """ + Determine if the elements contains nan, inf or -inf + Inputs: + - **input_x** (Tensor) - The input tensor. + + Outputs: + Tensor, has the shape of `(1,)`. + """ + + @prim_attr_register + def __init__(self): + """init FloatStatus""" + self.init_prim_io_names(inputs=['x'], outputs=['output']) + + def infer_shape(self, x_shape): + return [1] + + def infer_dtype(self, x_dtype): + return x_dtype class NPUAllocFloatStatus(PrimitiveWithInfer): """ From 22578e983949129e5f1c60f53cd2426072d55fd5 Mon Sep 17 00:00:00 2001 From: chengang Date: Tue, 7 Apr 2020 16:56:33 +0800 Subject: [PATCH 18/58] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20!1?= =?UTF-8?q?33=20:=20Edit=20loss=5Fscale=20to=20fit=20GPU'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mindspore/nn/wrap/loss_scale.py | 40 ++++---------- mindspore/ops/operations/__init__.py | 6 +- mindspore/ops/operations/math_ops.py | 83 ---------------------------- 3 files changed, 12 insertions(+), 117 deletions(-) diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py index 4d929352b3..1ce3179273 100644 --- a/mindspore/nn/wrap/loss_scale.py +++ b/mindspore/nn/wrap/loss_scale.py @@ -25,7 +25,6 @@ from ...ops import operations as P from ...ops.operations import NPUGetFloatStatus, NPUAllocFloatStatus, NPUClearFloatStatus, ReduceSum, LessEqual, \ ControlDepend from ...common import dtype as mstype -import mindspore.context as context _grad_scale = C.MultitypeFuncGraph("grad_scale") reciprocal = P.Reciprocal() @@ -35,12 +34,6 @@ reciprocal = P.Reciprocal() def tensor_grad_scale(scale, grad): return grad * F.cast(reciprocal(scale), F.dtype(grad)) -_grad_overflow = C.MultitypeFuncGraph("_grad_overflow") -grad_overflow = P.FloatStatus() - -@_grad_overflow.register("Tensor") -def _tensor_grad_overflow(grad): - return grad_overflow(grad) class DynamicLossScaleUpdateCell(Cell): r""" @@ -204,15 +197,9 @@ class TrainOneStepWithLossScaleCell(Cell): self.optimizer = optimizer self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.hyper_map = C.HyperMap() - if context.get_context("device_target") == "GPU": - self.gpu_target = True - self.float_status = P.FloatStatus() - self.addn = P.AddN() - else: - self.gpu_target = False - self.alloc_status = NPUAllocFloatStatus() - self.get_status = NPUGetFloatStatus() - self.clear_status = NPUClearFloatStatus() + self.alloc_status = NPUAllocFloatStatus() + self.get_status = NPUGetFloatStatus() + self.clear_status = NPUClearFloatStatus() self.reduce_sum = ReduceSum(keep_dims=False) self.base = Tensor(1, mstype.float32) self.less_equal = LessEqual() @@ -237,11 +224,10 @@ class TrainOneStepWithLossScaleCell(Cell): def construct(self, data, label, sens=None): weights = self.weights loss = self.network(data, label) - if not self.gpu_target: - # init overflow buffer - init = self.alloc_status() - # clear overflow buffer - self.clear_status(init) + # init overflow buffer + init = self.alloc_status() + # clear overflow buffer + self.clear_status(init) if sens is None: scaling_sens = self.loss_scale else: @@ -251,14 +237,10 @@ class TrainOneStepWithLossScaleCell(Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - if not self.gpu_target: - # get the overflow buffer - self.get_status(init) - # sum overflow buffer elements, 0:not overflow , >0:overflow - flag_sum = self.reduce_sum(init, (0,)) - else: - flag_sum = self.hyper_map(F.partial(_grad_overflow), grads) - flag_sum = self.addn(flag_sum) + # get the overflow buffer + self.get_status(init) + # sum overflow buffer elements, 0:not overflow , >0:overflow + flag_sum = self.reduce_sum(init, (0,)) if self.is_distributed: # sum overflow flag over devices flag_reduce = self.allreduce(flag_sum) diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index 48a985b33d..37a3b38bb6 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -44,7 +44,7 @@ from .math_ops import (Abs, ACos, AddN, AssignAdd, AssignSub, Atan2, BatchMatMul LogicalNot, LogicalOr, MatMul, Maximum, Minimum, Mul, Neg, NMSWithMask, NotEqual, NPUAllocFloatStatus, NPUClearFloatStatus, - NPUGetFloatStatus, Pow, RealDiv, IsNan, IsInf, IsFinite, FloatStatus, + NPUGetFloatStatus, Pow, RealDiv, Reciprocal, CumSum, Sin, Sqrt, Rsqrt, Square, Sub, TensorAdd, Sign, Round) @@ -151,10 +151,6 @@ __all__ = [ 'Neg', 'Slice', 'DType', - 'IsNan', - 'IsInf', - 'IsFinite', - 'FloatStatus', 'NPUAllocFloatStatus', 'NPUGetFloatStatus', 'NPUClearFloatStatus', diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 5f8c24d78b..175b72560f 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -1541,89 +1541,6 @@ class LogicalOr(_LogicBinaryOp): def infer_dtype(self, x_dtype, y_dtype): return _LogicBinaryOp.do_infer_dtype(x_dtype, y_dtype, (mstype.bool_,), self.prim_name()) -class IsNan(PrimitiveWithInfer): - """ - Judging which elements are nan for each position - Inputs: - - **input_x** (Tensor) - The input tensor. - - Outputs: - Tensor, has the same shape of input. - """ - - @prim_attr_register - def __init__(self): - """init IsNan""" - self.init_prim_io_names(inputs=['x'], outputs=['output']) - - def infer_shape(self, x_shape): - return x_shape - - def infer_dtype(self, x_dtype): - return mstype.bool_ - -class IsInf(PrimitiveWithInfer): - """ - Judging which elements are inf or -inf for each position - Inputs: - - **input_x** (Tensor) - The input tensor. - - Outputs: - Tensor, has the same shape of input. - """ - - @prim_attr_register - def __init__(self): - """init IsInf""" - self.init_prim_io_names(inputs=['x'], outputs=['output']) - - def infer_shape(self, x_shape): - return x_shape - - def infer_dtype(self, x_dtype): - return mstype.bool_ - -class IsFinite(PrimitiveWithInfer): - """ - Judging which elements are finite for each position - Inputs: - - **input_x** (Tensor) - The input tensor. - - Outputs: - Tensor, has the same shape of input. - """ - - @prim_attr_register - def __init__(self): - """init IsFinite""" - self.init_prim_io_names(inputs=['x'], outputs=['output']) - - def infer_shape(self, x_shape): - return x_shape - - def infer_dtype(self, x_dtype): - return mstype.bool_ - -class FloatStatus(PrimitiveWithInfer): - """ - Determine if the elements contains nan, inf or -inf - Inputs: - - **input_x** (Tensor) - The input tensor. - - Outputs: - Tensor, has the shape of `(1,)`. - """ - - @prim_attr_register - def __init__(self): - """init FloatStatus""" - self.init_prim_io_names(inputs=['x'], outputs=['output']) - - def infer_shape(self, x_shape): - return [1] - - def infer_dtype(self, x_dtype): - return x_dtype class NPUAllocFloatStatus(PrimitiveWithInfer): """ From b9701db887348513da47b7d301f25e7f7420a8f3 Mon Sep 17 00:00:00 2001 From: Alexey Shevlyakov Date: Thu, 2 Apr 2020 14:17:46 -0400 Subject: [PATCH 19/58] fix RandomCropDecodeResize test --- tests/ut/cpp/dataset/CMakeLists.txt | 2 +- .../dataset/random_crop_and_resize_op_test.cc | 36 ++---- ...c => random_crop_decode_resize_op_test.cc} | 105 +++++++++--------- 3 files changed, 62 insertions(+), 81 deletions(-) rename tests/ut/cpp/dataset/{random_crop_decode_resizeOp_test.cc => random_crop_decode_resize_op_test.cc} (56%) diff --git a/tests/ut/cpp/dataset/CMakeLists.txt b/tests/ut/cpp/dataset/CMakeLists.txt index 0da470ac89..086a67c7d7 100644 --- a/tests/ut/cpp/dataset/CMakeLists.txt +++ b/tests/ut/cpp/dataset/CMakeLists.txt @@ -32,7 +32,7 @@ SET(DE_UT_SRCS project_op_test.cc queue_test.cc random_crop_op_test.cc - random_crop_decode_resizeOp_test.cc + random_crop_decode_resize_op_test.cc random_crop_and_resize_op_test.cc random_color_adjust_op_test.cc random_horizontal_flip_op_test.cc diff --git a/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc b/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc index 864d713ed3..7be18fb02c 100644 --- a/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc +++ b/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc @@ -20,35 +20,17 @@ #include "utils/log_adapter.h" using namespace mindspore::dataset; -using mindspore::MsLogLevel::INFO; -using mindspore::ExceptionType::NoExceptionType; using mindspore::LogStream; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::MsLogLevel::INFO; class MindDataTestRandomCropAndResizeOp : public UT::CVOP::CVOpCommon { public: MindDataTestRandomCropAndResizeOp() : CVOpCommon() {} }; -TEST_F(MindDataTestRandomCropAndResizeOp, TestOpDefault) { - MS_LOG(INFO) << "Doing testRandomCropAndResize."; - TensorShape s_in = input_tensor_->shape(); - std::shared_ptr output_tensor; - int h_out = 512; - int w_out = 512; - - TensorShape s_out({(uint32_t) h_out, (uint32_t) w_out, (uint32_t) s_in[2]}); - - std::unique_ptr op(new RandomCropAndResizeOp(h_out, w_out)); - Status s; - for (auto i = 0; i < 100; i++) { - s = op->Compute(input_tensor_, &output_tensor); - } - EXPECT_TRUE(s.IsOk()); - MS_LOG(INFO) << "testRandomCropAndResize end."; -} - -TEST_F(MindDataTestRandomCropAndResizeOp, TestOpExtended) { - MS_LOG(INFO) << "Doing testRandomCropAndResize."; +TEST_F(MindDataTestRandomCropAndResizeOp, TestOpSimpleTest) { + MS_LOG(INFO) << " starting RandomCropAndResizeOp simple test"; TensorShape s_in = input_tensor_->shape(); std::shared_ptr output_tensor; int h_out = 1024; @@ -58,14 +40,14 @@ TEST_F(MindDataTestRandomCropAndResizeOp, TestOpExtended) { float scale_lb = 0.0001; float scale_ub = 1.0; - TensorShape s_out({(uint32_t) h_out, (uint32_t) w_out, (uint32_t) s_in[2]}); + TensorShape s_out({h_out, w_out, s_in[2]}); - std::unique_ptr op( - new RandomCropAndResizeOp(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub)); + auto op = std::make_unique(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub); Status s; for (auto i = 0; i < 100; i++) { s = op->Compute(input_tensor_, &output_tensor); + EXPECT_TRUE(s.IsOk()); } - EXPECT_TRUE(s.IsOk()); - MS_LOG(INFO) << "testRandomCropAndResize end."; + + MS_LOG(INFO) << "RandomCropAndResizeOp simple test finished"; } diff --git a/tests/ut/cpp/dataset/random_crop_decode_resizeOp_test.cc b/tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc similarity index 56% rename from tests/ut/cpp/dataset/random_crop_decode_resizeOp_test.cc rename to tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc index facd35c4f7..d7e0b16aff 100644 --- a/tests/ut/cpp/dataset/random_crop_decode_resizeOp_test.cc +++ b/tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc @@ -23,9 +23,10 @@ #include "utils/log_adapter.h" using namespace mindspore::dataset; -using mindspore::MsLogLevel::INFO; -using mindspore::ExceptionType::NoExceptionType; using mindspore::LogStream; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::MsLogLevel::INFO; +constexpr double kMseThreshold = 2.0; class MindDataTestRandomCropDecodeResizeOp : public UT::CVOP::CVOpCommon { public: @@ -33,39 +34,38 @@ class MindDataTestRandomCropDecodeResizeOp : public UT::CVOP::CVOpCommon { }; TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp2) { - MS_LOG(INFO) << "Doing testRandomCropDecodeResizeOp Test"; + MS_LOG(INFO) << "starting RandomCropDecodeResizeOp test 1"; - std::shared_ptr output_tensor1; - std::shared_ptr output_tensor2; + std::shared_ptr decode_and_crop_output; + std::shared_ptr crop_and_decode_output; - int target_height = 884; - int target_width = 718; - float scale_lb = 0.08; - float scale_ub = 1.0; - float aspect_lb = 0.75; - float aspect_ub = 1.333333; - InterpolationMode interpolation = InterpolationMode::kLinear; - uint32_t max_iter = 10; - std::unique_ptr op1(new RandomCropAndResizeOp( - target_height, target_width, scale_lb, scale_ub, aspect_lb, aspect_ub, interpolation, max_iter)); - EXPECT_TRUE(op1->OneToOne()); - std::unique_ptr op2(new RandomCropDecodeResizeOp( - target_height, target_width, scale_lb, scale_ub, aspect_lb, aspect_ub, interpolation, max_iter)); - EXPECT_TRUE(op2->OneToOne()); - Status s1, s2; + constexpr int target_height = 884; + constexpr int target_width = 718; + constexpr float scale_lb = 0.08; + constexpr float scale_ub = 1.0; + constexpr float aspect_lb = 0.75; + constexpr float aspect_ub = 1.333333; + const InterpolationMode interpolation = InterpolationMode::kLinear; + constexpr uint32_t max_iter = 10; + auto crop_and_decode = RandomCropDecodeResizeOp(target_height, target_width, scale_lb, scale_ub, aspect_lb, aspect_ub, + interpolation, max_iter); + auto crop_and_decode_copy = crop_and_decode; + auto decode_and_crop = static_cast(crop_and_decode_copy); + EXPECT_TRUE(crop_and_decode.OneToOne()); + GlobalContext::config_manager()->set_seed(42); for (int i = 0; i < 100; i++) { - s1 = op1->Compute(input_tensor_, &output_tensor1); - s2 = op2->Compute(raw_input_tensor_, &output_tensor2); - cv::Mat output1(target_height, target_width, CV_8UC3, output_tensor1->StartAddr()); - cv::Mat output2(target_height, target_width, CV_8UC3, output_tensor2->StartAddr()); + (void)crop_and_decode.Compute(raw_input_tensor_, &crop_and_decode_output); + (void)decode_and_crop.Compute(input_tensor_, &decode_and_crop_output); + cv::Mat output1(target_height, target_width, CV_8UC3, crop_and_decode_output->StartAddr()); + cv::Mat output2(target_height, target_width, CV_8UC3, decode_and_crop_output->StartAddr()); long int mse_sum = 0; long int count = 0; int a, b; - for (int i = 0; i < target_height; i++) { - for (int j = 0; j < target_width; j++) { - a = (int)output1.at(i, j)[1]; - b = (int)output2.at(i, j)[1]; + for (int j = 0; j < target_height; j++) { + for (int k = 0; k < target_width; k++) { + a = static_cast(output1.at(i, j)[1]); + b = static_cast(output2.at(i, j)[1]); mse_sum += sqrt((a - b) * (a - b)); if (a != b) { count++; @@ -73,24 +73,22 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp2) { } } double mse; - if (count > 0) { - mse = (double) mse_sum / count; - } else { - mse = mse_sum; - } - MS_LOG(DEBUG) << "mse: " << mse << std::endl; + mse = count > 0 ? static_cast(mse_sum) / count : mse_sum; + MS_LOG(INFO) << "mse: " << mse << std::endl; + EXPECT_LT(mse, kMseThreshold); } - MS_LOG(INFO) << "MindDataTestRandomCropDecodeResizeOp end!"; + + MS_LOG(INFO) << "RandomCropDecodeResizeOp test 1 finished"; } TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp1) { - MS_LOG(INFO) << "Doing MindDataTestRandomCropDecodeResizeOp"; - const unsigned int h = 884; - const unsigned int w = 718; - const float scale_lb = 0.1; - const float scale_ub = 1; - const float aspect_lb = 0.1; - const float aspect_ub = 10; + MS_LOG(INFO) << "starting RandomCropDecodeResizeOp test 2"; + constexpr int h = 884; + constexpr int w = 718; + constexpr float scale_lb = 0.1; + constexpr float scale_ub = 1; + constexpr float aspect_lb = 0.1; + constexpr float aspect_ub = 10; std::shared_ptr decoded, decoded_and_cropped, cropped_and_decoded; std::mt19937 rd; @@ -98,14 +96,14 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp1) { std::uniform_real_distribution rd_aspect(aspect_lb, aspect_ub); DecodeOp op(true); op.Compute(raw_input_tensor_, &decoded); - Status s1, s2; + Status crop_and_decode_status, decode_and_crop_status; float scale, aspect; int crop_width, crop_height; bool crop_success = false; - unsigned int mse_sum, m1, m2, count; - float mse; + int mse_sum, m1, m2, count; + double mse; - for (unsigned int k = 0; k < 100; ++k) { + for (int k = 0; k < 100; ++k) { mse_sum = 0; count = 0; for (auto i = 0; i < 100; i++) { @@ -132,13 +130,13 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp1) { int y = rd_y(rd); op.Compute(raw_input_tensor_, &decoded); - s1 = Crop(decoded, &decoded_and_cropped, x, y, crop_width, crop_height); - s2 = JpegCropAndDecode(raw_input_tensor_, &cropped_and_decoded, x, y, crop_width, crop_height); + crop_and_decode_status = Crop(decoded, &decoded_and_cropped, x, y, crop_width, crop_height); + decode_and_crop_status = JpegCropAndDecode(raw_input_tensor_, &cropped_and_decoded, x, y, crop_width, crop_height); { cv::Mat M1(crop_height, crop_width, CV_8UC3, decoded_and_cropped->StartAddr()); cv::Mat M2(crop_height, crop_width, CV_8UC3, cropped_and_decoded->StartAddr()); - for (unsigned int i = 0; i < crop_height; ++i) { - for (unsigned int j = 0; j < crop_width; ++j) { + for (int i = 0; i < crop_height; ++i) { + for (int j = 0; j < crop_width; ++j) { m1 = M1.at(i, j)[1]; m2 = M2.at(i, j)[1]; mse_sum += sqrt((m1 - m2) * (m1 - m2)); @@ -149,8 +147,9 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp1) { } } - mse = (count == 0) ? mse_sum : static_cast(mse_sum) / count; - MS_LOG(DEBUG) << "mse: " << mse << std::endl; + mse = count > 0 ? static_cast(mse_sum) / count : mse_sum; + MS_LOG(INFO) << "mse: " << mse << std::endl; + EXPECT_LT(mse, kMseThreshold); } - MS_LOG(INFO) << "MindDataTestRandomCropDecodeResizeOp end!"; + MS_LOG(INFO) << "RandomCropDecodeResizeOp test 2 finished"; } From c6c8c94d3f40b04b736da3d5d411c1a41629f6e6 Mon Sep 17 00:00:00 2001 From: dengwentao Date: Tue, 7 Apr 2020 11:22:53 +0800 Subject: [PATCH 20/58] updata mkl-dnn link and md5 --- Third_Party_Open_Source_Software_Notice | 2 +- cmake/external_libs/mkl_dnn.cmake | 18 +++++++++--------- cmake/utils.cmake | 2 ++ mindspore/ccsrc/CMakeLists.txt | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/Third_Party_Open_Source_Software_Notice b/Third_Party_Open_Source_Software_Notice index 498b5b8d1b..60ad7cf47c 100644 --- a/Third_Party_Open_Source_Software_Notice +++ b/Third_Party_Open_Source_Software_Notice @@ -368,7 +368,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -Software: MKL-DNN 1.1.2 +Software: oneDNN 1.1.2 Copyright (c) 2009-2018 The MathJax Consortium Copyright 2018 Intel Corporation Copyright 2019 Intel Corporation diff --git a/cmake/external_libs/mkl_dnn.cmake b/cmake/external_libs/mkl_dnn.cmake index 17d8020d3a..6f033fa565 100644 --- a/cmake/external_libs/mkl_dnn.cmake +++ b/cmake/external_libs/mkl_dnn.cmake @@ -1,11 +1,11 @@ -set(mkl_dnn_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2") -set(mkl_dnn_CFLAGS "-D_FORTIFY_SOURCE=2 -O2") -mindspore_add_pkg(mkl_dnn - VER 1.1.1 +set(onednn_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2") +set(onednn_CFLAGS "-D_FORTIFY_SOURCE=2 -O2") +mindspore_add_pkg(onednn + VER 1.1.2 LIBS dnnl mkldnn - URL https://github.com/intel/mkl-dnn/archive/v1.1.1.tar.gz - MD5 d6a422b00459600bdc22242590953f38 + URL https://github.com/oneapi-src/oneDNN/archive/v1.1.2.tar.gz + MD5 ab40d52230f3ad1d7a6f06ce0f6bc17a CMAKE_OPTION -DDNNL_ARCH_OPT_FLAGS='' -DDNNL_CPU_RUNTIME='SEQ' -DDNNL_BUILD_EXAMPLES=OFF -DDNNL_BUILD_TESTS=OFF) -include_directories(${mkl_dnn_INC}) -add_library(mindspore::dnnl ALIAS mkl_dnn::dnnl) -add_library(mindspore::mkldnn ALIAS mkl_dnn::mkldnn) +include_directories(${onednn_INC}) +add_library(mindspore::dnnl ALIAS onednn::dnnl) +add_library(mindspore::mkldnn ALIAS onednn::mkldnn) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 060e400820..99c064fdd4 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -40,6 +40,8 @@ else() set(JOBS 8) if (${JOBS} GREATER ${N}) set(THNUM ${N}) + else() + set(THNUM ${JOBS}) endif() endif () message("set make thread num: ${THNUM}") diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index befe86f3c0..9f559a51eb 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -542,7 +542,7 @@ endif() if (ENABLE_CPU) add_custom_target(add_cpu_lib ALL - COMMAND cp ${mkl_dnn_LIBPATH}/libdnnl.so.1.1 ${MS_LIB_PATH}/libdnnl.so.1 + COMMAND cp ${onednn_LIBPATH}/libdnnl.so.1.1 ${MS_LIB_PATH}/libdnnl.so.1 ) add_dependencies(add_cpu_lib add_ms_lib) endif() From 6dc6d6bc83113431542b6d21e4005331f1d20a4d Mon Sep 17 00:00:00 2001 From: jonyguo Date: Fri, 3 Apr 2020 16:53:45 +0800 Subject: [PATCH 21/58] fix: when use MindDataset block_reade=True hung --- mindspore/ccsrc/mindrecord/io/shard_reader.cc | 2 ++ mindspore/mindrecord/filewriter.py | 1 + mindspore/mindrecord/tools/cifar100_to_mr.py | 9 ++++--- tests/ut/python/dataset/test_minddataset.py | 27 ++++++++++++++++--- .../mindrecord/test_cifar100_to_mindrecord.py | 4 ++- .../mindrecord/test_mindrecord_exception.py | 8 +++++- 6 files changed, 43 insertions(+), 8 deletions(-) diff --git a/mindspore/ccsrc/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/mindrecord/io/shard_reader.cc index 791de6c60b..32825fd9df 100644 --- a/mindspore/ccsrc/mindrecord/io/shard_reader.cc +++ b/mindspore/ccsrc/mindrecord/io/shard_reader.cc @@ -785,6 +785,8 @@ vector ShardReader::GetAllColumns() { MSRStatus ShardReader::CreateTasksByBlock(const std::vector> &row_group_summary, const std::vector> &operators) { + vector columns = GetAllColumns(); + CheckIfColumnInIndex(columns); for (const auto &rg : row_group_summary) { auto shard_id = std::get<0>(rg); auto group_id = std::get<1>(rg); diff --git a/mindspore/mindrecord/filewriter.py b/mindspore/mindrecord/filewriter.py index d1471f47cb..4056825ff3 100644 --- a/mindspore/mindrecord/filewriter.py +++ b/mindspore/mindrecord/filewriter.py @@ -143,6 +143,7 @@ class FileWriter: ParamTypeError: If index field is invalid. MRMDefineIndexError: If index field is not primitive type. MRMAddIndexError: If failed to add index field. + MRMGetMetaError: If the schema is not set or get meta failed. """ if not index_fields or not isinstance(index_fields, list): raise ParamTypeError('index_fields', 'list') diff --git a/mindspore/mindrecord/tools/cifar100_to_mr.py b/mindspore/mindrecord/tools/cifar100_to_mr.py index a359de853d..c011c8f4b0 100644 --- a/mindspore/mindrecord/tools/cifar100_to_mr.py +++ b/mindspore/mindrecord/tools/cifar100_to_mr.py @@ -24,7 +24,7 @@ from mindspore import log as logger from .cifar100 import Cifar100 from ..common.exceptions import PathNotExistsError from ..filewriter import FileWriter -from ..shardutils import check_filename +from ..shardutils import check_filename, SUCCESS try: cv2 = import_module("cv2") except ModuleNotFoundError: @@ -98,8 +98,11 @@ class Cifar100ToMR: data_list = _construct_raw_data(images, fine_labels, coarse_labels) test_data_list = _construct_raw_data(test_images, test_fine_labels, test_coarse_labels) - _generate_mindrecord(self.destination, data_list, fields, "img_train") - _generate_mindrecord(self.destination + "_test", test_data_list, fields, "img_test") + if _generate_mindrecord(self.destination, data_list, fields, "img_train") != SUCCESS: + return FAILED + if _generate_mindrecord(self.destination + "_test", test_data_list, fields, "img_test") != SUCCESS: + return FAILED + return SUCCESS def _construct_raw_data(images, fine_labels, coarse_labels): """ diff --git a/tests/ut/python/dataset/test_minddataset.py b/tests/ut/python/dataset/test_minddataset.py index da22f5c3b7..460a728b5c 100644 --- a/tests/ut/python/dataset/test_minddataset.py +++ b/tests/ut/python/dataset/test_minddataset.py @@ -47,7 +47,9 @@ def add_and_remove_cv_file(): os.remove("{}.db".format(x)) if os.path.exists("{}.db".format(x)) else None writer = FileWriter(CV_FILE_NAME, FILES_NUM) data = get_data(CV_DIR_NAME) - cv_schema_json = {"file_name": {"type": "string"}, "label": {"type": "int32"}, + cv_schema_json = {"id": {"type": "int32"}, + "file_name": {"type": "string"}, + "label": {"type": "int32"}, "data": {"type": "bytes"}} writer.add_schema(cv_schema_json, "img_schema") writer.add_index(["file_name", "label"]) @@ -226,6 +228,24 @@ def test_cv_minddataset_blockreader_tutorial(add_and_remove_cv_file): num_iter += 1 assert num_iter == 20 +def test_cv_minddataset_blockreader_some_field_not_in_index_tutorial(add_and_remove_cv_file): + """tutorial for cv minddataset.""" + columns_list = ["id", "data", "label"] + num_readers = 4 + data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, shuffle=False, + block_reader=True) + assert data_set.get_dataset_size() == 10 + repeat_num = 2 + data_set = data_set.repeat(repeat_num) + num_iter = 0 + for item in data_set.create_dict_iterator(): + logger.info("-------------- block reader repeat tow {} -----------------".format(num_iter)) + logger.info("-------------- item[id]: {} ----------------------------".format(item["id"])) + logger.info("-------------- item[label]: {} ----------------------------".format(item["label"])) + logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) + num_iter += 1 + assert num_iter == 20 + def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): """tutorial for cv minderdataset.""" @@ -359,13 +379,14 @@ def get_data(dir_name): lines = file_reader.readlines() data_list = [] - for line in lines: + for i, line in enumerate(lines): try: filename, label = line.split(",") label = label.strip("\n") with open(os.path.join(img_dir, filename), "rb") as file_reader: img = file_reader.read() - data_json = {"file_name": filename, + data_json = {"id": i, + "file_name": filename, "data": img, "label": int(label)} data_list.append(data_json) diff --git a/tests/ut/python/mindrecord/test_cifar100_to_mindrecord.py b/tests/ut/python/mindrecord/test_cifar100_to_mindrecord.py index b3a8d94589..e95f25aae4 100644 --- a/tests/ut/python/mindrecord/test_cifar100_to_mindrecord.py +++ b/tests/ut/python/mindrecord/test_cifar100_to_mindrecord.py @@ -18,6 +18,7 @@ import pytest from mindspore.mindrecord import Cifar100ToMR from mindspore.mindrecord import FileReader from mindspore.mindrecord import MRMOpenError +from mindspore.mindrecord import SUCCESS from mindspore import log as logger CIFAR100_DIR = "../data/mindrecord/testCifar100Data" @@ -26,7 +27,8 @@ MINDRECORD_FILE = "./cifar100.mindrecord" def test_cifar100_to_mindrecord_without_index_fields(): """test transform cifar100 dataset to mindrecord without index fields.""" cifar100_transformer = Cifar100ToMR(CIFAR100_DIR, MINDRECORD_FILE) - cifar100_transformer.transform() + ret = cifar100_transformer.transform() + assert ret == SUCCESS, "Failed to tranform from cifar100 to mindrecord" assert os.path.exists(MINDRECORD_FILE) assert os.path.exists(MINDRECORD_FILE + "_test") read() diff --git a/tests/ut/python/mindrecord/test_mindrecord_exception.py b/tests/ut/python/mindrecord/test_mindrecord_exception.py index 0a51fbf4e7..1f7a3f859d 100644 --- a/tests/ut/python/mindrecord/test_mindrecord_exception.py +++ b/tests/ut/python/mindrecord/test_mindrecord_exception.py @@ -16,7 +16,7 @@ import os import pytest from mindspore.mindrecord import FileWriter, FileReader, MindPage -from mindspore.mindrecord import MRMOpenError, MRMGenerateIndexError, ParamValueError +from mindspore.mindrecord import MRMOpenError, MRMGenerateIndexError, ParamValueError, MRMGetMetaError from mindspore import log as logger from utils import get_data @@ -280,3 +280,9 @@ def test_cv_file_writer_shard_num_greater_than_1000(): with pytest.raises(ParamValueError) as err: FileWriter(CV_FILE_NAME, 1001) assert 'Shard number should between' in str(err.value) + +def test_add_index_without_add_schema(): + with pytest.raises(MRMGetMetaError) as err: + fw = FileWriter(CV_FILE_NAME) + fw.add_index(["label"]) + assert 'Failed to get meta info' in str(err.value) From 180c1750de1a0b4a3b62fe383cb90f63e5df75cf Mon Sep 17 00:00:00 2001 From: ms_yan <6576637+ms_yan@user.noreply.gitee.com> Date: Thu, 2 Apr 2020 21:56:48 +0800 Subject: [PATCH 22/58] add parameter check for Class Schema --- mindspore/dataset/engine/datasets.py | 23 ++++++++---- mindspore/dataset/engine/validators.py | 50 ++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 7 deletions(-) diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index db2b5169d2..de604a67e9 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -38,7 +38,7 @@ from .iterators import DictIterator, TupleIterator from .validators import check, check_batch, check_shuffle, check_map, check_repeat, check_zip, check_rename, \ check_project, check_imagefolderdatasetv2, check_mnist_cifar_dataset, check_manifestdataset, \ check_tfrecorddataset, check_vocdataset, check_celebadataset, check_minddataset, check_generatordataset, \ - check_zip_dataset + check_zip_dataset, check_add_column from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist try: @@ -2334,13 +2334,20 @@ class Schema: self.dataset_type = '' self.num_rows = 0 else: + if not os.path.isfile(schema_file) or not os.access(schema_file, os.R_OK): + raise ValueError("The file %s does not exist or permission denied!" % schema_file) try: with open(schema_file, 'r') as load_f: json_obj = json.load(load_f) - self.from_json(json_obj) except json.decoder.JSONDecodeError: - raise RuntimeError("Schema file failed to load") + raise RuntimeError("Schema file failed to load.") + except UnicodeDecodeError: + raise RuntimeError("Schema file failed to decode.") + except Exception: + raise RuntimeError("Schema file failed to open.") + self.from_json(json_obj) + @check_add_column def add_column(self, name, de_type, shape=None): """ Add new column to the schema. @@ -2359,10 +2366,8 @@ class Schema: if isinstance(de_type, typing.Type): de_type = mstype_to_detype(de_type) new_column["type"] = str(de_type) - elif isinstance(de_type, str): - new_column["type"] = str(DataType(de_type)) else: - raise ValueError("Unknown column type") + new_column["type"] = str(DataType(de_type)) if shape is not None: new_column["shape"] = shape @@ -2391,7 +2396,7 @@ class Schema: Parse the columns and add it to self. Args: - columns (list[str]): names of columns. + columns (dict or list[str]): names of columns. Raises: RuntimeError: If failed to parse schema file. @@ -2399,6 +2404,8 @@ class Schema: RuntimeError: If column's name field is missing. RuntimeError: If column's type field is missing. """ + if columns is None: + raise TypeError("Expected non-empty dict or string list.") self.columns = [] for col in columns: name = None @@ -2443,6 +2450,8 @@ class Schema: RuntimeError: if dataset type is missing in the object. RuntimeError: if columns are missing in the object. """ + if not isinstance(json_obj, dict) or json_obj is None: + raise ValueError("Expected non-empty dict.") for k, v in json_obj.items(): if k == "datasetType": self.dataset_type = v diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py index b4d22a4a01..26d6241945 100644 --- a/mindspore/dataset/engine/validators.py +++ b/mindspore/dataset/engine/validators.py @@ -19,10 +19,15 @@ import inspect as ins import os from functools import wraps from multiprocessing import cpu_count +from mindspore._c_expression import typing from . import samplers from . import datasets INT32_MAX = 2147483647 +valid_detype = [ + "bool", "int8", "int16", "int32", "int64", "uint8", "uint16", + "uint32", "uint64", "float16", "float32", "float64" +] def check(method): @@ -188,6 +193,12 @@ def check(method): return wrapper +def check_valid_detype(type_): + if type_ not in valid_detype: + raise ValueError("Unknown column type") + return True + + def check_filename(path): """ check the filename in the path @@ -743,3 +754,42 @@ def check_project(method): return method(*args, **kwargs) return new_method + + +def check_shape(shape, name): + if isinstance(shape, list): + for element in shape: + if not isinstance(element, int): + raise TypeError( + "Each element in {0} should be of type int. Got {1}.".format(name, type(element))) + else: + raise TypeError("Expected int list.") + + +def check_add_column(method): + """check the input arguments of add_column.""" + @wraps(method) + def new_method(*args, **kwargs): + param_dict = make_param_dict(method, args, kwargs) + + # check name; required argument + name = param_dict.get("name") + if not isinstance(name, str) or not name: + raise TypeError("Expected non-empty string.") + + # check type; required argument + de_type = param_dict.get("de_type") + if de_type is not None: + if not isinstance(de_type, typing.Type) and not check_valid_detype(de_type): + raise ValueError("Unknown column type.") + else: + raise TypeError("Expected non-empty string.") + + # check shape + shape = param_dict.get("shape") + if shape is not None: + check_shape(shape, "shape") + + return method(*args, **kwargs) + + return new_method From f01098bc12be06f440ae166bf28a17587d9e50cb Mon Sep 17 00:00:00 2001 From: Jonathan Yan Date: Sat, 4 Apr 2020 06:48:58 -0400 Subject: [PATCH 23/58] remove ENABLE_MINDRECORD flag --- mindspore/ccsrc/dataset/CMakeLists.txt | 2 -- mindspore/ccsrc/dataset/api/de_pipeline.cc | 12 +++--------- mindspore/ccsrc/dataset/api/de_pipeline.h | 4 ---- mindspore/ccsrc/dataset/api/python_bindings.cc | 6 ------ .../engine/datasetops/source/mindrecord_op.cc | 3 --- .../dataset/engine/datasetops/source/mindrecord_op.h | 2 -- tests/ut/cpp/CMakeLists.txt | 1 - tests/ut/cpp/dataset/mind_record_op_test.cc | 2 -- 8 files changed, 3 insertions(+), 29 deletions(-) diff --git a/mindspore/ccsrc/dataset/CMakeLists.txt b/mindspore/ccsrc/dataset/CMakeLists.txt index d6791f2b9b..477d37051e 100644 --- a/mindspore/ccsrc/dataset/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/CMakeLists.txt @@ -17,8 +17,6 @@ if (ENABLE_TDTQUE) message(STATUS "TDT queue is enabled") endif () -add_definitions(-D ENABLE_MINDRECORD) - # conde coverage # option(ENABLE_COVERAGE "Enable code coverage report" OFF) # if (ENABLE_COVERAGE) diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.cc b/mindspore/ccsrc/dataset/api/de_pipeline.cc index d51204f659..65ec8d30f2 100644 --- a/mindspore/ccsrc/dataset/api/de_pipeline.cc +++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc @@ -29,11 +29,9 @@ #include "dataset/engine/datasetops/source/manifest_op.h" #include "dataset/engine/datasetops/source/cifar_op.h" #include "dataset/engine/datasetops/source/celeba_op.h" -#ifdef ENABLE_MINDRECORD -#include "./shard_category.h" -#include "./shard_sample.h" -#include "./shard_shuffle.h" -#endif +#include "mindrecord/include/shard_category.h" +#include "mindrecord/include/shard_sample.h" +#include "mindrecord/include/shard_shuffle.h" #include "dataset/util/random.h" #include "dataset/util/status.h" @@ -46,9 +44,7 @@ using pFunction = Status (DEPipeline::*)(const py::dict &, std::shared_ptr g_parse_op_func_ = {{kStorage, &DEPipeline::ParseStorageOp}, {kShuffle, &DEPipeline::ParseShuffleOp}, -#ifdef ENABLE_MINDRECORD {kMindrecord, &DEPipeline::ParseMindRecordOp}, -#endif {kMap, &DEPipeline::ParseMapOp}, {kBatch, &DEPipeline::ParseBatchOp}, {kRepeat, &DEPipeline::ParseRepeatOp}, @@ -364,7 +360,6 @@ Status DEPipeline::ParseShuffleOp(const py::dict &args, std::shared_ptr *in_partitions) { if (args["partitions"].is_none()) { std::string err_msg = "Error: partitions is not set (None)"; @@ -450,7 +445,6 @@ Status DEPipeline::ParseMindRecordOp(const py::dict &args, std::shared_ptr *ptr) { std::shared_ptr builder = std::make_shared(); diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.h b/mindspore/ccsrc/dataset/api/de_pipeline.h index e8dde85a77..acffc390cc 100644 --- a/mindspore/ccsrc/dataset/api/de_pipeline.h +++ b/mindspore/ccsrc/dataset/api/de_pipeline.h @@ -38,9 +38,7 @@ using DsOpPtr = std::shared_ptr; enum OpName { kStorage = 0, kShuffle, -#ifdef ENABLE_MINDRECORD kMindrecord, -#endif kBatch, kCache, kRepeat, @@ -101,11 +99,9 @@ class DEPipeline { Status ParseShuffleOp(const py::dict &args, std::shared_ptr *ptr); -#ifdef ENABLE_MINDRECORD Status CheckMindRecordPartitionInfo(const py::dict &args, std::vector *ptr); Status ParseMindRecordOp(const py::dict &args, std::shared_ptr *ptr); -#endif Status ParseMapOp(const py::dict &args, std::shared_ptr *ptr); diff --git a/mindspore/ccsrc/dataset/api/python_bindings.cc b/mindspore/ccsrc/dataset/api/python_bindings.cc index 86b0a5d66a..e6c2691281 100644 --- a/mindspore/ccsrc/dataset/api/python_bindings.cc +++ b/mindspore/ccsrc/dataset/api/python_bindings.cc @@ -44,9 +44,7 @@ #include "dataset/engine/datasetops/source/io_block.h" #include "dataset/engine/datasetops/source/mnist_op.h" #include "dataset/engine/datasetops/source/manifest_op.h" -#ifdef ENABLE_MINDRECORD #include "dataset/engine/datasetops/source/mindrecord_op.h" -#endif #include "dataset/engine/datasetops/source/sampler/distributed_sampler.h" #include "dataset/engine/datasetops/source/sampler/pk_sampler.h" #include "dataset/engine/datasetops/source/sampler/random_sampler.h" @@ -146,14 +144,12 @@ void bindDatasetOps(py::module *m) { return py::make_tuple(count, num_classes); }); -#ifdef ENABLE_MINDRECORD (void)py::class_>(*m, "MindRecordOp") .def_static("get_num_rows", [](const std::string &path) { int64_t count = 0; THROW_IF_ERROR(MindRecordOp::CountTotalRows(path, &count)); return count; }); -#endif (void)py::class_>(*m, "ManifestOp") .def_static("get_num_rows_and_classes", @@ -424,9 +420,7 @@ PYBIND11_MODULE(_c_dataengine, m) { .value("STORAGE", OpName::kStorage) .value("SHUFFLE", OpName::kShuffle) .value("BATCH", OpName::kBatch) -#ifdef ENABLE_MINDRECORD .value("MINDRECORD", OpName::kMindrecord) -#endif .value("CACHE", OpName::kCache) .value("REPEAT", OpName::kRepeat) .value("TAKE", OpName::kTake) diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc index b062371d7f..b5bea5416c 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc @@ -13,8 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifdef ENABLE_MINDRECORD - #include "dataset/engine/datasetops/source/mindrecord_op.h" #include @@ -665,4 +663,3 @@ Status MindRecordOp::CountTotalRows(const std::string dataset_path, int64_t *cou } } // namespace dataset } // namespace mindspore -#endif diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h index 2535acbc50..0b16391b20 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h @@ -15,7 +15,6 @@ */ #ifndef DATASET_ENGINE_DATASETOPS_SOURCE_MINDRECORD_OP_H_ #define DATASET_ENGINE_DATASETOPS_SOURCE_MINDRECORD_OP_H_ -#ifdef ENABLE_MINDRECORD #pragma once #include @@ -276,5 +275,4 @@ class MindRecordOp : public ParallelOp { }; } // namespace dataset } // namespace mindspore -#endif #endif // DATASET_ENGINE_DATASETOPS_SOURCE_MINDRECORD_OP_H_ diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt index 5f4bd41b3b..8d3f8a8138 100644 --- a/tests/ut/cpp/CMakeLists.txt +++ b/tests/ut/cpp/CMakeLists.txt @@ -26,7 +26,6 @@ MESSAGE("check ut_test ${CMAKE_BINARY_DIR}") link_directories(${MS_CCSRC_BUILD_PATH}) if(ENABLE_MINDDATA) - add_definitions(-D ENABLE_MINDRECORD) add_definitions(-D ENABLE_MINDDATA) link_directories(${MS_CCSRC_BUILD_PATH}/dataset) link_directories(${MS_CCSRC_BUILD_PATH}/mindrecord) diff --git a/tests/ut/cpp/dataset/mind_record_op_test.cc b/tests/ut/cpp/dataset/mind_record_op_test.cc index abe7faef14..3d5c80b3f4 100644 --- a/tests/ut/cpp/dataset/mind_record_op_test.cc +++ b/tests/ut/cpp/dataset/mind_record_op_test.cc @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifdef ENABLE_MINDRECORD #include #include #include @@ -480,4 +479,3 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) { row_count++; } } -#endif From 5f1fedaae7bed936b22e1f7e4a0adde4f1fb11e8 Mon Sep 17 00:00:00 2001 From: WeibiaoYu Date: Thu, 2 Apr 2020 08:43:13 -0400 Subject: [PATCH 24/58] Support to config whether to save integeated checkpoint, in auto model parallel scene --- mindspore/common/api.py | 35 -------------------------- mindspore/train/callback.py | 16 +++++++++--- mindspore/train/serialization.py | 7 +++--- tests/ut/python/utils/test_callback.py | 4 +-- 4 files changed, 19 insertions(+), 43 deletions(-) diff --git a/mindspore/common/api.py b/mindspore/common/api.py index 9ee95ef772..5a74febe53 100644 --- a/mindspore/common/api.py +++ b/mindspore/common/api.py @@ -374,9 +374,6 @@ class _Executor: obj.parameter_layout_dict = self._executor.get_parameter_layout(phase) obj.load_parameter_slice(params) - if _get_parallel_mode() in ["hybrid_parallel"]: - obj.parameter_layout_dict = self._build_parameter_layout(obj) - # the following GE init process is not needed when use vm or ms backend if enable_ge: # decide whether to sink based on whether the inputs is virtual or not @@ -449,38 +446,6 @@ class _Executor: return self._exec_pip(obj, *args, phase=phase_real) raise KeyError('{} graph is not exist.'.format(phase_real)) - def _build_parameter_layout(self, obj): - """ - Build parameter layout, for layerwise_parallel parameter. - - Args: - obj (Function or Cell): The function or cell instance need to be compiled. - - Returns: - Dictionary, parameter layout info. - """ - parameter_layout_dict = {} - layerwise_parallel_parameters = [] - for key in obj.parameters_dict(): - if obj.parameters_dict()[key].layerwise_parallel is True: - layerwise_parallel_parameters.append(key) - - if not layerwise_parallel_parameters: - return parameter_layout_dict - - from ..communication.management import get_group_size - group_size = [get_group_size()] - for key in layerwise_parallel_parameters: - tensor_map = [0] - shape = obj.parameters_dict()[key].data.shape() - for x in range(len(shape)): # dim 0 set 0, others set -1 - if x: - tensor_map.append(-1) - layout = [group_size, tensor_map] - parameter_layout_dict[key] = layout - - return parameter_layout_dict - def del_net_res(self, net_id): self._executor.del_net_res(net_id) diff --git a/mindspore/train/callback.py b/mindspore/train/callback.py index 62f847089d..dcf630342c 100644 --- a/mindspore/train/callback.py +++ b/mindspore/train/callback.py @@ -24,7 +24,7 @@ import mindspore.context as context from mindspore.train.serialization import _exec_save_checkpoint, _fill_param_into_net, _save_graph from mindspore.train._utils import _make_directory from mindspore import log as logger -from mindspore._checkparam import check_int_non_negative +from mindspore._checkparam import check_int_non_negative, check_bool from mindspore.common.tensor import Tensor from .summary.summary_record import _cache_summary_tensor_data @@ -150,6 +150,8 @@ class CheckpointConfig: keep_checkpoint_max (int): Maximum step to save checkpoint. Default: 5. keep_checkpoint_per_n_minutes (int): Keep one checkpoint every n minutes. Default: 0. Can't be used with keep_checkpoint_max at the same time. + integrated_save (bool): Whether to intergrated save in automatic model parall scene. Default: True. + Integrated save function is only supported in automatic parall scene, not supported in manual parallel. Raises: ValueError: If the input_param is None or 0. @@ -163,7 +165,8 @@ class CheckpointConfig: save_checkpoint_steps=1, save_checkpoint_seconds=0, keep_checkpoint_max=5, - keep_checkpoint_per_n_minutes=0): + keep_checkpoint_per_n_minutes=0, + integrated_save=True): if not save_checkpoint_steps and not save_checkpoint_seconds and \ not keep_checkpoint_max and not keep_checkpoint_per_n_minutes: @@ -191,6 +194,8 @@ class CheckpointConfig: if not self._keep_checkpoint_per_n_minutes or self._keep_checkpoint_per_n_minutes == 0: self._keep_checkpoint_max = 1 + self._integrated_save = check_bool(integrated_save) + @property def save_checkpoint_steps(self): """Get the value of _save_checkpoint_steps.""" @@ -211,6 +216,11 @@ class CheckpointConfig: """Get the value of _keep_checkpoint_per_n_minutes.""" return self._keep_checkpoint_per_n_minutes + @property + def integrated_save(self): + """Get the value of _integrated_save.""" + return self._integrated_save + def get_checkpoint_policy(self): """Get the policy of checkpoint.""" checkpoint_policy = {'save_checkpoint_steps': self._save_checkpoint_steps, @@ -619,7 +629,7 @@ class ModelCheckpoint(Callback): _set_cur_net(cb_params.train_network) cb_params.train_network.exec_checkpoint_graph() - _exec_save_checkpoint(cb_params.train_network, gen_file) + _exec_save_checkpoint(cb_params.train_network, gen_file, self._config.integrated_save) if os.path.exists(gen_file): shutil.move(gen_file, cur_file) diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py index 0478bbc071..b334c3e9d8 100644 --- a/mindspore/train/serialization.py +++ b/mindspore/train/serialization.py @@ -279,13 +279,14 @@ def _save_graph(network, file_name): os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR) -def _exec_save_checkpoint(train_network, ckpoint_file_name): +def _exec_save_checkpoint(train_network, ckpoint_file_name, integrated_save=True): """ Saves checkpoint for 'ms' backend. Args: train_network (Network): The train network for training. ckpoint_file_name (str): The name of checkpoint file. + integrated_save (bool): Whether to intergrated save in automatic model parallel scene. """ param_dict = {} @@ -300,9 +301,9 @@ def _exec_save_checkpoint(train_network, ckpoint_file_name): else: param_data = Tensor(value.data) - # in model parallel scenario, some parameters were spliteds to all the devices, + # in automatic model parallel scenario, some parameters were spliteds to all the devices, # which should be combined before saving - if key in train_network.parameter_layout_dict: + if integrated_save and key in train_network.parameter_layout_dict: param_data = _get_merged_param_data(train_network, key, param_data) each_param["data"] = param_data diff --git a/tests/ut/python/utils/test_callback.py b/tests/ut/python/utils/test_callback.py index 60e4c6527a..43cf827330 100644 --- a/tests/ut/python/utils/test_callback.py +++ b/tests/ut/python/utils/test_callback.py @@ -308,10 +308,10 @@ def test_RunContext(): def test_Checkpoint_Config(): """Test CheckpointConfig all None or 0.""" with pytest.raises(ValueError): - CheckpointConfig(0, 0, 0, 0) + CheckpointConfig(0, 0, 0, 0, True) with pytest.raises(ValueError): - CheckpointConfig(0, None, 0, 0) + CheckpointConfig(0, None, 0, 0, True) def test_step_end_save_graph(): From 5b915155427293e60ecb229810b7e8d0f67912fd Mon Sep 17 00:00:00 2001 From: VectorSL Date: Tue, 7 Apr 2020 19:48:34 +0800 Subject: [PATCH 25/58] update lossscale for gpu --- mindspore/nn/wrap/loss_scale.py | 40 +++++++++---- mindspore/ops/operations/__init__.py | 6 +- mindspore/ops/operations/math_ops.py | 88 ++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 11 deletions(-) diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py index 1ce3179273..6a1f15a402 100644 --- a/mindspore/nn/wrap/loss_scale.py +++ b/mindspore/nn/wrap/loss_scale.py @@ -13,6 +13,7 @@ # limitations under the License. # ============================================================================ """Loss scale cell for loss scale training.""" +import mindspore.context as context from mindspore.nn.wrap.grad_reducer import DistributedGradReducer from mindspore.train.parallel_utils import ParallelMode from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean @@ -34,6 +35,13 @@ reciprocal = P.Reciprocal() def tensor_grad_scale(scale, grad): return grad * F.cast(reciprocal(scale), F.dtype(grad)) +_grad_overflow = C.MultitypeFuncGraph("_grad_overflow") +grad_overflow = P.FloatStatus() + + +@_grad_overflow.register("Tensor") +def _tensor_grad_overflow(grad): + return grad_overflow(grad) class DynamicLossScaleUpdateCell(Cell): r""" @@ -197,9 +205,15 @@ class TrainOneStepWithLossScaleCell(Cell): self.optimizer = optimizer self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) self.hyper_map = C.HyperMap() - self.alloc_status = NPUAllocFloatStatus() - self.get_status = NPUGetFloatStatus() - self.clear_status = NPUClearFloatStatus() + if context.get_context("device_target") == "GPU": + self.gpu_target = True + self.float_status = P.FloatStatus() + self.addn = P.AddN() + else: + self.gpu_target = False + self.alloc_status = NPUAllocFloatStatus() + self.get_status = NPUGetFloatStatus() + self.clear_status = NPUClearFloatStatus() self.reduce_sum = ReduceSum(keep_dims=False) self.base = Tensor(1, mstype.float32) self.less_equal = LessEqual() @@ -224,10 +238,12 @@ class TrainOneStepWithLossScaleCell(Cell): def construct(self, data, label, sens=None): weights = self.weights loss = self.network(data, label) - # init overflow buffer - init = self.alloc_status() - # clear overflow buffer - self.clear_status(init) + init = False + if not self.gpu_target: + # init overflow buffer + init = self.alloc_status() + # clear overflow buffer + self.clear_status(init) if sens is None: scaling_sens = self.loss_scale else: @@ -238,9 +254,13 @@ class TrainOneStepWithLossScaleCell(Cell): # apply grad reducer on grads grads = self.grad_reducer(grads) # get the overflow buffer - self.get_status(init) - # sum overflow buffer elements, 0:not overflow , >0:overflow - flag_sum = self.reduce_sum(init, (0,)) + if not self.gpu_target: + self.get_status(init) + # sum overflow buffer elements, 0:not overflow , >0:overflow + flag_sum = self.reduce_sum(init, (0,)) + else: + flag_sum = self.hyper_map(F.partial(_grad_overflow), grads) + flag_sum = self.addn(flag_sum) if self.is_distributed: # sum overflow flag over devices flag_reduce = self.allreduce(flag_sum) diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index 37a3b38bb6..d255796bae 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -44,7 +44,7 @@ from .math_ops import (Abs, ACos, AddN, AssignAdd, AssignSub, Atan2, BatchMatMul LogicalNot, LogicalOr, MatMul, Maximum, Minimum, Mul, Neg, NMSWithMask, NotEqual, NPUAllocFloatStatus, NPUClearFloatStatus, - NPUGetFloatStatus, Pow, RealDiv, + NPUGetFloatStatus, Pow, RealDiv, IsNan, IsInf, IsFinite, FloatStatus, Reciprocal, CumSum, Sin, Sqrt, Rsqrt, Square, Sub, TensorAdd, Sign, Round) @@ -154,6 +154,10 @@ __all__ = [ 'NPUAllocFloatStatus', 'NPUGetFloatStatus', 'NPUClearFloatStatus', + 'IsNan', + 'IsFinite', + 'IsInf', + 'FloatStatus', 'Reciprocal', 'SmoothL1Loss', 'ReduceAll', diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 175b72560f..127d3c513c 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -1541,6 +1541,94 @@ class LogicalOr(_LogicBinaryOp): def infer_dtype(self, x_dtype, y_dtype): return _LogicBinaryOp.do_infer_dtype(x_dtype, y_dtype, (mstype.bool_,), self.prim_name()) +class IsNan(PrimitiveWithInfer): + """ + Judging which elements are nan for each position + + Inputs: + - **input_x** (Tensor) - The input tensor. + + Outputs: + Tensor, has the same shape of input, and the dtype is bool. + """ + + @prim_attr_register + def __init__(self): + """init IsNan""" + self.init_prim_io_names(inputs=['x'], outputs=['output']) + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_dtype): + return mstype.bool_ + +class IsInf(PrimitiveWithInfer): + """ + Judging which elements are inf or -inf for each position + + Inputs: + - **input_x** (Tensor) - The input tensor. + + Outputs: + Tensor, has the same shape of input, and the dtype is bool. + """ + + @prim_attr_register + def __init__(self): + """init IsInf""" + self.init_prim_io_names(inputs=['x'], outputs=['output']) + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_dtype): + return mstype.bool_ + +class IsFinite(PrimitiveWithInfer): + """ + Judging which elements are finite for each position + + Inputs: + - **input_x** (Tensor) - The input tensor. + + Outputs: + Tensor, has the same shape of input, and the dtype is bool. + """ + + @prim_attr_register + def __init__(self): + """init IsFinite""" + self.init_prim_io_names(inputs=['x'], outputs=['output']) + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_dtype): + return mstype.bool_ + +class FloatStatus(PrimitiveWithInfer): + """ + Determine if the elements contains nan, inf or -inf. `0` for normal, `1` for overflow. + + Inputs: + - **input_x** (Tensor) - The input tensor. + + Outputs: + Tensor, has the shape of `(1,)`, and has the same dtype of input `mindspore.dtype.float32` or + `mindspore.dtype.float16`. + """ + + @prim_attr_register + def __init__(self): + """init FloatStatus""" + self.init_prim_io_names(inputs=['x'], outputs=['output']) + + def infer_shape(self, x_shape): + return [1] + + def infer_dtype(self, x_dtype): + return x_dtype class NPUAllocFloatStatus(PrimitiveWithInfer): """ From 14c5c1b57d89e6a80f9d5272c566d199515b8b59 Mon Sep 17 00:00:00 2001 From: guohongzilong <2713219276@qq.com> Date: Tue, 7 Apr 2020 20:39:51 +0800 Subject: [PATCH 26/58] usr mindspore. instead of mstype. --- mindspore/common/api.py | 4 ++-- mindspore/common/initializer.py | 2 +- mindspore/dataset/engine/datasets.py | 4 ++-- mindspore/ops/operations/array_ops.py | 7 ++++--- mindspore/ops/operations/math_ops.py | 4 ++-- mindspore/train/model.py | 2 +- 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/mindspore/common/api.py b/mindspore/common/api.py index 5a74febe53..7f0b2bfeaa 100644 --- a/mindspore/common/api.py +++ b/mindspore/common/api.py @@ -230,8 +230,8 @@ def ms_function(fn=None, obj=None, input_signature=None): >>> z = F.tensor_add(x, y) >>> return z >>> - >>> @ms_function(input_signature=(MetaTensor(mstype.float32, (1, 1, 3, 3)), - >>> MetaTensor(mstype.float32, (1, 1, 3, 3)))) + >>> @ms_function(input_signature=(MetaTensor(mindspore.float32, (1, 1, 3, 3)), + >>> MetaTensor(mindspore.float32, (1, 1, 3, 3)))) >>> def tensor_add_with_sig(x, y): >>> z = F.tensor_add(x, y) >>> return z diff --git a/mindspore/common/initializer.py b/mindspore/common/initializer.py index bdc3418129..4261621272 100644 --- a/mindspore/common/initializer.py +++ b/mindspore/common/initializer.py @@ -282,7 +282,7 @@ def initializer(init, shape=None, dtype=mstype.float32): Tensor, initialized tensor. Examples: - >>> tensor = initializer('ones', [1, 2, 3], mstype.float32) + >>> tensor = initializer('ones', [1, 2, 3], mindspore.float32) """ if not isinstance(init, (Tensor, numbers.Number, str, Initializer)): raise TypeError('Unsupported init type.') diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index de604a67e9..ab2290c13c 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -1814,7 +1814,7 @@ class TFRecordDataset(SourceDataset): >>> tfdataset = ds.TFRecordDataset(dataset_files=dataset_files) >>> # 2) get all rows from dataset_files with user-defined schema: >>> schema = ds.Schema() - >>> schema.add_column('col_1d', de_type=mstype.int64, shape=[2]) + >>> schema.add_column('col_1d', de_type=mindspore.int64, shape=[2]) >>> tfdataset = ds.TFRecordDataset(dataset_files=dataset_files, schema=schema) >>> # 3) get all rows from dataset_files with schema file "./schema.json": >>> tfdataset = ds.TFRecordDataset(dataset_files=dataset_files, schema="./schema.json") @@ -2325,7 +2325,7 @@ class Schema: >>> import mindspore.common.dtype as mstype >>> # create schema, specify column name, mindspore.dtype and shape of the column >>> schema = ds.Schema() - >>> schema.add_column('col1', de_type=mstype.int64, shape=[2]) + >>> schema.add_column('col1', de_type=mindspore.int64, shape=[2]) """ def __init__(self, schema_file=None): diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index fdad46a31f..b91c2cbc7d 100644 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -1535,7 +1535,8 @@ class StridedSlice(PrimitiveWithInfer): - Finally, the output is [3, 3, 3]. Examples - >>> input_x = Tensor([[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 6, 6]]]) + >>> input_x = Tensor([[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], + >>> [[5, 5, 5], [6, 6, 6]]], mindspore.float32) >>> slice = StridedSlice() >>> output = slice(input_x, (1, 0, 0), (2, 1, 3), (1, 1, 1)) >>> output.shape() @@ -2067,7 +2068,7 @@ class SpaceToBatch(PrimitiveWithInfer): >>> block_size = 2 >>> paddings = [[0, 0], [0, 0]] >>> space_to_batch = P.SpaceToBatch(block_size, paddings) - >>> x = Tensor(np.array([[[[1, 2], [3, 4]]]]), mstype.float32) + >>> x = Tensor(np.array([[[[1, 2], [3, 4]]]]), mindspore.float32) >>> space_to_batch(x) [[[[1.]]], [[[2.]]], [[[3.]]], [[[4.]]]] @@ -2135,7 +2136,7 @@ class BatchToSpace(PrimitiveWithInfer): >>> block_size = 2 >>> crops = [[0, 0], [0, 0]] >>> op = P.BatchToSpace(block_size, crops) - >>> x = Tensor(np.array([[[[1]]], [[[2]]], [[[3]]], [[[4]]]]), mstype.float32) + >>> x = Tensor(np.array([[[[1]]], [[[2]]], [[[3]]], [[[4]]]]), mindspore.float32) >>> output = op(x) [[[[1., 2.], [3., 4.]]]] diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 127d3c513c..47b9e490f1 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -1996,8 +1996,8 @@ class Atan2(_MathBinaryOp): Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'. Examples: - >>> input_x = Tensor(np.array([[0, 1]]), mstype.float32) - >>> input_y = Tensor(np.array([[1, 1]]), mstype.float32) + >>> input_x = Tensor(np.array([[0, 1]]), mindspore.float32) + >>> input_y = Tensor(np.array([[1, 1]]), mindspore.float32) >>> atan2 = P.Atan2() >>> atan2(input_x, input_y) [[0. 0.7853982]] diff --git a/mindspore/train/model.py b/mindspore/train/model.py index bcfd897f58..41b372f85a 100755 --- a/mindspore/train/model.py +++ b/mindspore/train/model.py @@ -528,7 +528,7 @@ class Model: Tensor, array(s) of predictions. Examples: - >>> input_data = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]), mstype.float32) + >>> input_data = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]), mindspore.float32) >>> model = Model(Net()) >>> model.predict(input_data) """ From 951e094dd4868c871f61006487f1e6d8d7672fb6 Mon Sep 17 00:00:00 2001 From: zhaozhenlong Date: Fri, 3 Apr 2020 15:37:42 +0800 Subject: [PATCH 27/58] add api image gradients --- mindspore/nn/layer/__init__.py | 4 +- mindspore/nn/layer/basic.py | 45 ++++++++++++++ tests/st/ops/davinci/test_image_gradients.py | 62 ++++++++++++++++++++ tests/ut/python/nn/test_image_gradients.py | 49 ++++++++++++++++ 4 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 tests/st/ops/davinci/test_image_gradients.py create mode 100644 tests/ut/python/nn/test_image_gradients.py diff --git a/mindspore/nn/layer/__init__.py b/mindspore/nn/layer/__init__.py index bb29935602..dae18fe663 100644 --- a/mindspore/nn/layer/__init__.py +++ b/mindspore/nn/layer/__init__.py @@ -22,7 +22,7 @@ from .normalization import BatchNorm1d, BatchNorm2d, LayerNorm from .container import SequentialCell, CellList from .conv import Conv2d, Conv2dTranspose from .lstm import LSTM -from .basic import Dropout, Flatten, Dense, ClipByNorm, Norm, OneHot +from .basic import Dropout, Flatten, Dense, ClipByNorm, Norm, OneHot, ImageGradients from .embedding import Embedding from .pooling import AvgPool2d, MaxPool2d @@ -31,7 +31,7 @@ __all__ = ['Softmax', 'LogSoftmax', 'ReLU', 'ReLU6', 'Tanh', 'GELU', 'Sigmoid', 'SequentialCell', 'CellList', 'Conv2d', 'Conv2dTranspose', 'LSTM', - 'Dropout', 'Flatten', 'Dense', 'ClipByNorm', 'Norm', 'OneHot', + 'Dropout', 'Flatten', 'Dense', 'ClipByNorm', 'Norm', 'OneHot', 'ImageGradients', 'Embedding', 'AvgPool2d', 'MaxPool2d', ] diff --git a/mindspore/nn/layer/basic.py b/mindspore/nn/layer/basic.py index 30b94c738d..de49685dac 100644 --- a/mindspore/nn/layer/basic.py +++ b/mindspore/nn/layer/basic.py @@ -370,3 +370,48 @@ class OneHot(Cell): def construct(self, indices): return self.onehot(indices, self.depth, self.on_value, self.off_value) + + +class ImageGradients(Cell): + r""" + Returns two tensors, the first is along the height dimension and the second is along the width dimension. + + Assume an image shape is :math:`h*w`. The gradients along the height and the width are :math:`dy` and :math:`dx`, + respectively. + + .. math:: + dy[i] = \begin{cases} image[i+1, :]-image[i, :], &if\ 0<=i>> net = nn.ImageGradients() + >>> image = Tensor(np.array([[[[1,2],[3,4]]]]), dtype=mstype.int32) + >>> net(image) + [[[[2,2] + [0,0]]]] + [[[[1,0] + [1,0]]]] + """ + def __init__(self): + super(ImageGradients, self).__init__() + + def construct(self, images): + batch_size, depth, height, width = P.Shape()(images) + dy = images[:, :, 1:, :] - images[:, :, :height - 1, :] + dy_last = P.Fill()(P.DType()(images), (batch_size, depth, 1, width), 0) + dy = P.Concat(2)((dy, dy_last)) + + dx = images[:, :, :, 1:] - images[:, :, :, :width - 1] + dx_last = P.Fill()(P.DType()(images), (batch_size, depth, height, 1), 0) + dx = P.Concat(3)((dx, dx_last)) + return dy, dx diff --git a/tests/st/ops/davinci/test_image_gradients.py b/tests/st/ops/davinci/test_image_gradients.py new file mode 100644 index 0000000000..ea385158c9 --- /dev/null +++ b/tests/st/ops/davinci/test_image_gradients.py @@ -0,0 +1,62 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import mindspore.nn as nn +import mindspore.context as context +import mindspore.common.dtype as mstype +from mindspore import Tensor +from mindspore.common.api import ms_function + +context.set_context(device_target="Ascend") +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.image_gradients = nn.ImageGradients() + + @ms_function + def construct(self, x): + return self.image_gradients(x) + + +def test_image_gradients(): + image = Tensor(np.array([[[[1,2],[3,4]]]]), dtype=mstype.int32) + expected_dy = np.array([[[[2,2],[0,0]]]]).astype(np.int32) + expected_dx = np.array([[[[1,0],[1,0]]]]).astype(np.int32) + net = Net() + dy, dx = net(image) + assert np.any(dx.asnumpy()-expected_dx) == False + assert np.any(dy.asnumpy()-expected_dy) == False + + +def test_image_gradients_multi_channel_depth(): + # 4 x 2 x 2 x 2 + dtype = mstype.int32 + image = Tensor(np.array([[[[1,2],[3,4]], [[5,6],[7,8]]], + [[[3,5],[7,9]], [[11,13],[15,17]]], + [[[5,10],[15,20]], [[25,30],[35,40]]], + [[[10,20],[30,40]], [[50,60],[70,80]]]]), dtype=dtype) + expected_dy = Tensor(np.array([[[[2,2],[0,0]], [[2,2],[0,0]]], + [[[4,4],[0,0]], [[4,4],[0,0]]], + [[[10,10],[0,0]], [[10,10],[0,0]]], + [[[20,20],[0,0]], [[20,20],[0,0]]]]), dtype=dtype) + expected_dx = Tensor(np.array([[[[1,0],[1,0]], [[1,0],[1,0]]], + [[[2,0],[2,0]], [[2,0],[2,0]]], + [[[5,0],[5,0]], [[5,0],[5,0]]], + [[[10,0],[10,0]], [[10,0],[10,0]]]]), dtype=dtype) + net = Net() + dy, dx = net(image) + + assert np.any(dx.asnumpy()-expected_dx.asnumpy()) == False + assert np.any(dy.asnumpy()-expected_dy.asnumpy()) == False diff --git a/tests/ut/python/nn/test_image_gradients.py b/tests/ut/python/nn/test_image_gradients.py new file mode 100644 index 0000000000..f65f38ec0a --- /dev/null +++ b/tests/ut/python/nn/test_image_gradients.py @@ -0,0 +1,49 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" test loss """ +import numpy as np +import mindspore.nn as nn +import mindspore.context as context +import mindspore.common.dtype as mstype +from mindspore import Tensor +from mindspore.common.api import _executor +from mindspore.common.api import ms_function + +context.set_context(device_target="Ascend") +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.image_gradients = nn.ImageGradients() + + @ms_function + def construct(self, x): + return self.image_gradients(x) + +def test_compile(): + # input shape 1 x 1 x 2 x 2 + image = Tensor(np.array([[[[1,2],[3,4]]]]), dtype=mstype.int32) + net = Net() + _executor.compile(net, image) + + +def test_compile_multi_channel(): + # input shape 4 x 2 x 2 x 2 + dtype = mstype.int32 + image = Tensor(np.array([[[[1,2],[3,4]], [[5,6],[7,8]]], + [[[3,5],[7,9]], [[11,13],[15,17]]], + [[[5,10],[15,20]], [[25,30],[35,40]]], + [[[10,20],[30,40]], [[50,60],[70,80]]]]), dtype=dtype) + net = Net() + _executor.compile(net, image) From 7798c85e70ebe474a2832102d15707b9b2b40eb4 Mon Sep 17 00:00:00 2001 From: Xiaoda Zhang Date: Fri, 3 Apr 2020 14:33:04 +0800 Subject: [PATCH 28/58] This commit is to separate the computation cost and memory cost in auto_parallel. Some related memory correction is removed. --- .../ccsrc/parallel/auto_parallel/costmodel.cc | 12 +- .../ccsrc/parallel/auto_parallel/costmodel.h | 10 +- .../auto_parallel/dp_algo_costmodel.h | 2 +- .../parallel/auto_parallel/edge_costmodel.cc | 51 ++-- .../parallel/auto_parallel/edge_costmodel.h | 2 +- .../parallel/auto_parallel/graph_costmodel.cc | 105 +++----- .../parallel/auto_parallel/graph_costmodel.h | 8 +- .../auto_parallel/operator_costmodel.cc | 114 ++++---- .../auto_parallel/operator_costmodel.h | 244 +++++++++--------- .../ccsrc/parallel/ops_info/matmul_info.cc | 8 +- .../ccsrc/parallel/ops_info/operator_info.cc | 25 +- .../ccsrc/parallel/ops_info/operator_info.h | 6 +- .../ccsrc/parallel/step_auto_parallel.cc | 30 +-- .../tensor_layout/tensor_redistribution.cc | 24 +- .../tensor_layout/tensor_redistribution.h | 9 +- .../auto_parallel/graph_costmodel_test.cc | 4 +- .../auto_parallel/operator_costmodel_test.cc | 12 +- .../cpp/parallel/ops_info/activation_test.cc | 8 +- .../cpp/parallel/ops_info/matmul_info_test.cc | 8 +- .../parallel/ops_info/tensor_add_info_test.cc | 8 +- .../cpp/parallel/ops_info/tmpidentity_test.cc | 4 +- 21 files changed, 322 insertions(+), 372 deletions(-) diff --git a/mindspore/ccsrc/parallel/auto_parallel/costmodel.cc b/mindspore/ccsrc/parallel/auto_parallel/costmodel.cc index f5cf5069be..190f589bb5 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/costmodel.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/costmodel.cc @@ -23,8 +23,8 @@ namespace mindspore { namespace parallel { void Simplify(CostPtrList* clist_ptrs) { - // Sort the cost_list with the memory_cost increasing, and communication_cost decreasing order. This method - // excludes the cost with greater memory_cost and greater communication_cost. + // Sort the cost_list with the computation_cost_ increasing, and communication_cost decreasing order. This method + // excludes the cost with greater computation_cost_ and greater communication_cost. // E.g. clist_ptrs = {<100, 20>, <200, 10>, <300, 50>}. After this method, clist_ptrs = {<200, 10>, <100, 20>} if (!COST_MODEL_SIMPLIFY_CALCULATION) { return; @@ -33,7 +33,7 @@ void Simplify(CostPtrList* clist_ptrs) { std::vector id(clist_ptrs->size()); std::iota(id.begin(), id.end(), size_t(0)); std::sort(id.begin(), id.end(), [&clist_ptrs](size_t x, size_t y) { - return clist_ptrs->at(x)->memory_cost_ < clist_ptrs->at(y)->memory_cost_; + return clist_ptrs->at(x)->computation_cost_ < clist_ptrs->at(y)->computation_cost_; }); CostPtrList ret; for (size_t i = 0; i < clist_ptrs->size(); ++i) { @@ -45,8 +45,8 @@ void Simplify(CostPtrList* clist_ptrs) { } void SimplifyForDreasingCommunicationWithPartialPara(CostPtrList* clist_ptrs) { - // Sort the cost_list with the memory_cost increasing, and communication_with_partial_para_cost decreasing order. - // This method excludes the cost with greater memory_cost and greater communication_without_para_cost. + // Sort the cost_list with the computation_cost_ increasing, and communication_with_partial_para_cost decreasing + // order. This method excludes the cost with greater computation_cost_ and greater communication_without_para_cost. if (!COST_MODEL_SIMPLIFY_CALCULATION) { return; } @@ -54,7 +54,7 @@ void SimplifyForDreasingCommunicationWithPartialPara(CostPtrList* clist_ptrs) { std::vector id(clist_ptrs->size()); std::iota(id.begin(), id.end(), size_t(0)); std::sort(id.begin(), id.end(), [&clist_ptrs](size_t x, size_t y) { - return clist_ptrs->at(x)->memory_cost_ < clist_ptrs->at(y)->memory_cost_; + return clist_ptrs->at(x)->computation_cost_ < clist_ptrs->at(y)->computation_cost_; }); CostPtrList ret; for (size_t i = 0; i < clist_ptrs->size(); ++i) { diff --git a/mindspore/ccsrc/parallel/auto_parallel/costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/costmodel.h index 361c19573f..229f0fbf5e 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/costmodel.h +++ b/mindspore/ccsrc/parallel/auto_parallel/costmodel.h @@ -44,14 +44,18 @@ using RedistributionOpListPtr = std::shared_ptr& decision_ = nullptr) - : memory_cost_(memory), communication_cost_(commuication), decision_ptr_(std::move(decision_)) { + Cost(double computation, double commuication, const std::shared_ptr& decision_ = nullptr) + : computation_cost_(computation), communication_cost_(commuication), decision_ptr_(std::move(decision_)) { + memory_with_reuse_ = 0.0; communication_without_parameter_ = 0.0; communication_with_partial_para_ = 0.0; communication_redis_forward_ = 0.0; communication_redis_backward_ = 0.0; } - double memory_cost_; + // 'memory_with_reuse_' calculates the peak memory usage in a training phase + double memory_with_reuse_; + // 'computation_cost_' models the training time of an iteration in a training phase + double computation_cost_; // 'communication_cost_' includes communications from operators (forward and backward) and edges double communication_cost_; // communication_without_parameter_ = communication_cost_ - (backward communication from operators) diff --git a/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h index c9b6a07317..0cb58c49da 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h +++ b/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h @@ -35,7 +35,7 @@ namespace parallel { // interpretation of 6 operations in costmodel.h. // Phase 2: Search the cost_list in the final graph, and determine the optimal one // Create the cost_list for the final graph, and choose the optimal one: one the minimum quantity -// COST_MODEL_ALPHA * memory_cost + COST_MODEL_BETA * communication_cost +// COST_MODEL_ALPHA * computation_cost + COST_MODEL_BETA * communication_cost // Phase 3: Recover the original CostGraph, the determine strategy for each operator // After determining the optimal cost for the final graph, the algorithm recovers the original graph by applying // the 4 operations in the reverse order in the Phase 1. Because each operation decision contains the strategy, diff --git a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc b/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc index 6381049f17..653f6c903d 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc @@ -69,7 +69,7 @@ Status Edge::InitEdgeCost() { MS_LOG(EXCEPTION) << "Failure: redistribution cost calculation failed"; } MS_EXCEPTION_IF_NULL(cost); - MS_LOG(DEBUG) << "The redistribution cost: memory_cost: " << cost->memory_cost_ + MS_LOG(DEBUG) << "The redistribution cost: computation_cost: " << cost->computation_cost_ << ", communication_cost: " << cost->communication_cost_ << ", communication_without_parameter_: " << cost->communication_without_parameter_ << ", communication_with_partial_para_: " << cost->communication_with_partial_para_ << "."; @@ -117,9 +117,9 @@ Status Edge::GetRedistributionCost(const TensorLayout& prev_op_output_layout, co double comm_cost = tensor_redistribution.comm_cost(); double forward_comm_cost = tensor_redistribution.forward_comm_cost(); double backward_comm_cost = tensor_redistribution.backward_comm_cost(); - double mem_cost = tensor_redistribution.mem_cost(); + double computation_cost = tensor_redistribution.computation_cost(); - *cost = std::make_shared(type_length * mem_cost, type_length * comm_cost); + *cost = std::make_shared(type_length * computation_cost, type_length * comm_cost); (*cost)->communication_without_parameter_ = type_length * comm_cost; (*cost)->communication_with_partial_para_ = (*cost)->communication_without_parameter_ + @@ -150,26 +150,26 @@ CostPtrList Edge::CreateEdgeEliminationCostList(const StrategyPtr& output_st_ptr (void)std::transform(edges.begin(), edges.end(), all_cost_list.begin(), LocalGetCostList); CostPtrList selected_cost_list(all_cost_list.size(), nullptr); - std::function recursive = [&](size_t k, double memory, double communication, - double communication_without_para) { - if (k == edges.size()) { - auto decision = std::make_shared(selected_cost_list); - CostPtr new_cost = std::make_shared(memory, communication); - MS_EXCEPTION_IF_NULL(new_cost); - new_cost->communication_without_parameter_ = communication_without_para; - new_cost->communication_with_partial_para_ = - communication_without_para + COST_MODEL_GAMMA * (communication - communication_without_para); - new_cost->decision_ptr_ = decision; - result.push_back(new_cost); - return; - } - for (auto& c : all_cost_list[k]) { - MS_EXCEPTION_IF_NULL(c); - selected_cost_list[k] = c; - recursive(k + 1, memory + c->memory_cost_, communication + c->communication_cost_, - communication_without_para + c->communication_without_parameter_); - } - }; + std::function recursive = + [&](size_t k, double computation, double communication, double communication_without_para) { + if (k == edges.size()) { + auto decision = std::make_shared(selected_cost_list); + CostPtr new_cost = std::make_shared(computation, communication); + MS_EXCEPTION_IF_NULL(new_cost); + new_cost->communication_without_parameter_ = communication_without_para; + new_cost->communication_with_partial_para_ = + communication_without_para + COST_MODEL_GAMMA * (communication - communication_without_para); + new_cost->decision_ptr_ = decision; + result.push_back(new_cost); + return; + } + for (auto& c : all_cost_list[k]) { + MS_EXCEPTION_IF_NULL(c); + selected_cost_list[k] = c; + recursive(k + 1, computation + c->computation_cost_, communication + c->communication_cost_, + communication_without_para + c->communication_without_parameter_); + } + }; recursive(0, 0, 0, 0); SimplifyForDreasingCommunicationWithPartialPara(&result); return result; @@ -203,7 +203,8 @@ void Edge::CreateOpEliminationSubCostList(StrategyPtr op_strategy, const CostPtr MS_EXCEPTION_IF_NULL(middle_cost); for (auto& right_cost : right_cost_list) { MS_EXCEPTION_IF_NULL(right_cost); - double memory = left_cost->memory_cost_ + middle_cost->memory_cost_ + right_cost->memory_cost_; + double computation = + left_cost->computation_cost_ + middle_cost->computation_cost_ + right_cost->computation_cost_; double communication = left_cost->communication_cost_ + middle_cost->communication_cost_ + right_cost->communication_cost_; double communication_without_para = left_cost->communication_without_parameter_ + @@ -211,7 +212,7 @@ void Edge::CreateOpEliminationSubCostList(StrategyPtr op_strategy, const CostPtr right_cost->communication_without_parameter_; auto decision = std::make_shared(op_strategy, left_cost, middle_cost, right_cost); - auto cost = std::make_shared(memory, communication, decision); + auto cost = std::make_shared(computation, communication, decision); MS_EXCEPTION_IF_NULL(cost); cost->communication_without_parameter_ = communication_without_para; cost->communication_with_partial_para_ = diff --git a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h index 1fa49029fa..eb89466d7c 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h +++ b/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h @@ -133,7 +133,7 @@ class Edge { void set_parameter_involve(int para_invol) { is_output_parameter_involve_ = para_invol; } // When the input of a operator contains WEIGHT or a output from other operators involving WEIGHT, then these input // should stay in memory until it is used in the backward phase, which is kept in memory at the end of forward phase. - Status CorrectStrategyCostForMemoryReuse() const { return SUCCESS; } + Status CalculateMemoryCost() const { return SUCCESS; } private: std::string edge_name_; diff --git a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc index 59b9d9e992..88a54662d3 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc @@ -247,7 +247,7 @@ CostPtrList CostGraph::CreateFinalCostList(const OperatorInfoPtr& u, const std:: MS_EXCEPTION_IF_NULL(cost1); MS_EXCEPTION_IF_NULL(cost2); MS_EXCEPTION_IF_NULL(cost3); - double memory = cost1->memory_cost_ + cost2->memory_cost_ + cost3->memory_cost_; + double computation = cost1->computation_cost_ + cost2->computation_cost_ + cost3->computation_cost_; double commmunication = cost1->communication_cost_ + cost2->communication_cost_ + cost3->communication_cost_; double communication_without_para = cost1->communication_without_parameter_ + @@ -255,7 +255,7 @@ CostPtrList CostGraph::CreateFinalCostList(const OperatorInfoPtr& u, const std:: cost3->communication_without_parameter_; auto decision = std::make_shared(u_strategy->strategy_ptr, v_strategy->strategy_ptr, cost1, cost2, cost3); - auto cost = std::make_shared(memory, commmunication, decision); + auto cost = std::make_shared(computation, commmunication, decision); MS_EXCEPTION_IF_NULL(cost); cost->communication_without_parameter_ = communication_without_para; cost->communication_with_partial_para_ = @@ -282,7 +282,7 @@ CostPtrList CostGraph::CreateFinalSingleCostList(const OperatorInfoPtr& u) { for (const auto& cost1 : clist1) { MS_EXCEPTION_IF_NULL(cost1); auto decision = std::make_shared(u_strategy_ptr, cost1); - auto new_cost = std::make_shared(cost1->memory_cost_, cost1->communication_cost_, decision); + auto new_cost = std::make_shared(cost1->computation_cost_, cost1->communication_cost_, decision); MS_EXCEPTION_IF_NULL(new_cost); new_cost->communication_without_parameter_ = cost1->communication_without_parameter_; new_cost->communication_with_partial_para_ = @@ -297,12 +297,12 @@ CostPtrList CostGraph::CreateFinalSingleCostList(const OperatorInfoPtr& u) { } CostPtr CostGraph::SelectCostWithMemoryConstraint(const CostPtrList& cost_list, double memory) { - if (cost_list.empty() || cost_list[0]->memory_cost_ >= memory) { + if (cost_list.empty() || cost_list[0]->computation_cost_ >= memory) { return nullptr; } std::function LocalCompare = [&](CostPtr init, const CostPtr& cost_x) { MS_EXCEPTION_IF_NULL(cost_x); - if (init == nullptr || cost_x->memory_cost_ < memory) { + if (init == nullptr || cost_x->computation_cost_ < memory) { init = cost_x; } return init; @@ -313,36 +313,36 @@ CostPtr CostGraph::SelectCostWithMemoryConstraint(const CostPtrList& cost_list, CostPtr CostGraph::SelectCostWithMinTrainingTime(const CostPtrList& cost_list, double memory) { // Select the cost with minimum training time. Currently, the training time is modeled as = - // costmodel_alpha_ * memory_cost + costmodel_beta_ * communication_with_partial_para_ + // costmodel_alpha_ * computation_cost + costmodel_beta_ * communication_with_partial_para_ if (cost_list.empty()) { MS_LOG(ERROR) << "Final cost list is null."; return nullptr; } CostPtr ret = cost_list[0]; MS_EXCEPTION_IF_NULL(ret); - if (ret->memory_cost_ >= memory) { - MS_LOG(ERROR) << "No available cost; the minimum cost is " << ret->memory_cost_ + if (ret->computation_cost_ >= memory) { + MS_LOG(ERROR) << "No available cost; the minimum cost is " << ret->computation_cost_ << ", the memory capacity is: " << memory << "."; return nullptr; } - double minimum = costmodel_alpha_ * ret->memory_cost_ + costmodel_beta_ * ret->communication_with_partial_para_; - MS_LOG(INFO) << "minimum: " << minimum << ", memory_cost_: " << ret->memory_cost_ + double minimum = costmodel_alpha_ * ret->computation_cost_ + costmodel_beta_ * ret->communication_with_partial_para_; + MS_LOG(INFO) << "minimum: " << minimum << ", computation_cost_: " << ret->computation_cost_ << ", communication_with_partial_para_: " << ret->communication_with_partial_para_ << ", communication_cost_: " << ret->communication_cost_ << ", communication_without_parameter_: " << ret->communication_without_parameter_ << "."; for (size_t i = 1; i < cost_list.size(); ++i) { MS_EXCEPTION_IF_NULL(cost_list[i]); - if (cost_list[i]->memory_cost_ >= memory) { - MS_LOG(INFO) << "cost_list " << i << " memory_cost_: " << cost_list[i]->memory_cost_ + if (cost_list[i]->computation_cost_ >= memory) { + MS_LOG(INFO) << "cost_list " << i << " computation_cost_: " << cost_list[i]->computation_cost_ << ", is larger than the memory capacity: " << memory << "."; break; } - MS_LOG(INFO) << "cost_list " << i << " memory_cost_: " << cost_list[i]->memory_cost_ + MS_LOG(INFO) << "cost_list " << i << " computation_cost_: " << cost_list[i]->computation_cost_ << ", communication_with_partial_para_: " << cost_list[i]->communication_with_partial_para_ << ", communication_cost_: " << cost_list[i]->communication_cost_ << ", communication_without_parameter_: " << cost_list[i]->communication_without_parameter_ << "."; - auto tmp = - costmodel_alpha_ * cost_list[i]->memory_cost_ + costmodel_beta_ * cost_list[i]->communication_with_partial_para_; + auto tmp = costmodel_alpha_ * cost_list[i]->computation_cost_ + + costmodel_beta_ * cost_list[i]->communication_with_partial_para_; MS_LOG(INFO) << "tmp: " << tmp; if (minimum > tmp) { minimum = tmp; @@ -363,8 +363,8 @@ CostPtrList CostGraph::SelectCostListWithMinTrainingTimeMultiple(const std::vect MS_LOG(ERROR) << "The cost list " << i << " is empty."; return ret; } else { - total_memory += all_cost_list[i][0]->memory_cost_; - minimum += costmodel_alpha_ * all_cost_list[i][0]->memory_cost_ + + total_memory += all_cost_list[i][0]->computation_cost_; + minimum += costmodel_alpha_ * all_cost_list[i][0]->computation_cost_ + costmodel_beta_ * all_cost_list[i][0]->communication_with_partial_para_; ret[i] = all_cost_list[i][0]; } @@ -381,8 +381,8 @@ CostPtrList CostGraph::SelectCostListWithMinTrainingTimeMultiple(const std::vect double tmp_memory = 0.0, tmp_minimum = 0.0; for (size_t i = 0; i < selected_cost_list.size(); ++i) { MS_EXCEPTION_IF_NULL(selected_cost_list[i]); - tmp_memory += selected_cost_list[i]->memory_cost_; - tmp_minimum += costmodel_alpha_ * selected_cost_list[i]->memory_cost_ + + tmp_memory += selected_cost_list[i]->computation_cost_; + tmp_minimum += costmodel_alpha_ * selected_cost_list[i]->computation_cost_ + costmodel_beta_ * selected_cost_list[i]->communication_with_partial_para_; } MS_LOG(INFO) << "tmp_memory: " << tmp_memory << ", tmp_minimum: " << tmp_minimum << ", minimum: " << minimum @@ -394,6 +394,7 @@ CostPtrList CostGraph::SelectCostListWithMinTrainingTimeMultiple(const std::vect } return; } + MS_LOG(DEBUG) << "The value minimum: " << minimum << ", available_memory: " << available_memory << "."; for (auto& c : all_cost_list[k]) { selected_cost_list[k] = c; @@ -814,7 +815,7 @@ void CostGraph::CreateMergeEliminationSubCostList(StrategyPtr op_strategy, const for (size_t k = 0; k < tar_cost_list.size(); ++k) { auto& tar_cost = tar_cost_list[k]; MS_EXCEPTION_IF_NULL(tar_cost); - double memory = op_cost->memory_cost_ + edge_cost->memory_cost_ + tar_cost->memory_cost_; + double computation = op_cost->computation_cost_ + edge_cost->computation_cost_ + tar_cost->computation_cost_; double communication = op_cost->communication_cost_ + edge_cost->communication_cost_ + tar_cost->communication_cost_; double communication_without_para = op_cost->communication_without_parameter_ + @@ -823,7 +824,7 @@ void CostGraph::CreateMergeEliminationSubCostList(StrategyPtr op_strategy, const auto decision = std::make_shared(op_strategy, op_cost, edge_cost, tar_op_strategy, tar_cost); - auto new_cost = std::make_shared(memory, communication, decision); + auto new_cost = std::make_shared(computation, communication, decision); MS_EXCEPTION_IF_NULL(new_cost); new_cost->communication_without_parameter_ = communication_without_para; new_cost->communication_with_partial_para_ = @@ -891,7 +892,8 @@ void CostGraph::CreateContractEliminationSubCostList(StrategyPtr contract_op_str for (size_t k = 0; k < tar_cost_list.size(); ++k) { auto& tar_cost = tar_cost_list[k]; MS_EXCEPTION_IF_NULL(tar_cost); - double memory = contract_op_cost->memory_cost_ + edge_cost->memory_cost_ + tar_cost->memory_cost_; + double computation = + contract_op_cost->computation_cost_ + edge_cost->computation_cost_ + tar_cost->computation_cost_; double communication = contract_op_cost->communication_cost_ + edge_cost->communication_cost_ + tar_cost->communication_cost_; double communication_without_para = contract_op_cost->communication_without_parameter_ + @@ -900,7 +902,7 @@ void CostGraph::CreateContractEliminationSubCostList(StrategyPtr contract_op_str auto decision = std::make_shared(contract_op_stra, contract_op_cost, edge_cost, target_op_stra, tar_cost); - auto new_cost = std::make_shared(memory, communication, decision); + auto new_cost = std::make_shared(computation, communication, decision); new_cost->communication_without_parameter_ = communication_without_para; new_cost->communication_with_partial_para_ = communication_without_para + COST_MODEL_GAMMA * (communication - communication_without_para); @@ -963,9 +965,9 @@ void CostGraph::CreateTriangleEliminationSubCostList(StrategyPtr elimi_op_stra, MS_EXCEPTION_IF_NULL(left_edge_cost); for (auto& left_node_cost : left_node_clist_origin) { MS_EXCEPTION_IF_NULL(left_node_cost); - double new_memory_cost = elimi_op_cost->memory_cost_ + left_edge_cost->memory_cost_ + - left_node_cost->memory_cost_ + right_edge_cost->memory_cost_ + - right_op_cost->memory_cost_; + double new_computation = elimi_op_cost->computation_cost_ + left_edge_cost->computation_cost_ + + left_node_cost->computation_cost_ + right_edge_cost->computation_cost_ + + right_op_cost->computation_cost_; double new_commu_cost = elimi_op_cost->communication_cost_ + left_edge_cost->communication_cost_ + left_node_cost->communication_cost_ + right_edge_cost->communication_cost_ + right_op_cost->communication_cost_; @@ -977,7 +979,7 @@ void CostGraph::CreateTriangleEliminationSubCostList(StrategyPtr elimi_op_stra, auto decision = std::make_shared(elimi_op_stra, elimi_op_cost, left_edge_cost, right_edge_cost, left_op_stra, left_node_cost, right_op_stra, right_op_cost); - auto new_cost = std::make_shared(new_memory_cost, new_commu_cost, decision); + auto new_cost = std::make_shared(new_computation, new_commu_cost, decision); new_cost->communication_without_parameter_ = new_commu_without; new_cost->communication_with_partial_para_ = new_commu_without + COST_MODEL_GAMMA * (new_commu_cost - new_commu_without); @@ -1082,11 +1084,12 @@ void CostGraph::CreateStarEliminationSubCostList(const StrategyPtr& first_succ_n succ_edges_costs[0] = first_succ_edge_cost; succ_nodes_costs[0] = first_succ_node_cost; - double memory_cost = merged_node_cost->memory_cost_, commu_cost = merged_node_cost->communication_cost_, + double computation_cost = merged_node_cost->computation_cost_, + commu_cost = merged_node_cost->communication_cost_, commu_without = merged_node_cost->communication_without_parameter_; for (size_t i = 0; i < succ_nodes_stras.size(); ++i) { MS_EXCEPTION_IF_NULL(succ_edges_costs[i]); - memory_cost += succ_edges_costs[i]->memory_cost_ + succ_nodes_costs[i]->memory_cost_; + computation_cost += succ_edges_costs[i]->computation_cost_ + succ_nodes_costs[i]->computation_cost_; commu_cost += succ_edges_costs[i]->communication_cost_ + succ_nodes_costs[i]->communication_cost_; commu_without += succ_edges_costs[i]->communication_without_parameter_ + succ_nodes_costs[i]->communication_without_parameter_; @@ -1094,7 +1097,7 @@ void CostGraph::CreateStarEliminationSubCostList(const StrategyPtr& first_succ_n auto decision = std::make_shared(merged_op_stra, merged_node_cost, succ_edges_costs, succ_nodes_stras, succ_nodes_costs); - auto new_cost = std::make_shared(memory_cost, commu_cost, decision); + auto new_cost = std::make_shared(computation_cost, commu_cost, decision); new_cost->communication_without_parameter_ = commu_without; new_cost->communication_with_partial_para_ = commu_without + COST_MODEL_GAMMA * (commu_cost - commu_without); first_succ_node_clist_new->emplace_back(std::move(new_cost)); @@ -1210,36 +1213,6 @@ Status CostGraph::InitSelectedStrategy() { return SUCCESS; } -Status CostGraph::CorrectOpsStrategyCostForMultiOutputUse() { - for (auto& op : ops_) { - MS_EXCEPTION_IF_NULL(op); - if (op->GetAliveSuccEdges().size() > 1) { - // Filter out the case of a output being used by multiple operators - std::map output_count; - for (size_t i = 0; i < op->GetAliveSuccEdges().size(); ++i) { - auto output_index = op->GetAliveSuccEdges()[i]->prev_op_output_index(); - output_count[output_index]++; - } - for (size_t i = 0; i < op->GetAliveSuccEdges().size(); ++i) { - auto output_index = op->GetAliveSuccEdges()[i]->prev_op_output_index(); - if (output_count[output_index] <= 1) { - continue; - } - auto next_op = op->GetAliveSuccEdges()[i]->next_operator(); - MS_EXCEPTION_IF_NULL(next_op); - auto input_index = op->GetAliveSuccEdges()[i]->next_op_input_index(); - if (next_op->CorrectStrategyCostForMultiOutputUse(input_index) != SUCCESS) { - MS_LOG(ERROR) << "The operator name: " << op->name() << ", the next operator name: " << next_op->name() - << ", the output_index: " << output_index << ", the input_index: " << input_index << "."; - return FAILED; - } - output_count[output_index]--; - } - } - } - return SUCCESS; -} - Status CostGraph::ComputeOpsAndEdgesParameterInvolved() { for (auto& op : ops_) { MS_EXCEPTION_IF_NULL(op); @@ -1252,23 +1225,23 @@ Status CostGraph::ComputeOpsAndEdgesParameterInvolved() { return SUCCESS; } -Status CostGraph::CorrectOpsStrategyCostForMemoryReuse() { +Status CostGraph::CalculateOpsMemoryCost() { for (auto& op : ops_) { MS_EXCEPTION_IF_NULL(op); - if (op->CorrectStrategyCostForMemoryReuse() != SUCCESS) { - MS_LOG(ERROR) << "Correcting Operator: " << op->name() << " cost for memory reuse failed."; + if (op->CalculateMemoryCost() != SUCCESS) { + MS_LOG(ERROR) << "Calculate Operator: " << op->name() << " cost for memory usage failed."; return FAILED; } } return SUCCESS; } -Status CostGraph::CorrectEdgesStrategyCostForMemoryReuse() { +Status CostGraph::CalculateEdgesMemoryCost() { for (auto& edge_pair : edges_) { const auto& edges = edge_pair.second; for (auto& one_edge : edges) { - if (one_edge->CorrectStrategyCostForMemoryReuse() != SUCCESS) { - MS_LOG(ERROR) << "Correcting Edge: " << one_edge->edge_name() << " cost for memory reuse failed."; + if (one_edge->CalculateMemoryCost() != SUCCESS) { + MS_LOG(ERROR) << "Calculate Edge: " << one_edge->edge_name() << " cost for memory usage failed."; return FAILED; } } diff --git a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h index e4cbdffb61..c149534826 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h +++ b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h @@ -175,16 +175,12 @@ class CostGraph { void CreateStarEliminationSubCostList(const StrategyPtr&, const CostPtrList&, const CostPtrList&, const StrategyPtr&, const CostPtrList&, std::vector, CostPtrList&, CostPtrList&, CostPtrList*); - - // When a output of a operator is being used by multiple operators, the memory cost of this part should be calculated - // only once. This method is for correcting the 'strategy_cost_' for operators - Status CorrectOpsStrategyCostForMultiOutputUse(); // When the input of a operator is neither a WEIGHT, nor a output of a subsequent operator involving WEIGHT, then // the memory cost can be resused. - Status CorrectOpsStrategyCostForMemoryReuse(); + Status CalculateOpsMemoryCost(); // When the input of the edge is neither a WEIGHT, nor a output of a subsequent operator involving WEIGHT, then // the memory cost can be resused. - Status CorrectEdgesStrategyCostForMemoryReuse(); + Status CalculateEdgesMemoryCost(); Status ComputeOpsAndEdgesParameterInvolved(); std::vector GetOperators() const { return ops_; } diff --git a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc index 6958932fd6..7c17b499b1 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc @@ -74,8 +74,8 @@ double MatMulCost::GetBackwardCommCost(const std::vector& inputs, co // Return the per device memory cost in the forward phase. The cost is calculated according to the bytes // this operator uses -double MatMulCost::GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t&) const { +double MatMulCost::GetForwardComputationCost(const std::vector& inputs, + const std::vector& outputs, const int32_t&) const { // In forward phase, the memory cost = slice(A) + slice(B) + (0 or 1) allreduce(slice(C)) double result = 0.0; TensorInfo output0 = outputs[0]; @@ -93,8 +93,8 @@ double MatMulCost::GetForwardMemoryCost(const std::vector& inputs, c // Return the per device memory cost in the forward phase. The cost is calculated according to the bytes // this operator uses -double MatMulCost::GetBackwardMemoryCost(const std::vector& inputs, const std::vector&, - const int32_t& stage_id) const { +double MatMulCost::GetBackwardComputationCost(const std::vector& inputs, const std::vector&, + const int32_t& stage_id) const { // In backward phase, the memory cost = (0 or 1) allreduce(slice(B)) double result = 0.0; if (is_parameter_[1]) { @@ -147,8 +147,8 @@ double ActivationCost::GetBackwardCommCost(const std::vector& inputs // Return the per memory cost in the forward phase. The cost is calculated according to the bytes // this operator uses -double ActivationCost::GetForwardMemoryCost(const std::vector& inputs, const std::vector&, - const int32_t&) const { +double ActivationCost::GetForwardComputationCost(const std::vector& inputs, const std::vector&, + const int32_t&) const { TensorInfo input0_info = inputs[0]; Shape input0_slice_shape = input0_info.slice_shape(); return ListProduct(input0_slice_shape) * static_cast(inputs_type_lengths_[0]); @@ -156,8 +156,8 @@ double ActivationCost::GetForwardMemoryCost(const std::vector& input // Return the per memory cost in the forward phase. The cost is calculated according to the bytes // this operator uses -double ActivationCost::GetBackwardMemoryCost(const std::vector&, const std::vector&, - const int32_t&) const { +double ActivationCost::GetBackwardComputationCost(const std::vector&, const std::vector&, + const int32_t&) const { return 0.0; } @@ -191,8 +191,8 @@ double SoftmaxCost::GetBackwardCommCost(const std::vector& inputs, c // Return the per memory cost in the forward phase. The cost is calculated according to the bytes // this operator uses -double SoftmaxCost::GetForwardMemoryCost(const std::vector& inputs, const std::vector&, - const int32_t&) const { +double SoftmaxCost::GetForwardComputationCost(const std::vector& inputs, const std::vector&, + const int32_t&) const { // In the forward phase, the memory cost = slice(A) TensorInfo input0 = inputs[0]; Shape input0_slice_shape = input0.slice_shape(); @@ -201,8 +201,9 @@ double SoftmaxCost::GetForwardMemoryCost(const std::vector& inputs, // Return the per memory cost in the forward phase. The cost is calculated according to the bytes // this operator uses -double SoftmaxCost::GetBackwardMemoryCost(const std::vector&, - const std::vector&, const int32_t&) const { +double SoftmaxCost::GetBackwardComputationCost(const std::vector&, + const std::vector&, + const int32_t&) const { return 0.0; } @@ -222,9 +223,9 @@ double TmpIdentityCost::GetBackwardCommCost(const std::vector& inputs, - const std::vector&, - const int32_t&) const { +double TmpIdentityCost::GetForwardComputationCost(const std::vector& inputs, + const std::vector&, + const int32_t&) const { TensorInfo input0_info = inputs[0]; Shape input0_slice_shape = input0_info.slice_shape(); return ListProduct(input0_slice_shape) * static_cast(inputs_type_lengths_[0]); @@ -232,15 +233,15 @@ double TmpIdentityCost::GetForwardMemoryCost(const std::vector&, - const std::vector&, - const int32_t&) const { +double TmpIdentityCost::GetBackwardComputationCost(const std::vector&, + const std::vector&, + const int32_t&) const { return 0.0; } -double BatchParallelCost::GetForwardMemoryCost(const std::vector& inputs, - const std::vector&, - const int32_t&) const { +double BatchParallelCost::GetForwardComputationCost(const std::vector& inputs, + const std::vector&, + const int32_t&) const { double cost = 0.0; for (size_t i = 0; i < inputs.size(); ++i) { cost += ListProduct(inputs[i].slice_shape()) * static_cast(inputs_type_lengths_[i]); @@ -248,9 +249,9 @@ double BatchParallelCost::GetForwardMemoryCost(const std::vector&, - const std::vector&, - const int32_t&) const { +double BatchParallelCost::GetBackwardComputationCost(const std::vector&, + const std::vector&, + const int32_t&) const { return 0.0; } @@ -285,8 +286,8 @@ double PReLUCost::GetBackwardCommCost(const std::vector& inputs, con // Return the per memory cost in the forward phase. The cost is calculated according to the bytes // this operator uses -double PReLUCost::GetForwardMemoryCost(const std::vector& inputs, const std::vector&, - const int32_t&) const { +double PReLUCost::GetForwardComputationCost(const std::vector& inputs, const std::vector&, + const int32_t&) const { // In forward phase, the memory cost = slice(A) + slice(B) Shape input0_slice_shape = inputs[0].slice_shape(); Shape input1_slice_shape = inputs[1].slice_shape(); @@ -297,9 +298,9 @@ double PReLUCost::GetForwardMemoryCost(const std::vector& inputs, co // Return the per memory cost in the backward phase. The cost is calculated according to the bytes // this operator uses -double PReLUCost::GetBackwardMemoryCost(const std::vector& inputs, - const std::vector&, - const int32_t& stage_id) const { +double PReLUCost::GetBackwardComputationCost(const std::vector& inputs, + const std::vector&, + const int32_t& stage_id) const { // In backward phase, the memory cost = (0 or 1) allreduce(slice(B)) double result = 0.0; if (is_parameter_[1]) { @@ -338,8 +339,8 @@ double OneHotCost::GetBackwardCommCost(const std::vector&, const std // Return the per memory cost in the forward phase. The cost is calculated according to the bytes // this operator uses -double OneHotCost::GetForwardMemoryCost(const std::vector& inputs, const std::vector&, - const int32_t&) const { +double OneHotCost::GetForwardComputationCost(const std::vector& inputs, const std::vector&, + const int32_t&) const { // In onehot's forward phase, the memory cost = slice(A) Shape input0_slice_shape = inputs[0].slice_shape(); return ListProduct(input0_slice_shape) * static_cast(inputs_type_lengths_[0]); @@ -347,8 +348,8 @@ double OneHotCost::GetForwardMemoryCost(const std::vector& inputs, c // Return the per memory cost in the backward phase. The cost is calculated according to the bytes // this operator uses -double OneHotCost::GetBackwardMemoryCost(const std::vector&, const std::vector&, - const int32_t&) const { +double OneHotCost::GetBackwardComputationCost(const std::vector&, const std::vector&, + const int32_t&) const { return 0.0; } @@ -368,8 +369,9 @@ double SoftmaxCrossEntropyWithLogitsCost::GetBackwardCommCost(const std::vector< // Return the per memory cost in the forward phase. The cost is calculated according to the bytes // this operator uses -double SoftmaxCrossEntropyWithLogitsCost::GetForwardMemoryCost(const std::vector& inputs, - const std::vector&, const int32_t&) const { +double SoftmaxCrossEntropyWithLogitsCost::GetForwardComputationCost(const std::vector& inputs, + const std::vector&, + const int32_t&) const { // In forward phase, the memory cost = slice(A) + slice(B) Shape input0_slice_shape = inputs[0].slice_shape(); Shape input1_slice_shape = inputs[1].slice_shape(); @@ -380,8 +382,9 @@ double SoftmaxCrossEntropyWithLogitsCost::GetForwardMemoryCost(const std::vector // Return the per memory cost in the backward phase. The cost is calculated according to the bytes // this operator uses -double SoftmaxCrossEntropyWithLogitsCost::GetBackwardMemoryCost(const std::vector&, - const std::vector&, const int32_t&) const { +double SoftmaxCrossEntropyWithLogitsCost::GetBackwardComputationCost(const std::vector&, + const std::vector&, + const int32_t&) const { return 0.0; } @@ -409,8 +412,8 @@ double ReshapeCost::GetBackwardCommCost(const std::vector&, const st // Return the per memory cost in the forward phase. The cost is calculated according to the bytes // this operator uses -double ReshapeCost::GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const { +double ReshapeCost::GetForwardComputationCost(const std::vector& inputs, + const std::vector& outputs, const int32_t& stage_id) const { CheckGlobalDeviceManager(); MS_EXCEPTION_IF_NULL(g_device_manager); RankList dev_list = g_device_manager->GetDeviceListByStageId(stage_id); @@ -421,26 +424,27 @@ double ReshapeCost::GetForwardMemoryCost(const std::vector& inputs, if (tensor_redistribution.ComputeCost() == FAILED) { MS_LOG(EXCEPTION) << "Failure: tensor_redistribution ComputeCost failed."; } - return (inputs_type_lengths_[0] * tensor_redistribution.mem_cost()); + return (inputs_type_lengths_[0] * tensor_redistribution.computation_cost()); } // Return the per memory cost in the backward phase. The cost is calculated according to the bytes // this operator uses -double ReshapeCost::GetBackwardMemoryCost(const std::vector&, - const std::vector&, const int32_t&) const { +double ReshapeCost::GetBackwardComputationCost(const std::vector&, + const std::vector&, + const int32_t&) const { return 0.0; } -double ArithmeticCost::GetForwardMemoryCost(const std::vector& inputs, const std::vector&, - const int32_t&) const { +double ArithmeticCost::GetForwardComputationCost(const std::vector& inputs, const std::vector&, + const int32_t&) const { double result; result = ListProduct(inputs[0].slice_shape()) * static_cast(inputs_type_lengths_[0]) + ListProduct(inputs[1].slice_shape()) * static_cast(inputs_type_lengths_[1]); return result; } -double ArithmeticCost::GetBackwardMemoryCost(const std::vector& inputs, const std::vector&, - const int32_t& stage_id) const { +double ArithmeticCost::GetBackwardComputationCost(const std::vector& inputs, const std::vector&, + const int32_t& stage_id) const { double result = 0.0; CheckGlobalDeviceManager(); MS_EXCEPTION_IF_NULL(g_device_manager); @@ -533,15 +537,15 @@ double L2NormalizeCost::GetBackwardCommCost(const std::vector& input return result; } -double L2NormalizeCost::GetForwardMemoryCost(const std::vector& inputs, const std::vector&, - const int32_t&) const { +double L2NormalizeCost::GetForwardComputationCost(const std::vector& inputs, const std::vector&, + const int32_t&) const { TensorInfo input0_info = inputs[0]; Shape input0_slice_shape = input0_info.slice_shape(); return ListProduct(input0_slice_shape) * static_cast(inputs_type_lengths_[0]); } -double L2NormalizeCost::GetBackwardMemoryCost(const std::vector& inputs, const std::vector&, - const int32_t& stage_id) const { +double L2NormalizeCost::GetBackwardComputationCost(const std::vector& inputs, + const std::vector&, const int32_t& stage_id) const { double result = 0.0; if (is_parameter_[0]) { @@ -618,8 +622,9 @@ double ReduceMethodCost::GetBackwardCommCost(const std::vector& inpu return result; } -double ReduceMethodCost::GetForwardMemoryCost(const std::vector& inputs, - const std::vector& outputs, const int32_t& stage_id) const { +double ReduceMethodCost::GetForwardComputationCost(const std::vector& inputs, + const std::vector& outputs, + const int32_t& stage_id) const { double result = 0.0; TensorInfo input0 = inputs[0]; TensorInfo output0 = outputs[0]; @@ -640,8 +645,9 @@ double ReduceMethodCost::GetForwardMemoryCost(const std::vector& inp return result; } -double ReduceMeanCost::GetForwardMemoryCost(const std::vector& inputs, - const std::vector& outputs, const int32_t& stage_id) const { +double ReduceMeanCost::GetForwardComputationCost(const std::vector& inputs, + const std::vector& outputs, + const int32_t& stage_id) const { double result = 0.0; TensorInfo input0 = inputs[0]; TensorInfo output0 = outputs[0]; diff --git a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h index 9fb86d467e..8f0099bba3 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h +++ b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h @@ -65,12 +65,12 @@ class OperatorCost { virtual double GetBackwardCommCost(const std::vector& inputs, const std::vector& outputs, const int32_t& stage_id) const = 0; // per device computation cost - virtual double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const = 0; - virtual double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const = 0; - virtual double GetBackwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const = 0; + virtual double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const = 0; + virtual double GetForwardComputationCost(const std::vector& inputs, + const std::vector& outputs, const int32_t& stage_id) const = 0; + virtual double GetBackwardComputationCost(const std::vector& inputs, + const std::vector& outputs, const int32_t& stage_id) const = 0; protected: // for each input in 'inputs_', there is a bool variable indicating whether that the corresponding input is parameter @@ -96,14 +96,14 @@ class MatMulCost : public OperatorCost { const int32_t& stage_id) const override; // per device computation cost - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); - } - double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; - double GetBackwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); + } + double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; + double GetBackwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; }; using MatMulCostPtr = std::shared_ptr; @@ -121,14 +121,14 @@ class ActivationCost : public OperatorCost { const int32_t& stage_id) const override; double GetBackwardCommCost(const std::vector& inputs, const std::vector& outputs, const int32_t& stage_id) const override; - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); - } - double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; - double GetBackwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); + } + double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; + double GetBackwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; }; using ActivationCostPtr = std::shared_ptr; @@ -146,14 +146,14 @@ class SoftmaxCost : public OperatorCost { const int32_t& stage_id) const override; double GetBackwardCommCost(const std::vector& inputs, const std::vector& outputs, const int32_t& stage_id) const override; - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); - } - double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; - double GetBackwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t&) const override; + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); + } + double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; + double GetBackwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t&) const override; }; using SoftmaxCostPtr = std::shared_ptr; @@ -171,14 +171,14 @@ class TmpIdentityCost : public OperatorCost { const int32_t& stage_id) const override; double GetBackwardCommCost(const std::vector& inputs, const std::vector& outputs, const int32_t& stage_id) const override; - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); - } - double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; - double GetBackwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); + } + double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; + double GetBackwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; }; using TmpIdentityCostPtr = std::shared_ptr; @@ -199,14 +199,14 @@ class BatchParallelCost : public OperatorCost { const int32_t&) const override { return 0.0; } - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); } - double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; - double GetBackwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; + double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; + double GetBackwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; }; using BatchParallelCostPtr = std::shared_ptr; @@ -227,16 +227,16 @@ class VirtualDatasetCost : public OperatorCost { const int32_t&) const override { return 0.0; } - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); } - double GetForwardMemoryCost(const std::vector&, const std::vector&, - const int32_t&) const override { + double GetForwardComputationCost(const std::vector&, const std::vector&, + const int32_t&) const override { return 0.0; } - double GetBackwardMemoryCost(const std::vector&, const std::vector&, - const int32_t&) const override { + double GetBackwardComputationCost(const std::vector&, const std::vector&, + const int32_t&) const override { return 0.0; } }; @@ -259,18 +259,18 @@ class GeneratorBaseCost : public OperatorCost { const int32_t&) const override { return 0.0; } - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); } // Inputs vector is empty for generator ops. - double GetForwardMemoryCost(const std::vector&, const std::vector&, - const int32_t&) const override { + double GetForwardComputationCost(const std::vector&, const std::vector&, + const int32_t&) const override { return 0.0; } // Generator ops don't have backward steps. - double GetBackwardMemoryCost(const std::vector&, const std::vector&, - const int32_t&) const override { + double GetBackwardComputationCost(const std::vector&, const std::vector&, + const int32_t&) const override { return 0.0; } }; @@ -292,14 +292,14 @@ class PReLUCost : public OperatorCost { const int32_t& stage_id) const override; // per device computation cost - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); - } - double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; - double GetBackwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); + } + double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; + double GetBackwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; }; using PReLUCostPtr = std::shared_ptr; @@ -319,14 +319,14 @@ class OneHotCost : public OperatorCost { const int32_t& stage_id) const override; // per device computation cost - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); - } - double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; - double GetBackwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); + } + double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; + double GetBackwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; }; using OneHotCostPtr = std::shared_ptr; @@ -346,14 +346,14 @@ class SoftmaxCrossEntropyWithLogitsCost : public OperatorCost { const int32_t& stage_id) const override; // per device computation cost - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); - } - double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; - double GetBackwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); + } + double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; + double GetBackwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; }; using SoftmaxCrossEntropyWithLogitsCostPtr = std::shared_ptr; @@ -376,16 +376,16 @@ class ReshapeCost : public OperatorCost { const int32_t& stage_id) const override; // per device computation cost - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); } - double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; + double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; - double GetBackwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; + double GetBackwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; }; using ReshapeCostPtr = std::shared_ptr; @@ -405,14 +405,14 @@ class ArithmeticCost : public OperatorCost { double GetBackwardCommCost(const std::vector&, const std::vector&, const int32_t&) const override; - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); } - double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; - double GetBackwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; + double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; + double GetBackwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; }; using ArithmeticCostPtr = std::shared_ptr; @@ -431,14 +431,14 @@ class L2NormalizeCost : public OperatorCost { } double GetBackwardCommCost(const std::vector& inputs, const std::vector& outputs, const int32_t& stage_id) const override; - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); - } - double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; - double GetBackwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); + } + double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; + double GetBackwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; }; using L2NormalizeCostPtr = std::shared_ptr; @@ -455,14 +455,14 @@ class ReduceMethodCost : public OperatorCost { const int32_t& stage_id) const override; double GetBackwardCommCost(const std::vector& inputs, const std::vector& outputs, const int32_t& stage_id) const override; - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardCommCost(inputs, outputs, stage_id); - } - double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; - double GetBackwardMemoryCost(const std::vector&, const std::vector&, - const int32_t&) const override { + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardCommCost(inputs, outputs, stage_id); + } + double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; + double GetBackwardComputationCost(const std::vector&, const std::vector&, + const int32_t&) const override { return 0.0; } void set_cross_batch(bool cb) { cross_batch_ = cb; } @@ -477,8 +477,8 @@ class ReduceMeanCost : public ReduceMethodCost { ReduceMeanCost() = default; ~ReduceMeanCost() override = default; - double GetForwardMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; + double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override; }; using ReduceMeanCostPtr = std::shared_ptr; @@ -499,18 +499,18 @@ class GetNextCost : public OperatorCost { const int32_t&) const override { return 0.0; } - double GetMemoryCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardMemoryCost(inputs, outputs, stage_id) + GetBackwardMemoryCost(inputs, outputs, stage_id); + double GetComputationCost(const std::vector& inputs, const std::vector& outputs, + const int32_t& stage_id) const override { + return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); } // Inputs vector is empty for generator ops. - double GetForwardMemoryCost(const std::vector&, const std::vector&, - const int32_t&) const override { + double GetForwardComputationCost(const std::vector&, const std::vector&, + const int32_t&) const override { return 0.0; } // Generator ops don't have backward steps. - double GetBackwardMemoryCost(const std::vector&, const std::vector&, - const int32_t&) const override { + double GetBackwardComputationCost(const std::vector&, const std::vector&, + const int32_t&) const override { return 0.0; } }; diff --git a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc b/mindspore/ccsrc/parallel/ops_info/matmul_info.cc index ad6409be0a..2b02dc100d 100644 --- a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/matmul_info.cc @@ -592,10 +592,10 @@ Status MatMulBase::SetCostUnderStrategy(const mindspore::parallel::StrategyPtr& int32_t stage_id = strategy->GetInputStage(); // Here, we use the origin outputs_, because we only use the slice size of the output tensor. // It does not matter whether the output tensor is transposed or not. - double memory_cost = - matmulcost_ptr->GetForwardMemoryCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id); + double computation_cost = + matmulcost_ptr->GetForwardComputationCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id); double communication_cost = matmulcost_ptr->GetCommCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id); - std::shared_ptr result = std::make_shared(memory_cost, communication_cost); + std::shared_ptr result = std::make_shared(computation_cost, communication_cost); result->communication_without_parameter_ = matmulcost_ptr->GetForwardCommCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id); result->communication_with_partial_para_ = @@ -604,7 +604,7 @@ Status MatMulBase::SetCostUnderStrategy(const mindspore::parallel::StrategyPtr& // Breaking ties for preferring data parallelization BreakingTiesForPerferringDataParallel(strategy, result); - MS_LOG(DEBUG) << name_ << " : memory_cost: " << result->memory_cost_ + MS_LOG(DEBUG) << name_ << " : computation_cost: " << result->computation_cost_ << ", communication_cost: " << result->communication_cost_ << ", communication_without_parameter_: " << result->communication_without_parameter_ << ", communication_with_partial_para_: " << result->communication_with_partial_para_; diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.cc b/mindspore/ccsrc/parallel/ops_info/operator_info.cc index 8b96425bf7..11c518d844 100644 --- a/mindspore/ccsrc/parallel/ops_info/operator_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/operator_info.cc @@ -1034,9 +1034,10 @@ Status OperatorInfo::SetCostUnderStrategyBase(const StrategyPtr& strategy) { return FAILED; } int32_t stage_id = strategy->GetInputStage(); - double memory_cost = GetOperatorCost()->GetForwardMemoryCost(inputs_tensor_info_, outputs_tensor_info_, stage_id); + double computation_cost = + GetOperatorCost()->GetForwardComputationCost(inputs_tensor_info_, outputs_tensor_info_, stage_id); double communication_cost = GetOperatorCost()->GetCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id); - std::shared_ptr result = std::make_shared(memory_cost, communication_cost); + std::shared_ptr result = std::make_shared(computation_cost, communication_cost); result->communication_without_parameter_ = GetOperatorCost()->GetForwardCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id); result->communication_with_partial_para_ = @@ -1056,22 +1057,6 @@ Status OperatorInfo::SetCostUnderStrategyBase(const StrategyPtr& strategy) { return SUCCESS; } -Status OperatorInfo::CorrectStrategyCostForMultiOutputUse(size_t input_index) { - for (auto& swc : strategy_cost_) { - double parameter_memory_cost = ListProduct(swc->inputs_ptr[input_index].slice_shape()) * - static_cast(GetOperatorCost()->inputs_type_lengths()[input_index]); - // remove the parameter memory cost - swc->cost_list[0]->memory_cost_ -= parameter_memory_cost; - if (swc->cost_list[0]->memory_cost_ < -1) { - MS_LOG(ERROR) << "The memory cost after correction is " << swc->cost_list[0]->memory_cost_ - << ", the parameter_memory_cost is " << parameter_memory_cost; - return FAILED; - } - } - corrected_input_indices_.push_back(input_index); - return SUCCESS; -} - int OperatorInfo::ComputeOpAndPrevEdgeParameterInvolved() { if (is_output_parameter_involve_ != -1) { return is_output_parameter_involve_; @@ -1217,7 +1202,7 @@ void OperatorInfo::BreakingTiesForPerferringDataParallel(const StrategyPtr& stra CheckGlobalDeviceManager(); auto total_device_num = g_device_manager->GetDeviceListByStageId(stra->GetInputStage()).size(); if (IntToSize(stra->GetInputDim()[0][0]) == total_device_num) { - cost->memory_cost_ -= 1.0; + cost->computation_cost_ -= 1.0; cost->communication_cost_ -= 1.0; cost->communication_with_partial_para_ -= 1.0; cost->communication_without_parameter_ -= 1.0; @@ -1226,7 +1211,7 @@ void OperatorInfo::BreakingTiesForPerferringDataParallel(const StrategyPtr& stra } double OperatorInfo::GetForwardMemoryCostFromCNode() { - return GetOperatorCost()->GetForwardMemoryCost(inputs_tensor_info_, outputs_tensor_info_, 0); + return GetOperatorCost()->GetForwardComputationCost(inputs_tensor_info_, outputs_tensor_info_, 0); } } // namespace parallel diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.h b/mindspore/ccsrc/parallel/ops_info/operator_info.h index cc70f1b870..e7b8af0a7e 100644 --- a/mindspore/ccsrc/parallel/ops_info/operator_info.h +++ b/mindspore/ccsrc/parallel/ops_info/operator_info.h @@ -87,13 +87,9 @@ class OperatorInfo { // is checked Status SetCostUnderStrategyBase(const StrategyPtr& strategy); std::vector> GetStrategyCost() { return strategy_cost_; } - // In the case of a Parameter (or a output) being used by multiple operators, the memory cost induced by - // the parameter (or a output) should be calculated only once. This method is used to - // remove this part from the 'strategy_cost_'. - Status CorrectStrategyCostForMultiOutputUse(size_t input_index); // When the input of a operator contains WEIGHT or a output from other operators involving WEIGHT, then these input // should stay in memory until it is used in the backward phase, which is kept in memory at the end of forward phase. - Status CorrectStrategyCostForMemoryReuse() const { return SUCCESS; } + Status CalculateMemoryCost() const { return SUCCESS; } int ComputeOpAndPrevEdgeParameterInvolved(); ForwardOp forward_op() const { return forward_op_; } diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc index 50e6a1e84e..d7d48c35bb 100644 --- a/mindspore/ccsrc/parallel/step_auto_parallel.cc +++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc @@ -387,7 +387,7 @@ OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr & operator_info->set_outputs_dtype(cnode->Type()); operator_info->set_cnode(cnode); // If no strategy has been configured for this operator, then candidate strategies are generated for - // auto-strategy searchingm if this primitive is Cast, we ignore the user-specified strategy + // auto-strategy searching; if this primitive is CAST, we ignore the user-specified strategy if (!StrategyFound(attrs) || prim->name() == CAST) { // Compute split_flag_list_, indicating which input has batch dimension. This is ONLY used for preparation for // BatchParallelInfo operator @@ -600,13 +600,7 @@ void ConstructCostGraphEdges(const std::vector &all_nodes) { } MS_LOG(INFO) << "Successfully created " << edge_count << " edges for: " << cnode->operator_info()->name(); } - // For the case of a output being used by multiple subsequent operators, the output induced memory cost should be - // calculated only once. This method is for correct the operators' memory cost calculation. - if (entire_costgraph->CorrectOpsStrategyCostForMultiOutputUse() != SUCCESS) { - MS_LOG(EXCEPTION) << "Correcting strategy_cost_ for operators failed."; - } else { - MS_LOG(INFO) << "Correcting strategy_cost_ for operators succeeded."; - } + MS_LOG(INFO) << "Constructing edges for cost graph ends."; } @@ -803,14 +797,6 @@ void AugmentCostGraph(const std::vector &all_nodes) { std::shared_ptr edge_ptr = std::make_shared( edge_name, tmp_identity_ptr, target_cnode->operator_info(), 0, input_index - 1, false, true); - // Correct the memory calculation for a parameter being used by multiple operators. The parameter is calculated - // only once - if (target_cnode->operator_info()->CorrectStrategyCostForMultiOutputUse(IntToSize(input_index - 1)) != SUCCESS) { - MS_LOG(EXCEPTION) << "Correcting strategy_cost_ failed : " << prim->name(); - } else { - MS_LOG(INFO) << "Correcting strategy_cost_ succeeded. " << prim->name(); - } - if (edge_ptr->InitEdgeCost() != SUCCESS) { MS_LOG(EXCEPTION) << "Edge cost initialization failed"; } @@ -840,7 +826,7 @@ Status ParallelStrategySearch(const std::vector &all_nodes, const Fu // taking care for the case of a single Parameter being used by multiple operators. Create a TmpIdentity // operator for this Parameter, and add an edge for the use of this Parameter by each // subsequent operator; - // Step 3.1: Correct the memory calculation for memory reuse + // Step 3.1: Calculate memory usage // Step 4: Run the Dynamic Programming algorithm: // in this process, cost is calculated based on not only the operators, but also the edges. Here, the edge // cost is caused by the redistribution of a operator's output tensor layout to the next operator's input @@ -867,14 +853,14 @@ Status ParallelStrategySearch(const std::vector &all_nodes, const Fu MS_LOG(INFO) << "After the augmenting procedure, there are " << entire_costgraph->GetOperators().size() << " operators, and " << entire_costgraph->GetNumPairs() << " edges."; - // Step 3.1: Correcting calculation for memory reuse + // Step 3.1: Calculate the memory usage if (entire_costgraph->ComputeOpsAndEdgesParameterInvolved() == SUCCESS) { - // Correcting operators' memory usage - if (entire_costgraph->CorrectOpsStrategyCostForMemoryReuse() != SUCCESS) { + // Calculate operators' memory usage + if (entire_costgraph->CalculateOpsMemoryCost() != SUCCESS) { MS_LOG(EXCEPTION) << "Correcting operators' cost for memory reuse failed."; } - // Correcting edges' memory usage - if (entire_costgraph->CorrectEdgesStrategyCostForMemoryReuse() != SUCCESS) { + // Calculate edges' memory usage + if (entire_costgraph->CalculateEdgesMemoryCost() != SUCCESS) { MS_LOG(EXCEPTION) << "Correcting edges' cost for memory reuse failed."; } } else { diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc b/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc index 93bda5da81..55e6a300e0 100644 --- a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc +++ b/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc @@ -144,7 +144,7 @@ Status TensorRedistribution::ComputeCost() { MS_LOG(ERROR) << "Failure: InferTensorRedistribution failed"; return Status::FAILED; } - // Compute redistribution communication cost and memory cost + // Compute redistribution communication cost and computation cost for (auto& op_cost : operator_list_) { OperatorR op = op_cost.first; Shape slice_shape = op_cost.second; @@ -154,14 +154,14 @@ Status TensorRedistribution::ComputeCost() { if (str == PERMUTE_BY_AXIS) { // The shape does not change after PermuteByAxis operation. // communication cost = all_to_all + all_to_all = 2 * slice_shape - // memory cost = slice_shape + // computation cost = slice_shape forward_comm_cost_ += prod; backward_comm_cost_ += prod; comm_cost_ += 2.0 * prod; - mem_cost_ += prod; + computation_cost_ += prod; } else if (str == CONCAT_BY_AXIS) { // communication cost = all_gather + reduce_scatter = before_slice_shape + after_slice_shape - // memory cost = before_slice_shape + // computation cost = before_slice_shape if (op.second.size() < 3) { MS_LOG(ERROR) << "op.second size should not be less than 3!"; return Status::FAILED; @@ -173,22 +173,22 @@ Status TensorRedistribution::ComputeCost() { comm_cost_ += prod * (dev_num + 1.0); int32_t concat_dim = op.second[0]; if (concat_dim == 0) { - // memory cost = all_gather - mem_cost_ += prod; + // computation cost = all_gather + computation_cost_ += prod; } else { - // memory cost = all_gather + split + concat - mem_cost_ += (prod + prod * dev_num + prod * dev_num); + // computation cost = all_gather + split + concat + computation_cost_ += (prod + prod * dev_num + prod * dev_num); } } else { - // There is only memory cost in SplitByAxis. - // memory cost = before_slice_shape - mem_cost_ += prod; + // There is only computation cost in SplitByAxis. + // computation cost = before_slice_shape + computation_cost_ += prod; } } if (reshape_flag()) { Shape prev_slice_shape = from_.slice_shape().array(); double prev_prod = std::accumulate(prev_slice_shape.begin(), prev_slice_shape.end(), 1, std::multiplies()); - mem_cost_ += 2.0 * prev_prod; + computation_cost_ += 2.0 * prev_prod; } return Status::SUCCESS; } diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h b/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h index 38fb5959ad..e933b9b8eb 100644 --- a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h +++ b/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h @@ -41,7 +41,7 @@ class TensorRedistribution { comm_cost_(0.0), forward_comm_cost_(0.0), backward_comm_cost_(0.0), - mem_cost_(0.0), + computation_cost_(0.0), construct_op_flag_(construct_op_flag), keep_reshape_(keep_reshape) {} Status Init(const TensorLayout& from, const TensorLayout& to, const RankList& dev_list); @@ -51,7 +51,7 @@ class TensorRedistribution { bool reshape_flag() const { return reshape_flag_; } Status ComputeCost(); double comm_cost() const { return comm_cost_; } - double mem_cost() const { return mem_cost_; } + double computation_cost() const { return computation_cost_; } double forward_comm_cost() const { return forward_comm_cost_; } double backward_comm_cost() const { return backward_comm_cost_; } @@ -66,10 +66,13 @@ class TensorRedistribution { RankList dev_list_; OperatorList operator_list_; bool reshape_flag_; + // communication cost double comm_cost_; + // forward communication cost double forward_comm_cost_; + // backward communication cost double backward_comm_cost_; - double mem_cost_; + double computation_cost_; bool construct_op_flag_; bool keep_reshape_; }; diff --git a/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc b/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc index 83a9eceacc..415a1fdd55 100644 --- a/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc +++ b/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc @@ -322,8 +322,8 @@ TEST_F(TestCostGraph, test_SelectCostListWithMinTrainingTimeMultiple) { auto ret_list = entire_cost_graph.SelectCostListWithMinTrainingTimeMultiple(all_list, memory); ASSERT_EQ(ret_list.size(), 2); - ASSERT_DOUBLE_EQ(ret_list[0]->memory_cost_, 10); - ASSERT_DOUBLE_EQ(ret_list[1]->memory_cost_, 1010); + ASSERT_DOUBLE_EQ(ret_list[0]->computation_cost_, 10); + ASSERT_DOUBLE_EQ(ret_list[1]->computation_cost_, 1010); } TEST_F(TestCostGraph, test_CheckOpElimination) { diff --git a/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc b/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc index 3bd65c049c..919c5b43ec 100644 --- a/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc +++ b/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc @@ -76,8 +76,8 @@ TEST_F(TestMatMulCost, test_CostGeneration) { mmcost_.SetInputAndOutputTypeLength(inputs_length, outputs_length); mmcost_.GetForwardCommCost(inputs, outputs, 0); mmcost_.GetBackwardCommCost(inputs, outputs, 0); - mmcost_.GetForwardMemoryCost(inputs, outputs, 0); - mmcost_.GetBackwardMemoryCost(inputs, outputs, 0); + mmcost_.GetForwardComputationCost(inputs, outputs, 0); + mmcost_.GetForwardComputationCost(inputs, outputs, 0); } class TestActivationCost : public UT::Common { @@ -128,8 +128,8 @@ TEST_F(TestActivationCost, test_CostGeneration) { std::vector inputs_length = {4, 4}; std::vector outputs_length = {4}; ac_cost_.SetInputAndOutputTypeLength(inputs_length, outputs_length); - ac_cost_.GetForwardMemoryCost(inputs, outputs, 0); - ac_cost_.GetBackwardMemoryCost(inputs, outputs, 0); + ac_cost_.GetForwardComputationCost(inputs, outputs, 0); + ac_cost_.GetBackwardComputationCost(inputs, outputs, 0); } class TestPReLUCost : public UT::Common { @@ -184,8 +184,8 @@ TEST_F(TestPReLUCost, test_CostGeneration) { prelu_cost_.SetInputAndOutputTypeLength(inputs_length, outputs_length); double BCC, FMC, GMC; BCC = prelu_cost_.GetBackwardCommCost(inputs, outputs, 0); - FMC = prelu_cost_.GetForwardMemoryCost(inputs, outputs, 0); - GMC = prelu_cost_.GetBackwardMemoryCost(inputs, outputs, 0); + FMC = prelu_cost_.GetForwardComputationCost(inputs, outputs, 0); + GMC = prelu_cost_.GetBackwardComputationCost(inputs, outputs, 0); ASSERT_EQ(BCC, 32 * 4); ASSERT_EQ(FMC, 8 * 32 * 8 * 8 * 4 + 32 * 4); ASSERT_EQ(GMC, 128); diff --git a/tests/ut/cpp/parallel/ops_info/activation_test.cc b/tests/ut/cpp/parallel/ops_info/activation_test.cc index 149aa9d5af..5d18c5372f 100644 --- a/tests/ut/cpp/parallel/ops_info/activation_test.cc +++ b/tests/ut/cpp/parallel/ops_info/activation_test.cc @@ -84,8 +84,8 @@ TEST_F(TestActivation, test_activation_strategies) { act_ptr_->InitForCostModel(sp); std::vector inputs_info = act_ptr_->inputs_tensor_info(); std::vector outputs_info = act_ptr_->outputs_tensor_info(); - ASSERT_DOUBLE_EQ(act_ptr_->GetOperatorCost()->GetMemoryCost(inputs_info, outputs_info, sp->GetInputStage()), - cost.memory_cost_); + ASSERT_DOUBLE_EQ(act_ptr_->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()), + cost.computation_cost_); ASSERT_DOUBLE_EQ(act_ptr_->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()), cost.communication_cost_); } @@ -109,8 +109,8 @@ TEST_F(TestActivation, test_softmax_strategies) { soft_ptr_->InitForCostModel(sp); std::vector inputs_info = soft_ptr_->inputs_tensor_info(); std::vector outputs_info = soft_ptr_->outputs_tensor_info(); - ASSERT_DOUBLE_EQ(soft_ptr_->GetOperatorCost()->GetMemoryCost(inputs_info, outputs_info, sp->GetInputStage()), - cost.memory_cost_); + ASSERT_DOUBLE_EQ(soft_ptr_->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()), + cost.computation_cost_); ASSERT_DOUBLE_EQ(soft_ptr_->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()), cost.communication_cost_); } diff --git a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc index 978b792a0c..99ca9f8e0e 100644 --- a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc +++ b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc @@ -569,8 +569,8 @@ TEST_F(TestMatmulInfo, test_GenerateStrategies1) { matmul1->InitForCostModel(sp); std::vector inputs_info = matmul1->inputs_tensor_info(); std::vector outputs_info = matmul1->outputs_tensor_info(); - ASSERT_DOUBLE_EQ(matmul1->GetOperatorCost()->GetMemoryCost(inputs_info, outputs_info, sp->GetInputStage()), - cost.memory_cost_); + ASSERT_DOUBLE_EQ(matmul1->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()), + cost.computation_cost_); break; } } @@ -599,8 +599,8 @@ TEST_F(TestMatmulInfo, test_GenerateStrategies2) { TensorInfo replica_input1_info(tly, input1_shape, input1_slice_shape); replica_inputs_info.push_back(replica_input1_info); - ASSERT_DOUBLE_EQ(matmul3->GetOperatorCost()->GetMemoryCost(replica_inputs_info, outputs_info, sp->GetInputStage()), - cost.memory_cost_); + ASSERT_DOUBLE_EQ(matmul3->GetOperatorCost()->GetComputationCost(replica_inputs_info, outputs_info, sp->GetInputStage()), + cost.computation_cost_); break; } } diff --git a/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc b/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc index e7736a4b3e..6cb9739b1c 100644 --- a/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc +++ b/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc @@ -188,8 +188,8 @@ TEST_F(TestTensorAddInfo, GenerateStrategies) { tensor_add->InitForCostModel(sp); std::vector inputs_info = tensor_add->inputs_tensor_info(); std::vector outputs_info = tensor_add->outputs_tensor_info(); - double memory_cost0 = tensor_add->GetOperatorCost()->GetMemoryCost(inputs_info, outputs_info, sp->GetInputStage()); - double memory_cost1 = cost.memory_cost_; + double memory_cost0 = tensor_add->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()); + double memory_cost1 = cost.computation_cost_; bool memory = memory_cost0 - memory_cost1 <= 1.0; double comm_cost0 = tensor_add->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()); @@ -210,8 +210,8 @@ TEST_F(TestTensorAddInfo, GenerateStrategies1) { tensor_add1->InitForCostModel(sp); std::vector inputs_info = tensor_add1->inputs_tensor_info(); std::vector outputs_info = tensor_add1->outputs_tensor_info(); - double memory_cost0 = tensor_add1->GetOperatorCost()->GetMemoryCost(inputs_info, outputs_info, sp->GetInputStage()); - double memory_cost1 = cost.memory_cost_; + double memory_cost0 = tensor_add1->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()); + double memory_cost1 = cost.computation_cost_; bool memory = memory_cost0 - memory_cost1 <= 1.0; double comm_cost0 = tensor_add1->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()); diff --git a/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc b/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc index ce1238baeb..043746498f 100644 --- a/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc +++ b/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc @@ -145,8 +145,8 @@ TEST_F(TestTmpIdentityInfo, test_generate_strategies) { identity_ptr->Init(sp); std::vector inputs_info = identity_ptr->inputs_tensor_info(); std::vector outputs_info = identity_ptr->outputs_tensor_info(); - ASSERT_DOUBLE_EQ(identity_ptr->GetOperatorCost()->GetMemoryCost(inputs_info, outputs_info, sp->GetInputStage()), - cost.memory_cost_); + ASSERT_DOUBLE_EQ(identity_ptr->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()), + cost.computation_cost_); ASSERT_DOUBLE_EQ(identity_ptr->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()), cost.communication_cost_); } From 89f0b3b1bb7af5e638dd9e76cd444edec4260856 Mon Sep 17 00:00:00 2001 From: jojobugfree Date: Tue, 7 Apr 2020 19:59:36 +0800 Subject: [PATCH 29/58] profiling feature enhancement --- .../device/ascend/ascend_stream_assign.cc | 2 +- .../ascend/profiling/profiling_manager.h | 4 - .../ascend/profiling/profiling_utils.cc | 229 ++++++++---------- .../device/ascend/profiling/profiling_utils.h | 93 ++++--- mindspore/ccsrc/device/kernel_adjust.cc | 31 +-- mindspore/ccsrc/device/kernel_adjust.h | 6 +- .../ascend/ascend_backend_optimization.cc | 2 +- mindspore/ccsrc/session/ascend_session.cc | 2 +- .../tasksink/ascend_stream_assign_stub.cc | 2 +- 9 files changed, 190 insertions(+), 181 deletions(-) diff --git a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc b/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc index 4f16c596c7..8c4d1f4a8f 100644 --- a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc +++ b/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc @@ -702,7 +702,7 @@ void AscendStreamAssign::PrintGraphExeOrders(const shared_ptr(primitive->GetAttr(kAttrEventId)) << "]"; } else { - MS_LOG(INFO) << "node name[" << AnfAlgo::GetCNodeName(cur_cnode_ptr) << "], logic id[" + MS_LOG(INFO) << "node name[" << cur_cnode_ptr->fullname_with_scope() << "], logic id[" << AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()) << "], stream id[" << AnfAlgo::GetStreamId(cur_cnode_ptr) << "]"; } diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h index de8f6a7d0a..b826c4cf36 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h @@ -29,10 +29,6 @@ namespace ascend { // PROFILING_CUSTOM_LOGID_START 3 const uint64_t kProfilingFpStartLogId = 1; const uint64_t kProfilingBpEndLogId = 2; -const uint64_t kProfilingAllReduce1Start = 3; -const uint64_t kProfilingAllReduce1End = 4; -const uint64_t kProfilingAllReduce2Start = 5; -const uint64_t kProfilingAllReduce2End = 6; const uint64_t kProfilingIterEndLogId = 255; class ProfilingEngineImpl; diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc index 0d7088300e..aa71aa0566 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc @@ -14,10 +14,8 @@ * limitations under the License. */ -#include "device/ascend/profiling/profiling_utils.h" - #include - +#include "device/ascend/profiling/profiling_utils.h" #include "kernel/kernel.h" #include "device/ascend/profiling/profiling_manager.h" #include "session/anf_runtime_algorithm.h" @@ -27,82 +25,61 @@ namespace mindspore { namespace device { namespace ascend { -const char ProfilingUtils::kProfiling[] = "Profiling"; -const char ProfilingUtils::kNotify[] = "notify"; -const char ProfilingUtils::kProfilerTraceId[] = "profiler_trace_id"; -const char ProfilingUtils::kFlags[] = "flags"; +constexpr uint32_t kMaxProfilingNodeNum = 100; +constexpr char kCustomNode[] = "PROFILING_CUSTOM_"; +constexpr char kFpStartNode[] = "PROFILING_FP_START"; +constexpr char kBpEndNode[] = "PROFILING_BP_END"; +constexpr char kIterEndNode[] = "PROFILING_ITER_END"; std::unordered_map> ProfilingUtils::graph_kernel_name_; -bool ProfilingUtils::GetProfilingTraceInfo(const std::shared_ptr &graph_ptr, - ProfilingTraceInfo *profiling_trace_info) { - MS_EXCEPTION_IF_NULL(profiling_trace_info); - MS_EXCEPTION_IF_NULL(graph_ptr); - bool find_begin = false; - bool first_allreduce = true; - for (const auto &anf_node : graph_ptr->execution_order()) { - if (anf_node->isa()) { - const std::string kernel_name = AnfAlgo::GetCNodeName(anf_node); - if ((kernel_name == "Cast" || kernel_name == "Four2Five") && !find_begin) { - profiling_trace_info->profiling_trace_begin = anf_node->fullname_with_scope(); - find_begin = true; - } - if (kernel_name == "Conv2DBackpropFilter") { - profiling_trace_info->profiling_trace_bp_end = anf_node->fullname_with_scope(); - } - if (kernel_name == kFusedMulApplyMomentumOpName || kernel_name == kApplyMomentumOpName) { - profiling_trace_info->profiling_trace_netoutput = anf_node->fullname_with_scope(); - } - if (kernel_name == kAllReduceOpName) { - if (first_allreduce) { - profiling_trace_info->profiling_allreduce1_start = anf_node->fullname_with_scope(); - profiling_trace_info->profiling_allreduce1_end = anf_node->fullname_with_scope(); - first_allreduce = false; - } else { - profiling_trace_info->profiling_allreduce2_start = anf_node->fullname_with_scope(); - profiling_trace_info->profiling_allreduce2_end = anf_node->fullname_with_scope(); - } - } +uint32_t ProfilingUtils::custom_node_index_ = 1; + +ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull graph_ptr) { + MS_LOG(INFO) << "get env start"; + custom_node_index_ = 1; + auto &cnode_exec_order = graph_ptr->execution_order(); + ProfilingTraceInfo profiling_trace; + profiling_trace.trace_begin = GetTraceBegin(cnode_exec_order); + profiling_trace.trace_bp_end = GetTraceBpEnd(); + profiling_trace.trace_netoutput = GetTraceNetoutput(cnode_exec_order); + + MS_LOG(INFO) << "[profiling] trace_begin:" << profiling_trace.trace_begin + << " trace_bp_end:" << profiling_trace.trace_bp_end + << " trace_netoutput:" << profiling_trace.trace_netoutput; + + for (uint32_t i = 1; i <= kMaxProfilingNodeNum; ++i) { + std::string env_str = std::string(kCustomNode) + std::to_string(i); + const char *node_full_name = std::getenv(env_str.c_str()); + if (node_full_name == nullptr) { + break; } + MS_LOG(INFO) << "Get profiling node:" << node_full_name; + profiling_trace.trace_custom_node.insert(node_full_name); } - MS_LOG(INFO) << "[profiling]begin:" << profiling_trace_info->profiling_trace_begin - << ", net_output:" << profiling_trace_info->profiling_trace_netoutput - << ", end:" << profiling_trace_info->profiling_trace_bp_end - << ", allreduce1:" << profiling_trace_info->profiling_allreduce1_start - << ", allreduce2:" << profiling_trace_info->profiling_allreduce2_start; - return profiling_trace_info->IsValid(); + MS_LOG(INFO) << "get env end"; + return profiling_trace; } -bool ProfilingUtils::GetNetOutput(AnfNodePtr anf_node, std::string *profiling_trace_net_output) { - MS_EXCEPTION_IF_NULL(anf_node); - MS_EXCEPTION_IF_NULL(profiling_trace_net_output); - MS_LOG(INFO) << "[profiling]Anf node's full name with scope:" << anf_node->fullname_with_scope(); - if (!profiling_trace_net_output->empty()) { - MS_LOG(INFO) << "[profiling]Has got the net_output:" << profiling_trace_net_output->c_str(); - return true; - } - - if (AnfAlgo::IsRealKernel(anf_node)) { - *profiling_trace_net_output = anf_node->fullname_with_scope(); - return true; - } +std::string ProfilingUtils::GetTraceBegin(const std::vector &cnode_exec_order) { + const char *trace_begin = std::getenv(kFpStartNode); + auto &first_cnode = cnode_exec_order.front(); + MS_EXCEPTION_IF_NULL(first_cnode); + return trace_begin == nullptr ? first_cnode->fullname_with_scope() : std::string(trace_begin); +} - auto cnode = anf_node->cast(); - if (cnode == nullptr) { - MS_LOG(ERROR) << "[profiling]Anf node should be a CNode"; - return false; - } +std::string ProfilingUtils::GetTraceBpEnd() { + const char *trace_bp_end = std::getenv(kBpEndNode); + return trace_bp_end == nullptr ? "" : std::string(trace_bp_end); +} - auto inputs = cnode->inputs(); - auto input_size = inputs.size(); - if (input_size < 2) { - MS_LOG(ERROR) << "[profiling]Anf node' input size(" << input_size << ") < 2, don't support get apply kernel node."; - return false; - } - return GetNetOutput(inputs[1], profiling_trace_net_output); +std::string ProfilingUtils::GetTraceNetoutput(const std::vector &cnode_exec_order) { + const char *trace_netoutput = std::getenv(kIterEndNode); + auto &last_cnode = cnode_exec_order.back(); + MS_EXCEPTION_IF_NULL(last_cnode); + return trace_netoutput == nullptr ? last_cnode->fullname_with_scope() : std::string(trace_netoutput); } -CNodePtr ProfilingUtils::CreateProfilingCNode(const std::shared_ptr &graph_ptr, bool notify, - uint64_t profiler_trace_id, uint32_t flags) { - MS_EXCEPTION_IF_NULL(graph_ptr); +NotNull ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content, + NotNull graph_ptr) { kernel::KernelBuildInfo::KernelBuildInfoBuilder selected_kernel_builder; selected_kernel_builder.SetInputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT}); selected_kernel_builder.SetInputsDeviceType({TypeId::kNumberTypeInt32, TypeId::kNumberTypeInt32}); @@ -118,75 +95,79 @@ CNodePtr ProfilingUtils::CreateProfilingCNode(const std::shared_ptrset_abstract(type_none_abstract); // set attr - ValuePtr notify_value = MakeValue(notify); - ValuePtr trace_id_value = MakeValue(profiler_trace_id); - ValuePtr flags_value = MakeValue(flags); + ValuePtr notify_value = MakeValue(profiling_content.notify); + ValuePtr trace_id_value = MakeValue(profiling_content.profiler_trace_id); + ValuePtr flags_value = MakeValue(profiling_content.flags); AnfAlgo::SetNodeAttr(ProfilingUtils::kNotify, notify_value, cnode_ptr); AnfAlgo::SetNodeAttr(ProfilingUtils::kProfilerTraceId, trace_id_value, cnode_ptr); AnfAlgo::SetNodeAttr(ProfilingUtils::kFlags, flags_value, cnode_ptr); - return cnode_ptr; + return NOT_NULL(cnode_ptr); } -void ProfilingUtils::ProfilingTraceFpStart(const std::shared_ptr &graph_ptr, - const mindspore::AnfNodePtr &anf_node, - const mindspore::device::ascend::ProfilingTraceInfo &profiling_trace_info, - std::vector *kernel_list) { - if (profiling_trace_info.IsValid() && profiling_trace_info.profiling_trace_begin == anf_node->fullname_with_scope()) { - if (graph_ptr == nullptr || kernel_list == nullptr || anf_node == nullptr) { - MS_LOG(ERROR) << "[profiling]input param invalid"; - return; - } +void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node, + const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list) { + if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) { auto job_id = ProfilingManager::GetInstance().GetJobId(); - // job task info - CNodePtr job_kernel_ptr = CreateProfilingCNode(graph_ptr, false, job_id, 0); - AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), job_kernel_ptr.get()); - AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), job_kernel_ptr.get()); - // fp task info - CNodePtr start_kernel_ptr = CreateProfilingCNode(graph_ptr, false, kProfilingFpStartLogId, 0); - AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), start_kernel_ptr.get()); - AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), start_kernel_ptr.get()); - kernel_list->emplace_back(job_kernel_ptr); - kernel_list->emplace_back(start_kernel_ptr); + ProfilingContent job_profiling_context = {false, job_id, 0}; + auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr); + kernel_list->emplace_back(job_profiling_node); + + ProfilingContent fp_profiling_content = {false, kProfilingFpStartLogId, 0}; + auto fp_profiling_node = CreateProfilingCNodeWithStream(anf_node, fp_profiling_content, graph_ptr); + kernel_list->emplace_back(fp_profiling_node); } } -void ProfilingUtils::ProfilingAllReduce(const std::shared_ptr &graph_ptr, - const AnfNodePtr &anf_node, int job_id, const std::string &profiling_node_name, - std::vector *kernel_list) { - MS_EXCEPTION_IF_NULL(graph_ptr); +CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNodePtr &anf_node, + const ProfilingContent &profiling_content, + NotNull graph_ptr) { + CNodePtr profiling_node = CreateProfilingCNode(profiling_content, graph_ptr); + AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), profiling_node.get()); + AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), profiling_node.get()); + return profiling_node; +} + +void ProfilingUtils::ProfilingCustomOp(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list) { MS_EXCEPTION_IF_NULL(anf_node); - MS_EXCEPTION_IF_NULL(kernel_list); - auto full_scope_name = anf_node->fullname_with_scope(); - if (profiling_node_name == full_scope_name) { - CNodePtr allreduce_kernel_ptr = CreateProfilingCNode(graph_ptr, false, job_id, 0); - AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), allreduce_kernel_ptr.get()); - AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), allreduce_kernel_ptr.get()); - kernel_list->emplace_back(allreduce_kernel_ptr); + auto iter = profiling_trace_info.trace_custom_node.find(anf_node->fullname_with_scope()); + if (iter == profiling_trace_info.trace_custom_node.end()) { + return; } + // custom op profiling job start from 3. + ProfilingContent front_profiling_content = {false, 2 * custom_node_index_ + 1, 0}; + CNodePtr front_node = CreateProfilingCNodeWithStream(anf_node, front_profiling_content, graph_ptr); + kernel_list->insert(kernel_list->end() - 1, front_node); + + ProfilingContent back_profiling_content = {false, 2 * custom_node_index_ + 2, 0}; + CNodePtr back_node = CreateProfilingCNodeWithStream(anf_node, back_profiling_content, graph_ptr); + kernel_list->insert(kernel_list->end(), back_node); + ++custom_node_index_; } -void ProfilingUtils::ProfilingTraceEnd(const std::shared_ptr &graph_ptr, - const mindspore::AnfNodePtr &anf_node, - const mindspore::device::ascend::ProfilingTraceInfo &profiling_trace_info, - std::vector *kernel_list) { - MS_EXCEPTION_IF_NULL(graph_ptr); +void ProfilingUtils::ProfilingTraceBpEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list) { MS_EXCEPTION_IF_NULL(anf_node); - MS_EXCEPTION_IF_NULL(kernel_list); - if (profiling_trace_info.IsValid()) { - auto full_scope_name = anf_node->fullname_with_scope(); - if (profiling_trace_info.profiling_trace_netoutput == full_scope_name) { - CNodePtr bp_kernel_ptr = CreateProfilingCNode(graph_ptr, true, kProfilingIterEndLogId, 0); - AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), bp_kernel_ptr.get()); - AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), bp_kernel_ptr.get()); - kernel_list->emplace_back(bp_kernel_ptr); - } + if (profiling_trace_info.trace_bp_end == anf_node->fullname_with_scope()) { + ProfilingContent bp_end_profiling_content = {false, kProfilingBpEndLogId, 0}; + CNodePtr bp_end_node = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr); + kernel_list->emplace_back(bp_end_node); + } +} - if (profiling_trace_info.profiling_trace_bp_end == full_scope_name) { - CNodePtr end_task_info = CreateProfilingCNode(graph_ptr, false, kProfilingBpEndLogId, 0); - AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), end_task_info.get()); - AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), end_task_info.get()); - kernel_list->emplace_back(end_task_info); - } +void ProfilingUtils::ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list) { + MS_EXCEPTION_IF_NULL(anf_node); + auto full_scope_name = anf_node->fullname_with_scope(); + if (profiling_trace_info.trace_netoutput == full_scope_name) { + ProfilingContent bp_end_profiling_content = {true, kProfilingIterEndLogId, 0}; + CNodePtr bp_kernel_ptr = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr); + kernel_list->emplace_back(bp_kernel_ptr); } } diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h index ca0ef6f1f0..c59e856249 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h @@ -19,63 +19,102 @@ #include #include #include +#include #include #include "session/kernel_graph.h" +#include "utils/contract.h" namespace mindspore { namespace device { namespace ascend { struct ProfilingTraceInfo { // execute order's first execute op(like: Cast or Four2Five ...), except tdt op(GetNext ...) - std::string profiling_trace_begin; + std::string trace_begin; // get first net_output(apply kernel) from graph outputs: fp ->net_output<- bp - std::string profiling_trace_bp_end; + std::string trace_bp_end; // execute order's end execute (like: Conv2DBackpropFilter) - std::string profiling_trace_netoutput; + std::string trace_netoutput; - std::string profiling_allreduce1_start; - - std::string profiling_allreduce1_end; - - std::string profiling_allreduce2_start; - - std::string profiling_allreduce2_end; + // profiling specific op, such as AllReduce; + std::set trace_custom_node; // 1. insert profiling_trace_begin if profiling_trace_bp_end is not empty. // 2. op lanuch get task info with callback func. // 3. insert profiling_trace_bp_end. // 4. insert profiling_trace_net_output if profiling_trace_bp_end is not empty. - bool IsValid() const { return !(profiling_trace_begin.empty() || profiling_trace_bp_end.empty()); } + bool IsValid() const { return !(trace_begin.empty() || trace_bp_end.empty() || trace_netoutput.empty()); } +}; + +struct ProfilingContent { + // true -send data from device to host and finish profiling + bool notify; + uint64_t profiler_trace_id; + uint32_t flags; }; class ProfilingUtils { public: ProfilingUtils() = default; ~ProfilingUtils() = default; - static bool GetProfilingTraceInfo(const std::shared_ptr &graph_ptr, - ProfilingTraceInfo *profiling_trace_info); - static void ProfilingTraceFpStart(const std::shared_ptr &graph_ptr, const AnfNodePtr &anf_node, - const ProfilingTraceInfo &profiling_trace_info, std::vector *kernel_list); - static void ProfilingAllReduce(const std::shared_ptr &graph_ptr, const AnfNodePtr &anf_node, - int job_id, const std::string &profiling_node_name, - std::vector *kernel_list); - static void ProfilingTraceEnd(const std::shared_ptr &graph_ptr, const AnfNodePtr &anf_node, - const ProfilingTraceInfo &profiling_trace_info, std::vector *kernel_list); + + // Insert job_id profiling node and fp_start profiling node. + // Job_id is got from envs, which shound be a number greater than 255 + // Fp_start node should been inserted in the start of a network, and the log_id is hard code to 1. + static void ProfilingTraceFpStart(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list); + + // Insert net output profiling node, which tells the device to stop profiling. + // The notify in struct ProfilingContent should be 'true', which tells the device to send data to host. + static void ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list); + + // Insert bp_end profiling node, which should been inserted after the last backpropagation CNode in the network. + static void ProfilingTraceBpEnd(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list); + + // Mapping graph id and the kernels' name in the graph static void SetGraphKernelName(uint32_t graph_id, const std::vector &kernel_names); + + // Mapping task_id and kernel name for device to generate the time cost of specific kernel. + // Device calculate the time cost of the task which is marked by task id. + // But we need data of (kernel name , time cost) static void ReportProfilingData(uint32_t graph_id, const std::vector &task_ids); - static const char kProfiling[]; - static const char kNotify[]; - static const char kProfilerTraceId[]; - static const char kFlags[]; + // Get profiling trace point from envs. + // export PROFILING_FP_START='full name of the first cnode to execute' + // export PROFILING_BP_END='full name of the last backpropagation cnode to execute' + // export PROFILING_ITER_END='full name of last cnode in graph to execute' + // And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode' + // GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode' + // The variable i in PROFILING_CUSTOM_i should start from 1 without interruption. + static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull graph_ptr); + + // Insert two profiling trace points, one in front and one behind + static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, + NotNull graph_ptr, + NotNull *> kernel_list); + + inline static constexpr char kProfiling[] = "Profiling"; + inline static constexpr char kNotify[] = "notify"; + inline static constexpr char kProfilerTraceId[] = "profiler_trace_id"; + inline static constexpr char kFlags[] = "flags"; private: - static bool GetNetOutput(AnfNodePtr anf_node, std::string *profiling_trace_net_output); - static CNodePtr CreateProfilingCNode(const std::shared_ptr &graph_ptr, bool notify, - uint64_t profiler_trace_id, uint32_t flags); + static NotNull CreateProfilingCNode(const ProfilingContent &profiling_content, + NotNull graph_ptr); + static CNodePtr CreateProfilingCNodeWithStream(const AnfNodePtr &anf_node, const ProfilingContent &profiling_content, + NotNull graph_ptr); + static std::string GetTraceBegin(const std::vector &cnode_exec_order); + static std::string GetTraceBpEnd(); + static std::string GetTraceNetoutput(const std::vector &cnode_exec_order); + // graph id --> (kernel name list) static std::unordered_map> graph_kernel_name_; + static uint32_t custom_node_index_; }; } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/kernel_adjust.cc b/mindspore/ccsrc/device/kernel_adjust.cc index c422d50b51..9a6f48025f 100644 --- a/mindspore/ccsrc/device/kernel_adjust.cc +++ b/mindspore/ccsrc/device/kernel_adjust.cc @@ -438,23 +438,22 @@ void KernelAdjust::LoadSwitchInputs(std::vector *inputs) { MS_LOG(INFO) << "---------------- LoadSwitchInputs End--"; } -void KernelAdjust::Profiling(const std::shared_ptr &kernel_graph_ptr) { +void KernelAdjust::Profiling(NotNull kernel_graph_ptr) { if (!ascend::ProfilingManager::GetInstance().IsProfiling()) { MS_LOG(INFO) << "No need to profiling"; return; } - ProfilingTraceInfo profiling_trace_info; - if (ProfilingUtils::GetProfilingTraceInfo(kernel_graph_ptr, &profiling_trace_info)) { - InsertProfilingKernel(kernel_graph_ptr, profiling_trace_info); - } else { - MS_LOG(WARNING) << "[profiling] GetProfilingTraceInfo failed"; + ProfilingTraceInfo profiling_trace_info = ProfilingUtils::GetProfilingTraceFromEnv(kernel_graph_ptr); + if (!profiling_trace_info.IsValid()) { + MS_LOG(WARNING) << "[profiling] no profiling node found!"; + return; } + InsertProfilingKernel(profiling_trace_info, kernel_graph_ptr); } -void KernelAdjust::InsertProfilingKernel(const std::shared_ptr &kernel_graph_ptr, - const ProfilingTraceInfo &profiling_trace_info) { +void KernelAdjust::InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info, + NotNull kernel_graph_ptr) { MS_LOG(INFO) << "[profiling] Insert profiling kernel start"; - MS_EXCEPTION_IF_NULL(kernel_graph_ptr); if (!profiling_trace_info.IsValid()) { MS_LOG(WARNING) << "Profiling trace point not found"; return; @@ -462,18 +461,12 @@ void KernelAdjust::InsertProfilingKernel(const std::shared_ptr new_cnode_list; std::vector cnode_ptr_list = kernel_graph_ptr->execution_order(); for (const auto &cnode_ptr : cnode_ptr_list) { - ProfilingUtils::ProfilingTraceFpStart(kernel_graph_ptr, cnode_ptr, profiling_trace_info, &new_cnode_list); - ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce1Start, - profiling_trace_info.profiling_allreduce1_start, &new_cnode_list); - ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce2Start, - profiling_trace_info.profiling_allreduce2_start, &new_cnode_list); + ProfilingUtils::ProfilingTraceFpStart(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); new_cnode_list.emplace_back(cnode_ptr); - ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce1End, - profiling_trace_info.profiling_allreduce1_end, &new_cnode_list); - ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce2End, - profiling_trace_info.profiling_allreduce2_end, &new_cnode_list); - ProfilingUtils::ProfilingTraceEnd(kernel_graph_ptr, cnode_ptr, profiling_trace_info, &new_cnode_list); + ProfilingUtils::ProfilingCustomOp(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); + ProfilingUtils::ProfilingTraceBpEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); + ProfilingUtils::ProfilingTraceEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); } kernel_graph_ptr->set_execution_order(new_cnode_list); } diff --git a/mindspore/ccsrc/device/kernel_adjust.h b/mindspore/ccsrc/device/kernel_adjust.h index 62c64d98b9..ca01d51e54 100644 --- a/mindspore/ccsrc/device/kernel_adjust.h +++ b/mindspore/ccsrc/device/kernel_adjust.h @@ -48,7 +48,7 @@ class KernelAdjust { void SetStreamSwitchOps(const std::shared_ptr &kernel_graph_ptr); bool StepLoadCtrlInputs(const std::shared_ptr &context, const std::shared_ptr &kernel_graph_ptr); - void Profiling(const std::shared_ptr &kernel_graph_ptr); + void Profiling(NotNull kernel_graph_ptr); static bool NeedInsertSwitch(); CNodePtr CreateSteamActiveOp(const std::shared_ptr &kernel_graph_ptr); @@ -66,8 +66,8 @@ class KernelAdjust { kernel::KernelBuildInfo::KernelBuildInfoBuilder CreateMngKernelBuilder(const std::vector &formats, const std::vector &type_ids); void LoadSwitchInputs(std::vector *inputs); - void InsertProfilingKernel(const std::shared_ptr &kernel_graph_ptr, - const ProfilingTraceInfo &profiling_trace_info); + void InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info, + NotNull kernel_graph_ptr); }; } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc index 8212d64c27..432d88e7a4 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc @@ -246,7 +246,7 @@ void AscendBackendOptimization(const std::shared_ptr &kern kernel_graph->SetExecOrderByDefault(); if (save_graphs) { std::string file_path = save_graphs_path + "/" + "hwopt_d_end.ir"; - DumpIR(file_path, kernel_graph); + DumpIR(file_path, kernel_graph, true); DumpIRProto(kernel_graph, "after_hwopt"); } } diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc index 9d351f3199..34c05aed08 100644 --- a/mindspore/ccsrc/session/ascend_session.cc +++ b/mindspore/ccsrc/session/ascend_session.cc @@ -136,7 +136,7 @@ void AscendSession::BuildGraph(GraphId graph_id) { // Assign streams for control sink and hccl and so on AssignStream(graph); - device::KernelAdjust::GetInstance().Profiling(graph); + device::KernelAdjust::GetInstance().Profiling(NOT_NULL(graph.get())); // build kernel if node is cnode BuildKernel(graph); auto ms_context = MsContext::GetInstance(); diff --git a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc index ebd2ac8b46..e0b5ab0d61 100755 --- a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc +++ b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc @@ -42,6 +42,6 @@ bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr &c return true; } bool KernelAdjust::NeedInsertSwitch() { return true; } -void KernelAdjust::Profiling(const std::shared_ptr &kernel_graph_ptr) { return; } +void KernelAdjust::Profiling(NotNull kernel_graph_ptr) { return; } } // namespace device } // namespace mindspore From 0ba72a6885df59659e5a6162805941f42a428476 Mon Sep 17 00:00:00 2001 From: guohongzilong <2713219276@qq.com> Date: Wed, 8 Apr 2020 18:00:33 +0800 Subject: [PATCH 30/58] unified tensor and mindspore.type --- mindspore/common/tensor.py | 8 ++++---- mindspore/ops/operations/math_ops.py | 6 +++--- mindspore/ops/operations/random_ops.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mindspore/common/tensor.py b/mindspore/common/tensor.py index d17661595f..709b2ae280 100644 --- a/mindspore/common/tensor.py +++ b/mindspore/common/tensor.py @@ -42,14 +42,14 @@ class Tensor(Tensor_): Examples: >>> # init a tensor with input data - >>> t1 = mindspore.Tensor(np.zeros([1, 2, 3]), mindspore.float32) - >>> assert isinstance(t1, mindspore.Tensor) + >>> t1 = Tensor(np.zeros([1, 2, 3]), mindspore.float32) + >>> assert isinstance(t1, Tensor) >>> assert t1.shape() == (1, 2, 3) >>> assert t1.dtype() == mindspore.float32 >>> >>> # init a tensor with a float scalar - >>> t2 = mindspore.Tensor(0.1) - >>> assert isinstance(t2, mindspore.Tensor) + >>> t2 = Tensor(0.1) + >>> assert isinstance(t2, Tensor) >>> assert t2.dtype() == mindspore.float64 """ diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 47b9e490f1..d003f6ee8b 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -1208,7 +1208,7 @@ class Acosh(PrimitiveWithInfer): Examples: >>> acosh = Acosh() - >>> X = Tensor(np.array([1.0, 1.5, 3.0, 100.0]), ms.float32) + >>> X = Tensor(np.array([1.0, 1.5, 3.0, 100.0]), mindspore.float32) >>> output = acosh(X) """ @@ -1752,7 +1752,7 @@ class Cos(PrimitiveWithInfer): Examples: >>> cos = P.Cos() - >>> X = Tensor(np.array([0.24, 0.83, 0.31, 0.09]), ms.float32) + >>> X = Tensor(np.array([0.24, 0.83, 0.31, 0.09]), mindspore.float32) >>> output = cos(X) """ @@ -1808,7 +1808,7 @@ class Sin(PrimitiveWithInfer): Examples: >>> sin = P.Sin() - >>> input_x = Tensor(np.array([0.62, 0.28, 0.43, 0.62]), ms.float32) + >>> input_x = Tensor(np.array([0.62, 0.28, 0.43, 0.62]), mindspore.float32) >>> output = sin(input_x) """ diff --git a/mindspore/ops/operations/random_ops.py b/mindspore/ops/operations/random_ops.py index 9ef5b301f9..95692a622e 100644 --- a/mindspore/ops/operations/random_ops.py +++ b/mindspore/ops/operations/random_ops.py @@ -45,7 +45,7 @@ class RandomChoiceWithMask(PrimitiveWithInfer): Examples: >>> rnd_choice_mask = RandomChoiceWithMask() - >>> input_x = Tensor(np.ones(shape=[240000, 4]), ms.bool_) + >>> input_x = Tensor(np.ones(shape=[240000, 4]), mindspore.bool_) >>> output_y, output_mask = rnd_choice_mask(input_x) """ From ee519aa0d188695caf61a277ddb2d3c915e67046 Mon Sep 17 00:00:00 2001 From: yoonlee666 Date: Wed, 8 Apr 2020 14:31:18 +0800 Subject: [PATCH 31/58] use TFRecordDataset in bert ci script and add absolute position embedding code in bert model --- mindspore/model_zoo/Bert_NEZHA/bert_model.py | 14 ++++++++++++++ .../networks/models/bert/bert_tdt_no_lossscale.py | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/mindspore/model_zoo/Bert_NEZHA/bert_model.py b/mindspore/model_zoo/Bert_NEZHA/bert_model.py index f20c57dd75..d7f9355b3c 100644 --- a/mindspore/model_zoo/Bert_NEZHA/bert_model.py +++ b/mindspore/model_zoo/Bert_NEZHA/bert_model.py @@ -165,6 +165,7 @@ class EmbeddingPostprocessor(nn.Cell): def __init__(self, embedding_size, embedding_shape, + use_relative_positions=False, use_token_type=False, token_type_vocab_size=16, use_one_hot_embeddings=False, @@ -192,6 +193,13 @@ class EmbeddingPostprocessor(nn.Cell): self.layernorm = nn.LayerNorm(embedding_size) self.dropout = nn.Dropout(1 - dropout_prob) self.gather = P.GatherV2() + self.use_relative_positions = use_relative_positions + self.slice = P.Slice() + self.full_position_embeddings = Parameter(initializer + (TruncatedNormal(initializer_range), + [max_position_embeddings, + embedding_size]), + name='full_position_embeddings') def construct(self, token_type_ids, word_embeddings): output = word_embeddings @@ -206,6 +214,11 @@ class EmbeddingPostprocessor(nn.Cell): token_type_embeddings = self.gather(self.embedding_table, flat_ids, 0) token_type_embeddings = self.reshape(token_type_embeddings, self.shape) output += token_type_embeddings + if not self.use_relative_positions: + _, seq, width = self.shape + position_embeddings = self.slice(self.full_position_embeddings, [0, 0], [seq, width]) + position_embeddings = self.reshape(position_embeddings, (1, seq, width)) + output += position_embeddings output = self.layernorm(output) output = self.dropout(output) return output @@ -853,6 +866,7 @@ class BertModel(nn.Cell): self.bert_embedding_postprocessor = EmbeddingPostprocessor( embedding_size=self.embedding_size, embedding_shape=output_embedding_shape, + use_relative_positions=config.use_relative_positions, use_token_type=True, token_type_vocab_size=config.type_vocab_size, use_one_hot_embeddings=use_one_hot_embeddings, diff --git a/tests/st/networks/models/bert/bert_tdt_no_lossscale.py b/tests/st/networks/models/bert/bert_tdt_no_lossscale.py index 9cc11997e6..5b6268505b 100644 --- a/tests/st/networks/models/bert/bert_tdt_no_lossscale.py +++ b/tests/st/networks/models/bert/bert_tdt_no_lossscale.py @@ -103,9 +103,9 @@ def me_de_train_dataset(): """test me de train dataset""" # apply repeat operations repeat_count = 1 - ds = de.StorageDataset(DATA_DIR, SCHEMA_DIR, columns_list=["input_ids", "input_mask", "segment_ids", + ds = de.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels", "masked_lm_positions", - "masked_lm_ids", "masked_lm_weights"]) + "masked_lm_ids", "masked_lm_weights"], shuffle=False) type_cast_op = C.TypeCast(mstype.int32) ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) From 599543932c566fd7f8b7a9245e455d53c81b396d Mon Sep 17 00:00:00 2001 From: liuxiao Date: Mon, 6 Apr 2020 10:22:47 +0800 Subject: [PATCH 32/58] Add pack and unpack --- mindspore/ccsrc/operator/ops.h | 1 + mindspore/ccsrc/transform/convert.cc | 3 +- mindspore/ops/_grad/grad_array_ops.py | 24 +++++ mindspore/ops/operations/__init__.py | 4 +- mindspore/ops/operations/array_ops.py | 144 ++++++++++++++++++++++++++ tests/ut/python/ops/test_ops.py | 53 ++++++++++ 6 files changed, 227 insertions(+), 2 deletions(-) diff --git a/mindspore/ccsrc/operator/ops.h b/mindspore/ccsrc/operator/ops.h index 727d66dfb3..5fbf2b7067 100644 --- a/mindspore/ccsrc/operator/ops.h +++ b/mindspore/ccsrc/operator/ops.h @@ -135,6 +135,7 @@ extern const PrimitivePtr kPrimGatherV2; extern const PrimitivePtr kPrimSize; extern const PrimitivePtr kPrimArgMax; extern const PrimitivePtr kPrimPack; +extern const PrimitivePtr kPrimUnpack; extern const PrimitivePtr kPrimUnsortedSegmentSum; extern const PrimitivePtr kPrimConcatOffset; extern const PrimitivePtr kPrimReshape; diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc index 8eed207f59..8b14a8f338 100755 --- a/mindspore/ccsrc/transform/convert.cc +++ b/mindspore/ccsrc/transform/convert.cc @@ -148,7 +148,8 @@ const char kNameSlice[] = "Slice"; const char kNameAddN[] = "AddN"; const char kNameLess[] = "Less"; const char kNameGreater[] = "Greater"; -const char kNamePack[] = "Stack"; +const char kNameStack[] = "Stack"; +const char kNameUnstack[] = "Unstack"; const char kNameMerge[] = "Merge"; const char kNameGeSwitch[] = "GeSwitch"; diff --git a/mindspore/ops/_grad/grad_array_ops.py b/mindspore/ops/_grad/grad_array_ops.py index 81d38a1e1e..0a0caf471e 100644 --- a/mindspore/ops/_grad/grad_array_ops.py +++ b/mindspore/ops/_grad/grad_array_ops.py @@ -266,6 +266,30 @@ def get_bprop_gather_v2(self): return bprop +@bprop_getters.register(P.Stack) +def get_bprop_stack(self): + """Generate bprop for Stack""" + axis = self.axis + + def bprop(x, out, dout): + stack_grad = P.Unstack(axis) + out = stack_grad(dout) + return (out,) + return bprop + + +@bprop_getters.register(P.Unstack) +def get_bprop_unstack(self): + """Generate bprop for Unstack""" + axis = self.axis + + def bprop(x, out, dout): + unstack_grad = P.Stack(axis) + out = unstack_grad(dout) + return (out,) + return bprop + + @bprop_getters.register(P.StridedSlice) def get_bprop_strided_slice(self): """Generate bprop for StridedSlice""" diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index d255796bae..5fd3f07876 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -19,7 +19,7 @@ Primitive operator classes. A collection of operators to build nerual networks or computing functions. """ -from .array_ops import (Argmax, Argmin, Cast, ConcatOffset, Concat, +from .array_ops import (Argmax, Argmin, Cast, ConcatOffset, Concat, Stack, Unstack, Diag, DiagPart, DType, ExpandDims, Eye, Fill, GatherNd, GatherV2, InvertPermutation, IsInstance, IsSubClass, ArgMaxWithValue, OnesLike, ZerosLike, @@ -112,6 +112,8 @@ __all__ = [ 'OneHot', 'GatherV2', 'Concat', + 'Stack', + 'Unstack', 'Tile', 'BiasAdd', 'Gelu', diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index b91c2cbc7d..59d3083c5d 100644 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -1350,6 +1350,150 @@ class Concat(PrimitiveWithInfer): return out +def _get_stack_shape(x_shape, x_type, axis): + """for satck output shape""" + validator.check_type("shape", x_shape, [tuple]) + validator.check_integer("len of input_x shape", len(x_shape), 0, Rel.GT) + validator.check_subclass("shape0", x_type[0], mstype.tensor) + validator.check_integer("len of input_x0 shape", len(x_shape[0]), 0, Rel.GT) + rank_base = len(x_shape[0]) + N = len(x_shape) + out_shape = x_shape[0] + validator.check_int_range('axis', axis, -rank_base - 1, rank_base, Rel.INC_BOTH) + if axis < 0: + axis = axis + rank_base + 1 + for i in range(1, N): + v = x_shape[i] + validator.check('len of x_shape[%d]' % i, len(v), 'len of rank_base', rank_base) + validator.check('x_type[%d]' % i, x_type[i], 'base', x_type[0]) + for j in range(rank_base): + if v[j] != x_shape[0][j]: + raise ValueError("Stack evaluator element %d shape in input can not stack with first element" % i) + out_shape.insert(axis, N) + return out_shape + +class Stack(PrimitiveWithInfer): + r""" + Stacks a list of rank-`R` tensors into one rank-`(R+1)` tensor. + + Packs the list of tensors in `input_x` into a tensor with rank one higher than + each tensor in `input_x`, by packing them along the `axis` dimension. + Given a list of length `N` of tensors of shape `(A, B, C)`; + + If `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`. + + If `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`. Etc. + + Args: + axis (int): The axis to stack along. Negative values wrap around, + so the valid range is [-(R+1), R+1). Default: 0. + + Inputs: + - **input_x** (Union[tuple, list]) - A Tuple or list of Tensor objects with the same shape and type. + + Outputs: + Tensor. A stacked Tensor with the same type as values. + + Examples: + >>> data1 = Tensor(np.array([0, 1]).astype(np.float32)) + >>> data2 = Tensor(np.array([2, 3]).astype(np.float32)) + >>> op = P.Stack() + >>> output = op([data1, data2]) + [[0, 1], [2, 3]] + """ + + @prim_attr_register + def __init__(self, axis=0): + """init Stack""" + self.__setattr_flag__ = True + validator.check_type("axis", axis, [int]) + self.axis = axis + + def __infer__(self, value): + x_shape = value['shape'] + x_type = value['dtype'] + self.add_prim_attr('num', len(x_shape)) + all_shape = _get_stack_shape(x_shape, x_type, self.axis) + out = {'shape': all_shape, + 'dtype': x_type[0], + 'value': None} + return out + + +class Unstack(PrimitiveWithInfer): + r""" + Unpacks the given dimension of a rank-`R` tensor into rank-`(R-1)` tensors. + + Unpacks num tensors from value by chipping it along the axis dimension. + If num is not specified (the default), it is inferred from value's shape. + If value.shape[axis] is not known, ValueError is raised. + + For example, given a tensor of shape (A, B, C, D); + + If axis == 0 then the i'th tensor in output is the slice value[i, :, :, :] and + each tensor in output will have shape (B, C, D). (Note that the dimension unpacked along is gone, unlike split). + + If axis == 1 then the i'th tensor in output is the slice value[:, i, :, :] and + each tensor in output will have shape (A, C, D). Etc. + + This is the opposite of stack. + + Args: + axis (int): The axis to unstack along. Defaults to the first dimension. + Negative values wrap around, so the valid range is [-R, R). + + Inputs: + - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`. + A rank R > 0 Tensor to be unstacked. + + Outputs: + A tuple of Tensors, the shape of each objects is same. + + Raises: + ValueError: If axis is out of the range [-len(input_x.shape()), len(input_x.shape())), + or if len(input_x.shape[axis]) not equal to num. + + Examples: + >>> unstack = P.Unstack() + >>> x = Tensor(np.array([[1, 1, 1, 1], [2, 2, 2, 2]])) + >>> output = unstack(x) + ([1, 1, 1, 1], [2, 2, 2, 2]) + """ + + @prim_attr_register + def __init__(self, axis=0): + """init Unstack""" + self.__setattr_flag__ = True + validator.check_type("axis", axis, [int]) + self.axis = axis + + def __infer__(self, x): + validator.check_subclass("x", x['dtype'], mstype.tensor) + x_shape = list(x['shape']) + dim = len(x_shape) + validator.check_int_range('axis value', self.axis, -dim, dim, Rel.INC_LEFT) + if self.axis < 0: + self.axis = self.axis + dim + output_num = x_shape[self.axis] + validator.check_type("num", output_num, [int]) + validator.check_integer("output_num", output_num, 0, Rel.GT) + self.add_prim_attr('num', output_num) + output_valid_check = x_shape[self.axis] - output_num + validator.check_integer("the dimension which to unstack divides output_num", output_valid_check, 0, Rel.EQ) + out_shapes = [] + out_dtypes = [] + out_shape = x_shape[:self.axis] + x_shape[self.axis + 1:] + for _ in range(output_num): + out_shapes.append(tuple(out_shape)) + out_dtypes.append(x['dtype']) + out_shapes = tuple(out_shapes) + out_dtypes = tuple(out_dtypes) + out = {'shape': out_shapes, + 'dtype': out_dtypes, + 'value': None} + return out + + class Slice(PrimitiveWithInfer): """ Slice a tensor in specified shape. diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py index 3e4acb12f3..97481e69a2 100755 --- a/tests/ut/python/ops/test_ops.py +++ b/tests/ut/python/ops/test_ops.py @@ -80,6 +80,29 @@ class NetForConcat1(nn.Cell): return self.concat((x1, x2)) +class NetForStackInput(nn.Cell): + def __init__(self, op): + super(NetForStackInput, self).__init__() + self.op = op + self.mul = P.Mul() + + def construct(self, *args): + t = () + for i in range(len(args)): + t = t + (self.mul(args[i], args[i]),) + return self.op(t) + + +class NetForUnstackInput(nn.Cell): + def __init__(self, op): + super(NetForUnstackInput, self).__init__() + self.op = op + self.mul = P.Mul() + + def construct(self, x1): + return self.op((self.mul(x1, x1))) + + class NetForFlatten(nn.Cell): def __init__(self): super(NetForFlatten, self).__init__() @@ -973,6 +996,36 @@ test_case_array_ops = [ Tensor(np.array([1], np.float32)), Tensor(np.array([1], np.float32)))], 'desc_bprop': [[3,]]}), + ('StackV2_0', { + 'block': NetForStackInput(P.Stack()), + 'desc_inputs':[[2, 2], [2, 2], [2, 2]], + 'desc_bprop':[[3, 2, 2]], + }), + ('StackV2_1', { + 'block': NetForStackInput(P.Stack(axis=-2)), + 'desc_inputs':[[3, 2, 3], [3, 2, 3], [3, 2, 3]], + 'desc_bprop':[[3, 2, 3, 3]], + }), + ('StackV2_2', { + 'block': NetForStackInput(P.Stack()), + 'desc_inputs':[[2, 2]], + 'desc_bprop':[[2, 2, 2]], + }), + ('StackV2_3', { + 'block': NetForStackInput(P.Stack()), + 'desc_inputs':[[128, 128], [128, 128]], + 'desc_bprop':[[2, 128, 128]], + }), + ('UnstackV2_0', { + 'block': NetForUnstackInput(P.Unstack(axis=0)), + 'desc_inputs':[[2, 4]], + 'desc_bprop':[[4], [4]], + }), + ('UnstackV2_1', { + 'block': NetForUnstackInput(P.Unstack(axis=-1)), + 'desc_inputs':[Tensor(np.array([[1, 1, 1]], np.float32))], + 'desc_bprop':[[1], [1], [1]], + }), ('Diag', { 'block': P.Diag(), 'desc_inputs': [[4]], From e01df479151f16782f6b7865d220ec0e647c3e0a Mon Sep 17 00:00:00 2001 From: jinyaohui Date: Tue, 7 Apr 2020 17:23:17 +0800 Subject: [PATCH 33/58] modify set_dataset_mode_config api param --- example/yolov3_coco2017/train.py | 6 +++--- mindspore/ccsrc/transform/convert.cc | 12 ++++++------ mindspore/ccsrc/utils/config_manager.cc | 4 ++-- mindspore/ccsrc/utils/config_manager.h | 4 ++-- mindspore/common/api.py | 4 ++-- mindspore/nn/wrap/loss_scale.py | 2 +- tests/ut/python/utils/test_callback.py | 16 +++++++++------- 7 files changed, 25 insertions(+), 23 deletions(-) diff --git a/example/yolov3_coco2017/train.py b/example/yolov3_coco2017/train.py index 3ac3816f4a..0a32a6d30d 100644 --- a/example/yolov3_coco2017/train.py +++ b/example/yolov3_coco2017/train.py @@ -67,7 +67,7 @@ if __name__ == '__main__': parser.add_argument("--distribute", type=bool, default=False, help="Run distribute, default is false.") parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.") - parser.add_argument("--mode", type=str, default="graph", help="Run graph mode or feed mode, default is graph") + parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or non-sink mode, default is sink") parser.add_argument("--epoch_size", type=int, default=10, help="Epoch size, default is 10") parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.") parser.add_argument("--checkpoint_path", type=str, default="", help="Checkpoint file path") @@ -150,8 +150,8 @@ if __name__ == '__main__': model = Model(net) dataset_sink_mode = False - if args_opt.mode == "graph": - print("In graph mode, one epoch return a loss.") + if args_opt.mode == "sink": + print("In sink mode, one epoch return a loss.") dataset_sink_mode = True print("Start train YOLOv3, the first epoch will be slower because of the graph compilation.") model.train(args_opt.epoch_size, dataset, callbacks=callback, dataset_sink_mode=dataset_sink_mode) diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc index 8b14a8f338..59985c8ae3 100755 --- a/mindspore/ccsrc/transform/convert.cc +++ b/mindspore/ccsrc/transform/convert.cc @@ -446,10 +446,10 @@ void DfGraphConvertor::InitLoopVar(std::vector *init_input) { int64_t value = 0; auto const_iter_num = std::make_shared("const/npu_runconfig/iterations_per_loop"); - if (ConfigManager::GetInstance().dataset_mode() == DS_GRAPH_MODE) { + if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE) { value = ConfigManager::GetInstance().iter_num(); } else { - MS_LOG(INFO) << "Run with feed mode, the iterator number will always be 1"; + MS_LOG(INFO) << "Run with non-sink mode, the iterator number will always be 1"; value = 1; ConfigManager::GetInstance().set_iter_num(value); } @@ -580,7 +580,7 @@ void DfGraphConvertor::SetupParamInitSubGraph(const TensorOrderMap &tensors, std void DfGraphConvertor::MakeDatasetHandler(const std::string &name, const size_t &input_idx, const AnfNodePtr &it) { MS_LOG(INFO) << "The " << name << " is the " << input_idx << "(st/nd/th) input"; - if (ConfigManager::GetInstance().dataset_mode() == DS_GRAPH_MODE) { + if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE) { auto getnext_idx = static_cast(input_idx); DatasetGraphParam param = ConfigManager::GetInstance().dataset_param(); if (!param.input_indexes().empty() && input_idx <= param.input_indexes().size()) { @@ -872,7 +872,7 @@ DfGraphConvertor &DfGraphConvertor::ConvertAllNode() { } // Create dataset iterator and iterator_getnext node - if (ConfigManager::GetInstance().dataset_mode() == DS_GRAPH_MODE) { + if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE) { DatasetGraphParam param = ConfigManager::GetInstance().dataset_param(); MS_LOG(INFO) << "Dataset param is " << param.ToString() << "."; // GetNext @@ -981,7 +981,7 @@ void DfGraphConvertor::TraceOutputFromParameter(const AnfNodePtr &anf_out) { } void SetupDatasetIterGetNextNode(const OperatorPtr &op) { - if (ConfigManager::GetInstance().dataset_mode() == DS_GRAPH_MODE) { + if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE) { DatasetGraphParam param = ConfigManager::GetInstance().dataset_param(); size_t output_num = param.ge_types().size(); MS_LOG(INFO) << "Set iterator_getnext op's output num = " << output_num << "."; @@ -1040,7 +1040,7 @@ DfGraphConvertor &DfGraphConvertor::BuildGraph() { // set graph input according to the order from anf graph std::vector inputs; - if (ConfigManager::GetInstance().dataset_mode() == DS_GRAPH_MODE) { + if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE) { inputs.push_back(*dataset_iter_getnext_); } else { auto params = anf_graph_->parameters(); diff --git a/mindspore/ccsrc/utils/config_manager.cc b/mindspore/ccsrc/utils/config_manager.cc index ac8a965878..6d66b37436 100644 --- a/mindspore/ccsrc/utils/config_manager.cc +++ b/mindspore/ccsrc/utils/config_manager.cc @@ -28,7 +28,7 @@ ConfigManager& ConfigManager::GetInstance() noexcept { } void ConfigManager::SetDatasetModeConfig(const std::string& mode) { - static const std::map mode_map = {{"feed", DS_FEED_MODE}, {"graph", DS_GRAPH_MODE}}; + static const std::map mode_map = {{"normal", DS_NORMAL_MODE}, {"sink", DS_SINK_MODE}}; if (mode_map.find(mode) == mode_map.end()) { MS_LOG(ERROR) << "Invalid dataset mode:" << mode; return; @@ -38,7 +38,7 @@ void ConfigManager::SetDatasetModeConfig(const std::string& mode) { void ConfigManager::ResetConfig() noexcept { parallel_strategy_ = ONE_DEVICE; - dataset_mode_ = DS_FEED_MODE; + dataset_mode_ = DS_NORMAL_MODE; dataset_param_ = DatasetGraphParam("", 0, 0, {}, {}, {}); iter_num_ = 1; } diff --git a/mindspore/ccsrc/utils/config_manager.h b/mindspore/ccsrc/utils/config_manager.h index 31137f6243..db7d7d0c14 100644 --- a/mindspore/ccsrc/utils/config_manager.h +++ b/mindspore/ccsrc/utils/config_manager.h @@ -33,7 +33,7 @@ enum ParallelStrategy { DISTRIBUTION, }; -enum DatasetMode { DS_FEED_MODE = 0, DS_GRAPH_MODE }; +enum DatasetMode { DS_NORMAL_MODE = 0, DS_SINK_MODE }; class DatasetGraphParam { public: @@ -106,7 +106,7 @@ class ConfigManager { ~ConfigManager() = default; ParallelStrategy parallel_strategy_{ONE_DEVICE}; - DatasetMode dataset_mode_{DS_FEED_MODE}; + DatasetMode dataset_mode_{DS_NORMAL_MODE}; DatasetGraphParam dataset_param_{"", 0, 0, {}, {}, {}}; int64_t iter_num_{1}; std::string dataset_phase_{""}; diff --git a/mindspore/common/api.py b/mindspore/common/api.py index 7f0b2bfeaa..b5450bc5a3 100644 --- a/mindspore/common/api.py +++ b/mindspore/common/api.py @@ -378,9 +378,9 @@ class _Executor: if enable_ge: # decide whether to sink based on whether the inputs is virtual or not if args_list and isinstance(args_list[0], Tensor) and args_list[0].virtual_flag: - _set_dataset_mode_config('graph') + _set_dataset_mode_config('sink') else: - _set_dataset_mode_config('feed') + _set_dataset_mode_config('normal') self._build_data_graph(obj, params, phase) diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py index 6a1f15a402..fd1c22be1f 100644 --- a/mindspore/nn/wrap/loss_scale.py +++ b/mindspore/nn/wrap/loss_scale.py @@ -51,7 +51,7 @@ class DynamicLossScaleUpdateCell(Cell): In every training step, the loss scaling value will be updated by loss scaling value/`scale_factor` when there is overflow. And it will be increased by loss scaling value * `scale_factor` if there is no overflow for a continuous `scale_window` steps. This cell is used for Graph mode training in which all - logic will be executed on device side(Another training mode is feed mode in which some logic will be + logic will be executed on device side(Another training mode is non-sink mode in which some logic will be executed on host). Args: diff --git a/tests/ut/python/utils/test_callback.py b/tests/ut/python/utils/test_callback.py index 43cf827330..7e7b893e0c 100644 --- a/tests/ut/python/utils/test_callback.py +++ b/tests/ut/python/utils/test_callback.py @@ -24,11 +24,12 @@ from mindspore import context from mindspore.common.tensor import Tensor from mindspore.nn.optim import Momentum from mindspore.nn import TrainOneStepCell, WithLossCell -from mindspore.train.callback import ModelCheckpoint, _check_file_name_prefix, RunContext,_checkpoint_cb_for_save_op,\ - LossMonitor, _InternalCallbackParam, _chg_ckpt_file_name_if_same_exist,\ - _build_callbacks, CheckpointConfig, _set_cur_net +from mindspore.train.callback import ModelCheckpoint, _check_file_name_prefix, RunContext, _checkpoint_cb_for_save_op, \ + LossMonitor, _InternalCallbackParam, _chg_ckpt_file_name_if_same_exist, \ + _build_callbacks, CheckpointConfig, _set_cur_net from mindspore.common.api import ms_function + class Net(nn.Cell): """Net definition.""" @@ -52,6 +53,7 @@ class Net(nn.Cell): class LossNet(nn.Cell): """ LossNet definition """ + def __init__(self): super(LossNet, self).__init__() self.conv = nn.Conv2d(3, 64, 3, has_bias=False, weight_init='normal', pad_mode='valid') @@ -110,8 +112,8 @@ def test_save_checkpoint(): os.remove('./test_files/test_ckpt-model.pkl') -def test_loss_monitor_graph_model(): - """Test lossmonitor Graph model.""" +def test_loss_monitor_sink_model(): + """Test loss monitor sink model.""" cb_params = _InternalCallbackParam() cb_params.cur_epoch_num = 4 cb_params.cur_step_num = 2 @@ -129,8 +131,8 @@ def test_loss_monitor_graph_model(): callbacklist.end(run_context) -def test_Loss_Monitor_feed_feed_model(): - """Test Loss Monitor feed feed mode.""" +def test_loss_monitor_feed_model(): + """Test loss monitor non-sink mode.""" cb_params = _InternalCallbackParam() run_context = RunContext(cb_params) loss_cb = LossMonitor(1) From 82896411e06f7c5372ebef500553cd62b1701aa9 Mon Sep 17 00:00:00 2001 From: anzhengqi Date: Wed, 8 Apr 2020 14:47:16 +0800 Subject: [PATCH 34/58] modify part of comments --- .../ccsrc/dataset/kernels/image/image_utils.h | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.h b/mindspore/ccsrc/dataset/kernels/image/image_utils.h index d289f6f56e..a4ddef40d6 100644 --- a/mindspore/ccsrc/dataset/kernels/image/image_utils.h +++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.h @@ -84,18 +84,8 @@ Status Resize(const std::shared_ptr &input, std::shared_ptr *out // Returns Decoded image // Supported images: -// - Windows bitmaps - \*.bmp, \*.dib (always supported) -// - JPEG files - \*.jpeg, \*.jpg, \*.jpe (see the *Note* section) -// - JPEG 2000 files - \*.jp2 (see the *Note* section) -// - Portable Network Graphics - \*.png (see the *Note* section) -// - WebP - \*.webp (see the *Note* section) -// - Portable image format - \*.pbm, \*.pgm, \*.ppm \*.pxm, \*.pnm (always supported) -// - PFM files - \*.pfm (see the *Note* section) -// - Sun rasters - \*.sr, \*.ras (always supported) -// - TIFF files - \*.tiff, \*.tif (see the *Note* section) -// - OpenEXR Image files - \*.exr (see the *Note* section) -// - Radiance HDR - \*.hdr, \*.pic (always supported) -// - Raster and Vector geospatial data supported by GDAL (see the *Note* section) +// BMP JPEG JPG PNG TIFF +// supported by opencv, if user need more image analysis capabilities, please compile opencv particularlly. // @param input: CVTensor containing the not decoded image 1D bytes // @param output: Decoded image Tensor of shape and type DE_UINT8. Pixel order is RGB Status Decode(const std::shared_ptr &input, std::shared_ptr *output); From 6690a7fd7a3108ce408220342bd750455067ccab Mon Sep 17 00:00:00 2001 From: jonyguo Date: Wed, 8 Apr 2020 15:52:53 +0800 Subject: [PATCH 35/58] fix: error info is not exactly when column list invalid --- .../engine/datasetops/source/mindrecord_op.cc | 3 +- .../engine/datasetops/source/mindrecord_op.h | 1 + .../ccsrc/mindrecord/common/shard_error.cc | 178 ++++++++++++++++++ .../ccsrc/mindrecord/include/shard_error.h | 102 +++++----- mindspore/ccsrc/mindrecord/io/shard_reader.cc | 2 +- tests/ut/cpp/dataset/mind_record_op_test.cc | 36 ++++ .../ut/cpp/mindrecord/ut_shard_reader_test.cc | 2 +- 7 files changed, 271 insertions(+), 53 deletions(-) create mode 100644 mindspore/ccsrc/mindrecord/common/shard_error.cc diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc index b5bea5416c..cb0f135a0d 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc @@ -103,7 +103,8 @@ Status MindRecordOp::Init() { shard_reader_ = mindspore::make_unique(); auto rc = shard_reader_->Open(dataset_file_, num_mind_record_workers_, columns_to_load_, operators_, block_reader_); - CHECK_FAIL_RETURN_UNEXPECTED(rc != MSRStatus::FAILED, "MindRecordOp init failed."); + CHECK_FAIL_RETURN_UNEXPECTED(rc != MSRStatus::FAILED, + "MindRecordOp init failed. Error message: " + ErrnoToMessage(rc)); data_schema_ = mindspore::make_unique(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h index 0b16391b20..aca5c86c2c 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h @@ -32,6 +32,7 @@ #include "dataset/engine/datasetops/source/io_block.h" #include "dataset/util/queue.h" #include "dataset/util/status.h" +#include "mindrecord/include/shard_error.h" #include "mindrecord/include/shard_reader.h" #include "mindrecord/include/common/shard_utils.h" #include "dataset/util/wait_post.h" diff --git a/mindspore/ccsrc/mindrecord/common/shard_error.cc b/mindspore/ccsrc/mindrecord/common/shard_error.cc new file mode 100644 index 0000000000..cf43dcb315 --- /dev/null +++ b/mindspore/ccsrc/mindrecord/common/shard_error.cc @@ -0,0 +1,178 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mindrecord/include/shard_error.h" + +namespace mindspore { +namespace mindrecord { +std::string ErrnoToMessage(MSRStatus status) { + switch (status) { + case FAILED: + return "operator failed"; + break; + case SUCCESS: + return "operator success"; + break; + case OPEN_FILE_FAILED: + return "open file failed"; + break; + case CLOSE_FILE_FAILED: + return "close file failed"; + break; + case WRITE_METADATA_FAILED: + return "write metadata failed"; + break; + case WRITE_RAWDATA_FAILED: + return "write rawdata failed"; + break; + case GET_SCHEMA_FAILED: + return "get schema failed"; + break; + case ILLEGAL_RAWDATA: + return "illegal raw data"; + break; + case PYTHON_TO_JSON_FAILED: + return "pybind: python object to json failed"; + break; + case DIR_CREATE_FAILED: + return "directory create failed"; + break; + case OPEN_DIR_FAILED: + return "open directory failed"; + break; + case INVALID_STATISTICS: + return "invalid statistics object"; + break; + case OPEN_DATABASE_FAILED: + return "open database failed"; + break; + case CLOSE_DATABASE_FAILED: + return "close database failed"; + break; + case DATABASE_OPERATE_FAILED: + return "database operate failed"; + break; + case BUILD_SCHEMA_FAILED: + return "build schema failed"; + break; + case DIVISOR_IS_ILLEGAL: + return "divisor is illegal"; + break; + case INVALID_FILE_PATH: + return "file path is invalid"; + break; + case SECURE_FUNC_FAILED: + return "secure function failed"; + break; + case ALLOCATE_MEM_FAILED: + return "allocate memory failed"; + break; + case ILLEGAL_FIELD_NAME: + return "illegal field name"; + break; + case ILLEGAL_FIELD_TYPE: + return "illegal field type"; + break; + case SET_METADATA_FAILED: + return "set metadata failed"; + break; + case ILLEGAL_SCHEMA_DEFINITION: + return "illegal schema definition"; + break; + case ILLEGAL_COLUMN_LIST: + return "illegal column list"; + break; + case SQL_ERROR: + return "sql error"; + break; + case ILLEGAL_SHARD_COUNT: + return "illegal shard count"; + break; + case ILLEGAL_SCHEMA_COUNT: + return "illegal schema count"; + break; + case VERSION_ERROR: + return "data version is not matched"; + break; + case ADD_SCHEMA_FAILED: + return "add schema failed"; + break; + case ILLEGAL_Header_SIZE: + return "illegal header size"; + break; + case ILLEGAL_Page_SIZE: + return "illegal page size"; + break; + case ILLEGAL_SIZE_VALUE: + return "illegal size value"; + break; + case INDEX_FIELD_ERROR: + return "add index fields failed"; + break; + case GET_CANDIDATE_CATEGORYFIELDS_FAILED: + return "get candidate category fields failed"; + break; + case GET_CATEGORY_INFO_FAILED: + return "get category information failed"; + break; + case ILLEGAL_CATEGORY_ID: + return "illegal category id"; + break; + case ILLEGAL_ROWNUMBER_OF_PAGE: + return "illegal row number of page"; + break; + case ILLEGAL_SCHEMA_ID: + return "illegal schema id"; + break; + case DESERIALIZE_SCHEMA_FAILED: + return "deserialize schema failed"; + break; + case DESERIALIZE_STATISTICS_FAILED: + return "deserialize statistics failed"; + break; + case ILLEGAL_DB_FILE: + return "illegal db file"; + break; + case OVERWRITE_DB_FILE: + return "overwrite db file"; + break; + case OVERWRITE_MINDRECORD_FILE: + return "overwrite mindrecord file"; + break; + case ILLEGAL_MINDRECORD_FILE: + return "illegal mindrecord file"; + break; + case PARSE_JSON_FAILED: + return "parse json failed"; + break; + case ILLEGAL_PARAMETERS: + return "illegal parameters"; + break; + case GET_PAGE_BY_GROUP_ID_FAILED: + return "get page by group id failed"; + break; + case GET_SYSTEM_STATE_FAILED: + return "get system state failed"; + break; + case IO_FAILED: + return "io operate failed"; + break; + default: + return "invalid error no"; + } +} +} // namespace mindrecord +} // namespace mindspore diff --git a/mindspore/ccsrc/mindrecord/include/shard_error.h b/mindspore/ccsrc/mindrecord/include/shard_error.h index 026ee836e3..b85eeb71c0 100644 --- a/mindspore/ccsrc/mindrecord/include/shard_error.h +++ b/mindspore/ccsrc/mindrecord/include/shard_error.h @@ -18,63 +18,65 @@ #define MINDRECORD_INCLUDE_SHARD_ERROR_H_ #include -#include "utils/error_code.h" +#include namespace mindspore { namespace mindrecord { -DE_ERRORNO_MINDRECORD(OPEN_FILE_FAILED, 0, "open file failed"); -DE_ERRORNO_MINDRECORD(CLOSE_FILE_FAILED, 1, "close file failed"); -DE_ERRORNO_MINDRECORD(WRITE_METADATA_FAILED, 2, "write metadata failed"); -DE_ERRORNO_MINDRECORD(WRITE_RAWDATA_FAILED, 3, "write rawdata failed"); -DE_ERRORNO_MINDRECORD(GET_SCHEMA_FAILED, 4, "get schema failed"); -DE_ERRORNO_MINDRECORD(ILLEGAL_RAWDATA, 5, "illegal raw data"); -DE_ERRORNO_MINDRECORD(PYTHON_TO_JSON_FAILED, 6, "pybind: python object to json failed"); -DE_ERRORNO_MINDRECORD(DIR_CREATE_FAILED, 7, "directory create failed"); -DE_ERRORNO_MINDRECORD(OPEN_DIR_FAILED, 8, "open directory failed"); -DE_ERRORNO_MINDRECORD(INVALID_STATISTICS, 9, "invalid statistics object"); -DE_ERRORNO_MINDRECORD(OPEN_DATABASE_FAILED, 10, "open database failed"); -DE_ERRORNO_MINDRECORD(CLOSE_DATABASE_FAILED, 11, "close database failed"); -DE_ERRORNO_MINDRECORD(DATABASE_OPERATE_FAILED, 12, "database operate failed"); -DE_ERRORNO_MINDRECORD(BUILD_SCHEMA_FAILED, 13, "build schema failed"); -DE_ERRORNO_MINDRECORD(DIVISOR_IS_ILLEGAL, 14, "divisor is illegal"); -DE_ERRORNO_MINDRECORD(INVALID_FILE_PATH, 15, "file path is invalid"); -DE_ERRORNO_MINDRECORD(SECURE_FUNC_FAILED, 16, "secure function failed"); -DE_ERRORNO_MINDRECORD(ALLOCATE_MEM_FAILED, 17, "allocate memory failed"); -DE_ERRORNO_MINDRECORD(ILLEGAL_FIELD_NAME, 18, "illegal field name"); -DE_ERRORNO_MINDRECORD(ILLEGAL_FIELD_TYPE, 19, "illegal field type"); -DE_ERRORNO_MINDRECORD(SET_METADATA_FAILED, 20, "set metadata failed"); -DE_ERRORNO_MINDRECORD(ILLEGAL_SCHEMA_DEFINITION, 21, "illegal schema definition"); -DE_ERRORNO_MINDRECORD(ILLEGAL_COLUMN_LIST, 22, "illegal column list"); -DE_ERRORNO_MINDRECORD(SQL_ERROR, 23, "sql error"); -DE_ERRORNO_MINDRECORD(ILLEGAL_SHARD_COUNT, 24, "illegal shard count"); -DE_ERRORNO_MINDRECORD(ILLEGAL_SCHEMA_COUNT, 25, "illegal schema count"); -DE_ERRORNO_MINDRECORD(VERSION_ERROR, 26, "data version is not matched"); -DE_ERRORNO_MINDRECORD(ADD_SCHEMA_FAILED, 27, "add schema failed"); -DE_ERRORNO_MINDRECORD(ILLEGAL_Header_SIZE, 28, "illegal header size"); -DE_ERRORNO_MINDRECORD(ILLEGAL_Page_SIZE, 29, "illegal page size"); -DE_ERRORNO_MINDRECORD(ILLEGAL_SIZE_VALUE, 30, "illegal size value"); -DE_ERRORNO_MINDRECORD(INDEX_FIELD_FAILED, 31, "add index fields failed"); -DE_ERRORNO_MINDRECORD(GET_CANDIDATE_CATEGORYFIELDS_FAILED, 32, "get candidate categoryFields failed"); -DE_ERRORNO_MINDRECORD(GET_CATEGORY_INFO, 33, "get category information failed"); -DE_ERRORNO_MINDRECORD(ILLEGAL_CATEGORY_ID, 34, "illegal category id"); -DE_ERRORNO_MINDRECORD(ILLEGAL_ROWNUMBER_OF_PAGE, 35, "illegal row number of page"); -DE_ERRORNO_MINDRECORD(ILLEGAL_SCHEMA_ID, 36, "illegal schema id"); -DE_ERRORNO_MINDRECORD(DESERIALIZE_SCHEMA_FAILED, 37, "deserialize schema failed"); -DE_ERRORNO_MINDRECORD(DESERIALIZE_STATISTICS_FAILED, 38, "deserialize statistics failed"); -DE_ERRORNO_MINDRECORD(ILLEGAL_DB_FILE, 39, "illegal db file."); -DE_ERRORNO_MINDRECORD(OVERWRITE_DB_FILE, 40, "overwrite db file."); -DE_ERRORNO_MINDRECORD(OVERWRITE_MINDRECORD_FILE, 41, "overwrite mindrecord file."); -DE_ERRORNO_MINDRECORD(ILLEGAL_MINDRECORD_FILE, 42, "illegal mindrecord file."); -DE_ERRORNO_MINDRECORD(PARSE_JSON_FAILED, 43, "parse json failed."); -DE_ERRORNO_MINDRECORD(ILLEGAL_PARAMETERS, 44, "illegal parameters."); -DE_ERRORNO_MINDRECORD(GET_PAGE_BY_GROUP_ID_FAILED, 46, "get page by group id failed."); -DE_ERRORNO_MINDRECORD(GET_SYSTEM_STATE_FAILED, 47, "get system state failed."); -DE_ERRORNO_MINDRECORD(IO_FAILED, 48, "io operate failed."); - enum MSRStatus { SUCCESS = 0, FAILED = 1, + OPEN_FILE_FAILED, + CLOSE_FILE_FAILED, + WRITE_METADATA_FAILED, + WRITE_RAWDATA_FAILED, + GET_SCHEMA_FAILED, + ILLEGAL_RAWDATA, + PYTHON_TO_JSON_FAILED, + DIR_CREATE_FAILED, + OPEN_DIR_FAILED, + INVALID_STATISTICS, + OPEN_DATABASE_FAILED, + CLOSE_DATABASE_FAILED, + DATABASE_OPERATE_FAILED, + BUILD_SCHEMA_FAILED, + DIVISOR_IS_ILLEGAL, + INVALID_FILE_PATH, + SECURE_FUNC_FAILED, + ALLOCATE_MEM_FAILED, + ILLEGAL_FIELD_NAME, + ILLEGAL_FIELD_TYPE, + SET_METADATA_FAILED, + ILLEGAL_SCHEMA_DEFINITION, + ILLEGAL_COLUMN_LIST, + SQL_ERROR, + ILLEGAL_SHARD_COUNT, + ILLEGAL_SCHEMA_COUNT, + VERSION_ERROR, + ADD_SCHEMA_FAILED, + ILLEGAL_Header_SIZE, + ILLEGAL_Page_SIZE, + ILLEGAL_SIZE_VALUE, + INDEX_FIELD_ERROR, + GET_CANDIDATE_CATEGORYFIELDS_FAILED, + GET_CATEGORY_INFO_FAILED, + ILLEGAL_CATEGORY_ID, + ILLEGAL_ROWNUMBER_OF_PAGE, + ILLEGAL_SCHEMA_ID, + DESERIALIZE_SCHEMA_FAILED, + DESERIALIZE_STATISTICS_FAILED, + ILLEGAL_DB_FILE, + OVERWRITE_DB_FILE, + OVERWRITE_MINDRECORD_FILE, + ILLEGAL_MINDRECORD_FILE, + PARSE_JSON_FAILED, + ILLEGAL_PARAMETERS, + GET_PAGE_BY_GROUP_ID_FAILED, + GET_SYSTEM_STATE_FAILED, + IO_FAILED }; + +// convert error no to string message +std::string ErrnoToMessage(MSRStatus status); } // namespace mindrecord } // namespace mindspore diff --git a/mindspore/ccsrc/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/mindrecord/io/shard_reader.cc index 32825fd9df..f91d28544e 100644 --- a/mindspore/ccsrc/mindrecord/io/shard_reader.cc +++ b/mindspore/ccsrc/mindrecord/io/shard_reader.cc @@ -676,7 +676,7 @@ MSRStatus ShardReader::Open(const std::string &file_path, int n_consumer, if (CheckColumnList(selected_columns_) == FAILED) { MS_LOG(ERROR) << "Illegal column list"; - return FAILED; + return ILLEGAL_COLUMN_LIST; } // Initialize argument diff --git a/tests/ut/cpp/dataset/mind_record_op_test.cc b/tests/ut/cpp/dataset/mind_record_op_test.cc index 3d5c80b3f4..90f41fdeb9 100644 --- a/tests/ut/cpp/dataset/mind_record_op_test.cc +++ b/tests/ut/cpp/dataset/mind_record_op_test.cc @@ -21,6 +21,7 @@ #include "common/utils.h" #include "gtest/gtest.h" #include "mindrecord/include/shard_category.h" +#include "mindrecord/include/shard_error.h" #include "mindrecord/include/shard_sample.h" #include "mindrecord/include/shard_shuffle.h" #include "utils/log_adapter.h" @@ -479,3 +480,38 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) { row_count++; } } + +TEST_F(MindDataTestMindRecordOp, TestMindRecordInvalidColumnList) { + // single MindRecord op and nothing else + // + // MindRecordOp + + MS_LOG(INFO) << "UT test TestMindRecordInvalidColumnList"; + + Status rc; + + // Start with an empty execution tree + auto my_tree = std::make_shared(); + + // Test info: + // Dataset from testDataset1 has 10 rows, 2 columns. + // RowsPerBuffer buffer setting of 3 yields 4 buffers with the last buffer having single row + // only. 2 workers. + // Test a column selection instead of all columns as well. + + std::vector column_list; + std::string label_col_name("file_name_2"); + column_list.push_back(label_col_name); + label_col_name = "label"; + column_list.push_back(label_col_name); + + std::shared_ptr my_mindrecord_op; + MindRecordOp::Builder builder; + builder.SetDatasetFile(mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0") + .SetRowsPerBuffer(3) + .SetNumMindRecordWorkers(4) + .SetColumnsToLoad(column_list); + rc = builder.Build(&my_mindrecord_op); + ASSERT_TRUE(rc.IsError()); + ASSERT_TRUE(rc.ToString().find_first_of("illegal column list") != std::string::npos); +} diff --git a/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc b/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc index fd63373e20..f7ed39a006 100644 --- a/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc +++ b/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc @@ -155,7 +155,7 @@ TEST_F(TestShardReader, TestShardReaderColumnNotInSchema) { auto column_list = std::vector{"file_namex"}; ShardReader dataset; MSRStatus ret = dataset.Open(file_name, 4, column_list); - ASSERT_EQ(ret, FAILED); + ASSERT_EQ(ret, ILLEGAL_COLUMN_LIST); } TEST_F(TestShardReader, TestShardVersion) { From 824d9e49565dfc3401e3c3fb54484bbf647daeb9 Mon Sep 17 00:00:00 2001 From: buxue Date: Thu, 2 Apr 2020 11:58:45 +0800 Subject: [PATCH 36/58] Develop op MaxPoolWithArgMax --- mindspore/nn/layer/pooling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindspore/nn/layer/pooling.py b/mindspore/nn/layer/pooling.py index 5d9b0ffa6c..bf90fcc9de 100644 --- a/mindspore/nn/layer/pooling.py +++ b/mindspore/nn/layer/pooling.py @@ -58,7 +58,7 @@ class _PoolNd(Cell): pass def extend_repr(self): - return 'kernel_size={kernel_size}, stride={stride}, pad_mode={pad_mode}'.format(**self.__dict__) + return 'kernel_size={kernel_size}, strides={strides}, pad_mode={pad_mode}'.format(**self.__dict__) class MaxPool2d(_PoolNd): From 71b81c8f1b25ac9fd2431d3b9f292df793197a13 Mon Sep 17 00:00:00 2001 From: Zirui Wu Date: Wed, 1 Apr 2020 11:24:25 -0400 Subject: [PATCH 37/58] implemented multi-thread index writer for mindrecord num threads cannot be more than num shards minor fix clang style fix address review comments --- .../include/shard_index_generator.h | 10 +- .../mindrecord/io/shard_index_generator.cc | 98 ++++++++++++------- 2 files changed, 72 insertions(+), 36 deletions(-) diff --git a/mindspore/ccsrc/mindrecord/include/shard_index_generator.h b/mindspore/ccsrc/mindrecord/include/shard_index_generator.h index f59dbe9bf0..1febd28fc2 100644 --- a/mindspore/ccsrc/mindrecord/include/shard_index_generator.h +++ b/mindspore/ccsrc/mindrecord/include/shard_index_generator.h @@ -85,14 +85,14 @@ class ShardIndexGenerator { /// \param sql /// \param data /// \return - MSRStatus BindParamaterExecuteSQL( + MSRStatus BindParameterExecuteSQL( sqlite3 *db, const std::string &sql, const std::vector>> &data); INDEX_FIELDS GenerateIndexFields(const std::vector &schema_detail); - MSRStatus ExcuteTransaction(const int &shard_no, const std::pair &db, - const std::vector &raw_page_ids, const std::map &blob_id_to_page_id); + MSRStatus ExecuteTransaction(const int &shard_no, const std::pair &db, + const std::vector &raw_page_ids, const std::map &blob_id_to_page_id); MSRStatus CreateShardNameTable(sqlite3 *db, const std::string &shard_name); @@ -103,12 +103,16 @@ class ShardIndexGenerator { void AddIndexFieldByRawData(const std::vector &schema_detail, std::vector> &row_data); + void DatabaseWriter(); // worker thread + std::string file_path_; bool append_; ShardHeader shard_header_; uint64_t page_size_; uint64_t header_size_; int schema_count_; + std::atomic_int task_; + std::atomic_bool write_success_; std::vector> fields_; }; } // namespace mindrecord diff --git a/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc b/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc index 1c14d30f30..c0108241a1 100644 --- a/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc +++ b/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include #include "mindrecord/include/shard_index_generator.h" #include "common/utils.h" @@ -26,7 +27,13 @@ using mindspore::MsLogLevel::INFO; namespace mindspore { namespace mindrecord { ShardIndexGenerator::ShardIndexGenerator(const std::string &file_path, bool append) - : file_path_(file_path), append_(append), page_size_(0), header_size_(0), schema_count_(0) {} + : file_path_(file_path), + append_(append), + page_size_(0), + header_size_(0), + schema_count_(0), + task_(0), + write_success_(true) {} MSRStatus ShardIndexGenerator::Build() { ShardHeader header = ShardHeader(); @@ -284,7 +291,7 @@ std::pair ShardIndexGenerator::GenerateRawSQL( return {SUCCESS, sql}; } -MSRStatus ShardIndexGenerator::BindParamaterExecuteSQL( +MSRStatus ShardIndexGenerator::BindParameterExecuteSQL( sqlite3 *db, const std::string &sql, const std::vector>> &data) { sqlite3_stmt *stmt = nullptr; @@ -471,9 +478,9 @@ INDEX_FIELDS ShardIndexGenerator::GenerateIndexFields(const std::vector &s return {SUCCESS, std::move(fields)}; } -MSRStatus ShardIndexGenerator::ExcuteTransaction(const int &shard_no, const std::pair &db, - const std::vector &raw_page_ids, - const std::map &blob_id_to_page_id) { +MSRStatus ShardIndexGenerator::ExecuteTransaction(const int &shard_no, const std::pair &db, + const std::vector &raw_page_ids, + const std::map &blob_id_to_page_id) { // Add index data to database std::string shard_address = shard_header_.get_shard_address_by_id(shard_no); if (shard_address.empty()) { @@ -493,7 +500,7 @@ MSRStatus ShardIndexGenerator::ExcuteTransaction(const int &shard_no, const std: if (data.first != SUCCESS) { return FAILED; } - if (BindParamaterExecuteSQL(db.second, sql.second, data.second) == FAILED) { + if (BindParameterExecuteSQL(db.second, sql.second, data.second) == FAILED) { return FAILED; } MS_LOG(INFO) << "Insert " << data.second.size() << " rows to index db."; @@ -514,37 +521,62 @@ MSRStatus ShardIndexGenerator::WriteToDatabase() { page_size_ = shard_header_.get_page_size(); header_size_ = shard_header_.get_header_size(); schema_count_ = shard_header_.get_schema_count(); - if (shard_header_.get_shard_count() <= kMaxShardCount) { - // Create one database per shard - for (int shard_no = 0; shard_no < shard_header_.get_shard_count(); ++shard_no) { - // Create database - auto db = CreateDatabase(shard_no); - if (db.first != SUCCESS || db.second == nullptr) { - return FAILED; - } - MS_LOG(INFO) << "Init index db for shard: " << shard_no << " successfully."; - - // Pre-processing page information - auto total_pages = shard_header_.GetLastPageId(shard_no) + 1; - - std::map blob_id_to_page_id; - std::vector raw_page_ids; - for (uint64_t i = 0; i < total_pages; ++i) { - std::shared_ptr cur_page = shard_header_.GetPage(shard_no, i).first; - if (cur_page->get_page_type() == "RAW_DATA") { - raw_page_ids.push_back(i); - } else if (cur_page->get_page_type() == "BLOB_DATA") { - blob_id_to_page_id[cur_page->get_page_type_id()] = i; - } - } + if (shard_header_.get_shard_count() > kMaxShardCount) { + MS_LOG(ERROR) << "num shards: " << shard_header_.get_shard_count() << " exceeds max count:" << kMaxSchemaCount; + return FAILED; + } + task_ = 0; // set two atomic vars to initial value + write_success_ = true; - if (ExcuteTransaction(shard_no, db, raw_page_ids, blob_id_to_page_id) != SUCCESS) { - return FAILED; + // spawn half the physical threads or total number of shards whichever is smaller + const unsigned int num_workers = + std::min(std::thread::hardware_concurrency() / 2 + 1, static_cast(shard_header_.get_shard_count())); + + std::vector threads; + threads.reserve(num_workers); + + for (size_t t = 0; t < threads.capacity(); t++) { + threads.emplace_back(std::thread(&ShardIndexGenerator::DatabaseWriter, this)); + } + + for (size_t t = 0; t < threads.capacity(); t++) { + threads[t].join(); + } + return write_success_ ? SUCCESS : FAILED; +} + +void ShardIndexGenerator::DatabaseWriter() { + int shard_no = task_++; + while (shard_no < shard_header_.get_shard_count()) { + auto db = CreateDatabase(shard_no); + if (db.first != SUCCESS || db.second == nullptr || write_success_ == false) { + write_success_ = false; + return; + } + + MS_LOG(INFO) << "Init index db for shard: " << shard_no << " successfully."; + + // Pre-processing page information + auto total_pages = shard_header_.GetLastPageId(shard_no) + 1; + + std::map blob_id_to_page_id; + std::vector raw_page_ids; + for (uint64_t i = 0; i < total_pages; ++i) { + std::shared_ptr cur_page = shard_header_.GetPage(shard_no, i).first; + if (cur_page->get_page_type() == "RAW_DATA") { + raw_page_ids.push_back(i); + } else if (cur_page->get_page_type() == "BLOB_DATA") { + blob_id_to_page_id[cur_page->get_page_type_id()] = i; } - MS_LOG(INFO) << "Generate index db for shard: " << shard_no << " successfully."; } + + if (ExecuteTransaction(shard_no, db, raw_page_ids, blob_id_to_page_id) != SUCCESS) { + write_success_ = false; + return; + } + MS_LOG(INFO) << "Generate index db for shard: " << shard_no << " successfully."; + shard_no = task_++; } - return SUCCESS; } } // namespace mindrecord } // namespace mindspore From 6d1ea7af8e13c10795ddfe8bdd59db4767bf48bf Mon Sep 17 00:00:00 2001 From: Alexey Shevlyakov Date: Tue, 7 Apr 2020 13:32:52 -0400 Subject: [PATCH 38/58] remove make_unique.h --- mindspore/ccsrc/dataset/api/de_pipeline.cc | 7 ++- .../ccsrc/dataset/core/global_context.cc | 6 +-- mindspore/ccsrc/dataset/core/tensor.cc | 5 +-- mindspore/ccsrc/dataset/engine/data_buffer.cc | 2 +- mindspore/ccsrc/dataset/engine/data_schema.cc | 8 ++-- .../dataset/engine/datasetops/batch_op.cc | 16 +++---- .../dataset/engine/datasetops/dataset_op.cc | 10 ++--- .../engine/datasetops/device_queue_op.cc | 2 +- .../ccsrc/dataset/engine/datasetops/map_op.cc | 2 +- .../dataset/engine/datasetops/parallel_op.cc | 2 +- .../dataset/engine/datasetops/project_op.cc | 2 +- .../dataset/engine/datasetops/rename_op.cc | 4 +- .../dataset/engine/datasetops/shuffle_op.cc | 12 +++--- .../engine/datasetops/source/celeba_op.cc | 24 +++++------ .../engine/datasetops/source/cifar_op.cc | 24 +++++------ .../engine/datasetops/source/generator_op.cc | 8 ++-- .../datasetops/source/image_folder_op.cc | 26 +++++------ .../engine/datasetops/source/manifest_op.cc | 22 +++++----- .../engine/datasetops/source/mindrecord_op.cc | 43 ++++++++++--------- .../engine/datasetops/source/mnist_op.cc | 26 +++++------ .../source/sampler/distributed_sampler.cc | 6 +-- .../datasetops/source/sampler/pk_sampler.cc | 6 +-- .../source/sampler/random_sampler.cc | 8 ++-- .../datasetops/source/sampler/sampler.cc | 2 +- .../datasetops/source/sampler/sampler.h | 1 - .../source/sampler/sequential_sampler.cc | 6 +-- .../source/sampler/subset_random_sampler.cc | 6 +-- .../source/sampler/weighted_random_sampler.cc | 10 ++--- .../datasetops/source/storage_client.cc | 9 ++-- .../engine/datasetops/source/storage_op.cc | 8 ++-- .../engine/datasetops/source/tf_buffer.cc | 7 ++- .../engine/datasetops/source/tf_reader_op.cc | 37 ++++++++-------- .../engine/datasetops/source/voc_op.cc | 22 +++++----- .../ccsrc/dataset/engine/datasetops/zip_op.cc | 14 +++--- mindspore/ccsrc/dataset/engine/db_connector.h | 2 +- .../ccsrc/dataset/engine/execution_tree.cc | 2 +- .../dataset/kernels/image/image_utils.cc | 3 +- mindspore/ccsrc/dataset/kernels/py_func_op.cc | 1 - mindspore/ccsrc/dataset/util/arena.cc | 1 - mindspore/ccsrc/dataset/util/circular_pool.cc | 4 +- mindspore/ccsrc/dataset/util/de_error.h | 7 +++ mindspore/ccsrc/dataset/util/list.h | 3 +- mindspore/ccsrc/dataset/util/lock.cc | 1 + mindspore/ccsrc/dataset/util/lock.h | 1 - mindspore/ccsrc/dataset/util/make_unique.h | 37 ---------------- mindspore/ccsrc/dataset/util/queue.h | 2 +- mindspore/ccsrc/dataset/util/task.h | 1 - mindspore/ccsrc/device/gpu/blocking_queue.cc | 3 +- .../kernel/gpu/math/bias_add_gpu_kernel.h | 5 +-- .../kernel/gpu/nn/bias_add_grad_gpu_kenel.h | 5 +-- .../ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h | 5 +-- .../kernel/gpu/nn/lstm_grad_data_gpu_kernel.h | 7 ++- .../gpu/nn/lstm_grad_weight_gpu_kernel.h | 5 +-- tests/ut/cpp/dataset/celeba_op_test.cc | 2 +- tests/ut/cpp/dataset/cifar_op_test.cc | 2 +- tests/ut/cpp/dataset/image_folder_op_test.cc | 14 +++--- tests/ut/cpp/dataset/manifest_op_test.cc | 2 +- tests/ut/cpp/dataset/project_op_test.cc | 2 +- .../cpp/dataset/stand_alone_samplers_test.cc | 2 +- tests/ut/cpp/dataset/tfReader_op_test.cc | 20 ++++----- 60 files changed, 241 insertions(+), 291 deletions(-) delete mode 100644 mindspore/ccsrc/dataset/util/make_unique.h diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.cc b/mindspore/ccsrc/dataset/api/de_pipeline.cc index 65ec8d30f2..1812c0421a 100644 --- a/mindspore/ccsrc/dataset/api/de_pipeline.cc +++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc @@ -23,7 +23,6 @@ #include "dataset/engine/datasetops/source/image_folder_op.h" #include "dataset/engine/datasetops/source/mnist_op.h" #include "dataset/engine/datasetops/source/voc_op.h" -#include "dataset/util/make_unique.h" #include "dataset/core/tensor.h" #include "dataset/engine/dataset_iterator.h" #include "dataset/engine/datasetops/source/manifest_op.h" @@ -119,7 +118,7 @@ Status DEPipeline::AssignRootNode(const DsOpPtr &dataset_op) { return (tree_->As Status DEPipeline::LaunchTreeExec() { RETURN_IF_NOT_OK(tree_->Prepare()); RETURN_IF_NOT_OK(tree_->Launch()); - iterator_ = make_unique(tree_); + iterator_ = std::make_unique(tree_); if (iterator_ == nullptr) RETURN_STATUS_UNEXPECTED("Cannot create an Iterator."); return Status::OK(); } @@ -307,7 +306,7 @@ Status DEPipeline::ParseStorageOp(const py::dict &args, std::shared_ptrSetSchemaFile(ToString(args["schema"])); } else if (!args["schema_json_string"].is_none()) { - std::unique_ptr schema = make_unique(); + std::unique_ptr schema = std::make_unique(); std::string s = ToString(args["schema_json_string"]); RETURN_IF_NOT_OK(schema->LoadSchemaString(s, std::vector())); (void)builder->SetNumRows(schema->num_rows()); @@ -683,7 +682,7 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr schema = make_unique(); + std::unique_ptr schema = std::make_unique(); if (args.contains("schema_file_path")) { RETURN_IF_NOT_OK(schema->LoadSchemaFile(ToString(args["schema_file_path"]), columns_to_load)); } else { diff --git a/mindspore/ccsrc/dataset/core/global_context.cc b/mindspore/ccsrc/dataset/core/global_context.cc index 7e361a1f2c..3de8e0fcd8 100644 --- a/mindspore/ccsrc/dataset/core/global_context.cc +++ b/mindspore/ccsrc/dataset/core/global_context.cc @@ -55,9 +55,9 @@ Status GlobalContext::Init() { // For testing we can use Dummy pool instead // Create some tensor allocators for the different types and hook them into the pool. - tensor_allocator_ = mindspore::make_unique>(mem_pool_); - cv_tensor_allocator_ = mindspore::make_unique>(mem_pool_); - int_allocator_ = mindspore::make_unique(mem_pool_); + tensor_allocator_ = std::make_unique>(mem_pool_); + cv_tensor_allocator_ = std::make_unique>(mem_pool_); + int_allocator_ = std::make_unique(mem_pool_); return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/core/tensor.cc b/mindspore/ccsrc/dataset/core/tensor.cc index 6aa34fa342..8f0eae459a 100644 --- a/mindspore/ccsrc/dataset/core/tensor.cc +++ b/mindspore/ccsrc/dataset/core/tensor.cc @@ -28,7 +28,6 @@ #include "dataset/core/global_context.h" #include "dataset/core/pybind_support.h" #include "dataset/core/tensor_shape.h" -#include "dataset/util/make_unique.h" namespace py = pybind11; namespace mindspore { @@ -53,7 +52,7 @@ namespace dataset { Tensor::Tensor(const TensorShape &shape, const DataType &type) : shape_(shape), type_(type), data_(nullptr) { // grab the mem pool from global context and create the allocator for char data area std::shared_ptr global_pool = GlobalContext::Instance()->mem_pool(); - data_allocator_ = mindspore::make_unique>(global_pool); + data_allocator_ = std::make_unique>(global_pool); } Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data) : Tensor(shape, type) { @@ -137,7 +136,7 @@ Status Tensor::CreateTensor(std::shared_ptr *ptr, py::array arr) { if ((*ptr)->type_ == DataType::DE_UNKNOWN) RETURN_STATUS_UNEXPECTED("Invalid data type."); std::shared_ptr global_pool = GlobalContext::Instance()->mem_pool(); - (*ptr)->data_allocator_ = mindspore::make_unique>(global_pool); + (*ptr)->data_allocator_ = std::make_unique>(global_pool); static_cast((*ptr)->StartAddr()); int64_t byte_size = (*ptr)->SizeInBytes(); unsigned char *data = static_cast(arr.request().ptr); diff --git a/mindspore/ccsrc/dataset/engine/data_buffer.cc b/mindspore/ccsrc/dataset/engine/data_buffer.cc index a0f47512ec..4aed994d3c 100644 --- a/mindspore/ccsrc/dataset/engine/data_buffer.cc +++ b/mindspore/ccsrc/dataset/engine/data_buffer.cc @@ -40,7 +40,7 @@ Status DataBuffer::CreateDataBuffer( case DatasetType::kTf: { // This type of buffer is for TF record data. // Allocate derived class version for a TF buffers - new_data_buffer = mindspore::make_unique(id, kDeBFlagNone, storage_client); + new_data_buffer = std::make_unique(id, kDeBFlagNone, storage_client); break; } default: { diff --git a/mindspore/ccsrc/dataset/engine/data_schema.cc b/mindspore/ccsrc/dataset/engine/data_schema.cc index 68666796be..4fe5d665c6 100644 --- a/mindspore/ccsrc/dataset/engine/data_schema.cc +++ b/mindspore/ccsrc/dataset/engine/data_schema.cc @@ -26,8 +26,8 @@ #include "common/utils.h" #include "dataset/util/status.h" #include "dataset/core/tensor_shape.h" -#include "dataset/util/make_unique.h" #include "utils/log_adapter.h" +#include "dataset/util/de_error.h" namespace mindspore { namespace dataset { @@ -58,7 +58,7 @@ ColDescriptor::ColDescriptor(const std::string &col_name, DataType col_type, Ten // our shape. Otherwise, set our shape to be empty. if (in_shape != nullptr) { // Create a shape and copy construct it into our column's shape. - tensor_shape_ = mindspore::make_unique(*in_shape); + tensor_shape_ = std::make_unique(*in_shape); } else { tensor_shape_ = nullptr; } @@ -75,7 +75,7 @@ ColDescriptor::ColDescriptor(const std::string &col_name, DataType col_type, Ten ColDescriptor::ColDescriptor(const ColDescriptor &in_cd) : type_(in_cd.type_), rank_(in_cd.rank_), tensor_impl_(in_cd.tensor_impl_), col_name_(in_cd.col_name_) { // If it has a tensor shape, make a copy of it with our own unique_ptr. - tensor_shape_ = in_cd.hasShape() ? mindspore::make_unique(in_cd.shape()) : nullptr; + tensor_shape_ = in_cd.hasShape() ? std::make_unique(in_cd.shape()) : nullptr; } // Assignment overload @@ -86,7 +86,7 @@ ColDescriptor &ColDescriptor::operator=(const ColDescriptor &in_cd) { tensor_impl_ = in_cd.tensor_impl_; col_name_ = in_cd.col_name_; // If it has a tensor shape, make a copy of it with our own unique_ptr. - tensor_shape_ = in_cd.hasShape() ? mindspore::make_unique(in_cd.shape()) : nullptr; + tensor_shape_ = in_cd.hasShape() ? std::make_unique(in_cd.shape()) : nullptr; } return *this; } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc index 7c5d4bd4c8..8778fe1b45 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc @@ -59,8 +59,8 @@ Status BatchOp::operator()() { TaskManager::FindMe()->Post(); int32_t epoch_num = 0, batch_num = 0, cnt = 0; TensorRow new_row; - std::unique_ptr table = make_unique(); - child_iterator_ = mindspore::make_unique(this, 0, 0); + std::unique_ptr table = std::make_unique(); + child_iterator_ = std::make_unique(this, 0, 0); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); column_name_map_ = child_iterator_->col_name_id_map(); int32_t cur_batch_size = 0; @@ -72,7 +72,7 @@ Status BatchOp::operator()() { if (table->size() == static_cast(cur_batch_size)) { RETURN_IF_NOT_OK(worker_queues_[cnt++ % num_workers_]->EmplaceBack( std::make_pair(std::move(table), CBatchInfo(epoch_num, batch_num++, cnt - epoch_num)))); - table = make_unique(); + table = std::make_unique(); RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(epoch_num, batch_num, cnt - epoch_num))); } RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); @@ -82,7 +82,7 @@ Status BatchOp::operator()() { RETURN_IF_NOT_OK(worker_queues_[cnt++ % num_workers_]->EmplaceBack( std::make_pair(std::move(table), CBatchInfo(epoch_num, batch_num++, cnt - epoch_num)))); } - table = make_unique(); // this drops when drop == true + table = std::make_unique(); // this drops when drop == true // end of the current epoch, batch_num should start from 0 again batch_num = 0; epoch_num++; @@ -153,9 +153,9 @@ Status BatchOp::WorkerEntry(int32_t workerId) { RETURN_IF_NOT_OK(worker_queues_[workerId]->PopFront(&table_pair)); while (table_pair.second.ctrl_ != batchCtrl::kQuit) { if (table_pair.second.ctrl_ == batchCtrl::kEOE) { - RETURN_IF_NOT_OK(out_connector_->Add(workerId, make_unique(0, DataBuffer::kDeBFlagEOE))); + RETURN_IF_NOT_OK(out_connector_->Add(workerId, std::make_unique(0, DataBuffer::kDeBFlagEOE))); } else if (table_pair.second.ctrl_ == batchCtrl::kEOF) { - RETURN_IF_NOT_OK(out_connector_->Add(workerId, make_unique(0, DataBuffer::kDeBFlagEOF))); + RETURN_IF_NOT_OK(out_connector_->Add(workerId, std::make_unique(0, DataBuffer::kDeBFlagEOF))); } else if (table_pair.second.ctrl_ == batchCtrl::kNoCtrl) { std::unique_ptr db = nullptr; RETURN_IF_NOT_OK(MakeBatchedBuffer(std::move(table_pair), &db)); @@ -170,8 +170,8 @@ Status BatchOp::MakeBatchedBuffer(std::pair, CBatc std::unique_ptr *db) { RETURN_UNEXPECTED_IF_NULL(table_pair.first); if (!input_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc - (*db) = make_unique(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone); - std::unique_ptr dest_table = make_unique(); + (*db) = std::make_unique(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone); + std::unique_ptr dest_table = std::make_unique(); RETURN_IF_NOT_OK(BatchRows(&table_pair.first, &dest_table, table_pair.first->size())); (*db)->set_tensor_table(std::move(dest_table)); (*db)->set_column_name_map(column_name_map_); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc index d3b85b84fb..f51c2a1539 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc @@ -80,9 +80,9 @@ void DatasetOp::CreateConnector(int32_t num_producers, int32_t num_consumers) { MS_LOG(INFO) << "Creating connector in tree operator: " << operator_id_ << ". Producer: " << num_producers << ". Consumer: " << num_consumers << "."; if (oc_queue_size_ > 0) { - out_connector_ = mindspore::make_unique(num_producers, // The number of producers - num_consumers, // Only one consumer (the training App) - oc_queue_size_); + out_connector_ = std::make_unique(num_producers, // The number of producers + num_consumers, // Only one consumer (the training App) + oc_queue_size_); } else { // Some op's may choose not to have an output connector MS_LOG(INFO) << "Bypassed connector creation for tree operator: " << operator_id_ << "."; @@ -149,7 +149,7 @@ Status DatasetOp::GetNextInput(std::unique_ptr *p_buffer, int32_t wo // The base class implementation simply flows the eoe message to output. Derived classes // may override if they need to perform special eoe handling. Status DatasetOp::EoeReceived(int32_t worker_id) { - std::unique_ptr eoe_buffer = mindspore::make_unique(0, DataBuffer::kDeBFlagEOE); + std::unique_ptr eoe_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOE); return (out_connector_->Add(static_cast(worker_id), std::move(eoe_buffer))); } @@ -157,7 +157,7 @@ Status DatasetOp::EoeReceived(int32_t worker_id) { // The base class implementation simply flows the eof message to output. Derived classes // may override if they need to perform special eof handling. Status DatasetOp::EofReceived(int32_t worker_id) { - std::unique_ptr eof_buffer = mindspore::make_unique(0, DataBuffer::kDeBFlagEOF); + std::unique_ptr eof_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOF); return (out_connector_->Add(static_cast(worker_id), std::move(eof_buffer))); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc index 3c2eea16ee..71e4ce64a4 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc @@ -225,7 +225,7 @@ Status DeviceQueueOp::SendDataToCPU() { MS_LOG(INFO) << "Device queue, sending data to CPU."; int64_t total_batch = 0; - std::unique_ptr child_iterator = mindspore::make_unique(this, 0, 0); + std::unique_ptr child_iterator = std::make_unique(this, 0, 0); while (!(child_iterator->eof_handled())) { TensorRow curr_row; RETURN_IF_NOT_OK(child_iterator->FetchNextTensorRow(&curr_row)); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc index c9f1f98ae0..b6d603bac9 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc @@ -179,7 +179,7 @@ Status MapOp::WorkerEntry(int32_t worker_id) { RETURN_IF_NOT_OK(WorkerEntryInit(in_buffer.get(), &keep_input_columns, &to_process_indices, &final_col_name_id_map, &input_columns, &output_columns)); - std::unique_ptr new_tensor_table(mindspore::make_unique()); + std::unique_ptr new_tensor_table(std::make_unique()); // Perform the compute function of TensorOp(s) and store the result in new_tensor_table. RETURN_IF_NOT_OK(WorkerCompute(in_buffer.get(), to_process_indices, new_tensor_table.get(), keep_input_columns, &input_columns, &output_columns)); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc index d9792312a3..4b2af2250a 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc @@ -48,7 +48,7 @@ Status ParallelOp::CreateWorkerConnector(int32_t worker_connector_size) { // Instantiate the worker connector. This is the internal connector, not the operators // output connector. It has single master consuming from it (num producers is 1), and the number // of workers is the defined count from the op. - worker_connector_ = mindspore::make_unique(num_workers_, num_producers_, worker_connector_size); + worker_connector_ = std::make_unique(num_workers_, num_producers_, worker_connector_size); return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc index 11296f84f4..b87967dde8 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc @@ -79,7 +79,7 @@ Status ProjectOp::Project(std::unique_ptr *data_buffer) { new_column_name_mapping[current_column] = i; projected_column_indices.push_back(column_name_mapping[current_column]); } - std::unique_ptr new_tensor_table = mindspore::make_unique(); + std::unique_ptr new_tensor_table = std::make_unique(); while ((*data_buffer)->NumRows() > 0) { TensorRow current_row; RETURN_IF_NOT_OK((*data_buffer)->PopRow(¤t_row)); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc index c09f56141e..725476bf91 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc @@ -84,13 +84,13 @@ Status RenameOp::operator()() { // we got eoe, now try again until we get eof MS_LOG(INFO) << "Rename operator EOE Received."; - RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(mindspore::make_unique(0, DataBuffer::kDeBFlagEOE)))); + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique(0, DataBuffer::kDeBFlagEOE)))); MS_LOG(DEBUG) << "Rename operator fetching buffer after EOE."; RETURN_IF_NOT_OK(GetNextInput(&curr_buffer)); } // end of while eof loop MS_LOG(INFO) << "Rename opeerator EOF Received."; - RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(mindspore::make_unique(0, DataBuffer::kDeBFlagEOF)))); + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique(0, DataBuffer::kDeBFlagEOF)))); return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc index 5dae48ad73..2afafe2128 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc @@ -70,7 +70,7 @@ ShuffleOp::ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_con rng_(shuffle_seed), buffer_counter_(0), rows_per_buffer_(rows_per_buffer), - shuffle_buffer_(mindspore::make_unique()), + shuffle_buffer_(std::make_unique()), shuffle_last_row_idx_(0), shuffle_buffer_state_(kShuffleStateInit) {} @@ -90,7 +90,7 @@ Status ShuffleOp::SelfReset() { shuffle_seed_ = distribution(random_device); rng_ = std::mt19937_64(shuffle_seed_); } - shuffle_buffer_ = mindspore::make_unique(); + shuffle_buffer_ = std::make_unique(); buffer_counter_ = 0; shuffle_last_row_idx_ = 0; shuffle_buffer_state_ = kShuffleStateInit; @@ -142,7 +142,7 @@ Status ShuffleOp::operator()() { // Create the child iterator to fetch our data from. int32_t worker_id = 0; int32_t child_idx = 0; - child_iterator_ = mindspore::make_unique(this, worker_id, child_idx); + child_iterator_ = std::make_unique(this, worker_id, child_idx); // Main operator loop while (true) { @@ -161,7 +161,7 @@ Status ShuffleOp::operator()() { // Step 1) // Create an output tensor table if one is not created yet. if (!new_buffer_table) { - new_buffer_table = mindspore::make_unique(); + new_buffer_table = std::make_unique(); } // Step 2) @@ -176,7 +176,7 @@ Status ShuffleOp::operator()() { // and send this buffer on it's way up the pipeline. Special case is if this is the // last row then we also send it. if (new_buffer_table->size() == rows_per_buffer_ || shuffle_last_row_idx_ == 0) { - auto new_buffer = mindspore::make_unique(buffer_counter_, DataBuffer::kDeBFlagNone); + auto new_buffer = std::make_unique(buffer_counter_, DataBuffer::kDeBFlagNone); new_buffer->set_tensor_table(std::move(new_buffer_table)); new_buffer->set_column_name_map(column_name_map_); buffer_counter_++; @@ -218,7 +218,7 @@ Status ShuffleOp::operator()() { // Since we overloaded eoeReceived function, we are responsible to flow the EOE up the // pipepline manually now that we are done draining the shuffle buffer MS_LOG(INFO) << "Shuffle operator sending EOE."; - auto eoe_buffer = mindspore::make_unique(0, DataBuffer::kDeBFlagEOE); + auto eoe_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOE); RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer))); // Do not wait for any reset to be flown down from operators above us. diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc index 570fc9f454..0c2e20729e 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc @@ -40,7 +40,7 @@ Status CelebAOp::Builder::Build(std::shared_ptr *op) { builder_sampler_ = std::make_shared(); } - builder_schema_ = make_unique(); + builder_schema_ = std::make_unique(); RETURN_IF_NOT_OK( builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); // label is like this:0 1 0 0 1...... @@ -83,7 +83,7 @@ CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::stri col_name_map_[data_schema_->column(index).name()] = index; } - attr_info_queue_ = make_unique>>(queue_size); + attr_info_queue_ = std::make_unique>>(queue_size); io_block_queues_.Init(num_workers_, queue_size); } @@ -311,7 +311,7 @@ Status CelebAOp::AddIOBlock(std::unique_ptr *data_buffer) { row_count++; if (row_count % rows_per_buffer_ == 0) { RETURN_IF_NOT_OK(io_block_queues_[buff_count++ % num_workers_]->Add( - make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); + std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); keys.clear(); } } @@ -320,21 +320,21 @@ Status CelebAOp::AddIOBlock(std::unique_ptr *data_buffer) { if (!keys.empty()) { RETURN_IF_NOT_OK(io_block_queues_[(buff_count++) % num_workers_]->Add( - make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); + std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); } if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { RETURN_IF_NOT_OK( - io_block_queues_[(buff_count++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEoe))); + io_block_queues_[(buff_count++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK( - io_block_queues_[(buff_count++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEof))); + io_block_queues_[(buff_count++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEof))); for (int32_t i = 0; i < num_workers_; i++) { RETURN_IF_NOT_OK( - io_block_queues_[i]->Add(std::move(make_unique(std::vector(), IOBlock::kDeIoBlockNone)))); + io_block_queues_[i]->Add(std::make_unique(std::vector(), IOBlock::kDeIoBlockNone))); } return Status::OK(); } else { // not the last repeat. Acquire lock, sleeps master thread, wait for the wake-up from reset RETURN_IF_NOT_OK( - io_block_queues_[(buff_count++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEoe))); + io_block_queues_[(buff_count++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks wp_.Clear(); RETURN_IF_NOT_OK(sampler_->GetNextBuffer(data_buffer)); @@ -349,17 +349,17 @@ Status CelebAOp::WorkerEntry(int32_t worker_id) { RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); while (io_block != nullptr) { if (io_block->eoe() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(make_unique(0, DataBuffer::kDeBFlagEOE)))); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique(0, DataBuffer::kDeBFlagEOE))); buffer_id = worker_id; } else if (io_block->eof() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(make_unique(0, DataBuffer::kDeBFlagEOF)))); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique(0, DataBuffer::kDeBFlagEOF))); } else { std::vector keys; RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); if (keys.empty()) { return Status::OK(); // empty key is a quit signal for workers } - std::unique_ptr db = make_unique(buffer_id, DataBuffer::kDeBFlagNone); + std::unique_ptr db = std::make_unique(buffer_id, DataBuffer::kDeBFlagNone); RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); buffer_id += num_workers_; @@ -370,7 +370,7 @@ Status CelebAOp::WorkerEntry(int32_t worker_id) { } Status CelebAOp::LoadBuffer(const std::vector &keys, std::unique_ptr *db) { - std::unique_ptr deq = make_unique(); + std::unique_ptr deq = std::make_unique(); for (const auto &key : keys) { TensorRow row; RETURN_IF_NOT_OK(LoadTensorRow(image_labels_vec_[key], &row)); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc index 260a4a4dc5..3e64c8a3e6 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc @@ -47,7 +47,7 @@ Status CifarOp::Builder::Build(std::shared_ptr *ptr) { if (sampler_ == nullptr) { sampler_ = std::make_shared(); } - schema_ = make_unique(); + schema_ = std::make_unique(); TensorShape scalar = TensorShape::CreateScalar(); RETURN_IF_NOT_OK(schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); if (cifar_type_ == kCifar10) { @@ -91,7 +91,7 @@ CifarOp::CifarOp(CifarType type, int32_t num_works, int32_t rows_per_buf, const col_name_map_[data_schema_->column(i).name()] = i; } constexpr uint64_t kUtilQueueSize = 512; - cifar_raw_data_block_ = make_unique>>(kUtilQueueSize); + cifar_raw_data_block_ = std::make_unique>>(kUtilQueueSize); io_block_queues_.Init(num_workers_, queue_size); } @@ -114,7 +114,7 @@ Status CifarOp::operator()() { if (row_cnt_ >= num_samples_) break; // enough row read, break for loop if (row_cnt_ % rows_per_buffer_ == 0) { RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( - make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); + std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); keys.clear(); } } @@ -122,21 +122,21 @@ Status CifarOp::operator()() { } if (keys.empty() == false) { RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( - make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); + std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); } if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { RETURN_IF_NOT_OK( - io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEoe))); + io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK( - io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEof))); + io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEof))); for (int32_t i = 0; i < num_workers_; i++) { RETURN_IF_NOT_OK( - io_block_queues_[i]->Add(make_unique(std::vector(), IOBlock::kDeIoBlockNone))); + io_block_queues_[i]->Add(std::make_unique(std::vector(), IOBlock::kDeIoBlockNone))); } return Status::OK(); } else { // not the last repeat. Acquire lock, sleeps master thread, wait for the wake-up from reset RETURN_IF_NOT_OK( - io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEoe))); + io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks wp_.Clear(); RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); @@ -169,17 +169,17 @@ Status CifarOp::WorkerEntry(int32_t worker_id) { RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); while (io_block != nullptr) { if (io_block->eoe() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique(0, DataBuffer::kDeBFlagEOE))); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique(0, DataBuffer::kDeBFlagEOE))); buffer_id = worker_id; } else if (io_block->eof() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique(0, DataBuffer::kDeBFlagEOF))); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique(0, DataBuffer::kDeBFlagEOF))); } else { std::vector keys; RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); if (keys.empty() == true) { return Status::OK(); // empty key is a quit signal for workers } - std::unique_ptr db = make_unique(buffer_id, DataBuffer::kDeBFlagNone); + std::unique_ptr db = std::make_unique(buffer_id, DataBuffer::kDeBFlagNone); RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); buffer_id += num_workers_; @@ -213,7 +213,7 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) { // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer Status CifarOp::LoadBuffer(const std::vector &keys, std::unique_ptr *db) { - std::unique_ptr deq = make_unique(); + std::unique_ptr deq = std::make_unique(); for (const int64_t &key : keys) { TensorRow trow; RETURN_IF_NOT_OK(LoadTensorRow(key, &trow)); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc index ceb88ceb0e..37a74f019a 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc @@ -173,9 +173,9 @@ Status GeneratorOp::operator()() { bool eof = false; while (!eof) { // Create new buffer each iteration - fetched_buffer = mindspore::make_unique(buffer_id_++, DataBuffer::kDeBFlagNone); + fetched_buffer = std::make_unique(buffer_id_++, DataBuffer::kDeBFlagNone); fetched_buffer->set_column_name_map(column_names_map_); - std::unique_ptr fetched_table = mindspore::make_unique(); + std::unique_ptr fetched_table = std::make_unique(); bool eoe = false; { py::gil_scoped_acquire gil_acquire; @@ -201,12 +201,12 @@ Status GeneratorOp::operator()() { if (eoe) { // Push out EOE upon StopIteration exception from generator MS_LOG(INFO) << "Generator operator sends out EOE."; - std::unique_ptr eoe_buffer = mindspore::make_unique(0, DataBuffer::kDeBFlagEOE); + std::unique_ptr eoe_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOE); RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer))); if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { // If last repeat or not repeated, push out EOF and exit master loop MS_LOG(INFO) << "Generator operator sends out EOF."; - std::unique_ptr eof_buffer = mindspore::make_unique(0, DataBuffer::kDeBFlagEOF); + std::unique_ptr eof_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOF); RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eof_buffer))); MS_LOG(INFO) << "Generator operator main execution loop complete."; eof = true; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc index b8044fb38a..f6cf377666 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc @@ -39,7 +39,7 @@ Status ImageFolderOp::Builder::Build(std::shared_ptr *ptr) { if (builder_sampler_ == nullptr) { builder_sampler_ = std::make_shared(); } - builder_schema_ = make_unique(); + builder_schema_ = std::make_unique(); TensorShape scalar = TensorShape::CreateScalar(); RETURN_IF_NOT_OK( builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); @@ -82,8 +82,8 @@ ImageFolderOp::ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::str for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { col_name_map_[data_schema_->column(i).name()] = i; } - folder_name_queue_ = make_unique>(num_wkrs * queue_size); - image_name_queue_ = make_unique>(num_wkrs * queue_size); + folder_name_queue_ = std::make_unique>(num_wkrs * queue_size); + image_name_queue_ = std::make_unique>(num_wkrs * queue_size); io_block_queues_.Init(num_workers_, queue_size); } @@ -143,7 +143,7 @@ Status ImageFolderOp::operator()() { row_cnt_++; if (row_cnt_ % rows_per_buffer_ == 0) { RETURN_IF_NOT_OK( - io_block_queues_[buf_cnt_++ % num_workers_]->Add(make_unique(keys, IOBlock::kDeIoBlockNone))); + io_block_queues_[buf_cnt_++ % num_workers_]->Add(std::make_unique(keys, IOBlock::kDeIoBlockNone))); keys.clear(); } } @@ -151,21 +151,21 @@ Status ImageFolderOp::operator()() { } if (keys.empty() == false) { RETURN_IF_NOT_OK( - io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(keys, IOBlock::kDeIoBlockNone))); + io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(keys, IOBlock::kDeIoBlockNone))); } if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { - std::unique_ptr eoe_block = make_unique(IOBlock::kDeIoBlockFlagEoe); - std::unique_ptr eof_block = make_unique(IOBlock::kDeIoBlockFlagEof); + std::unique_ptr eoe_block = std::make_unique(IOBlock::kDeIoBlockFlagEoe); + std::unique_ptr eof_block = std::make_unique(IOBlock::kDeIoBlockFlagEof); RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block))); RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block))); for (int32_t i = 0; i < num_workers_; ++i) { RETURN_IF_NOT_OK( - io_block_queues_[i]->Add(make_unique(std::vector(), IOBlock::kDeIoBlockNone))); + io_block_queues_[i]->Add(std::make_unique(std::vector(), IOBlock::kDeIoBlockNone))); } return Status::OK(); } else { // not the last repeat. Sleep master thread, wait for the wake-up from reset RETURN_IF_NOT_OK( - io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEoe))); + io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks wp_.Clear(); RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); @@ -182,15 +182,15 @@ Status ImageFolderOp::WorkerEntry(int32_t worker_id) { RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); while (io_block != nullptr) { if (io_block->eoe() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique(0, DataBuffer::kDeBFlagEOE))); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique(0, DataBuffer::kDeBFlagEOE))); buffer_id = worker_id; } else if (io_block->eof() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique(0, DataBuffer::kDeBFlagEOF))); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique(0, DataBuffer::kDeBFlagEOF))); } else { std::vector keys; RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); if (keys.empty() == true) return Status::OK(); // empty key is a quit signal for workers - std::unique_ptr db = make_unique(buffer_id, DataBuffer::kDeBFlagNone); + std::unique_ptr db = std::make_unique(buffer_id, DataBuffer::kDeBFlagNone); RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); buffer_id += num_workers_; @@ -231,7 +231,7 @@ Status ImageFolderOp::LoadTensorRow(ImageLabelPair pairPtr, TensorRow *trow) { // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer Status ImageFolderOp::LoadBuffer(const std::vector &keys, std::unique_ptr *db) { - std::unique_ptr deq = make_unique(); + std::unique_ptr deq = std::make_unique(); TensorRow trow; for (const int64_t &key : keys) { RETURN_IF_NOT_OK(this->LoadTensorRow(image_label_pairs_[key], &trow)); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc index 52db199e5b..6907647952 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc @@ -40,7 +40,7 @@ Status ManifestOp::Builder::Build(std::shared_ptr *ptr) { if (builder_sampler_ == nullptr) { builder_sampler_ = std::make_shared(); } - builder_schema_ = make_unique(); + builder_schema_ = std::make_unique(); RETURN_IF_NOT_OK( builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); RETURN_IF_NOT_OK( @@ -105,7 +105,7 @@ Status ManifestOp::AddIoBlock(std::unique_ptr *sampler_buffer) { row_cnt_++; if (row_cnt_ % rows_per_buffer_ == 0) { RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( - make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); + std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); keys.clear(); } } @@ -113,21 +113,21 @@ Status ManifestOp::AddIoBlock(std::unique_ptr *sampler_buffer) { } if (keys.empty() == false) { RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( - make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); + std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); } if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { RETURN_IF_NOT_OK( - io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEoe))); + io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK( - io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEof))); + io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEof))); for (int32_t i = 0; i < num_workers_; i++) { RETURN_IF_NOT_OK( - io_block_queues_[i]->Add(make_unique(std::vector(), IOBlock::kDeIoBlockNone))); + io_block_queues_[i]->Add(std::make_unique(std::vector(), IOBlock::kDeIoBlockNone))); } return Status::OK(); } else { RETURN_IF_NOT_OK( - io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEoe))); + io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks wp_.Clear(); RETURN_IF_NOT_OK(sampler_->GetNextBuffer(sampler_buffer)); @@ -160,17 +160,17 @@ Status ManifestOp::WorkerEntry(int32_t worker_id) { RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); while (io_block != nullptr) { if (io_block->eoe() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique(0, DataBuffer::kDeBFlagEOE))); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique(0, DataBuffer::kDeBFlagEOE))); buffer_id = worker_id; } else if (io_block->eof() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique(0, DataBuffer::kDeBFlagEOF))); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique(0, DataBuffer::kDeBFlagEOF))); } else { std::vector keys; RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); if (keys.empty()) { return Status::OK(); // empty key is a quit signal for workers } - std::unique_ptr db = make_unique(buffer_id, DataBuffer::kDeBFlagNone); + std::unique_ptr db = std::make_unique(buffer_id, DataBuffer::kDeBFlagNone); RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); buffer_id += num_workers_; @@ -227,7 +227,7 @@ Status ManifestOp::LoadTensorRow(const std::pair &keys, std::unique_ptr *db) { - std::unique_ptr deq = make_unique(); + std::unique_ptr deq = std::make_unique(); for (const auto &key : keys) { TensorRow trow; RETURN_IF_NOT_OK(LoadTensorRow(image_labelname_[static_cast(key)], &trow)); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc index cb0f135a0d..fbb772af59 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc @@ -28,7 +28,6 @@ #include "dataset/engine/datasetops/dataset_op.h" #include "dataset/engine/db_connector.h" #include "dataset/engine/execution_tree.h" -#include "dataset/util/make_unique.h" #include "utils/log_adapter.h" namespace mindspore { @@ -94,19 +93,19 @@ MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, int32_t rows_per_buf io_blk_queues_.Init(num_workers_, op_connector_queue_size); if (!block_reader_) return; for (int32_t i = 0; i < num_workers_; ++i) { - block_buffer_.emplace_back(make_unique>(std::vector{})); + block_buffer_.emplace_back(std::make_unique>(std::vector{})); } } // Private helper method to encapsulate some common construction/reset tasks Status MindRecordOp::Init() { - shard_reader_ = mindspore::make_unique(); + shard_reader_ = std::make_unique(); auto rc = shard_reader_->Open(dataset_file_, num_mind_record_workers_, columns_to_load_, operators_, block_reader_); CHECK_FAIL_RETURN_UNEXPECTED(rc != MSRStatus::FAILED, "MindRecordOp init failed. Error message: " + ErrnoToMessage(rc)); - data_schema_ = mindspore::make_unique(); + data_schema_ = std::make_unique(); std::vector> schema_vec = shard_reader_->get_shard_header()->get_schemas(); // check whether schema exists, if so use the first one @@ -143,7 +142,7 @@ Status MindRecordOp::Init() { } if (!load_all_cols) { - std::unique_ptr tmp_schema = make_unique(); + std::unique_ptr tmp_schema = std::make_unique(); for (std::string colname : columns_to_load_) { CHECK_FAIL_RETURN_UNEXPECTED(colname_to_ind.find(colname) != colname_to_ind.end(), colname + ": doesn't exist"); RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->column(colname_to_ind[colname]))); @@ -297,7 +296,7 @@ Status MindRecordOp::LoadFloat(TensorShape *new_shape, std::unique_ptr *arr RETURN_IF_NOT_OK(GetFloat(&value, columns_json[column_name], use_double)); *new_shape = TensorShape::CreateScalar(); - *array_data = mindspore::make_unique(1); + *array_data = std::make_unique(1); (*array_data)[0] = value; } else { if (column.hasShape()) { @@ -308,7 +307,7 @@ Status MindRecordOp::LoadFloat(TensorShape *new_shape, std::unique_ptr *arr } int idx = 0; - *array_data = mindspore::make_unique(new_shape->NumOfElements()); + *array_data = std::make_unique(new_shape->NumOfElements()); for (auto &element : columns_json[column_name]) { T value = 0; RETURN_IF_NOT_OK(GetFloat(&value, element, use_double)); @@ -349,7 +348,7 @@ Status MindRecordOp::LoadInt(TensorShape *new_shape, std::unique_ptr *array RETURN_IF_NOT_OK(GetInt(&value, columns_json[column_name])); *new_shape = TensorShape::CreateScalar(); - *array_data = mindspore::make_unique(1); + *array_data = std::make_unique(1); (*array_data)[0] = value; } else { if (column.hasShape()) { @@ -360,7 +359,7 @@ Status MindRecordOp::LoadInt(TensorShape *new_shape, std::unique_ptr *array } int idx = 0; - *array_data = mindspore::make_unique(new_shape->NumOfElements()); + *array_data = std::make_unique(new_shape->NumOfElements()); for (auto &element : columns_json[column_name]) { T value = 0; RETURN_IF_NOT_OK(GetInt(&value, element)); @@ -430,12 +429,14 @@ Status MindRecordOp::WorkerEntry(int32_t worker_id) { RETURN_IF_NOT_OK(io_blk_queues_[worker_id]->PopFront(&io_block)); while (io_block != nullptr) { if (io_block->eoe() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(make_unique(0, DataBuffer::kDeBFlagEOE)))); + RETURN_IF_NOT_OK( + out_connector_->Add(worker_id, std::move(std::make_unique(0, DataBuffer::kDeBFlagEOE)))); RETURN_IF_NOT_OK(io_blk_queues_[worker_id]->PopFront(&io_block)); continue; } if (io_block->eof() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(make_unique(0, DataBuffer::kDeBFlagEOF)))); + RETURN_IF_NOT_OK( + out_connector_->Add(worker_id, std::move(std::make_unique(0, DataBuffer::kDeBFlagEOF)))); RETURN_IF_NOT_OK(io_blk_queues_[worker_id]->PopFront(&io_block)); continue; } @@ -485,9 +486,9 @@ Status MindRecordOp::WorkerEntry(int32_t worker_id) { Status MindRecordOp::GetBufferFromReader(std::unique_ptr *fetched_buffer, int64_t buffer_id, int32_t worker_id) { - *fetched_buffer = mindspore::make_unique(buffer_id, DataBuffer::kDeBFlagNone); + *fetched_buffer = std::make_unique(buffer_id, DataBuffer::kDeBFlagNone); (*fetched_buffer)->set_column_name_map(column_name_mapping_); - std::unique_ptr tensor_table = mindspore::make_unique(); + std::unique_ptr tensor_table = std::make_unique(); for (int32_t i = 0; i < rows_per_buffer_; ++i) { ShardTuple tupled_buffer; if (block_reader_) { @@ -596,22 +597,22 @@ Status MindRecordOp::operator()() { for (int32_t i = 0; i < buffers_needed_; ++i) { if (block_reader_) RETURN_IF_NOT_OK(FetchBlockBuffer(i)); std::vector keys(1, i); - RETURN_IF_NOT_OK( - io_blk_queues_[buf_cnt_++ % num_workers_]->Add(make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); + RETURN_IF_NOT_OK(io_blk_queues_[buf_cnt_++ % num_workers_]->Add( + std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); } if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { RETURN_IF_NOT_OK( - io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEoe))); + io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK( - io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEof))); + io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEof))); for (int32_t i = 0; i < num_workers_; i++) { - RETURN_IF_NOT_OK( - io_blk_queues_[i]->Add(std::move(make_unique(std::vector(), IOBlock::kDeIoBlockNone)))); + RETURN_IF_NOT_OK(io_blk_queues_[i]->Add( + std::move(std::make_unique(std::vector(), IOBlock::kDeIoBlockNone)))); } return Status::OK(); } else { // not the last repeat. Acquire lock, sleeps master thread, wait for the wake-up from reset RETURN_IF_NOT_OK( - io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEoe))); + io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); // reset our buffer count and go to loop again. RETURN_IF_NOT_OK(shard_reader_wait_post_.Wait()); @@ -655,7 +656,7 @@ Status MindRecordOp::LaunchThreadAndInitOp() { } Status MindRecordOp::CountTotalRows(const std::string dataset_path, int64_t *count) { - std::unique_ptr shard_reader = mindspore::make_unique(); + std::unique_ptr shard_reader = std::make_unique(); MSRStatus rc = shard_reader->CountTotalRows(dataset_path, count); if (rc == MSRStatus::FAILED) { RETURN_STATUS_UNEXPECTED("MindRecordOp count total rows failed."); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc index f76fb9314d..3431e58aea 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc @@ -43,7 +43,7 @@ Status MnistOp::Builder::Build(std::shared_ptr *ptr) { if (builder_sampler_ == nullptr) { builder_sampler_ = std::make_shared(); } - builder_schema_ = make_unique(); + builder_schema_ = std::make_unique(); RETURN_IF_NOT_OK( builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); TensorShape scalar = TensorShape::CreateScalar(); @@ -89,7 +89,7 @@ Status MnistOp::TraversalSampleIds(const std::shared_ptr &sample_ids, st row_cnt_++; if (row_cnt_ % rows_per_buffer_ == 0) { RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( - make_unique(IOBlock(*keys, IOBlock::kDeIoBlockNone)))); + std::make_unique(IOBlock(*keys, IOBlock::kDeIoBlockNone)))); keys->clear(); } } @@ -115,21 +115,21 @@ Status MnistOp::operator()() { } if (keys.empty() == false) { RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( - make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); + std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); } if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { RETURN_IF_NOT_OK( - io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEoe))); + io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK( - io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEof))); + io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEof))); for (int32_t i = 0; i < num_workers_; ++i) { RETURN_IF_NOT_OK( - io_block_queues_[i]->Add(make_unique(std::vector(), IOBlock::kDeIoBlockNone))); + io_block_queues_[i]->Add(std::make_unique(std::vector(), IOBlock::kDeIoBlockNone))); } return Status::OK(); } else { RETURN_IF_NOT_OK( - io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEoe))); + io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks wp_.Clear(); RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); @@ -145,15 +145,15 @@ Status MnistOp::WorkerEntry(int32_t worker_id) { RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&iOBlock)); while (iOBlock != nullptr) { if (iOBlock->eoe() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique(0, DataBuffer::kDeBFlagEOE))); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique(0, DataBuffer::kDeBFlagEOE))); buffer_id = worker_id; } else if (iOBlock->eof() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique(0, DataBuffer::kDeBFlagEOF))); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique(0, DataBuffer::kDeBFlagEOF))); } else { std::vector keys; RETURN_IF_NOT_OK(iOBlock->GetKeys(&keys)); if (keys.empty() == true) return Status::OK(); // empty key is a quit signal for workers - std::unique_ptr db = make_unique(buffer_id, DataBuffer::kDeBFlagNone); + std::unique_ptr db = std::make_unique(buffer_id, DataBuffer::kDeBFlagNone); RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); buffer_id += num_workers_; @@ -178,7 +178,7 @@ Status MnistOp::LoadTensorRow(const MnistLabelPair &mnist_pair, TensorRow *trow) // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer Status MnistOp::LoadBuffer(const std::vector &keys, std::unique_ptr *db) { - std::unique_ptr deq = make_unique(); + std::unique_ptr deq = std::make_unique(); TensorRow trow; for (const int64_t &key : keys) { RETURN_IF_NOT_OK(this->LoadTensorRow(image_label_pairs_[key], &trow)); @@ -309,8 +309,8 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), "num_images != num_labels"); // The image size of the Mnist dataset is fixed at [28,28] int64_t size = kMnistImageRows * kMnistImageCols; - auto images_buf = mindspore::make_unique(size * num_images); - auto labels_buf = mindspore::make_unique(num_images); + auto images_buf = std::make_unique(size * num_images); + auto labels_buf = std::make_unique(num_images); if (images_buf == nullptr || labels_buf == nullptr) { std::string err_msg = "Fail to allocate memory for MNIST Buffer."; MS_LOG(ERROR) << err_msg.c_str(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc index 51ad71e8cf..28a5705648 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc @@ -52,9 +52,9 @@ Status DistributedSampler::GetNextBuffer(std::unique_ptr *out_buffer if (cnt_ > samples_per_buffer_) { RETURN_STATUS_UNEXPECTED("Distributed Sampler Error"); } else if (cnt_ == samples_per_buffer_) { - (*out_buffer) = mindspore::make_unique(0, DataBuffer::kDeBFlagEOE); + (*out_buffer) = std::make_unique(0, DataBuffer::kDeBFlagEOE); } else { - (*out_buffer) = mindspore::make_unique(cnt_, DataBuffer::kDeBFlagNone); + (*out_buffer) = std::make_unique(cnt_, DataBuffer::kDeBFlagNone); std::shared_ptr sample_ids; RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, samples_per_buffer_)); int64_t *id_ptr = reinterpret_cast(sample_ids->StartAddr()); @@ -63,7 +63,7 @@ Status DistributedSampler::GetNextBuffer(std::unique_ptr *out_buffer *(id_ptr++) = shuffle_ ? shuffle_vec_[static_cast(next_id)] : next_id; } TensorRow row(1, sample_ids); - (*out_buffer)->set_tensor_table(make_unique(1, row)); + (*out_buffer)->set_tensor_table(std::make_unique(1, row)); } return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc index 04a6ad17a2..8c8c12fce2 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc @@ -53,9 +53,9 @@ Status PKSampler::GetNextBuffer(std::unique_ptr *out_buffer) { if (next_id_ > num_pk_samples_ || num_pk_samples_ == 0) { RETURN_STATUS_UNEXPECTED("Index out of bound in PKSampler"); } else if (next_id_ == num_pk_samples_) { - (*out_buffer) = mindspore::make_unique(0, DataBuffer::kDeBFlagEOE); + (*out_buffer) = std::make_unique(0, DataBuffer::kDeBFlagEOE); } else { - (*out_buffer) = mindspore::make_unique(next_id_, DataBuffer::kDeBFlagNone); + (*out_buffer) = std::make_unique(next_id_, DataBuffer::kDeBFlagNone); std::shared_ptr sample_ids; int64_t last_id = (samples_per_buffer_ + next_id_ > num_pk_samples_) ? num_pk_samples_ : samples_per_buffer_ + next_id_; @@ -68,7 +68,7 @@ Status PKSampler::GetNextBuffer(std::unique_ptr *out_buffer) { *(id_ptr++) = samples[rnd_ind]; } TensorRow row(1, sample_ids); - (*out_buffer)->set_tensor_table(make_unique(1, row)); + (*out_buffer)->set_tensor_table(std::make_unique(1, row)); } return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc index de4d89d950..216f322052 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc @@ -32,9 +32,9 @@ Status RandomSampler::GetNextBuffer(std::unique_ptr *out_buffer) { if (next_id_ > num_samples_) { RETURN_STATUS_UNEXPECTED("RandomSampler Internal Error"); } else if (next_id_ == num_samples_) { - (*out_buffer) = make_unique(0, DataBuffer::kDeBFlagEOE); + (*out_buffer) = std::make_unique(0, DataBuffer::kDeBFlagEOE); } else { - (*out_buffer) = make_unique(next_id_, DataBuffer::kDeBFlagNone); + (*out_buffer) = std::make_unique(next_id_, DataBuffer::kDeBFlagNone); std::shared_ptr sampleIds; int64_t last_id = samples_per_buffer_ + next_id_ > num_samples_ ? num_samples_ : samples_per_buffer_ + next_id_; RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, last_id - next_id_)); @@ -44,7 +44,7 @@ Status RandomSampler::GetNextBuffer(std::unique_ptr *out_buffer) { } next_id_ = last_id; TensorRow row(1, sampleIds); - (*out_buffer)->set_tensor_table(make_unique(1, row)); + (*out_buffer)->set_tensor_table(std::make_unique(1, row)); } return Status::OK(); } @@ -61,7 +61,7 @@ Status RandomSampler::Init(const RandomAccessOp *op) { } std::shuffle(shuffled_ids_.begin(), shuffled_ids_.end(), rnd_); } else { - dist = make_unique>(0, num_rows_ - 1); + dist = std::make_unique>(0, num_rows_ - 1); } rnd_.seed(seed_++); return Status::OK(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc index 9818cd8a17..aa3838f8b5 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc @@ -35,7 +35,7 @@ Status Sampler::CreateSamplerTensor(std::shared_ptr *sample_ids, int64_t } if (col_desc_ == nullptr) { // a ColDescriptor for Tensor that holds SampleIds - col_desc_ = make_unique("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1); + col_desc_ = std::make_unique("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1); } TensorShape shape(std::vector(1, num_elements)); RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, col_desc_->tensorImpl(), shape, col_desc_->type())); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h index d9a20f9170..801565508b 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h @@ -27,7 +27,6 @@ #include "dataset/engine/data_buffer.h" #include "dataset/engine/data_schema.h" #include "dataset/engine/datasetops/dataset_op.h" -#include "dataset/util/make_unique.h" namespace mindspore { namespace dataset { diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc index 71c3dd07c4..72131a6de1 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc @@ -25,9 +25,9 @@ Status SequentialSampler::GetNextBuffer(std::unique_ptr *out_buffer) if (next_id_ > num_samples_) { RETURN_STATUS_UNEXPECTED("Sequential Sampler Internal Error"); } else if (next_id_ == num_samples_) { - (*out_buffer) = make_unique(0, DataBuffer::kDeBFlagEOE); + (*out_buffer) = std::make_unique(0, DataBuffer::kDeBFlagEOE); } else { - (*out_buffer) = make_unique(next_id_, DataBuffer::kDeBFlagNone); + (*out_buffer) = std::make_unique(next_id_, DataBuffer::kDeBFlagNone); std::shared_ptr sampleIds; int64_t lastId = (samples_per_buffer_ + next_id_ > num_samples_) ? num_samples_ : samples_per_buffer_ + next_id_; RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, lastId - next_id_)); @@ -36,7 +36,7 @@ Status SequentialSampler::GetNextBuffer(std::unique_ptr *out_buffer) *(idPtr++) = next_id_++; } TensorRow row(1, sampleIds); - (*out_buffer)->set_tensor_table(make_unique(1, row)); + (*out_buffer)->set_tensor_table(std::make_unique(1, row)); } return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc index 4f727fcd04..16603939b3 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc @@ -64,9 +64,9 @@ Status SubsetRandomSampler::Reset() { Status SubsetRandomSampler::GetNextBuffer(std::unique_ptr *out_buffer) { // All samples have been drawn if (sample_id_ == indices_.size()) { - (*out_buffer) = make_unique(buffer_id_++, DataBuffer::kDeBFlagEOE); + (*out_buffer) = std::make_unique(buffer_id_++, DataBuffer::kDeBFlagEOE); } else { - (*out_buffer) = make_unique(buffer_id_++, DataBuffer::kDeBFlagNone); + (*out_buffer) = std::make_unique(buffer_id_++, DataBuffer::kDeBFlagNone); std::shared_ptr outputIds; int64_t last_id = sample_id_ + samples_per_buffer_; @@ -92,7 +92,7 @@ Status SubsetRandomSampler::GetNextBuffer(std::unique_ptr *out_buffe } // Create a TensorTable from that single tensor and push into DataBuffer - (*out_buffer)->set_tensor_table(make_unique(1, TensorRow(1, outputIds))); + (*out_buffer)->set_tensor_table(std::make_unique(1, TensorRow(1, outputIds))); } return Status::OK(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc index f305474182..f2957e74be 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc @@ -46,10 +46,10 @@ Status WeightedRandomSampler::Init(const RandomAccessOp *op) { CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0 && samples_per_buffer_ > 0, "Fail to init WeightedRandomSampler"); if (!replacement_) { - exp_dist_ = mindspore::make_unique>(1); + exp_dist_ = std::make_unique>(1); InitOnePassSampling(); } else { - discrete_dist_ = mindspore::make_unique>(weights_.begin(), weights_.end()); + discrete_dist_ = std::make_unique>(weights_.begin(), weights_.end()); } return Status::OK(); @@ -96,9 +96,9 @@ Status WeightedRandomSampler::GetNextBuffer(std::unique_ptr *out_buf } if (sample_id_ == num_samples_) { - (*out_buffer) = make_unique(buffer_id_++, DataBuffer::kDeBFlagEOE); + (*out_buffer) = std::make_unique(buffer_id_++, DataBuffer::kDeBFlagEOE); } else { - (*out_buffer) = make_unique(buffer_id_++, DataBuffer::kDeBFlagNone); + (*out_buffer) = std::make_unique(buffer_id_++, DataBuffer::kDeBFlagNone); std::shared_ptr outputIds; int64_t last_id = sample_id_ + samples_per_buffer_; @@ -132,7 +132,7 @@ Status WeightedRandomSampler::GetNextBuffer(std::unique_ptr *out_buf } // Create a TensorTable from that single tensor and push into DataBuffer - (*out_buffer)->set_tensor_table(make_unique(1, TensorRow(1, outputIds))); + (*out_buffer)->set_tensor_table(std::make_unique(1, TensorRow(1, outputIds))); } return Status::OK(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.cc index 95720a97be..862edcf63a 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.cc @@ -24,7 +24,6 @@ #include "dataset/engine/datasetops/source/storage_client.h" #include "dataset/engine/datasetops/source/storage_op.h" #include "dataset/engine/datasetops/source/tf_client.h" -#include "dataset/util/make_unique.h" #include "dataset/util/status.h" namespace mindspore { @@ -57,7 +56,7 @@ static Status CreateStorageClientSwitch( case DatasetType::kTf: { // Construct the derived class TFClient, stored as base class StorageClient store_op->set_rows_per_buffer(32); - *out_client = mindspore::make_unique(std::move(schema), store_op); + *out_client = std::make_unique(std::move(schema), store_op); break; } case DatasetType::kUnknown: @@ -83,7 +82,7 @@ Status StorageClient::CreateStorageClient( std::shared_ptr *out_client) { // Out: the created storage client // Make a new schema first. This only assigns the dataset type. It does not // create the columns yet. - auto new_schema = mindspore::make_unique(); + auto new_schema = std::make_unique(); RETURN_IF_NOT_OK(new_schema->LoadDatasetType(dataset_schema_path)); RETURN_IF_NOT_OK(CreateStorageClientSwitch(std::move(new_schema), store_op, out_client)); return Status::OK(); @@ -99,7 +98,7 @@ Status StorageClient::CreateStorageClient( std::shared_ptr *out_client) { // Out: the created storage client // The dataset type is passed in by the user. Create an empty schema with only // only the dataset type filled in and then create the client with it. - auto new_schema = mindspore::make_unique(); + auto new_schema = std::make_unique(); new_schema->set_dataset_type(in_type); RETURN_IF_NOT_OK(CreateStorageClientSwitch(std::move(new_schema), store_op, out_client)); return Status::OK(); @@ -147,7 +146,7 @@ Status StorageClient::AssignDatasetLayout(uint32_t num_rows, // In: Th // The current schema was just an empty one with only the dataset field populated. // Let's copy construct a new one that will be a copy of the input schema (releasing the old // one) and then set the number of rows that the user requested. - data_schema_ = mindspore::make_unique(schema); + data_schema_ = std::make_unique(schema); CHECK_FAIL_RETURN_UNEXPECTED(num_rows <= MAX_INTEGER_INT32, "numRows exceeds the boundary numRows>2147483647"); num_rows_in_dataset_ = num_rows; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc index 9da27eac04..2ca957ae6d 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc @@ -303,7 +303,7 @@ Status StorageOp::init() { // For simplicity, we'll make both of them 3 so they are the same size. int32_t action_queue_size = (buffers_needed / num_workers_) + 1; for (int32_t i = 0; i < num_workers_; ++i) { - auto new_queue = mindspore::make_unique>(action_queue_size); + auto new_queue = std::make_unique>(action_queue_size); action_queue_.push_back(std::move(new_queue)); } } @@ -483,10 +483,10 @@ Status StorageOp::operator()() { // Post the control message to tell the workers to stop waiting on action queue // because we are done! RETURN_IF_NOT_OK(this->PostEndOfData()); - std::unique_ptr eoeBuffer = mindspore::make_unique(0, DataBuffer::kDeBFlagEOE); + std::unique_ptr eoeBuffer = std::make_unique(0, DataBuffer::kDeBFlagEOE); RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoeBuffer))); MS_LOG(INFO) << "StorageOp master: Flow end-of-data eof message."; - std::unique_ptr eofBuffer = mindspore::make_unique(0, DataBuffer::kDeBFlagEOF); + std::unique_ptr eofBuffer = std::make_unique(0, DataBuffer::kDeBFlagEOF); RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eofBuffer))); MS_LOG(INFO) << "StorageOp master: Main execution loop complete."; done = true; // while loop exit @@ -496,7 +496,7 @@ Status StorageOp::operator()() { // RepeatOp above us somewhere in the tree will re-init us with the data to fetch again // once it gets the end-of-epoch message. MS_LOG(INFO) << "StorageOp master: Flow end-of-epoch eoe message."; - std::unique_ptr eoe_buffer = mindspore::make_unique(0, DataBuffer::kDeBFlagEOE); + std::unique_ptr eoe_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOE); RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer))); // reset our buffer count and go to loop again. diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.cc index 766c2149c4..372dcd2c1c 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.cc @@ -27,7 +27,6 @@ #include "dataset/core/data_type.h" #include "dataset/engine/datasetops/source/storage_client.h" #include "dataset/engine/data_schema.h" -#include "dataset/util/make_unique.h" namespace mindspore { namespace dataset { @@ -72,7 +71,7 @@ Status TFBuffer::Load() { } // Construct the Tensor table for this buffer. - tensor_table_ = mindspore::make_unique(); + tensor_table_ = std::make_unique(); // At each position in the tensor table, instantiate the shared pointer to it's Tensor. uint32_t row = 0; @@ -272,7 +271,7 @@ Status TFBuffer::LoadFloatList(const ColDescriptor ¤t_col, const dataengin // Identify how many values we have and then create a local array of these // to deserialize into *num_elements = float_list.value_size(); - *float_array = mindspore::make_unique(*num_elements); + *float_array = std::make_unique(*num_elements); for (int i = 0; i < float_list.value_size(); i++) { (*float_array)[i] = float_list.value(i); } @@ -294,7 +293,7 @@ Status TFBuffer::LoadIntList(const ColDescriptor ¤t_col, const dataengine: // Identify how many values we have and then create a local array of these // to deserialize into *num_elements = int64_list.value_size(); - *int_array = mindspore::make_unique(*num_elements); + *int_array = std::make_unique(*num_elements); for (int i = 0; i < int64_list.value_size(); i++) { (*int_array)[i] = int64_list.value(i); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc index c872c02015..0764d7e0ad 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc @@ -36,7 +36,6 @@ #include "dataset/engine/db_connector.h" #include "dataset/engine/execution_tree.h" #include "dataset/engine/jagged_connector.h" -#include "dataset/util/make_unique.h" #include "dataset/util/path.h" #include "dataset/util/queue.h" #include "dataset/util/random.h" @@ -54,7 +53,7 @@ TFReaderOp::Builder::Builder() builder_op_connector_size_ = config_manager->op_connector_size(); builder_rows_per_buffer_ = config_manager->rows_per_buffer(); builder_shuffle_files_ = false; - builder_data_schema_ = make_unique(); + builder_data_schema_ = std::make_unique(); } Status TFReaderOp::Builder::ValidateInputs() const { @@ -103,7 +102,7 @@ TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64 finished_reading_dataset_(false), shuffle_files_(shuffle_files), data_schema_(std::move(data_schema)), - filename_index_(make_unique()), + filename_index_(std::make_unique()), load_io_block_queue_(true), load_jagged_connector_(true), num_rows_(0), @@ -129,7 +128,7 @@ Status TFReaderOp::Init() { // parallel op base. RETURN_IF_NOT_OK(ParallelOp::CreateWorkerConnector(worker_connector_size_)); - jagged_buffer_connector_ = mindspore::make_unique(num_workers_, 1, worker_connector_size_); + jagged_buffer_connector_ = std::make_unique(num_workers_, 1, worker_connector_size_); // temporary: make size large enough to hold all files + EOE to avoid hangs int32_t safe_queue_size = static_cast(std::ceil(dataset_files_list_.size() / num_workers_)) + 1; @@ -229,7 +228,7 @@ Status TFReaderOp::operator()() { } // all workers finished reading for this epoch, and we have read all the data from all workers - std::unique_ptr eoe_buffer = mindspore::make_unique(0, DataBuffer::kDeBFlagEOE); + std::unique_ptr eoe_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOE); RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer))); if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { @@ -241,7 +240,7 @@ Status TFReaderOp::operator()() { } } - std::unique_ptr eof_buffer = mindspore::make_unique(0, DataBuffer::kDeBFlagEOF); + std::unique_ptr eof_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOF); RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eof_buffer))); RETURN_IF_NOT_OK(PostEndOfData()); @@ -274,7 +273,7 @@ Status TFReaderOp::WorkerEntry(int32_t worker_id) { MS_LOG(INFO) << "TFReader operator worker " << worker_id << " loaded file " << filename << "."; } } else { - std::unique_ptr eoe_buffer = mindspore::make_unique(1, DataBuffer::kDeBFlagEOE); + std::unique_ptr eoe_buffer = std::make_unique(1, DataBuffer::kDeBFlagEOE); RETURN_IF_NOT_OK(jagged_buffer_connector_->Add(worker_id, std::move(eoe_buffer))); } @@ -288,7 +287,7 @@ Status TFReaderOp::WorkerEntry(int32_t worker_id) { // When the worker pops this control indicator, it will shut itself down gracefully. Status TFReaderOp::PostEndOfData() { for (int i = 0; i < num_workers_; ++i) { - std::unique_ptr eof = mindspore::make_unique(IOBlock::kDeIoBlockFlagEof); + std::unique_ptr eof = std::make_unique(IOBlock::kDeIoBlockFlagEof); RETURN_IF_NOT_OK(PushIoBlockQueue(i, std::move(eof))); } @@ -299,7 +298,7 @@ Status TFReaderOp::PostEndOfData() { // pops this control indicator, it will wait until the next epoch starts and then resume execution. Status TFReaderOp::PostEndOfEpoch(int32_t queue_index) { for (int i = 0; i < num_workers_; ++i) { - std::unique_ptr eoe = mindspore::make_unique(IOBlock::kDeIoBlockFlagEoe); + std::unique_ptr eoe = std::make_unique(IOBlock::kDeIoBlockFlagEoe); RETURN_IF_NOT_OK(PushIoBlockQueue((queue_index + i) % num_workers_, std::move(eoe))); } @@ -358,7 +357,7 @@ Status TFReaderOp::FillIOBlockShuffle(const std::vector &i_keys) { } if (!equal_rows_per_shard_) { if (key_index++ % num_devices_ == device_id_) { - auto ioBlock = make_unique(*it, kInvalidOffset, kInvalidOffset, IOBlock::kDeIoBlockNone); + auto ioBlock = std::make_unique(*it, kInvalidOffset, kInvalidOffset, IOBlock::kDeIoBlockNone); RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock))); queue_index = (queue_index + 1) % num_workers_; } @@ -367,7 +366,7 @@ Status TFReaderOp::FillIOBlockShuffle(const std::vector &i_keys) { auto file_it = filename_index_->Search(*it); std::string file_name = file_it.value(); if (NeedPushFileToblockQueue(file_name, &start_offset, &end_offset, pre_count)) { - auto ioBlock = make_unique(*it, start_offset, end_offset, IOBlock::kDeIoBlockNone); + auto ioBlock = std::make_unique(*it, start_offset, end_offset, IOBlock::kDeIoBlockNone); RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock))); MS_LOG(DEBUG) << "File name " << *it << " start offset " << start_offset << " end_offset " << end_offset; queue_index = (queue_index + 1) % num_workers_; @@ -404,14 +403,15 @@ Status TFReaderOp::FillIOBlockNoShuffle() { } if (!equal_rows_per_shard_) { if (key_index++ % num_devices_ == device_id_) { - auto ioBlock = make_unique(it.key(), kInvalidOffset, kInvalidOffset, IOBlock::kDeIoBlockNone); + auto ioBlock = + std::make_unique(it.key(), kInvalidOffset, kInvalidOffset, IOBlock::kDeIoBlockNone); RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock))); queue_index = (queue_index + 1) % num_workers_; } } else { std::string file_name = it.value(); if (NeedPushFileToblockQueue(file_name, &start_offset, &end_offset, pre_count)) { - auto ioBlock = make_unique(it.key(), start_offset, end_offset, IOBlock::kDeIoBlockNone); + auto ioBlock = std::make_unique(it.key(), start_offset, end_offset, IOBlock::kDeIoBlockNone); RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock))); queue_index = (queue_index + 1) % num_workers_; } @@ -490,14 +490,13 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off int64_t rows_read = 0; int64_t rows_total = 0; - std::unique_ptr current_buffer = - mindspore::make_unique(0, DataBuffer::BufferFlags::kDeBFlagNone); + std::unique_ptr current_buffer = std::make_unique(0, DataBuffer::BufferFlags::kDeBFlagNone); std::unordered_map column_name_map; for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { column_name_map[data_schema_->column(i).name()] = i; } current_buffer->set_column_name_map(column_name_map); - std::unique_ptr new_tensor_table = make_unique(); + std::unique_ptr new_tensor_table = std::make_unique(); while (reader.peek() != EOF) { if (!load_jagged_connector_) { @@ -532,9 +531,9 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off current_buffer->set_tensor_table(std::move(new_tensor_table)); RETURN_IF_NOT_OK(jagged_buffer_connector_->Add(worker_id, std::move(current_buffer))); - current_buffer = make_unique(0, DataBuffer::BufferFlags::kDeBFlagNone); + current_buffer = std::make_unique(0, DataBuffer::BufferFlags::kDeBFlagNone); current_buffer->set_column_name_map(column_name_map); - new_tensor_table = make_unique(); + new_tensor_table = std::make_unique(); rows_read = 0; } } @@ -742,7 +741,7 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor ¤t_col, const dataeng // Identify how many values we have and then create a local array of these // to deserialize into *num_elements = float_list.value_size(); - *float_array = mindspore::make_unique(*num_elements); + *float_array = std::make_unique(*num_elements); for (int i = 0; i < float_list.value_size(); ++i) { (*float_array)[i] = float_list.value(i); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc index d3785f4660..71b4c47cf5 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc @@ -38,7 +38,7 @@ Status VOCOp::Builder::Build(std::shared_ptr *ptr) { if (builder_sampler_ == nullptr) { builder_sampler_ = std::make_shared(); } - builder_schema_ = make_unique(); + builder_schema_ = std::make_unique(); RETURN_IF_NOT_OK( builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); RETURN_IF_NOT_OK( @@ -85,7 +85,7 @@ Status VOCOp::TraverseSampleIds(const std::shared_ptr &sample_ids, std:: row_cnt_++; if (row_cnt_ % rows_per_buffer_ == 0) { RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( - make_unique(IOBlock(*keys, IOBlock::kDeIoBlockNone)))); + std::make_unique(IOBlock(*keys, IOBlock::kDeIoBlockNone)))); keys->clear(); } } @@ -110,21 +110,21 @@ Status VOCOp::operator()() { } if (keys.empty() == false) { RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( - make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); + std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); } if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { - std::unique_ptr eoe_block = make_unique(IOBlock::kDeIoBlockFlagEoe); - std::unique_ptr eof_block = make_unique(IOBlock::kDeIoBlockFlagEof); + std::unique_ptr eoe_block = std::make_unique(IOBlock::kDeIoBlockFlagEoe); + std::unique_ptr eof_block = std::make_unique(IOBlock::kDeIoBlockFlagEof); RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block))); RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block))); for (int32_t i = 0; i < num_workers_; i++) { RETURN_IF_NOT_OK( - io_block_queues_[i]->Add(make_unique(std::vector(), IOBlock::kDeIoBlockNone))); + io_block_queues_[i]->Add(std::make_unique(std::vector(), IOBlock::kDeIoBlockNone))); } return Status::OK(); } else { RETURN_IF_NOT_OK( - io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique(IOBlock::kDeIoBlockFlagEoe))); + io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK(wp_.Wait()); wp_.Clear(); RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); @@ -164,7 +164,7 @@ Status VOCOp::LoadTensorRow(const std::string &image_id, TensorRow *trow) { } Status VOCOp::LoadBuffer(const std::vector &keys, std::unique_ptr *db) { - std::unique_ptr deq = make_unique(); + std::unique_ptr deq = std::make_unique(); TensorRow trow; for (const uint64_t &key : keys) { RETURN_IF_NOT_OK(this->LoadTensorRow(image_ids_[key], &trow)); @@ -182,15 +182,15 @@ Status VOCOp::WorkerEntry(int32_t worker_id) { RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); while (io_block != nullptr) { if (io_block->eoe() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique(0, DataBuffer::kDeBFlagEOE))); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique(0, DataBuffer::kDeBFlagEOE))); buffer_id = worker_id; } else if (io_block->eof() == true) { - RETURN_IF_NOT_OK(out_connector_->Add(worker_id, (make_unique(0, DataBuffer::kDeBFlagEOF)))); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, (std::make_unique(0, DataBuffer::kDeBFlagEOF)))); } else { std::vector keys; RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); if (keys.empty() == true) return Status::OK(); - std::unique_ptr db = make_unique(buffer_id, DataBuffer::kDeBFlagNone); + std::unique_ptr db = std::make_unique(buffer_id, DataBuffer::kDeBFlagNone); RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); buffer_id += num_workers_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc index 716c853488..ec771740c1 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc @@ -65,13 +65,13 @@ Status ZipOp::operator()() { // initialize the iterators for (int32_t i = 0; i < children_num_; ++i) { // magic number 0 since Zip is not a parallel Op - child_iterators_.push_back(mindspore::make_unique(this, 0, i)); + child_iterators_.push_back(std::make_unique(this, 0, i)); } // Loop until eof is true while (!eof_) { // Create tensor table and prepare it by fetching and packing the first zipped row into it. - std::unique_ptr curr_table = mindspore::make_unique(); + std::unique_ptr curr_table = std::make_unique(); RETURN_IF_NOT_OK(prepare(curr_table.get())); // If an eof got picked up during the above prepare, then we're done @@ -81,7 +81,7 @@ Status ZipOp::operator()() { while (!draining_) { // 1. If a previous loop iteration sent the current table out, then create a new one. if (curr_table == nullptr) { - curr_table = mindspore::make_unique(); + curr_table = std::make_unique(); } // 2 fill the table. Note: draining mode might get turned on if any of the child inputs were done @@ -89,8 +89,7 @@ Status ZipOp::operator()() { // 3 create and update buffer and send it to the out connector if (!curr_table->empty()) { - std::unique_ptr curr_buffer = - mindspore::make_unique(buffer_id_, DataBuffer::kDeBFlagNone); + std::unique_ptr curr_buffer = std::make_unique(buffer_id_, DataBuffer::kDeBFlagNone); curr_buffer->set_tensor_table(std::move(curr_table)); curr_buffer->set_column_name_map(col_name_id_map_); MS_LOG(DEBUG) << "Zip operator finished one buffer, pushing, rows " << curr_buffer->NumRows() << ", cols " @@ -105,15 +104,14 @@ Status ZipOp::operator()() { MS_LOG(DEBUG) << "Zip operator is now draining child inputs."; RETURN_IF_NOT_OK(drainPipeline()); // Now that we have drained child inputs, send the eoe up. - RETURN_IF_NOT_OK( - out_connector_->Add(0, std::move(mindspore::make_unique(0, DataBuffer::kDeBFlagEOE)))); + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique(0, DataBuffer::kDeBFlagEOE)))); } } // 5 handle eof // propagate eof here. MS_LOG(INFO) << "Zip operator got EOF, propagating."; - RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(mindspore::make_unique(0, DataBuffer::kDeBFlagEOF)))); + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique(0, DataBuffer::kDeBFlagEOF)))); return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/engine/db_connector.h b/mindspore/ccsrc/dataset/engine/db_connector.h index 243e77e6ac..7ea9837c44 100644 --- a/mindspore/ccsrc/dataset/engine/db_connector.h +++ b/mindspore/ccsrc/dataset/engine/db_connector.h @@ -65,7 +65,7 @@ class DbConnector : public Connector> { RETURN_IF_NOT_OK(cv_.Wait(&lk, [this, worker_id]() { return expect_consumer_ == worker_id; })); // Once an EOF message is encountered this flag will be set and we can return early. if (end_of_file_) { - *result = mindspore::make_unique(0, DataBuffer::kDeBFlagEOF); + *result = std::make_unique(0, DataBuffer::kDeBFlagEOF); } else { RETURN_IF_NOT_OK(queues_[pop_from_]->PopFront(result)); if (*result == nullptr) { diff --git a/mindspore/ccsrc/dataset/engine/execution_tree.cc b/mindspore/ccsrc/dataset/engine/execution_tree.cc index 3dbeaa5ed1..20fcb836c5 100644 --- a/mindspore/ccsrc/dataset/engine/execution_tree.cc +++ b/mindspore/ccsrc/dataset/engine/execution_tree.cc @@ -24,7 +24,7 @@ namespace mindspore { namespace dataset { // Constructor ExecutionTree::ExecutionTree() : id_count_(0) { - tg_ = mindspore::make_unique(); + tg_ = std::make_unique(); tree_state_ = kDeTStateInit; prepare_flags_ = kDePrepNone; } diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc index 0412f07636..8735cf7a05 100644 --- a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc +++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc @@ -24,7 +24,6 @@ #include "dataset/core/cv_tensor.h" #include "dataset/core/tensor.h" #include "dataset/core/tensor_shape.h" -#include "dataset/util/make_unique.h" #include "dataset/util/random.h" #define MAX_INT_PRECISION 16777216 // float int precision is 16777216 @@ -376,7 +375,7 @@ Status HwcToChw(std::shared_ptr input, std::shared_ptr *output) int width = input_cv->shape()[1]; int num_channels = input_cv->shape()[2]; - auto output_cv = mindspore::make_unique(TensorShape{num_channels, height, width}, input_cv->type()); + auto output_cv = std::make_unique(TensorShape{num_channels, height, width}, input_cv->type()); for (int i = 0; i < num_channels; ++i) { cv::Mat mat; RETURN_IF_NOT_OK(output_cv->Mat({i}, &mat)); diff --git a/mindspore/ccsrc/dataset/kernels/py_func_op.cc b/mindspore/ccsrc/dataset/kernels/py_func_op.cc index 69bd3443c4..c9e5d5b169 100644 --- a/mindspore/ccsrc/dataset/kernels/py_func_op.cc +++ b/mindspore/ccsrc/dataset/kernels/py_func_op.cc @@ -20,7 +20,6 @@ #include "dataset/core/tensor.h" #include "dataset/kernels/tensor_op.h" -#include "dataset/util/make_unique.h" #include "dataset/util/status.h" namespace mindspore { diff --git a/mindspore/ccsrc/dataset/util/arena.cc b/mindspore/ccsrc/dataset/util/arena.cc index 856f7fef24..68673529ff 100644 --- a/mindspore/ccsrc/dataset/util/arena.cc +++ b/mindspore/ccsrc/dataset/util/arena.cc @@ -16,7 +16,6 @@ #include "dataset/util/arena.h" #include #include -#include "dataset/util/make_unique.h" #include "dataset/util/system_pool.h" #include "dataset/util/de_error.h" #include "./securec.h" diff --git a/mindspore/ccsrc/dataset/util/circular_pool.cc b/mindspore/ccsrc/dataset/util/circular_pool.cc index f6e43f35bf..92b169c94a 100644 --- a/mindspore/ccsrc/dataset/util/circular_pool.cc +++ b/mindspore/ccsrc/dataset/util/circular_pool.cc @@ -18,10 +18,8 @@ #include #include #include - #include "./securec.h" - -#include "dataset/util/make_unique.h" +#include "dataset/util/de_error.h" #include "dataset/util/system_pool.h" namespace mindspore { diff --git a/mindspore/ccsrc/dataset/util/de_error.h b/mindspore/ccsrc/dataset/util/de_error.h index 07d7c125f7..d4988c58db 100644 --- a/mindspore/ccsrc/dataset/util/de_error.h +++ b/mindspore/ccsrc/dataset/util/de_error.h @@ -16,6 +16,13 @@ #ifndef DATASET_UTIL_DE_ERROR_H_ #define DATASET_UTIL_DE_ERROR_H_ +#ifdef DEBUG +#include +#define DS_ASSERT(f) assert(f) +#else +#define DS_ASSERT(f) ((void)0) +#endif + #include #include "utils/error_code.h" diff --git a/mindspore/ccsrc/dataset/util/list.h b/mindspore/ccsrc/dataset/util/list.h index f01201e34d..5a08f4514e 100644 --- a/mindspore/ccsrc/dataset/util/list.h +++ b/mindspore/ccsrc/dataset/util/list.h @@ -18,8 +18,7 @@ #include #include - -#include "dataset/util/make_unique.h" +#include "dataset/util/de_error.h" namespace mindspore { namespace dataset { diff --git a/mindspore/ccsrc/dataset/util/lock.cc b/mindspore/ccsrc/dataset/util/lock.cc index 7e92a1e53f..13a43e3e84 100644 --- a/mindspore/ccsrc/dataset/util/lock.cc +++ b/mindspore/ccsrc/dataset/util/lock.cc @@ -14,6 +14,7 @@ * limitations under the License. */ #include "dataset/util/lock.h" +#include "dataset/util/de_error.h" namespace mindspore { namespace dataset { diff --git a/mindspore/ccsrc/dataset/util/lock.h b/mindspore/ccsrc/dataset/util/lock.h index 8fef6a143a..9492d34bdf 100644 --- a/mindspore/ccsrc/dataset/util/lock.h +++ b/mindspore/ccsrc/dataset/util/lock.h @@ -19,7 +19,6 @@ #include #include #include -#include "dataset/util/make_unique.h" namespace mindspore { namespace dataset { diff --git a/mindspore/ccsrc/dataset/util/make_unique.h b/mindspore/ccsrc/dataset/util/make_unique.h deleted file mode 100644 index 2fe0bf4550..0000000000 --- a/mindspore/ccsrc/dataset/util/make_unique.h +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef DATASET_UTIL_MAKE_UNIQUE_H_ -#define DATASET_UTIL_MAKE_UNIQUE_H_ - -#ifdef DEBUG -#include -#define DS_ASSERT(f) assert(f) -#else -#define DS_ASSERT(f) ((void)0) -#endif - -#include -#include -#include -#include "dataset/util/de_error.h" -#include "utils/log_adapter.h" - -namespace mindspore { -using std::make_unique; -} // namespace mindspore - -#endif // DATASET_UTIL_MAKE_UNIQUE_H_ diff --git a/mindspore/ccsrc/dataset/util/queue.h b/mindspore/ccsrc/dataset/util/queue.h index 4048deb86b..f0b087cf6d 100644 --- a/mindspore/ccsrc/dataset/util/queue.h +++ b/mindspore/ccsrc/dataset/util/queue.h @@ -212,7 +212,7 @@ class QueueList { void Init(int num_queues, int capacity) { queue_list_.reserve(num_queues); for (int i = 0; i < num_queues; i++) { - queue_list_.emplace_back(mindspore::make_unique>(capacity)); + queue_list_.emplace_back(std::make_unique>(capacity)); } } diff --git a/mindspore/ccsrc/dataset/util/task.h b/mindspore/ccsrc/dataset/util/task.h index aaf2f80a3d..d6149caec8 100644 --- a/mindspore/ccsrc/dataset/util/task.h +++ b/mindspore/ccsrc/dataset/util/task.h @@ -27,7 +27,6 @@ #include #include #include "dataset/util/de_error.h" -#include "dataset/util/make_unique.h" #include "dataset/util/intrp_resource.h" #include "dataset/util/list.h" #include "dataset/util/memory_pool.h" diff --git a/mindspore/ccsrc/device/gpu/blocking_queue.cc b/mindspore/ccsrc/device/gpu/blocking_queue.cc index 7417115ae9..c36b1cdbf5 100644 --- a/mindspore/ccsrc/device/gpu/blocking_queue.cc +++ b/mindspore/ccsrc/device/gpu/blocking_queue.cc @@ -17,7 +17,6 @@ #include "device/gpu/blocking_queue.h" #include #include "device/gpu/gpu_common.h" -#include "dataset/util/make_unique.h" #include "common/utils.h" namespace mindspore { @@ -32,7 +31,7 @@ GpuQueue::GpuQueue(void *addr, size_t feature_size, size_t label_size, size_t ca stream_(0), node_info_(nullptr) { CHECK_CUDA_RET_WITH_ERROR(cudaStreamCreate(&stream_), "Cuda Create Stream Failed"); - node_info_ = mindspore::make_unique(capacity); + node_info_ = std::make_unique(capacity); } GpuQueue::~GpuQueue() { buffer_ = nullptr; } diff --git a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h index 265180afe6..90609c3be5 100644 --- a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h @@ -23,7 +23,6 @@ #include #include "kernel/gpu/gpu_kernel.h" #include "kernel/gpu/gpu_kernel_factory.h" -#include "dataset/util/make_unique.h" #include "kernel/gpu/kernel_constants.h" namespace mindspore { @@ -74,8 +73,8 @@ class BiasAddGpuKernel : public GpuKernel { // Expand to 4 dims for cudnnSetTensorNdDescriptorEx. auto cudnn_dims = std::max(num_dims, 4UL); - std::unique_ptr x_dims = mindspore::make_unique(cudnn_dims); - std::unique_ptr b_dims = mindspore::make_unique(cudnn_dims); + std::unique_ptr x_dims = std::make_unique(cudnn_dims); + std::unique_ptr b_dims = std::make_unique(cudnn_dims); for (size_t i = 0; i < cudnn_dims; i++) { x_dims[i] = (i < num_dims) ? SizeToInt(x_shape[i]) : 1; b_dims[i] = (i == pos) ? SizeToInt(x_shape[i]) : 1; diff --git a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h b/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h index b0e8102ee3..fd73f378d8 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h @@ -26,7 +26,6 @@ #include "kernel/gpu/gpu_kernel.h" #include "kernel/gpu/gpu_kernel_factory.h" #include "kernel/gpu/kernel_constants.h" -#include "dataset/util/make_unique.h" namespace mindspore { namespace kernel { @@ -84,8 +83,8 @@ class BiasAddGradGpuKernel : public GpuKernel { // Expand to 4 dims for cudnnSetTensorNdDescriptorEx. auto cudnn_dims = std::max(num_dims, 4UL); - std::unique_ptr dy_dims = mindspore::make_unique(cudnn_dims); - std::unique_ptr db_dims = mindspore::make_unique(cudnn_dims); + std::unique_ptr dy_dims = std::make_unique(cudnn_dims); + std::unique_ptr db_dims = std::make_unique(cudnn_dims); for (size_t i = 0; i < cudnn_dims; i++) { dy_dims[i] = (i < num_dims) ? SizeToInt(dy_shape[i]) : 1; db_dims[i] = (i == pos) ? SizeToInt(dy_shape[i]) : 1; diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h index 51a2da8574..c3e839b9c5 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h @@ -22,7 +22,6 @@ #include #include "kernel/gpu/gpu_kernel.h" #include "kernel/gpu/gpu_kernel_factory.h" -#include "dataset/util/make_unique.h" #include "kernel/gpu/kernel_constants.h" namespace mindspore { @@ -144,8 +143,8 @@ class LstmGpuKernel : public GpuKernel { int x_dims[3]{batch_size_, input_size_, 1}; int y_dims[3]{batch_size_, hidden_size_ * (bidirectional_ ? 2 : 1), 1}; - x_desc_ = mindspore::make_unique(seq_len_); - y_desc_ = mindspore::make_unique(seq_len_); + x_desc_ = std::make_unique(seq_len_); + y_desc_ = std::make_unique(seq_len_); for (size_t i = 0; i < IntToSize(seq_len_); ++i) { CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&x_desc_[i]), "create x_desc failed"); diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h index a60ab78f7d..b12fa3bea5 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h @@ -23,7 +23,6 @@ #include "kernel/gpu/gpu_kernel.h" #include "kernel/gpu/gpu_kernel_factory.h" #include "kernel/gpu/kernel_constants.h" -#include "dataset/util/make_unique.h" namespace mindspore { namespace kernel { @@ -212,9 +211,9 @@ class LstmGradDataGpuKernel : public GpuKernel { int x_dims[3]{batch_size_, input_size_, 1}; int y_dims[3]{batch_size_, hidden_size_ * (bidirectional_ ? 2 : 1), 1}; - dx_desc_ = mindspore::make_unique(seq_len_); - y_desc_ = mindspore::make_unique(seq_len_); - dy_desc_ = mindspore::make_unique(seq_len_); + dx_desc_ = std::make_unique(seq_len_); + y_desc_ = std::make_unique(seq_len_); + dy_desc_ = std::make_unique(seq_len_); for (size_t i = 0; i < IntToSize(seq_len_); ++i) { CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&dx_desc_[i]), "create x_desc failed"); diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h index b28736cc96..e081b9d070 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h @@ -22,7 +22,6 @@ #include #include "kernel/gpu/gpu_kernel.h" #include "kernel/gpu/gpu_kernel_factory.h" -#include "dataset/util/make_unique.h" #include "kernel/gpu/kernel_constants.h" namespace mindspore { namespace kernel { @@ -169,8 +168,8 @@ class LstmGradWeightGpuKernel : public GpuKernel { int x_dims[3]{batch_size_, input_size_, 1}; int y_dims[3]{batch_size_, hidden_size_ * (bidirectional_ ? 2 : 1), 1}; - x_desc_ = mindspore::make_unique(seq_len_); - y_desc_ = mindspore::make_unique(seq_len_); + x_desc_ = std::make_unique(seq_len_); + y_desc_ = std::make_unique(seq_len_); for (size_t i = 0; i < IntToSize(seq_len_); ++i) { CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&x_desc_[i]), "create x_desc failed"); diff --git a/tests/ut/cpp/dataset/celeba_op_test.cc b/tests/ut/cpp/dataset/celeba_op_test.cc index 69314771a3..35be4d7378 100644 --- a/tests/ut/cpp/dataset/celeba_op_test.cc +++ b/tests/ut/cpp/dataset/celeba_op_test.cc @@ -116,7 +116,7 @@ TEST_F(MindDataTestCelebaDataset, TestCelebaRepeat) { TEST_F(MindDataTestCelebaDataset, TestSubsetRandomSamplerCeleba) { std::vector indices({1}); - std::unique_ptr sampler = mindspore::make_unique(indices); + std::unique_ptr sampler = std::make_unique(indices); uint32_t expect_labels[1][40] = {{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}}; std::string dir = datasets_root_path_ + "/testCelebAData/"; uint32_t count = 0; diff --git a/tests/ut/cpp/dataset/cifar_op_test.cc b/tests/ut/cpp/dataset/cifar_op_test.cc index 0cd1db65b5..dcbea83df4 100644 --- a/tests/ut/cpp/dataset/cifar_op_test.cc +++ b/tests/ut/cpp/dataset/cifar_op_test.cc @@ -92,7 +92,7 @@ TEST_F(MindDataTestCifarOp, TestSequentialSamplerCifar10) { TEST_F(MindDataTestCifarOp, TestRandomSamplerCifar10) { uint32_t original_seed = GlobalContext::config_manager()->seed(); GlobalContext::config_manager()->set_seed(0); - std::unique_ptr sampler = mindspore::make_unique(true, 12); + std::unique_ptr sampler = std::make_unique(true, 12); std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; auto tree = Build({Cifarop(16, 2, 32, folder_path, std::move(sampler), 100)}); tree->Prepare(); diff --git a/tests/ut/cpp/dataset/image_folder_op_test.cc b/tests/ut/cpp/dataset/image_folder_op_test.cc index 5b118a629a..e149e687c6 100644 --- a/tests/ut/cpp/dataset/image_folder_op_test.cc +++ b/tests/ut/cpp/dataset/image_folder_op_test.cc @@ -138,7 +138,7 @@ TEST_F(MindDataTestImageFolderSampler, TestRandomImageFolder) { TEST_F(MindDataTestImageFolderSampler, TestRandomSamplerImageFolder) { int32_t original_seed = GlobalContext::config_manager()->seed(); GlobalContext::config_manager()->set_seed(0); - std::unique_ptr sampler = mindspore::make_unique(true, 12); + std::unique_ptr sampler = std::make_unique(true, 12); int32_t res[] = {2, 2, 2, 3, 2, 3, 2, 3, 1, 2, 2, 1}; // ground truth label std::string folder_path = datasets_root_path_ + "/testPK/data"; auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))}); @@ -200,7 +200,7 @@ TEST_F(MindDataTestImageFolderSampler, TestSequentialImageFolderWithRepeatBatch) TEST_F(MindDataTestImageFolderSampler, TestSubsetRandomSamplerImageFolder) { // id range 0 - 10 is label 0, and id range 11 - 21 is label 1 std::vector indices({0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11}); - std::unique_ptr sampler = mindspore::make_unique(indices); + std::unique_ptr sampler = std::make_unique(indices); std::string folder_path = datasets_root_path_ + "/testPK/data"; // Expect 6 samples for label 0 and 1 int res[2] = {6, 6}; @@ -238,7 +238,7 @@ TEST_F(MindDataTestImageFolderSampler, TestWeightedRandomSamplerImageFolder) { // create sampler with replacement = replacement std::unique_ptr sampler = - mindspore::make_unique(weights, num_samples, true, samples_per_buffer); + std::make_unique(weights, num_samples, true, samples_per_buffer); std::string folder_path = datasets_root_path_ + "/testPK/data"; auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))}); @@ -295,7 +295,7 @@ TEST_F(MindDataTestImageFolderSampler, TestImageFolderClassIndex) { } TEST_F(MindDataTestImageFolderSampler, TestDistributedSampler) { - std::unique_ptr sampler = mindspore::make_unique(11, 10, false); + std::unique_ptr sampler = std::make_unique(11, 10, false); std::string folder_path = datasets_root_path_ + "/testPK/data"; auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler)), Repeat(4)}); tree->Prepare(); @@ -322,7 +322,7 @@ TEST_F(MindDataTestImageFolderSampler, TestDistributedSampler) { } TEST_F(MindDataTestImageFolderSampler, TestPKSamplerImageFolder) { - std::unique_ptr sampler = mindspore::make_unique(3, false, 4); + std::unique_ptr sampler = std::make_unique(3, false, 4); int32_t res[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; // ground truth label std::string folder_path = datasets_root_path_ + "/testPK/data"; auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))}); @@ -431,7 +431,7 @@ TEST_F(MindDataTestImageFolderSampler, TestImageFolderDatasetSize) { } TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding1) { - std::unique_ptr sampler = mindspore::make_unique(4, 0, false); + std::unique_ptr sampler = std::make_unique(4, 0, false); std::string folder_path = datasets_root_path_ + "/testPK/data"; // numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler), {}, 5)}); @@ -460,7 +460,7 @@ TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding1) { } TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding2) { - std::unique_ptr sampler = mindspore::make_unique(4, 3, false); + std::unique_ptr sampler = std::make_unique(4, 3, false); std::string folder_path = datasets_root_path_ + "/testPK/data"; // numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode auto tree = Build({ImageFolder(16, 16, 32, folder_path, false, std::move(sampler), {}, 12)}); diff --git a/tests/ut/cpp/dataset/manifest_op_test.cc b/tests/ut/cpp/dataset/manifest_op_test.cc index 9e36f8c747..f662f98fc8 100644 --- a/tests/ut/cpp/dataset/manifest_op_test.cc +++ b/tests/ut/cpp/dataset/manifest_op_test.cc @@ -86,7 +86,7 @@ TEST_F(MindDataTestManifest, TestSequentialManifestWithRepeat) { TEST_F(MindDataTestManifest, TestSubsetRandomSamplerManifest) { std::vector indices({1}); - std::unique_ptr sampler = mindspore::make_unique(indices); + std::unique_ptr sampler = std::make_unique(indices); std::string file = datasets_root_path_ + "/testManifestData/cpp.json"; // Expect 6 samples for label 0 and 1 auto tree = Build({Manifest(16, 2, 32, file, "train", std::move(sampler))}); diff --git a/tests/ut/cpp/dataset/project_op_test.cc b/tests/ut/cpp/dataset/project_op_test.cc index 1df2ce05bb..484396321c 100644 --- a/tests/ut/cpp/dataset/project_op_test.cc +++ b/tests/ut/cpp/dataset/project_op_test.cc @@ -45,7 +45,7 @@ TEST_F(MindDataTestProjectOp, TestProjectProject) { .SetRowsPerBuffer(16) .SetWorkerConnectorSize(16) .SetNumWorkers(16); - std::unique_ptr schema = mindspore::make_unique(); + std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); Status rc = builder.Build(&my_tfreader_op); ASSERT_TRUE(rc.IsOk()); diff --git a/tests/ut/cpp/dataset/stand_alone_samplers_test.cc b/tests/ut/cpp/dataset/stand_alone_samplers_test.cc index c686a9486b..48cc811615 100644 --- a/tests/ut/cpp/dataset/stand_alone_samplers_test.cc +++ b/tests/ut/cpp/dataset/stand_alone_samplers_test.cc @@ -74,7 +74,7 @@ TEST_F(MindDataTestStandAloneSampler, TestDistributedSampler) { std::unique_ptr db; std::shared_ptr tensor; for (int i = 0; i < 6; i++) { - std::unique_ptr sampler = mindspore::make_unique(3, i % 3, (i < 3 ? false : true)); + std::unique_ptr sampler = std::make_unique(3, i % 3, (i < 3 ? false : true)); sampler->Init(&mock); sampler->GetNextBuffer(&db); db->GetTensor(&tensor, 0, 0); diff --git a/tests/ut/cpp/dataset/tfReader_op_test.cc b/tests/ut/cpp/dataset/tfReader_op_test.cc index c70d5fb6ee..5fb1f4e909 100644 --- a/tests/ut/cpp/dataset/tfReader_op_test.cc +++ b/tests/ut/cpp/dataset/tfReader_op_test.cc @@ -48,7 +48,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderBasic1) { builder.SetDatasetFilesList({dataset_path}) .SetRowsPerBuffer(16) .SetNumWorkers(16); - std::unique_ptr schema = mindspore::make_unique(); + std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); Status rc = builder.Build(&my_tfreader_op); @@ -102,7 +102,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeRowsPerBuffer) { builder.SetDatasetFilesList({dataset_path}) .SetRowsPerBuffer(500) .SetNumWorkers(16); - std::unique_ptr schema = mindspore::make_unique(); + std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); Status rc = builder.Build(&my_tfreader_op); @@ -156,7 +156,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderSmallRowsPerBuffer) { builder.SetDatasetFilesList({dataset_path}) .SetRowsPerBuffer(1) .SetNumWorkers(16); - std::unique_ptr schema = mindspore::make_unique(); + std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); Status rc = builder.Build(&my_tfreader_op); @@ -211,7 +211,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeQueueSize) { .SetWorkerConnectorSize(1) .SetRowsPerBuffer(16) .SetNumWorkers(16); - std::unique_ptr schema = mindspore::make_unique(); + std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); Status rc = builder.Build(&my_tfreader_op); @@ -265,7 +265,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderOneThread) { builder.SetDatasetFilesList({dataset_path}) .SetRowsPerBuffer(16) .SetNumWorkers(1); - std::unique_ptr schema = mindspore::make_unique(); + std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); Status rc = builder.Build(&my_tfreader_op); @@ -321,7 +321,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderRepeat) { .SetRowsPerBuffer(16) .SetWorkerConnectorSize(16) .SetNumWorkers(16); - std::unique_ptr schema = mindspore::make_unique(); + std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); Status rc= builder.Build(&my_tfreader_op); @@ -379,7 +379,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderSchemaConstructor) { std::string dataset_path; dataset_path = datasets_root_path_ + "/testTFTestAllTypes"; - std::unique_ptr data_schema = mindspore::make_unique(); + std::unique_ptr data_schema = std::make_unique(); std::vector columns_to_load; columns_to_load.push_back("col_sint32"); columns_to_load.push_back("col_binary"); @@ -445,7 +445,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Row) { std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; builder.SetDatasetFilesList({dataset_path + "/test.data"}).SetRowsPerBuffer(5).SetNumWorkers(16); - std::unique_ptr schema = mindspore::make_unique(); + std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {}); builder.SetDataSchema(std::move(schema)); @@ -503,7 +503,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Buffer) { std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; builder.SetDatasetFilesList({dataset_path + "/test.data"}).SetRowsPerBuffer(5).SetNumWorkers(16); - std::unique_ptr schema = mindspore::make_unique(); + std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema5Rows.json", {}); builder.SetDataSchema(std::move(schema)); @@ -561,7 +561,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderTake7Rows) { std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; builder.SetDatasetFilesList({dataset_path + "/test.data"}).SetRowsPerBuffer(5).SetNumWorkers(16); - std::unique_ptr schema = mindspore::make_unique(); + std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema7Rows.json", {}); builder.SetDataSchema(std::move(schema)); From d3757240551c610545b63efbc1f9c63ecafddd9e Mon Sep 17 00:00:00 2001 From: mxm <83028974@qq.com> Date: Tue, 31 Mar 2020 12:08:02 +0800 Subject: [PATCH 39/58] fixed: PrimitiveToInferImplMap map is global, and key of the map PrimitivePtr also a global variable. If key is initialized later than the map initialized during compilation, will cause the primitive map initialize failed. Variable initialization order is not guaranteed during compilation. --- .../ccsrc/pipeline/static_analysis/prim.cc | 248 +++++++++--------- .../static_analysis/static_analysis.cc | 49 ++-- 2 files changed, 148 insertions(+), 149 deletions(-) diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.cc b/mindspore/ccsrc/pipeline/static_analysis/prim.cc index d71e098009..403bbdf433 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/prim.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/prim.cc @@ -42,92 +42,95 @@ namespace mindspore { namespace abstract { -PrimitiveEvalImplMap PrimitiveToInferImplMap = { - // Statements - {prim::kPrimReturn, {InferImplReturn, true}}, - {prim::kPrimTypeOf, {InferImplTypeof, false}}, - {prim::kPrimHasType, {InferImplHasType, false}}, - {prim::kPrimDot, {InferImplDot, true}}, - {prim::kPrimSwitch, {InferImplSwitch, true}}, - {prim::kPrimIs_, {InferImplIs_, true}}, - {prim::kPrimIsNot, {InferImplIsNot, true}}, - // Maths - {prim::kPrimMaximumGrad, {InferImplMinOrMaxGrad, true}}, - {prim::kPrimMinimumGrad, {InferImplMinOrMaxGrad, true}}, - // Array - {prim::kPrimScalarToArray, {InferImplScalarToArray, true}}, - {prim::kPrimArrayToScalar, {InferImplArrayToScalar, true}}, - {prim::kPrimBroadcastShape, {InferImplBroadCastShape, true}}, - {prim::kPrimShape, {InferImplShape, true}}, - {prim::kPrimPack, {InferImplPack, true}}, - // Structure - {prim::kPrimMakeTuple, {InferImplMakeTuple, true}}, - {prim::kPrimMakeList, {InferImplMakeList, true}}, - {prim::kPrimMakeDict, {InferImplMakeDict, true}}, - {prim::kPrimMakeSlice, {InferImplMakeSlice, true}}, - {prim::kPrimMakeKeywordArg, {InferImplMakeKwarg, true}}, - {prim::kPrimExtractKeywordArg, {InferImplExtractKwarg, true}}, - {prim::kPrimMakeRecord, {InferImplMakeRecord, false}}, - {prim::kPrimTupleGetItem, {InferImplTupleGetItem, true}}, - {prim::kPrimListGetItem, {InferImplListGetItem, true}}, - {prim::kPrimTupleSetItem, {InferImplTupleSetItem, true}}, - {prim::kPrimListSetItem, {InferImplListSetItem, true}}, - {prim::kPrimDictGetItem, {InferImplDictGetItem, true}}, - {prim::kPrimDictSetItem, {InferImplDictSetItem, true}}, - {prim::kPrimListAppend, {InferImplListAppend, true}}, - {prim::kPrimTupleLen, {InferImplTupleLen, true}}, - {prim::kPrimListLen, {InferImplListLen, true}}, - {prim::kPrimArrayLen, {InferImplArrayLen, true}}, - {prim::kPrimListMap, {InferImplListMap, false}}, - {prim::kPrimListReduce, {InferImplListReduce, false}}, - {prim::kPrimTupleReversed, {InferImplTupleReversed, false}}, - {prim::kPrimReducedShape, {InferImplReduceShape, false}}, - {prim::kPrimTupleDiv, {InferImplTupleDiv, false}}, - {prim::kPrimTupleToArray, {InferImplTuple2Array, false}}, - {prim::kPrimShapeMul, {InferImplShapeMul, false}}, - {prim::kPrimTupleEqual, {InferImplTupleEqual, false}}, - {prim::kPrimListEqual, {InferImplListEqual, false}}, - {prim::kPrimMakeRange, {InferImplMakeRange, false}}, - {prim::kPrimStopGradient, {InferImplStopGradient, false}}, - {prim::kPrimStringEqual, {InferImplStringEqual, false}}, - {prim::kPrimDictLen, {InferImplDictLen, false}}, - // NN - {prim::kPrimPooling, {InferImplPooling, true}}, - {prim::kPrimPoolingGrad, {InferImplPoolingGrad, true}}, - {prim::kPrimFusedBatchNorm, {InferImplFusedBatchNorm, true}}, - {prim::kPrimFusedBatchNormGrad, {InferImplFusedBatchNormGrad, true}}, - {prim::kPrimReluGrad, {InferImplReluGrad, true}}, - {prim::kPrimConv2DBackpropInput, {InferImplConv2DBackpropInput, true}}, - {prim::kPrimConv2DBackpropFilter, {InferImplConv2DBackpropFilter, true}}, - {prim::kPrimBiasAddGrad, {InferImplBiasAddGrad, true}}, - {prim::kPrimRelu, {InferImplRelu, true}}, - {prim::kPrimZerosLikeTensor, {InferImplZerosLikeTensor, true}}, - {prim::kPrimFakeBprop, {InferImplFakeBprop, false}}, - {prim::kPrimLayerNorm, {InferImplLayerNorm, true}}, - {prim::kPrimLayerNormGrad, {InferImplLayerNormGrad, true}}, - {prim::kPrimDropoutGenMask, {InferImplDropoutGenMask, true}}, - // Others - {prim::kPrimIdentity, {InferImplIdentity, true}}, - // Set impl to null as it will use PartialEvaluator; - {prim::kPrimPartial, {nullptr, true}}, - {prim::kPrimJ, {InferImplJ, false}}, - {prim::kPrimEnvGetItem, {InferImplEnvGetItem, true}}, - {prim::kPrimEnvSetItem, {InferImplEnvSetItem, true}}, - {prim::kPrimEnvAdd, {InferImplEnvAdd, true}}, - {prim::kPrimMakeRefKey, {InferImplMakeRefKey, true}}, - {prim::kPrimMakeRef, {InferImplMakeRef, true}}, - {prim::kPrimGetRefKey, {InferImplGetRefKey, true}}, - {prim::kPrimGetRefValue, {InferImplGetRefValue, true}}, - {prim::kPrimGetRefOrigin, {InferImplGetRefOrigin, true}}, - {prim::kPrimStateSetItem, {InferImplStateSetItem, true}}, - {prim::kPrimDepend, {InferImplDepend, true}}, - {prim::kPrimBroadcastGradientArgs, {InferImplBroadcastGradientArgs, false}}, - {prim::kPrimControlDepend, {InferImplControlDepend, true}}, - // Debug - {prim::kPrimScalarSummary, {InferImplScalarSummary, true}}, - {prim::kPrimImageSummary, {InferImplTensorSummary, true}}, - {prim::kPrimTensorSummary, {InferImplTensorSummary, true}}, -}; +PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() { + static PrimitiveEvalImplMap prim_eval_implement_map = { + // Statements + {prim::kPrimReturn, {InferImplReturn, true}}, + {prim::kPrimTypeOf, {InferImplTypeof, false}}, + {prim::kPrimHasType, {InferImplHasType, false}}, + {prim::kPrimDot, {InferImplDot, true}}, + {prim::kPrimSwitch, {InferImplSwitch, true}}, + {prim::kPrimIs_, {InferImplIs_, true}}, + {prim::kPrimIsNot, {InferImplIsNot, true}}, + // Maths + {prim::kPrimMaximumGrad, {InferImplMinOrMaxGrad, true}}, + {prim::kPrimMinimumGrad, {InferImplMinOrMaxGrad, true}}, + // Array + {prim::kPrimScalarToArray, {InferImplScalarToArray, true}}, + {prim::kPrimArrayToScalar, {InferImplArrayToScalar, true}}, + {prim::kPrimBroadcastShape, {InferImplBroadCastShape, true}}, + {prim::kPrimShape, {InferImplShape, true}}, + {prim::kPrimPack, {InferImplPack, true}}, + // Structure + {prim::kPrimMakeTuple, {InferImplMakeTuple, true}}, + {prim::kPrimMakeList, {InferImplMakeList, true}}, + {prim::kPrimMakeDict, {InferImplMakeDict, true}}, + {prim::kPrimMakeSlice, {InferImplMakeSlice, true}}, + {prim::kPrimMakeKeywordArg, {InferImplMakeKwarg, true}}, + {prim::kPrimExtractKeywordArg, {InferImplExtractKwarg, true}}, + {prim::kPrimMakeRecord, {InferImplMakeRecord, false}}, + {prim::kPrimTupleGetItem, {InferImplTupleGetItem, true}}, + {prim::kPrimListGetItem, {InferImplListGetItem, true}}, + {prim::kPrimTupleSetItem, {InferImplTupleSetItem, true}}, + {prim::kPrimListSetItem, {InferImplListSetItem, true}}, + {prim::kPrimDictGetItem, {InferImplDictGetItem, true}}, + {prim::kPrimDictSetItem, {InferImplDictSetItem, true}}, + {prim::kPrimListAppend, {InferImplListAppend, true}}, + {prim::kPrimTupleLen, {InferImplTupleLen, true}}, + {prim::kPrimListLen, {InferImplListLen, true}}, + {prim::kPrimArrayLen, {InferImplArrayLen, true}}, + {prim::kPrimListMap, {InferImplListMap, false}}, + {prim::kPrimListReduce, {InferImplListReduce, false}}, + {prim::kPrimTupleReversed, {InferImplTupleReversed, false}}, + {prim::kPrimReducedShape, {InferImplReduceShape, false}}, + {prim::kPrimTupleDiv, {InferImplTupleDiv, false}}, + {prim::kPrimTupleToArray, {InferImplTuple2Array, false}}, + {prim::kPrimShapeMul, {InferImplShapeMul, false}}, + {prim::kPrimTupleEqual, {InferImplTupleEqual, false}}, + {prim::kPrimListEqual, {InferImplListEqual, false}}, + {prim::kPrimMakeRange, {InferImplMakeRange, false}}, + {prim::kPrimStopGradient, {InferImplStopGradient, false}}, + {prim::kPrimStringEqual, {InferImplStringEqual, false}}, + {prim::kPrimDictLen, {InferImplDictLen, false}}, + // NN + {prim::kPrimPooling, {InferImplPooling, true}}, + {prim::kPrimPoolingGrad, {InferImplPoolingGrad, true}}, + {prim::kPrimFusedBatchNorm, {InferImplFusedBatchNorm, true}}, + {prim::kPrimFusedBatchNormGrad, {InferImplFusedBatchNormGrad, true}}, + {prim::kPrimReluGrad, {InferImplReluGrad, true}}, + {prim::kPrimConv2DBackpropInput, {InferImplConv2DBackpropInput, true}}, + {prim::kPrimConv2DBackpropFilter, {InferImplConv2DBackpropFilter, true}}, + {prim::kPrimBiasAddGrad, {InferImplBiasAddGrad, true}}, + {prim::kPrimRelu, {InferImplRelu, true}}, + {prim::kPrimZerosLikeTensor, {InferImplZerosLikeTensor, true}}, + {prim::kPrimFakeBprop, {InferImplFakeBprop, false}}, + {prim::kPrimLayerNorm, {InferImplLayerNorm, true}}, + {prim::kPrimLayerNormGrad, {InferImplLayerNormGrad, true}}, + {prim::kPrimDropoutGenMask, {InferImplDropoutGenMask, true}}, + // Others + {prim::kPrimIdentity, {InferImplIdentity, true}}, + // Set impl to null as it will use PartialEvaluator; + {prim::kPrimPartial, {nullptr, true}}, + {prim::kPrimJ, {InferImplJ, false}}, + {prim::kPrimEnvGetItem, {InferImplEnvGetItem, true}}, + {prim::kPrimEnvSetItem, {InferImplEnvSetItem, true}}, + {prim::kPrimEnvAdd, {InferImplEnvAdd, true}}, + {prim::kPrimMakeRefKey, {InferImplMakeRefKey, true}}, + {prim::kPrimMakeRef, {InferImplMakeRef, true}}, + {prim::kPrimGetRefKey, {InferImplGetRefKey, true}}, + {prim::kPrimGetRefValue, {InferImplGetRefValue, true}}, + {prim::kPrimGetRefOrigin, {InferImplGetRefOrigin, true}}, + {prim::kPrimStateSetItem, {InferImplStateSetItem, true}}, + {prim::kPrimDepend, {InferImplDepend, true}}, + {prim::kPrimBroadcastGradientArgs, {InferImplBroadcastGradientArgs, false}}, + {prim::kPrimControlDepend, {InferImplControlDepend, true}}, + // Debug + {prim::kPrimScalarSummary, {InferImplScalarSummary, true}}, + {prim::kPrimImageSummary, {InferImplTensorSummary, true}}, + {prim::kPrimTensorSummary, {InferImplTensorSummary, true}}, + }; + return prim_eval_implement_map; +} using mindspore::parse::PyObjectWrapper; @@ -961,10 +964,7 @@ class PartialEvaluator : public Evaluator { new_nodes_inputs[1] = NewValueNode(new_signature_value); FuncGraphPtr func_graph = cnode->func_graph(); - ScopePtr scope = kDefaultScope; - if (out_conf != nullptr) { - scope = out_conf->node()->scope(); - } + ScopePtr scope = out_conf->node()->scope(); ScopeGuard scope_guard(scope); CNodePtr new_cnode = func_graph->NewCNode(new_nodes_inputs); @@ -981,39 +981,41 @@ struct PrimitiveImplInferValue { }; using PrimitiveToImplMap = std::unordered_map; - -PrimitiveToImplMap UniformPrimitiveToImplMapValue = { - {prim::kPrimScalarAdd, {prim::ScalarAdd, true, nullptr, true}}, - {prim::kPrimScalarSub, {prim::ScalarSub, true, nullptr, true}}, - {prim::kPrimScalarMul, {prim::ScalarMul, true, nullptr, true}}, - {prim::kPrimScalarDiv, {prim::ScalarDiv, true, nullptr, true}}, - {prim::kPrimScalarMod, {prim::ScalarMod, true, nullptr, true}}, - {prim::kPrimScalarUadd, {prim::ScalarUAdd, true, nullptr, true}}, - {prim::kPrimScalarUsub, {prim::ScalarUSub, true, nullptr, true}}, - {prim::kPrimScalarLog, {prim::ScalarLog, true, nullptr, true}}, - {prim::kPrimScalarEq, {prim::ScalarEq, true, std::make_shared(), true}}, - {prim::kPrimScalarLt, {prim::ScalarLt, true, std::make_shared(), true}}, - {prim::kPrimScalarGt, {prim::ScalarGt, true, std::make_shared(), true}}, - {prim::kPrimScalarNe, {prim::ScalarNe, true, std::make_shared(), true}}, - {prim::kPrimScalarLe, {prim::ScalarLe, true, std::make_shared(), true}}, - {prim::kPrimScalarGe, {prim::ScalarGe, true, std::make_shared(), true}}, - {prim::kPrimBoolNot, {prim::BoolNot, true, std::make_shared(), true}}, - {prim::kPrimBoolAnd, {prim::BoolAnd, true, std::make_shared(), true}}, - {prim::kPrimBoolEq, {prim::BoolEq, true, std::make_shared(), true}}, - {prim::kPrimBoolOr, {prim::BoolOr, true, std::make_shared(), true}}, -}; +PrimitiveToImplMap &GetUniformPrimitiveToImplMap() { + static PrimitiveToImplMap uniform_prim_implement_map = { + {prim::kPrimScalarAdd, {prim::ScalarAdd, true, nullptr, true}}, + {prim::kPrimScalarSub, {prim::ScalarSub, true, nullptr, true}}, + {prim::kPrimScalarMul, {prim::ScalarMul, true, nullptr, true}}, + {prim::kPrimScalarDiv, {prim::ScalarDiv, true, nullptr, true}}, + {prim::kPrimScalarMod, {prim::ScalarMod, true, nullptr, true}}, + {prim::kPrimScalarUadd, {prim::ScalarUAdd, true, nullptr, true}}, + {prim::kPrimScalarUsub, {prim::ScalarUSub, true, nullptr, true}}, + {prim::kPrimScalarLog, {prim::ScalarLog, true, nullptr, true}}, + {prim::kPrimScalarEq, {prim::ScalarEq, true, std::make_shared(), true}}, + {prim::kPrimScalarLt, {prim::ScalarLt, true, std::make_shared(), true}}, + {prim::kPrimScalarGt, {prim::ScalarGt, true, std::make_shared(), true}}, + {prim::kPrimScalarNe, {prim::ScalarNe, true, std::make_shared(), true}}, + {prim::kPrimScalarLe, {prim::ScalarLe, true, std::make_shared(), true}}, + {prim::kPrimScalarGe, {prim::ScalarGe, true, std::make_shared(), true}}, + {prim::kPrimBoolNot, {prim::BoolNot, true, std::make_shared(), true}}, + {prim::kPrimBoolAnd, {prim::BoolAnd, true, std::make_shared(), true}}, + {prim::kPrimBoolEq, {prim::BoolEq, true, std::make_shared(), true}}, + {prim::kPrimBoolOr, {prim::BoolOr, true, std::make_shared(), true}}, + }; + return uniform_prim_implement_map; +} PrimEvaluatorMap PrimEvaluatorConstructors = PrimEvaluatorMap(); std::mutex PrimEvaluatorConstructorMutex; -void InitPrimEvaluatorConstructors(const PrimitiveEvalImplMap &prim_eval_impl_map) { +void InitPrimEvaluatorConstructors() { PrimEvaluatorMap &constructor = PrimEvaluatorConstructors; - for (const auto &iter : prim_eval_impl_map) { + for (const auto &iter : GetPrimitiveToEvalImplMap()) { constructor[iter.first] = InitStandardPrimEvaluator(iter.first, iter.second.impl_); } - for (const auto &iter : UniformPrimitiveToImplMapValue) { + for (const auto &iter : GetUniformPrimitiveToImplMap()) { constructor[iter.first] = InitUniformPrimEvaluator(iter.first, iter.second.impl_, iter.second.eval_value_, iter.second.specify_out_type_); } @@ -1028,20 +1030,20 @@ void InitPrimEvaluatorConstructors(const PrimitiveEvalImplMap &prim_eval_impl_ma void ClearPrimEvaluatorMap() { PrimEvaluatorConstructors.clear(); - PrimitiveToInferImplMap.clear(); - UniformPrimitiveToImplMapValue.clear(); + GetPrimitiveToEvalImplMap().clear(); + GetUniformPrimitiveToImplMap().clear(); } bool IsInWhiteList(const PrimitivePtr primitive) { MS_EXCEPTION_IF_NULL(primitive); - auto iter = PrimitiveToInferImplMap.find(primitive); - if (iter != PrimitiveToInferImplMap.end()) { + auto iter = GetPrimitiveToEvalImplMap().find(primitive); + if (iter != GetPrimitiveToEvalImplMap().end()) { return iter->second.in_white_list_; } - auto uni_iter = UniformPrimitiveToImplMapValue.find(primitive); - if (uni_iter != UniformPrimitiveToImplMapValue.end()) { + auto uni_iter = GetUniformPrimitiveToImplMap().find(primitive); + if (uni_iter != GetUniformPrimitiveToImplMap().end()) { return uni_iter->second.in_white_list_; } @@ -1050,8 +1052,8 @@ bool IsInWhiteList(const PrimitivePtr primitive) { StandardPrimitiveEvalImpl GetPrimitiveInferImpl(const PrimitivePtr &primitive) { MS_EXCEPTION_IF_NULL(primitive); - auto iter = PrimitiveToInferImplMap.find(primitive); - if (iter == PrimitiveToInferImplMap.end()) { + auto iter = GetPrimitiveToEvalImplMap().find(primitive); + if (iter == GetPrimitiveToEvalImplMap().end()) { return nullptr; } return iter->second.impl_; @@ -1064,7 +1066,7 @@ PrimEvaluatorMap &GetPrimEvaluatorConstructors() { } std::lock_guard initLock(PrimEvaluatorConstructorMutex); if (constructor.empty()) { - InitPrimEvaluatorConstructors(PrimitiveToInferImplMap); + InitPrimEvaluatorConstructors(); } return constructor; diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc index 4ab7b9d20b..6230df44a5 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc @@ -296,38 +296,35 @@ EvaluatorPtr GetPrimEvaluator(const PrimitivePtr &prim, const AnalysisEnginePtr if (prim->HasPyEvaluator()) { auto prim_py = dyn_cast(prim); if (prim_py != nullptr) { - evaluator = std::make_shared(prim_py); - } else { - MS_LOG(EXCEPTION) << "The primitive with python evaluator should be a python primitive."; + return std::make_shared(prim_py); + } + MS_LOG(EXCEPTION) << "The primitive with python evaluator should be a python primitive."; + } + + if (prim->isa() || prim->HasAttr()) { + if (engine == nullptr) { + (void)GetPrimEvaluatorConstructors(); } - } else if (prim->isa() || prim->HasAttr()) { // If a primitive may have attr, try to create a new evaluator. StandardPrimitiveEvalImpl eval_impl = GetPrimitiveInferImpl(prim); if (eval_impl != nullptr) { - std::shared_ptr standard_evaluator = - std::make_shared(prim, eval_impl); - evaluator = standard_evaluator; + return std::make_shared(prim, eval_impl); } } - if (evaluator == nullptr) { - if (engine == nullptr) { - // If engine is nullptr, get constructor from default. - const PrimEvaluatorMap &prim_evaluator_map = GetPrimEvaluatorConstructors(); - auto iter = prim_evaluator_map.find(prim); - if (iter == prim_evaluator_map.end()) { - evaluator = nullptr; - } else { - evaluator = iter->second; - } - } else { - // If engine is given, get constructor from engine resource. - const PrimEvaluatorMap &prim_evaluator_map = engine->PrimConstructors(); - auto iter = prim_evaluator_map.find(prim); - if (iter == prim_evaluator_map.end()) { - evaluator = nullptr; - } else { - evaluator = iter->second; - } + + if (engine == nullptr) { + // If engine is nullptr, get constructor from default. + const PrimEvaluatorMap &prim_evaluator_map = GetPrimEvaluatorConstructors(); + auto iter = prim_evaluator_map.find(prim); + if (iter != prim_evaluator_map.end()) { + evaluator = iter->second; + } + } else { + // If engine is given, get constructor from engine resource. + const PrimEvaluatorMap &prim_evaluator_map = engine->PrimConstructors(); + auto iter = prim_evaluator_map.find(prim); + if (iter != prim_evaluator_map.end()) { + evaluator = iter->second; } } if (evaluator == nullptr) { From d84cfb01080b650c16fed895f7717ba667b60ca3 Mon Sep 17 00:00:00 2001 From: kswang Date: Wed, 8 Apr 2020 17:05:17 +0800 Subject: [PATCH 40/58] add mem manager --- mindspore/ccsrc/CMakeLists.txt | 1 + .../device/ascend/ascend_kernel_runtime.cc | 57 +---- .../device/ascend/ascend_kernel_runtime.h | 5 +- .../device/ascend/ascend_memory_manager.cc | 65 +++++ .../device/ascend/ascend_memory_manager.h | 35 +++ mindspore/ccsrc/device/device_address.h | 5 + .../ccsrc/device/gpu/gpu_kernel_runtime.cc | 108 ++------ .../ccsrc/device/gpu/gpu_kernel_runtime.h | 9 +- .../ccsrc/device/gpu/gpu_memory_manager.cc | 88 +++++++ .../ccsrc/device/gpu/gpu_memory_manager.h | 40 +++ mindspore/ccsrc/device/kernel_runtime.cc | 231 +++--------------- mindspore/ccsrc/device/kernel_runtime.h | 38 +-- mindspore/ccsrc/device/memory_manager.cc | 170 +++++++++++++ mindspore/ccsrc/device/memory_manager.h | 71 ++++++ .../ccsrc/session/anf_runtime_algorithm.cc | 10 + .../ccsrc/session/anf_runtime_algorithm.h | 1 + mindspore/ccsrc/session/gpu_session.cc | 4 - tests/ut/cpp/CMakeLists.txt | 2 + 18 files changed, 562 insertions(+), 378 deletions(-) create mode 100644 mindspore/ccsrc/device/ascend/ascend_memory_manager.cc create mode 100644 mindspore/ccsrc/device/ascend/ascend_memory_manager.h create mode 100644 mindspore/ccsrc/device/gpu/gpu_memory_manager.cc create mode 100644 mindspore/ccsrc/device/gpu/gpu_memory_manager.h create mode 100644 mindspore/ccsrc/device/memory_manager.cc create mode 100644 mindspore/ccsrc/device/memory_manager.h diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 9f559a51eb..1d104148c3 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -132,6 +132,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel/kash/*.cc" "device/kernel_info.cc" "device/kernel_runtime.cc" + "device/memory_manager.cc" "device/kernel_runtime_manager.cc" "device/convert_tensor_utils.cc" "pre_activate/common/*.cc" diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc index dc7eb5449b..0c2a97a5a6 100644 --- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc @@ -37,6 +37,7 @@ #include "kernel/tbe/tbe_utils.h" #include "kernel/tbe/tbe_python_funcs.h" #include "pre_activate/mem_reuse/mem_reuse_checker.h" +#include "device/ascend/ascend_memory_manager.h" using mindspore::device::ascend::ProfilingManager; using mindspore::device::ascend::ProfilingUtils; @@ -47,8 +48,6 @@ using std::vector; namespace mindspore { namespace device { namespace ascend { -static const uint64_t ASCEND_MEM_SIZE = 20; -static const uint64_t ASCEND_MEM_SIZE_BYTE = (ASCEND_MEM_SIZE << 30); static const size_t PRAMATER_OUTPUT_INDEX = 0; AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); } @@ -86,7 +85,8 @@ void AscendKernelRuntime::ReleaseDeviceRes() { MS_EXCEPTION(DeviceProcessError) << "rtSetDevice, ret[" << static_cast(ret) << "]"; } - FreeDeviceMemory(); + MS_EXCEPTION_IF_NULL(mem_manager_); + mem_manager_->FreeDeviceMemory(); (void)DestroyHccl(); (void)ResetDevice(); (void)ProfilingManager::GetInstance().StopProfiling(); @@ -109,11 +109,9 @@ bool AscendKernelRuntime::Init() { if (!ret) { return ret; } - - ret = MallocDeviceMemory(); - if (!ret) { - return ret; - } + mem_manager_ = std::make_shared(); + MS_EXCEPTION_IF_NULL(mem_manager_); + mem_manager_->MallocDeviceMemory(); ret = ProfilingManager::GetInstance().StartupProfiling(device_id_); if (!ret) { @@ -239,13 +237,6 @@ DeviceAddressPtr AscendKernelRuntime::CreateDeviceAddress(void *device_ptr, size return std::make_shared(device_ptr, device_size, format, type_id); } -void AscendKernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int) { - auto device_ptr = AscendMemoryAllocator::GetInstance().AllocTensorMem(size); - MS_EXCEPTION_IF_NULL(device_ptr); - address->ptr_ = device_ptr; - address->mem_dynamic_alloc_ = true; -} - bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); @@ -474,42 +465,6 @@ bool AscendKernelRuntime::DestroyHccl() { context_ptr->set_enable_hccl(false); return true; } - -bool AscendKernelRuntime::MallocDeviceMemory() { - device_mem_size_ = ASCEND_MEM_SIZE_BYTE; - static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO); - auto ret = rtMalloc(reinterpret_cast(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM); - if (ret != RT_ERROR_NONE) { - MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]"; - } - device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO)); - ret = rtMalloc(reinterpret_cast(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM); - if (ret != RT_ERROR_NONE) { - MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]"; - } - AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_); - AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_); - return true; -} - -void AscendKernelRuntime::FreeDeviceMemory() { - if (device_mem_base_ != nullptr) { - auto ret = rtFree(device_mem_base_); - if (ret != RT_ERROR_NONE) { - MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]"; - } - device_mem_base_ = nullptr; - } - if (device_mem_pool_base_ != nullptr) { - auto ret = rtFree(device_mem_pool_base_); - if (ret != RT_ERROR_NONE) { - MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]"; - } - device_mem_pool_base_ = nullptr; - } -} - -void AscendKernelRuntime::FreeHostMemory() { dynamic_mem_offset_ = 0; } } // namespace ascend } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h index dbd1460d24..0eedad3d2b 100644 --- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h +++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h @@ -39,13 +39,11 @@ class AscendKernelRuntime : public KernelRuntime { bool GenTask(const session::KernelGraph *graph) override; bool RunTask(const session::KernelGraph *graph) override; bool LoadTask(const session::KernelGraph *graph) override; - void FreeHostMemory() override; protected: DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, TypeId type_id) override; bool SyncStream() override; - void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) override; private: bool InitDevice(); @@ -53,8 +51,7 @@ class AscendKernelRuntime : public KernelRuntime { bool HcclInit(); bool NeedDestroyHccl(); bool DestroyHccl(); - bool MallocDeviceMemory(); - void FreeDeviceMemory(); + void ClearGraphModelMap(); void ReleaseDeviceRes() override; uint32_t GetGraphModelId(const session::KernelGraph *kernel_graph); diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc new file mode 100644 index 0000000000..f033d81d82 --- /dev/null +++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc @@ -0,0 +1,65 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device/ascend/ascend_memory_manager.h" +#include "device/ascend/ascend_memory_allocator.h" +#include "utils/context/ms_context.h" +#include "runtime/mem.h" +namespace mindspore { +namespace device { +namespace ascend { +static const uint64_t ASCEND_MEM_SIZE = 20; +static const uint64_t ASCEND_MEM_SIZE_BYTE = (ASCEND_MEM_SIZE << 30); + +void AscendMemoryManager::MallocDeviceMemory() { + device_mem_size_ = ASCEND_MEM_SIZE_BYTE; + static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO); + auto ret = rtMalloc(reinterpret_cast(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM); + if (ret != RT_ERROR_NONE) { + MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]"; + } + device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO)); + ret = rtMalloc(reinterpret_cast(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM); + if (ret != RT_ERROR_NONE) { + MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]"; + } + AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_); + AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_); +} + +void AscendMemoryManager::FreeDeviceMemory() { + if (device_mem_base_ != nullptr) { + auto ret = rtFree(device_mem_base_); + if (ret != RT_ERROR_NONE) { + MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]"; + } + device_mem_base_ = nullptr; + } + if (device_mem_pool_base_ != nullptr) { + auto ret = rtFree(device_mem_pool_base_); + if (ret != RT_ERROR_NONE) { + MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]"; + } + device_mem_pool_base_ = nullptr; + } +} + +void *AscendMemoryManager::AllocTensorMemDynamic(size_t size) { + return AscendMemoryAllocator::GetInstance().AllocTensorMem(size); +} +} // namespace ascend +} // namespace device +} // namespace mindspore diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h new file mode 100644 index 0000000000..8639fb5c72 --- /dev/null +++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h @@ -0,0 +1,35 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_ +#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_ +#include "device/memory_manager.h" +namespace mindspore { +namespace device { +namespace ascend { +class AscendMemoryManager : public MemoryManager { + public: + AscendMemoryManager() = default; + virtual ~AscendMemoryManager() = default; + + void MallocDeviceMemory() override; + void FreeDeviceMemory() override; + void *AllocTensorMemDynamic(size_t size) override; +}; +} // namespace ascend +} // namespace device +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_ diff --git a/mindspore/ccsrc/device/device_address.h b/mindspore/ccsrc/device/device_address.h index 1610d43372..cb022427e3 100644 --- a/mindspore/ccsrc/device/device_address.h +++ b/mindspore/ccsrc/device/device_address.h @@ -33,12 +33,14 @@ class CPUKernelRuntime; } // namespace cpu namespace ascend { class AscendKernelRuntime; +class AscendMemoryManager; namespace tasksink { class TaskGenerator; } // namespace tasksink } // namespace ascend namespace gpu { class GPUKernelRuntime; +class GPUMemoryManager; } // namespace gpu } // namespace device } // namespace mindspore @@ -70,12 +72,15 @@ class DeviceAddress { TypeId type_id_{kNumberTypeFloat16}; bool mem_dynamic_alloc_{false}; friend class KernelRuntime; + friend class MemoryManager; friend class mindspore::device::ascend::tasksink::TaskGenerator; friend class mindspore::device::cpu::CPUSimpleMemPlan; friend class mindspore::device::cpu::CPUResourceManager; friend class mindspore::device::cpu::CPUKernelRuntime; friend class mindspore::device::gpu::GPUKernelRuntime; + friend class mindspore::device::gpu::GPUMemoryManager; friend class mindspore::device::ascend::AscendKernelRuntime; + friend class mindspore::device::ascend::AscendMemoryManager; }; using DeviceAddressPtr = std::shared_ptr; diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc index 9eeb1062f7..597e188e9d 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc @@ -26,6 +26,7 @@ #include "device/kernel_runtime_manager.h" #include "device/gpu/gpu_common.h" #include "common/utils.h" +#include "device/gpu/gpu_memory_manager.h" namespace mindspore { namespace device { @@ -36,26 +37,14 @@ bool GPUKernelRuntime::Init() { if (device_init_ == true) { return true; } - auto ret = InitDevice(); if (!ret) { MS_LOG(ERROR) << "InitDevice error."; return ret; } - - auto context_ptr = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context_ptr); - // If use the dynamic memory pool, then alloc the first memory block to init. - if (context_ptr->enable_dynamic_mem_pool()) { - auto device_addr = AllocTensorMemDynamic(1); - if (!device_addr) { - MS_LOG(ERROR) << "Dynamic memory pool init error."; - return false; - } - } else { - MallocDeviceMemory(); - } - + mem_manager_ = std::make_shared(); + MS_EXCEPTION_IF_NULL(mem_manager_); + mem_manager_->MallocDeviceMemory(); const void *collective_handle_ = CollectiveInitializer::instance().collective_handle(); bool collective_inited = CollectiveInitializer::instance().collective_inited(); if (collective_inited && collective_handle_ != nullptr) { @@ -101,16 +90,6 @@ bool GPUKernelRuntime::InitDevice() { return true; } -void GPUKernelRuntime::MallocDeviceMemory() { - // Need to reserve 20% space for dynamic memory - const float init_gpu_mem_ratio = 0.8; - size_t mem_size = FloatToSize(GPUMemoryAllocator::GetInstance().free_mem_size() * init_gpu_mem_ratio); - auto alloc_size = - GPUMemoryAllocator::GetInstance().AllocDeviceMem(mem_size, reinterpret_cast(&device_mem_base_)); - device_mem_size_ = alloc_size; - static_mem_offset_ = device_mem_size_; -} - void GPUKernelRuntime::ReleaseDeviceRes() { // For dataset mode. if (GpuBufferMgr::GetInstance().IsInit()) { @@ -122,39 +101,22 @@ void GPUKernelRuntime::ReleaseDeviceRes() { CHECK_OP_RET_WITH_EXCEPT(GpuBufferMgr::GetInstance().Destroy(), "Could not destroy gpu data queue."); } GPUDeviceManager::GetInstance().ReleaseDevice(); - if (device_mem_base_ != nullptr) { - if (!GPUMemoryAllocator::GetInstance().FreeDeviceMem(device_mem_base_)) { - MS_LOG(EXCEPTION) << "Could not free gpu device memory."; - } - } - GPUMemoryAllocator::GetInstance().ReleaseDeviceRes(); -} - -void GPUKernelRuntime::FreeHostMemory() { dynamic_mem_offset_ = 0; } - -void *GPUKernelRuntime::AllocTensorMemDynamic(size_t size) { - return GPUMemoryAllocator::GetInstance().AllocTensorMem(size); -} - -void GPUKernelRuntime::FreeTensorMemDynamic(void *device_ptr) { - GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr); + MS_EXCEPTION_IF_NULL(mem_manager_); + mem_manager_->FreeDeviceMemory(); } void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); + MS_EXCEPTION_IF_NULL(mem_manager_); + mem_manager_->ResetDynamicMemory(); AssignStaticMemory(graph); - bool is_enable_mem_reuse = context_ptr->enable_mem_reuse(); bool is_enable_dynamic_mem = context_ptr->enable_dynamic_mem_pool(); if (is_enable_dynamic_mem) { // Use the dynamic memory pool. InitKernelRefCount(graph); InitKernelOutputAddress(graph); - } else if (is_enable_mem_reuse) { - // Use the memory reuse. - ReuseAssignDynamicMemory(graph); } else { - // Normal way. AssignDynamicMemory(graph); } } @@ -179,32 +141,6 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph) { return ret; } -uint8_t *GPUKernelRuntime::MallocStaticMem(size_t size, bool) { - auto context_ptr = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context_ptr); - if (context_ptr->enable_dynamic_mem_pool()) { - auto device_ptr = AllocTensorMemDynamic(size); - MS_EXCEPTION_IF_NULL(device_ptr); - return AddressOffset(device_ptr, 0); - } - - auto align_size = GetCommonAlignSize(size); - if (static_mem_offset_ < align_size) { - MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ - << "] static[" << total_static_size_ << "])" - << " malloc [" << align_size << "] failed!"; - } - auto offset = static_mem_offset_ - align_size; - if (dynamic_mem_offset_ > offset) { - MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ - << "] static[" << total_static_size_ << "])" - << " malloc [" << align_size << "] failed!"; - } - total_static_size_ += align_size; - static_mem_offset_ = offset; - return device_mem_base_ + offset; -} - void GPUKernelRuntime::InitKernelRefCount(const session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared(); @@ -273,6 +209,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod MS_EXCEPTION_IF_NULL(kernel_inputs); MS_EXCEPTION_IF_NULL(kernel_workspaces); MS_EXCEPTION_IF_NULL(kernel_outputs); + MS_EXCEPTION_IF_NULL(mem_manager_); for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) { auto device_address = AnfAlgo::GetPrevNodeOutputAddr(kernel, i); MS_EXCEPTION_IF_NULL(device_address); @@ -290,7 +227,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod MS_EXCEPTION_IF_NULL(device_address); auto device_ptr = device_address->ptr_; if (device_ptr == nullptr) { - device_ptr = AllocTensorMemDynamic(output_sizes[i]); + device_ptr = mem_manager_->AllocTensorMemDynamic(output_sizes[i]); MS_EXCEPTION_IF_NULL(device_ptr); device_address->ptr_ = device_ptr; } @@ -307,7 +244,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod kernel_workspaces->emplace_back(nullptr); continue; } - auto device_ptr = AllocTensorMemDynamic(workspace_sizes[i]); + auto device_ptr = mem_manager_->AllocTensorMemDynamic(workspace_sizes[i]); MS_EXCEPTION_IF_NULL(device_ptr); kernel::AddressPtr workspace = std::make_shared(); MS_EXCEPTION_IF_NULL(workspace); @@ -333,6 +270,7 @@ void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfNodePtr &kernel) { MS_EXCEPTION_IF_NULL(kernel); + MS_EXCEPTION_IF_NULL(mem_manager_); // The reference count of communication kernel input is not 0. if (communication_op_input_ref_count_ != 0) { MS_LOG(ERROR) << "The reference count of communication kernel input is not 0."; @@ -354,7 +292,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN addr_size.emplace_back(device_address.get(), output_size); } - auto device_mem_ptr = AllocTensorMemDynamic(total); + auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total); MS_EXCEPTION_IF_NULL(device_mem_ptr); for (const auto &iter : addr_size) { MS_EXCEPTION_IF_NULL(iter.first); @@ -366,6 +304,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::AnfNodePtr &kernel) { MS_EXCEPTION_IF_NULL(kernel); + MS_EXCEPTION_IF_NULL(mem_manager_); // The reference count of communication kernel output is not 0. if (communication_op_output_ref_count_ != 0) { MS_LOG(ERROR) << "The reference count of communication kernel output is not 0."; @@ -389,7 +328,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf addr_size.emplace_back(device_address.get(), output_sizes[i]); } - auto device_mem_ptr = AllocTensorMemDynamic(total); + auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total); MS_EXCEPTION_IF_NULL(device_mem_ptr); for (const auto &iter : addr_size) { MS_EXCEPTION_IF_NULL(iter.first); @@ -402,6 +341,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, const AddressPtrList &kernel_workspaces) { MS_EXCEPTION_IF_NULL(kernel); + MS_EXCEPTION_IF_NULL(mem_manager_); auto cnode = kernel->cast(); MS_EXCEPTION_IF_NULL(cnode); // Free the input of kernel by reference count. @@ -421,7 +361,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i); MS_EXCEPTION_IF_NULL(device_address); MS_EXCEPTION_IF_NULL(device_address->ptr_); - FreeTensorMemDynamic(device_address->ptr_); + mem_manager_->FreeTensorMemDynamic(device_address->ptr_); device_address->ptr_ = nullptr; } } @@ -432,7 +372,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, auto workspace = kernel_workspaces[i]; if (workspace != nullptr) { MS_EXCEPTION_IF_NULL(workspace->addr); - FreeTensorMemDynamic(workspace->addr); + mem_manager_->FreeTensorMemDynamic(workspace->addr); workspace->addr = nullptr; } } @@ -441,6 +381,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr &kernel, size_t input_idx, bool *is_communication_op) { MS_EXCEPTION_IF_NULL(kernel); + MS_EXCEPTION_IF_NULL(mem_manager_); // The inputs memory of communication kernel is one piece memory, need release together. if (AnfAlgo::GetCNodeName(kernel) == kAllReduceOpName) { communication_op_input_ref_count_--; @@ -448,7 +389,7 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, 0); MS_EXCEPTION_IF_NULL(device_address); MS_EXCEPTION_IF_NULL(device_address->ptr_); - FreeTensorMemDynamic(device_address->ptr_); + mem_manager_->FreeTensorMemDynamic(device_address->ptr_); device_address->ptr_ = nullptr; } *is_communication_op = true; @@ -470,19 +411,12 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr auto device_address = AnfAlgo::GetMutableOutputAddr(kernel_input.first, 0); MS_EXCEPTION_IF_NULL(device_address); MS_EXCEPTION_IF_NULL(device_address->ptr_); - FreeTensorMemDynamic(device_address->ptr_); + mem_manager_->FreeTensorMemDynamic(device_address->ptr_); device_address->ptr_ = nullptr; } *is_communication_op = true; } } - -void GPUKernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int) { - auto device_ptr = AllocTensorMemDynamic(size); - MS_EXCEPTION_IF_NULL(device_ptr); - address->ptr_ = device_ptr; - address->mem_dynamic_alloc_ = true; -} } // namespace gpu } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h index f3fdb5fa98..6f761342d3 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h @@ -33,7 +33,6 @@ class GPUKernelRuntime : public KernelRuntime { ~GPUKernelRuntime() override = default; bool Init() override; void ReleaseDeviceRes() override; - void FreeHostMemory() override; void AssignMemory(session::KernelGraph *graph) override; bool Run(session::KernelGraph *graph) override; @@ -41,18 +40,11 @@ class GPUKernelRuntime : public KernelRuntime { DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, TypeId type_id) override; bool SyncStream() override; - // Alloc memory use the dynamic memory pool. - void *AllocTensorMemDynamic(size_t size) override; - // Free memory use the dynamic memory pool. - void FreeTensorMemDynamic(void *device_ptr) override; - void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) override; - uint8_t *MallocStaticMem(size_t size, bool communication_mem) override; private: GPUKernelRuntime(const GPUKernelRuntime &); GPUKernelRuntime &operator=(const GPUKernelRuntime &); bool InitDevice(); - void MallocDeviceMemory(); bool device_init_{false}; // The related functions and members for using dynamic memory pool. @@ -69,6 +61,7 @@ class GPUKernelRuntime : public KernelRuntime { void FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr &kernel, size_t input_idx, bool *is_communication_op); size_t communication_op_input_ref_count_{0}; size_t communication_op_output_ref_count_{0}; + MemReuseUtilPtr mem_reuse_util_ptr_{nullptr}; }; MS_REG_KERNEL_RUNTIME(kGPUDevice, GPUKernelRuntime); } // namespace gpu diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc new file mode 100644 index 0000000000..3944b504e4 --- /dev/null +++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc @@ -0,0 +1,88 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device/gpu/gpu_memory_manager.h" +#include "device/gpu/gpu_memory_allocator.h" +#include "utils/context/ms_context.h" +#include "utils/convert_utils.h" +namespace mindspore { +namespace device { +namespace gpu { +void *GPUMemoryManager::AllocTensorMemDynamic(size_t size) { + return GPUMemoryAllocator::GetInstance().AllocTensorMem(size); +} + +void GPUMemoryManager::FreeTensorMemDynamic(void *device_ptr) { + GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr); +} + +void GPUMemoryManager::MallocDeviceMemory() { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + // If use the dynamic memory pool, then alloc the first memory block to init. + if (context_ptr->enable_dynamic_mem_pool()) { + auto device_addr = AllocTensorMemDynamic(1); + if (!device_addr) { + MS_LOG(ERROR) << "Dynamic memory pool init error."; + } + } else { + // Need to reserve 20% space for dynamic memory + const float init_gpu_mem_ratio = 0.8; + size_t mem_size = FloatToSize(GPUMemoryAllocator::GetInstance().free_mem_size() * init_gpu_mem_ratio); + auto alloc_size = + GPUMemoryAllocator::GetInstance().AllocDeviceMem(mem_size, reinterpret_cast(&device_mem_base_)); + device_mem_size_ = alloc_size; + static_mem_offset_ = device_mem_size_; + } +} + +void GPUMemoryManager::FreeDeviceMemory() { + if (device_mem_base_ != nullptr) { + if (!GPUMemoryAllocator::GetInstance().FreeDeviceMem(device_mem_base_)) { + MS_LOG(EXCEPTION) << "Could not free gpu device memory."; + } + } + GPUMemoryAllocator::GetInstance().ReleaseDeviceRes(); +} + +uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (context_ptr->enable_dynamic_mem_pool()) { + auto device_ptr = AllocTensorMemDynamic(size); + MS_EXCEPTION_IF_NULL(device_ptr); + return AddressOffset(device_ptr, 0); + } + + auto align_size = GetCommonAlignSize(size); + if (static_mem_offset_ < align_size) { + MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ + << "] static[" << total_static_size_ << "])" + << " malloc [" << align_size << "] failed!"; + } + auto offset = static_mem_offset_ - align_size; + if (dynamic_mem_offset_ > offset) { + MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ + << "] static[" << total_static_size_ << "])" + << " malloc [" << align_size << "] failed!"; + } + total_static_size_ += align_size; + static_mem_offset_ = offset; + return device_mem_base_ + offset; +} +} // namespace gpu +} // namespace device +} // namespace mindspore diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h b/mindspore/ccsrc/device/gpu/gpu_memory_manager.h new file mode 100644 index 0000000000..a18226bdf3 --- /dev/null +++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.h @@ -0,0 +1,40 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_ +#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_ +#include "device/memory_manager.h" +namespace mindspore { +namespace device { +namespace gpu { +class GPUMemoryManager : public MemoryManager { + public: + GPUMemoryManager() = default; + virtual ~GPUMemoryManager() = default; + + void MallocDeviceMemory() override; + void FreeDeviceMemory() override; + + void *AllocTensorMemDynamic(size_t size) override; + void FreeTensorMemDynamic(void *device_ptr) override; + + protected: + uint8_t *MallocStaticMem(size_t size, bool communication_mem); +}; +} // namespace gpu +} // namespace device +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_ diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/device/kernel_runtime.cc index 0a9be35fb5..16025ed8a4 100644 --- a/mindspore/ccsrc/device/kernel_runtime.cc +++ b/mindspore/ccsrc/device/kernel_runtime.cc @@ -31,18 +31,13 @@ #include "ir/value.h" using mindspore::kernel::Address; using mindspore::kernel::AddressPtr; -using mindspore::memreuse::BestFitMemReuse; -using mindspore::memreuse::MemReuseUtilPtr; namespace mindspore { namespace device { KernelRuntime::~KernelRuntime() { - device_mem_base_ = nullptr; - device_mem_pool_base_ = nullptr; #ifdef ENABLE_DUMP_E2E dump_conf_ptr_ = nullptr; #endif - mem_reuse_util_ptr_ = nullptr; } bool KernelRuntime::Run(session::KernelGraph *graph) { @@ -88,11 +83,6 @@ bool KernelRuntime::LoadTask(const session::KernelGraph *graph) { return false; } -void KernelRuntime::FreeHostMemory() { - dynamic_mem_offset_ = 0; - static_mem_offset_ = 0; -} - // for D to impl bool KernelRuntime::RunTask(const session::KernelGraph *graph) { if (graph != nullptr) { @@ -126,13 +116,11 @@ size_t KernelRuntime::CountNodeDeviceMemorySize(const mindspore::AnfNodePtr &nod void KernelRuntime::AssignMemory(session::KernelGraph *graph) { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); + MS_EXCEPTION_IF_NULL(mem_manager_); + mem_manager_->ResetDynamicMemory(); AssignStaticMemory(graph); - bool is_enable_mem_reuse = context_ptr->enable_mem_reuse(); - if (is_enable_mem_reuse) { - ReuseAssignDynamicMemory(graph); - } else { - AssignDynamicMemory(graph); - } + AssignDynamicMemory(graph); + UpdateRefNodeOutputMem(graph); } @@ -159,6 +147,7 @@ void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) { void KernelRuntime::RunOpAssignInputMemory(const std::vector &input_tensors, const session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(mem_manager_); for (size_t input_index = 0; input_index < graph->inputs().size(); ++input_index) { auto item = graph->inputs()[input_index]; MS_EXCEPTION_IF_NULL(item); @@ -180,7 +169,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector auto device_address = CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); MS_EXCEPTION_IF_NULL(device_address); - MallocOpMemory(device_address, tensor_size, kStaticMem); + mem_manager_->MallocOpMemory(device_address, tensor_size); AnfAlgo::SetOutputAddr(device_address, index, item.get()); } } @@ -188,6 +177,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { MS_EXCEPTION_IF_NULL(kernel); + MS_EXCEPTION_IF_NULL(mem_manager_); auto kernel_mod = AnfAlgo::GetKernelMod(kernel); MS_EXCEPTION_IF_NULL(kernel_mod); auto output_sizes = kernel_mod->GetOutputSizeList(); @@ -208,13 +198,14 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type); MS_EXCEPTION_IF_NULL(device_address); - MallocOpMemory(device_address, output_sizes[i], kDynamicMem); + mem_manager_->MallocOpMemory(device_address, output_sizes[i]); AnfAlgo::SetOutputAddr(device_address, i, kernel.get()); } } void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { MS_EXCEPTION_IF_NULL(kernel); + MS_EXCEPTION_IF_NULL(mem_manager_); if (kernel->isa()) { auto kernel_mod = AnfAlgo::GetKernelMod(kernel); MS_EXCEPTION_IF_NULL(kernel_mod); @@ -222,7 +213,7 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { for (size_t i = 0; i < workspace_lists.size(); ++i) { auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown); MS_EXCEPTION_IF_NULL(device_address); - MallocOpMemory(device_address, workspace_lists[i], kDynamicMem); + mem_manager_->MallocOpMemory(device_address, workspace_lists[i]); AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get()); } } @@ -230,6 +221,7 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(mem_manager_); for (auto &item : graph->inputs()) { MS_EXCEPTION_IF_NULL(item); if (!item->isa()) { @@ -247,7 +239,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) { output_type_id = AnfAlgo::GetOutputInferDataType(item, index); } auto tensor_size = CountNodeDeviceMemorySize(item, index); - auto ptr = MallocStaticMem(tensor_size, false); + auto ptr = mem_manager_->MallocMem(kStaticMem, tensor_size); auto address = CreateDeviceAddress(ptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); AnfAlgo::SetOutputAddr(address, index, item.get()); } @@ -301,6 +293,7 @@ void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph *graph) { void KernelRuntime::AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); + MS_EXCEPTION_IF_NULL(mem_manager_); auto kernel_mod = AnfAlgo::GetKernelMod(node); MS_EXCEPTION_IF_NULL(kernel_mod); auto output_sizes = kernel_mod->GetOutputSizeList(); @@ -314,12 +307,12 @@ void KernelRuntime::AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr std::vector align_size_list; for (uint64_t mem_size : output_sizes) { if (context_ptr->enable_hccl()) { - mem_size = GetCommonAlignSize(mem_size); + mem_size = mem_manager_->GetCommonAlignSize(mem_size); } total_size += mem_size; align_size_list.emplace_back(mem_size); } - uint8_t *output_ptr = CalDeviceMem(node, total_size, flag, 0); + uint8_t *output_ptr = mem_manager_->MallocOutputMem(node, 0, flag, total_size); for (size_t j = 0; j < align_size_list.size(); ++j) { std::string output_format = AnfAlgo::GetOutputFormat(node, j); auto output_type = AnfAlgo::GetOutputDeviceDataType(node, j); @@ -333,6 +326,7 @@ void KernelRuntime::UpdateCommunicationOpInputMem(const AnfNodePtr &node) { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); MS_EXCEPTION_IF_NULL(node); + MS_EXCEPTION_IF_NULL(mem_manager_); size_t total_size = 0; std::vector> addr_size; for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(node); ++i) { @@ -340,12 +334,12 @@ void KernelRuntime::UpdateCommunicationOpInputMem(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(address); auto mem_size = address->size(); if (context_ptr->enable_hccl()) { - mem_size = GetCommonAlignSize(mem_size); + mem_size = mem_manager_->GetCommonAlignSize(mem_size); } total_size += mem_size; addr_size.emplace_back(address.get(), mem_size); } - uint8_t *input_ptr = CalDeviceMem(node, total_size, kDynamicMem, 0); + uint8_t *input_ptr = mem_manager_->MallocOutputMem(node, 0, kDynamicMem, total_size); for (const auto &iter : addr_size) { MS_EXCEPTION_IF_NULL(iter.first); iter.first->set_ptr(input_ptr); @@ -355,7 +349,8 @@ void KernelRuntime::UpdateCommunicationOpInputMem(const AnfNodePtr &node) { void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int index) { MS_EXCEPTION_IF_NULL(node); - if (IsCommunicationOp(node)) { + MS_EXCEPTION_IF_NULL(mem_manager_); + if (AnfAlgo::IsCommunicationOp(node)) { UpdateCommunicationOpInputMem(node); AssignCommunicationNodeOutputMem(flag, node); return; @@ -375,7 +370,7 @@ void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int in MS_LOG(INFO) << "Already malloc index:" << i; continue; } - auto ptr = CalDeviceMem(node, output_sizes[i], flag, i); + auto ptr = mem_manager_->MallocOutputMem(node, i, flag, output_sizes[i]); if (ptr == nullptr) { // reused ptr, no need alloc, continue; continue; @@ -390,6 +385,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const size_t output_idx) { MS_EXCEPTION_IF_NULL(value_node); MS_EXCEPTION_IF_NULL(node_value); + MS_EXCEPTION_IF_NULL(mem_manager_); auto tensor = node_value->cast(); if (tensor == nullptr) { MS_LOG(WARNING) << "Tensor is null"; @@ -397,7 +393,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const } size_t tensor_size = tensor->data().nbytes(); auto node_size = CountNodeDeviceMemorySize(value_node, output_idx); - auto ptr = MallocStaticMem(node_size, false); + auto ptr = mem_manager_->MallocMem(kStaticMem, node_size); TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(value_node, output_idx); if (output_type_id == kTypeUnknown) { output_type_id = AnfAlgo::GetOutputInferDataType(value_node, output_idx); @@ -414,6 +410,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(mem_manager_); for (auto &value_node : graph->graph_value_nodes()) { MS_EXCEPTION_IF_NULL(value_node); if (AnfAlgo::OutputAddrExist(value_node, 0)) { @@ -440,7 +437,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { } else if (node_value->isa()) { auto value = GetValue(node_value); size_t tensor_size = value.size(); - auto ptr = MallocStaticMem(tensor_size, false); + auto ptr = mem_manager_->MallocMem(kStaticMem, tensor_size); auto address = CreateDeviceAddress(ptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeUInt8); MS_EXCEPTION_IF_NULL(address); AnfAlgo::SetOutputAddr(address, 0, value_node.get()); @@ -452,103 +449,37 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { } } -void KernelRuntime::AssignDynamicMemory(const session::KernelGraph *graph) { +void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); - // reset dynamic mem offset - dynamic_mem_offset_ = 0; - auto &kernels = graph->execution_order(); - for (auto &kernel : kernels) { - AssignNodeOutputMem(kDynamicMem, kernel, kGetAllOuts); - AssignWorkSpaceMem(kernel); + MS_EXCEPTION_IF_NULL(mem_manager_); + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + bool is_enable_mem_reuse = context_ptr->enable_mem_reuse(); + auto mem_flag = kDynamicMem; + if (is_enable_mem_reuse) { + mem_manager_->InitReuseDynamicMemory(graph); + mem_flag = kReuseDynamicMem; } -} - -void KernelRuntime::ReuseAssignDynamicMemory(session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); - dynamic_mem_offset_ = 0; - MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared(); - MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr); - // set all infos - mem_reuse_util_ptr->SetAllInfo(graph); - auto bestfit_mem_reuse = std::make_shared(); - MS_EXCEPTION_IF_NULL(bestfit_mem_reuse); - bestfit_mem_reuse->Reuse(mem_reuse_util_ptr.get()); - size_t total_allocated_size = bestfit_mem_reuse->GetAllocatedSize(); - MS_LOG(INFO) << "TotalReuseDynamicSize [" << total_allocated_size << "]"; - mem_reuse_util_ptr_ = mem_reuse_util_ptr; - auto base_ptr = MallocDynamicMem(total_allocated_size, false); - mem_reuse_util_ptr_->set_mem_base(base_ptr); auto &kernels = graph->execution_order(); for (auto &kernel : kernels) { - AssignNodeOutputMem(kReuseDynamicMem, kernel, kGetAllOuts); - AssignReuseWorkSpaceMem(kernel); + AssignNodeOutputMem(mem_flag, kernel, kGetAllOuts); + AssignWorkSpaceMem(mem_flag, kernel); } } -void KernelRuntime::AssignReuseWorkSpaceMem(const AnfNodePtr &node) { +void KernelRuntime::AssignWorkSpaceMem(int flag, const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); + MS_EXCEPTION_IF_NULL(mem_manager_); auto kernel_mod = AnfAlgo::GetKernelMod(node); MS_EXCEPTION_IF_NULL(kernel_mod); size_t index = 0; for (auto &size : kernel_mod->GetWorkspaceSizeList()) { - auto wk_ptr = mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index); - AnfAlgo::SetWorkspaceAddr(CreateDeviceAddress(wk_ptr, size, "", kTypeUnknown), index, node.get()); + auto ptr = mem_manager_->MallocWorkSpaceMem(node, flag, index, size); + AnfAlgo::SetWorkspaceAddr(CreateDeviceAddress(ptr, size, "", kTypeUnknown), index, node.get()); index++; } } -void KernelRuntime::AssignWorkSpaceMem(const AnfNodePtr &node) { - MS_EXCEPTION_IF_NULL(node); - if (node->isa()) { - auto kernel_mod = AnfAlgo::GetKernelMod(node); - MS_EXCEPTION_IF_NULL(kernel_mod); - size_t index = 0; - for (auto &size : kernel_mod->GetWorkspaceSizeList()) { - auto ptr = MallocDynamicMem(size, false); - AnfAlgo::SetWorkspaceAddr(CreateDeviceAddress(ptr, size, "", kTypeUnknown), index, node.get()); - index++; - } - } -} - -bool KernelRuntime::IsCommunicationOp(const AnfNodePtr &node) { - MS_EXCEPTION_IF_NULL(node); - auto kernel_name = AnfAlgo::GetCNodeName(node); - auto kernel_type = AnfAlgo::GetKernelType(node); - if (kernel_name == kAllReduceOpName || kernel_type == HCCL_KERNEL) { - return true; - } - return false; -} - -uint8_t *KernelRuntime::CalDeviceMem(const AnfNodePtr &node, size_t size, int flag, size_t index) { - MS_EXCEPTION_IF_NULL(node); - auto context_ptr = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context_ptr); - uint8_t *ptr = nullptr; - if (IsCommunicationOp(node)) { - bool communication_mem = false; - if (context_ptr->enable_hccl()) { - communication_mem = true; - } - if (flag == kStaticMem) { - ptr = MallocStaticMem(size, communication_mem); - } else { - ptr = MallocDynamicMem(size, communication_mem); - } - return ptr; - } - - if (flag == kStaticMem) { - ptr = MallocStaticMem(size, false); - } else if (flag == kDynamicMem) { - ptr = MallocDynamicMem(size, false); - } else if (flag == kReuseDynamicMem) { - ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index); - } - return ptr; -} - void KernelRuntime::GenLaunchArgs(const mindspore::kernel::KernelMod &kernel_mod, const mindspore::AnfNodePtr &kernel, AddressPtrList *kernel_inputs, AddressPtrList *const kernel_workspaces, AddressPtrList *kernel_outputs) { @@ -659,65 +590,6 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph) { return true; } -size_t KernelRuntime::GetCommonAlignSize(size_t input_size) const { - return (input_size + mem_align_size_ + 31) / mem_align_size_ * mem_align_size_; -} - -size_t KernelRuntime::GetCommunicationAlignSize(size_t input_size) const { - return (input_size + mem_align_size_ - 1) / mem_align_size_ * mem_align_size_ + 2 * mem_align_size_; -} - -uint8_t *KernelRuntime::MallocStaticMem(size_t size, bool communication_mem) { - size_t align_size = 0; - if (communication_mem) { - align_size = GetCommunicationAlignSize(size); - } else { - align_size = GetCommonAlignSize(size); - } - if (static_mem_offset_ < align_size) { - MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ - << "] static[" << total_static_size_ << "])" - << " malloc [" << align_size << "] failed!"; - } - total_static_size_ += align_size; - auto offset = static_mem_offset_ - align_size; - if (dynamic_mem_offset_ > offset) { - MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ - << "] static[" << total_static_size_ << "])" - << " malloc [" << align_size << "] failed!"; - } - static_mem_offset_ = offset; - if (communication_mem) { - return device_mem_base_ + offset + mem_align_size_; - } else { - return device_mem_base_ + offset; - } -} - -uint8_t *KernelRuntime::MallocDynamicMem(size_t size, bool communication_mem) { - size_t align_size = 0; - if (communication_mem) { - align_size = GetCommunicationAlignSize(size); - } else { - align_size = GetCommonAlignSize(size); - } - uint64_t offset = dynamic_mem_offset_; - auto new_offset = dynamic_mem_offset_ + align_size; - if (new_offset > static_mem_offset_) { - MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ - << "] static[" << total_static_size_ << "])" - << " malloc [" << align_size << "] failed!"; - } - total_dynamic_size_ += align_size; - dynamic_mem_offset_ = new_offset; - - if (communication_mem) { - return device_mem_base_ + offset + mem_align_size_; - } else { - return device_mem_base_ + offset; - } -} - bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); if (!LaunchKernelMod(*graph)) { @@ -731,29 +603,6 @@ bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph) { return true; } -void KernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) { - if (flag == kStaticMem) { - address->ptr_ = MallocStaticMem(size, false); - } else if (flag == kDynamicMem) { - address->ptr_ = MallocDynamicMem(size, false); - } else { - MS_LOG(EXCEPTION) << "Unknown memory type!"; - } -} - -void *KernelRuntime::AllocTensorMemDynamic(size_t size) { - if (size == 0) { - MS_LOG(ERROR) << "AllocTensorMemDynamic size is 0."; - } - return nullptr; -} - -void KernelRuntime::FreeTensorMemDynamic(void *device_ptr) { - if (device_ptr == nullptr) { - MS_LOG(ERROR) << "FreeTensorMemDynamic device_ptr is null."; - } -} - #ifdef ENABLE_DUMP_E2E bool KernelRuntime::SetDumpConf() { dump_conf_ptr_ = std::make_shared(); diff --git a/mindspore/ccsrc/device/kernel_runtime.h b/mindspore/ccsrc/device/kernel_runtime.h index ac9a56ed4d..1224bf14eb 100644 --- a/mindspore/ccsrc/device/kernel_runtime.h +++ b/mindspore/ccsrc/device/kernel_runtime.h @@ -20,8 +20,7 @@ #include #include #include -#include "pre_activate/mem_reuse/mem_reuse.h" -#include "pre_activate/mem_reuse/mem_reuse_allocator.h" + #include "device/device_address.h" #include "ir/meta_tensor.h" #include "predict/generator/utils/ir_model_util.h" @@ -32,21 +31,16 @@ #include "session/anf_runtime_algorithm.h" #include "kernel/kernel.h" #include "utils/context/ms_context.h" +#include "device/memory_manager.h" // using mindspore::session::KernelGraph; using mindspore::tensor::Tensor; using TensorPtr = std::shared_ptr; -using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr; using mindspore::kernel::AddressPtr; using AddressPtrList = std::vector; namespace mindspore { namespace device { -const int kStaticMem = 0; -const int kDynamicMem = 1; -const int kReuseDynamicMem = 2; -const int kGetAllOuts = -1; - class KernelRuntime { public: KernelRuntime() = default; @@ -65,7 +59,6 @@ class KernelRuntime { DumpConfPtr GetDumpConf(); #endif virtual bool LoadTask(const session::KernelGraph *graph); - virtual void FreeHostMemory(); // for GPU and D to impl virtual void ReleaseDeviceRes() {} void set_device_id(uint32_t device_id) { device_id_ = device_id; } @@ -75,29 +68,17 @@ class KernelRuntime { TypeId type_id) = 0; virtual bool SyncStream() = 0; void AssignStaticMemory(session::KernelGraph *graph); - void AssignDynamicMemory(const session::KernelGraph *graph); + void AssignDynamicMemory(session::KernelGraph *graph); void ReuseAssignDynamicMemory(session::KernelGraph *graph); void AssignNodeOutputMem(int flag, const AnfNodePtr &node, int index); - void AssignWorkSpaceMem(const AnfNodePtr &node); + void AssignWorkSpaceMem(int flag, const AnfNodePtr &node); void AssignReuseWorkSpaceMem(const AnfNodePtr &node); void AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr &node); void UpdateRefNodeOutputMem(const session::KernelGraph *graph); void UpdateCommunicationOpInputMem(const AnfNodePtr &node); - bool IsCommunicationOp(const AnfNodePtr &node); - size_t GetCommonAlignSize(size_t input_size) const; - size_t GetCommunicationAlignSize(size_t input_size) const; - - uint8_t *CalDeviceMem(const AnfNodePtr &node, size_t size, int flag, size_t index); - virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem); - uint8_t *MallocDynamicMem(size_t size, bool communication_mem); #ifdef ENABLE_DUMP_E2E bool SetDumpConf(); #endif - // Alloc memory use the dynamic memory pool. - virtual void *AllocTensorMemDynamic(size_t size); - // Free memory use the dynamic memory pool. - virtual void FreeTensorMemDynamic(void *device_ptr); - virtual void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag); private: void AssignStaticMemoryOutput(const session::KernelGraph *graph); @@ -114,20 +95,11 @@ class KernelRuntime { protected: uint32_t device_id_{0}; - uint8_t *device_mem_base_{nullptr}; - uint8_t *device_mem_pool_base_{nullptr}; - uint64_t device_mem_size_{0}; - uint64_t device_mem_pool_size_{0}; - uint64_t dynamic_mem_offset_{0}; - uint64_t static_mem_offset_{0}; - const uint64_t mem_align_size_ = 512; #ifdef ENABLE_DUMP_E2E DumpConfPtr dump_conf_ptr_; #endif void *stream_ = nullptr; - size_t total_static_size_ = 0; - size_t total_dynamic_size_ = 0; - MemReuseUtilPtr mem_reuse_util_ptr_{nullptr}; + std::shared_ptr mem_manager_{nullptr}; }; using KernelRuntimePtr = std::shared_ptr; } // namespace device diff --git a/mindspore/ccsrc/device/memory_manager.cc b/mindspore/ccsrc/device/memory_manager.cc new file mode 100644 index 0000000000..3c1ddee6bc --- /dev/null +++ b/mindspore/ccsrc/device/memory_manager.cc @@ -0,0 +1,170 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device/memory_manager.h" +#include "session/anf_runtime_algorithm.h" +#include "utils/context/ms_context.h" +using mindspore::memreuse::BestFitMemReuse; +using mindspore::memreuse::MemReuseUtilPtr; +namespace mindspore { +namespace device { +MemoryManager::~MemoryManager() { + device_mem_base_ = nullptr; + device_mem_pool_base_ = nullptr; + mem_reuse_util_ptr_ = nullptr; +} + +size_t MemoryManager::GetCommonAlignSize(size_t input_size) const { + return (input_size + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize; +} + +size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) const { + return (input_size + kMemAlignSize - 1) / kMemAlignSize * kMemAlignSize + 2 * kMemAlignSize; +} + +void MemoryManager::InitReuseDynamicMemory(session::KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(graph); + MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared(); + MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr); + // set all infos + mem_reuse_util_ptr->SetAllInfo(graph); + auto bestfit_mem_reuse = std::make_shared(); + MS_EXCEPTION_IF_NULL(bestfit_mem_reuse); + bestfit_mem_reuse->Reuse(mem_reuse_util_ptr.get()); + size_t total_allocated_size = bestfit_mem_reuse->GetAllocatedSize(); + MS_LOG(INFO) << "TotalReuseDynamicSize [" << total_allocated_size << "]"; + mem_reuse_util_ptr_ = mem_reuse_util_ptr; + auto base_ptr = MallocDynamicMem(total_allocated_size, false); + mem_reuse_util_ptr_->set_mem_base(base_ptr); +} + +uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size) { + MS_EXCEPTION_IF_NULL(node); + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + uint8_t *ptr = nullptr; + if (AnfAlgo::IsCommunicationOp(node)) { + bool communication_mem = false; + if (context_ptr->enable_hccl()) { + communication_mem = true; + } + if (flag == kStaticMem) { + ptr = MallocStaticMem(size, communication_mem); + } else { + ptr = MallocDynamicMem(size, communication_mem); + } + return ptr; + } + + if (flag == kStaticMem) { + ptr = MallocStaticMem(size, false); + } else if (flag == kDynamicMem) { + ptr = MallocDynamicMem(size, false); + } else if (flag == kReuseDynamicMem) { + ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index); + } + return ptr; +} + +uint8_t *MemoryManager::MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size) { + if (flag == kReuseDynamicMem) { + return mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index); + } + return MallocDynamicMem(size, false); +} + +uint8_t *MemoryManager::MallocMem(int flag, size_t size) { + uint8_t *ptr = nullptr; + if (flag == kStaticMem) { + ptr = MallocStaticMem(size, false); + } else if (flag == kDynamicMem) { + ptr = MallocDynamicMem(size, false); + } + return ptr; +} + +uint8_t *MemoryManager::MallocStaticMem(size_t size, bool communication_mem) { + size_t align_size = 0; + if (communication_mem) { + align_size = GetCommunicationAlignSize(size); + } else { + align_size = GetCommonAlignSize(size); + } + if (static_mem_offset_ < align_size) { + MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ + << "] static[" << total_static_size_ << "])" + << " malloc [" << align_size << "] failed!"; + } + total_static_size_ += align_size; + auto offset = static_mem_offset_ - align_size; + if (dynamic_mem_offset_ > offset) { + MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ + << "] static[" << total_static_size_ << "])" + << " malloc [" << align_size << "] failed!"; + } + static_mem_offset_ = offset; + if (communication_mem) { + return device_mem_base_ + offset + kMemAlignSize; + } else { + return device_mem_base_ + offset; + } +} + +uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) { + size_t align_size = 0; + if (communication_mem) { + align_size = GetCommunicationAlignSize(size); + } else { + align_size = GetCommonAlignSize(size); + } + uint64_t offset = dynamic_mem_offset_; + auto new_offset = dynamic_mem_offset_ + align_size; + if (new_offset > static_mem_offset_) { + MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ + << "] static[" << total_static_size_ << "])" + << " malloc [" << align_size << "] failed!"; + } + total_dynamic_size_ += align_size; + dynamic_mem_offset_ = new_offset; + + if (communication_mem) { + return device_mem_base_ + offset + kMemAlignSize; + } else { + return device_mem_base_ + offset; + } +} + +void MemoryManager::MallocOpMemory(const DeviceAddressPtr address, size_t size) { + auto device_ptr = AllocTensorMemDynamic(size); + MS_EXCEPTION_IF_NULL(device_ptr); + address->ptr_ = device_ptr; + address->mem_dynamic_alloc_ = true; +} + +void *MemoryManager::AllocTensorMemDynamic(size_t size) { + if (size == 0) { + MS_LOG(ERROR) << "AllocTensorMemDynamic size is 0."; + } + return nullptr; +} + +void MemoryManager::FreeTensorMemDynamic(void *device_ptr) { + if (device_ptr == nullptr) { + MS_LOG(ERROR) << "FreeTensorMemDynamic device_ptr is null."; + } +} +} // namespace device +} // namespace mindspore diff --git a/mindspore/ccsrc/device/memory_manager.h b/mindspore/ccsrc/device/memory_manager.h new file mode 100644 index 0000000000..2e47237def --- /dev/null +++ b/mindspore/ccsrc/device/memory_manager.h @@ -0,0 +1,71 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_ +#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_ +#include +#include "pre_activate/mem_reuse/mem_reuse.h" +#include "pre_activate/mem_reuse/mem_reuse_allocator.h" +namespace mindspore { +namespace device { +const int kStaticMem = 0; +const int kDynamicMem = 1; +const int kReuseDynamicMem = 2; +const int kGetAllOuts = -1; +const uint64_t kMemAlignSize = 512; +using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr; + +class MemoryManager { + public: + MemoryManager() = default; + virtual ~MemoryManager(); + + virtual void MallocDeviceMemory() = 0; + virtual void FreeDeviceMemory() = 0; + void ResetDynamicMemory() { + total_dynamic_size_ = 0; + dynamic_mem_offset_ = 0; + } + + void InitReuseDynamicMemory(session::KernelGraph *graph); + uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size); + uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size); + virtual uint8_t *MallocMem(int flag, size_t size); + + // Alloc memory use the dynamic memory pool. + virtual void *AllocTensorMemDynamic(size_t size); + // Free memory use the dynamic memory pool. + virtual void FreeTensorMemDynamic(void *device_ptr); + virtual void MallocOpMemory(const DeviceAddressPtr address, size_t size); + size_t GetCommonAlignSize(size_t input_size) const; + size_t GetCommunicationAlignSize(size_t input_size) const; + + protected: + virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem); + virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem); + uint8_t *device_mem_base_{nullptr}; + uint8_t *device_mem_pool_base_{nullptr}; + uint64_t device_mem_size_{0}; + uint64_t device_mem_pool_size_{0}; + uint64_t dynamic_mem_offset_{0}; + uint64_t static_mem_offset_{0}; + size_t total_static_size_ = 0; + size_t total_dynamic_size_ = 0; + MemReuseUtilPtr mem_reuse_util_ptr_{nullptr}; +}; +} // namespace device +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_ diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/session/anf_runtime_algorithm.cc index cc23dbbdd2..78922448af 100644 --- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc +++ b/mindspore/ccsrc/session/anf_runtime_algorithm.cc @@ -857,5 +857,15 @@ void AnfRuntimeAlgorithm::SetNodeInput(const CNodePtr &node, const AnfNodePtr &i MS_EXCEPTION_IF_NULL(input_node); node->set_input(index + 1, input_node); } + +bool AnfRuntimeAlgorithm::IsCommunicationOp(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + auto kernel_name = AnfAlgo::GetCNodeName(node); + auto kernel_type = AnfAlgo::GetKernelType(node); + if (kernel_name == kAllReduceOpName || kernel_type == HCCL_KERNEL) { + return true; + } + return false; +} } // namespace session } // namespace mindspore diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.h b/mindspore/ccsrc/session/anf_runtime_algorithm.h index 2de68f0098..55650ac31e 100644 --- a/mindspore/ccsrc/session/anf_runtime_algorithm.h +++ b/mindspore/ccsrc/session/anf_runtime_algorithm.h @@ -166,6 +166,7 @@ class AnfRuntimeAlgorithm { static bool IsFeatureMapInput(const AnfNodePtr &node, size_t input_index); // get real input index for some tbe ops which input order is different between me and tbe impl static size_t GetRealInputIndex(const AnfNodePtr &anf_node, const size_t cur_index); + static bool IsCommunicationOp(const AnfNodePtr &node); }; } // namespace session using AnfAlgo = session::AnfRuntimeAlgorithm; diff --git a/mindspore/ccsrc/session/gpu_session.cc b/mindspore/ccsrc/session/gpu_session.cc index 29330fb193..bbcf2228cc 100644 --- a/mindspore/ccsrc/session/gpu_session.cc +++ b/mindspore/ccsrc/session/gpu_session.cc @@ -102,10 +102,6 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList graph->set_execution_order(execution_order); // Alloc memory, including static memory and dynamic memory AllocateMemory(graph.get()); - // Reset memory resource - auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); - MS_EXCEPTION_IF_NULL(runtime_instance); - runtime_instance->FreeHostMemory(); return graph_id; } diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt index 8d3f8a8138..3c1351a857 100644 --- a/tests/ut/cpp/CMakeLists.txt +++ b/tests/ut/cpp/CMakeLists.txt @@ -85,6 +85,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "../../../mindspore/ccsrc/kernel/oplib/*.cc" "../../../mindspore/ccsrc/kernel/tbe/*.cc" "../../../mindspore/ccsrc/device/kernel_runtime.cc" + "../../../mindspore/ccsrc/device/memory_manager.cc" "../../../mindspore/ccsrc/device/kernel_runtime_manager.cc" "../../../mindspore/ccsrc/device/kernel_info.cc" "../../../mindspore/ccsrc/device/ascend/profiling/*.cc" @@ -92,6 +93,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "../../../mindspore/ccsrc/device/convert_tensor_utils.cc" "../../../mindspore/ccsrc/device/ascend/kernel_build_ascend.cc" "../../../mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc" + "../../../mindspore/ccsrc/device/ascend/ascend_memory_manager.cc" "../../../mindspore/ccsrc/device/ascend/ascend_device_address.cc" "../../../mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc" "../../../mindspore/ccsrc/predict/generator/utils/ir_model_util.cc" From ed04b8e165e65645b74e7cb532151356ad14ad45 Mon Sep 17 00:00:00 2001 From: zhoufeng Date: Mon, 6 Apr 2020 11:22:47 +0800 Subject: [PATCH 41/58] default build command "-z" (minddata) and "-M on" (gpu) by default in build.sh --- build.sh | 24 +++++++++++++++++------- cmake/mind_expression.cmake | 8 ++++---- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/build.sh b/build.sh index 9d812d6dcc..8bb5f3d95a 100755 --- a/build.sh +++ b/build.sh @@ -26,7 +26,7 @@ usage() echo "Usage:" echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-s] [-b ge|cpu] [-m infer|train] \\" echo " [-a on|off] [-g on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" - echo " [-P on|off] [-z] [-M on|off] [-V 9.2|10.1] [-I] [-K]" + echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K]" echo "" echo "Options:" echo " -d Debug mode" @@ -50,8 +50,8 @@ usage() echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" echo " -Q Enable dump end to end, default off" echo " -D Enable dumping of function graph ir, default on" - echo " -z Compile dataset & mindrecord, default off" - echo " -M Enable MPI and NCCL for GPU training, default off" + echo " -z Compile dataset & mindrecord, default on" + echo " -M Enable MPI and NCCL for GPU training, default on" echo " -V Specify the minimum required cuda version, default CUDA 9.2" echo " -I Compile predict, default off" echo " -K Compile with AKG, default off" @@ -88,8 +88,8 @@ checkopts() ENABLE_DUMP2PROTO="on" ENABLE_DUMPE2E="off" ENABLE_DUMP_IR="on" - COMPILE_MINDDATA="off" - ENABLE_MPI="off" + COMPILE_MINDDATA="on" + ENABLE_MPI="on" CUDA_VERSION="9.2" COMPILE_PREDICT="off" USE_GLOG="on" @@ -177,7 +177,7 @@ checkopts() if [[ "X$OPTARG" == "Xgpu" ]]; then ENABLE_GPU="on" ENABLE_CPU="on" - elif [[ "X$OPTARG" == "Xd" ]]; then + elif [[ "X$OPTARG" == "Xd" || "X$OPTARG" == "Xascend" ]]; then ENABLE_D="on" ENABLE_CPU="on" elif [[ "X$OPTARG" == "Xcpu" ]]; then @@ -216,7 +216,17 @@ checkopts() echo "enable dump function graph ir" ;; z) - COMPILE_MINDDATA="on" + eval ARG=\$\{$OPTIND\} + if [[ -n $ARG && $ARG != -* ]]; then + OPTARG=$ARG + check_on_off $OPTARG z + OPTIND=$((OPTIND + 1)) + else + OPTARG="" + fi + if [[ "X$OPTARG" == "Xoff" ]]; then + COMPILE_MINDDATA="off" + fi ;; I) COMPILE_PREDICT="on" diff --git a/cmake/mind_expression.cmake b/cmake/mind_expression.cmake index 345fd4675e..af122d4117 100644 --- a/cmake/mind_expression.cmake +++ b/cmake/mind_expression.cmake @@ -29,11 +29,11 @@ if (ENABLE_GPU) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/dmlc_core.cmake) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/rang.cmake) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/tvm_gpu.cmake) -endif() -if (ENABLE_MPI) - include(${CMAKE_SOURCE_DIR}/cmake/external_libs/nccl.cmake) - include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ompi.cmake) + if (ENABLE_MPI) + include(${CMAKE_SOURCE_DIR}/cmake/external_libs/nccl.cmake) + include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ompi.cmake) + endif() endif() if (ENABLE_GE) From 849543ac96c76c876195192bb94ff5a6008fbc6a Mon Sep 17 00:00:00 2001 From: zhoufeng Date: Mon, 6 Apr 2020 12:17:43 +0800 Subject: [PATCH 42/58] Distinguish package name according to hardware platform --- build.sh | 6 ++++-- package.sh | 17 ++++++++++++++++- setup_package.py | 2 +- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/build.sh b/build.sh index 8bb5f3d95a..6dc699000a 100755 --- a/build.sh +++ b/build.sh @@ -462,8 +462,10 @@ if [[ "X$INC_BUILD" = "Xoff" ]]; then bash "${PROJECT_PATH}/package.sh" ge elif [[ "X$ENABLE_GPU" = "Xon" ]]; then bash "${PROJECT_PATH}/package.sh" ms gpu - elif [[ "X$ENABLE_D" = "Xon" ]] || [[ "X$ENABLE_CPU" = "Xon" ]]; then - bash "${PROJECT_PATH}/package.sh" ms + elif [[ "X$ENABLE_D" = "Xon" ]]; then + bash "${PROJECT_PATH}/package.sh" ms ascend + elif [[ "X$ENABLE_CPU" = "Xon" ]]; then + bash "${PROJECT_PATH}/package.sh" ms cpu else bash "${PROJECT_PATH}/package.sh" debug fi diff --git a/package.sh b/package.sh index 0d4147c9f6..67f4761f37 100755 --- a/package.sh +++ b/package.sh @@ -96,14 +96,29 @@ if [ -n "$1" ];then else export BACKEND_POLICY="ms" fi + +# package name +if [[ "X$1" = "Xge" ]]; then + export MS_PACKAGE_NAME="mindspore" +elif [[ "X$1" = "Xms" && "X$2" = "Xgpu" ]]; then + export MS_PACKAGE_NAME="mindspore-gpu" +elif [[ "X$1" = "Xms" && "X$2" = "Xascend" ]]; then + export MS_PACKAGE_NAME="mindspore-ascend" +elif [[ "X$1" = "Xms" && "X$2" = "Xcpu" ]]; then + export MS_PACKAGE_NAME="mindspore" +else + export MS_PACKAGE_NAME="mindspore" +fi + ${PYTHON} "${BASEPATH}/setup_package.py" bdist_wheel chmod -R 700 ${PACKAGE_PATH}/mindspore/ -chmod -R 700 ${PACKAGE_PATH}/mindspore.egg-info/ +chmod -R 700 ${PACKAGE_PATH}/${MS_PACKAGE_NAME//-/_}.egg-info/ # rename package PACKAGE_FULL_NAME=$(find "${PACKAGE_PATH}" -iname "*.whl") PACKAGE_BASE_NAME=$(echo ${PACKAGE_FULL_NAME} | awk -F / '{print $NF}' | awk -F - '{print $1"-"$2}') +PACKAGE_BASE_NAME=${PACKAGE_BASE_NAME//_*-/-} PACKAGE_NEW_NAME="${PACKAGE_BASE_NAME}-${PY_TAGS}-${PLATFORM_TAG}.whl" cp -rf "${PACKAGE_PATH}/dist"/*.whl "${PACKAGE_PATH}/${PACKAGE_NEW_NAME}" diff --git a/setup_package.py b/setup_package.py index 8b6889cd34..87b5718de2 100644 --- a/setup_package.py +++ b/setup_package.py @@ -21,7 +21,6 @@ from setuptools import setup, find_packages from setuptools.command.egg_info import egg_info from setuptools.command.build_py import build_py -package_name = 'mindspore' version = '0.1.0' author = 'The MindSpore Authors' author_email = 'contact@mindspore.cn' @@ -29,6 +28,7 @@ home_page = 'https://www.mindspore.cn' backend_policy = os.getenv('BACKEND_POLICY') commit_id = os.getenv('COMMIT_ID').replace("\n", "") +package_name = os.getenv('MS_PACKAGE_NAME').replace("\n", "") pwd = os.path.dirname(os.path.realpath(__file__)) pkg_dir = os.path.join(pwd, 'build/package') From 513f384c43f5d850fabdfc9ca878ed7cd7f403a3 Mon Sep 17 00:00:00 2001 From: yao_yf Date: Wed, 8 Apr 2020 17:24:22 +0800 Subject: [PATCH 43/58] fix auto parallel prelu --- mindspore/ccsrc/parallel/ops_info/prelu_info.cc | 2 +- tests/ut/cpp/parallel/ops_info/prelu_test.cc | 6 ++---- tests/ut/python/parallel/test_prelu.py | 17 +++++++++++++++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/mindspore/ccsrc/parallel/ops_info/prelu_info.cc b/mindspore/ccsrc/parallel/ops_info/prelu_info.cc index 9aa8513331..1a44501f42 100644 --- a/mindspore/ccsrc/parallel/ops_info/prelu_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/prelu_info.cc @@ -52,7 +52,7 @@ Status PReLUInfo::CheckStrategy(const StrategyPtr& strategy) { } return FAILED; } - if ((stra[0][PRELU_CHANNEL_INDEX] != PRELU_CHANNEL_STRATEGY) || (stra[1][0] != PRELU_CHANNEL_STRATEGY)) { + if (stra[0][PRELU_CHANNEL_INDEX] != stra[1][0]) { if (is_auto_parallel_) { MS_LOG(DEBUG) << name_ << ": Invalid channel strategy."; } else { diff --git a/tests/ut/cpp/parallel/ops_info/prelu_test.cc b/tests/ut/cpp/parallel/ops_info/prelu_test.cc index 5ff261234f..d6db1b8460 100644 --- a/tests/ut/cpp/parallel/ops_info/prelu_test.cc +++ b/tests/ut/cpp/parallel/ops_info/prelu_test.cc @@ -146,11 +146,10 @@ TEST_F(TestPReLUInfo, CheckStrategy1) { } TEST_F(TestPReLUInfo, CheckStrategy2) { - // Success: {{2,1,8,16},{1}} std::vector inputs = {{2, 4, 8, 16}, {4}}; StrategyPtr strategy = NewStrategy(0, inputs); Status ret = prelu->Init(strategy); - ASSERT_EQ(ret, FAILED); + ASSERT_EQ(ret, SUCCESS); } TEST_F(TestPReLUInfo, AutoStrategy1) { @@ -252,11 +251,10 @@ TEST_F(TestPReLUInfo, CheckStrategy_2d1) { } TEST_F(TestPReLUInfo, CheckStrategy_2d2) { - // Success: {{2,1,8,16},{1}} std::vector inputs = {{128, 4}, {4}}; StrategyPtr strategy = NewStrategy(0, inputs); Status ret = prelu_2d->Init(strategy); - ASSERT_EQ(ret, FAILED); + ASSERT_EQ(ret, SUCCESS); } TEST_F(TestPReLUInfo, AutoStrategy_2d1) { diff --git a/tests/ut/python/parallel/test_prelu.py b/tests/ut/python/parallel/test_prelu.py index c601045491..d3ad1cc710 100755 --- a/tests/ut/python/parallel/test_prelu.py +++ b/tests/ut/python/parallel/test_prelu.py @@ -149,3 +149,20 @@ def test_prelu_parallel_success3(): w = Tensor(np.random.rand(16),dtype=ms.float32) net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) _executor.compile(net, x, y, w) + +def test_prelu_parallel_success4(): + class Net(nn.Cell): + def __init__(self, strategy): + super().__init__() + self.prelu = P.PReLU().set_strategy(strategy) + def construct(self, x, y): + out = self.prelu(x, y) + return out + context.reset_auto_parallel_context() + context.set_auto_parallel_context(device_num=64, global_rank=0) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + strategy = ((2, 4, 4, 2), (4, )) + x = Tensor(np.random.rand(4, 16, 32, 64),dtype=ms.float32) + w = Tensor(np.random.rand(16),dtype=ms.float32) + net = GradWrap(NetWithLoss(Net(strategy))) + _executor.compile(net, x, w) From c0e2a63fdb989ff598869b38e184c5049cea1948 Mon Sep 17 00:00:00 2001 From: Cathy Wong Date: Wed, 8 Apr 2020 16:36:06 -0400 Subject: [PATCH 44/58] Correct dataset error checking --- mindspore/dataset/engine/datasets.py | 2 -- mindspore/dataset/engine/validators.py | 8 ++++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index ab2290c13c..2058bbf826 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -82,8 +82,6 @@ def zip(datasets): if len(datasets) <= 1: raise ValueError( "Can't zip empty or just one dataset!") - if not isinstance(datasets, tuple): - raise TypeError("The zip function %s type error!" % (datasets)) return ZipDataset(datasets) diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py index 26d6241945..4c84cfe354 100644 --- a/mindspore/dataset/engine/validators.py +++ b/mindspore/dataset/engine/validators.py @@ -105,13 +105,13 @@ def check(method): "The %s function %s exceeds the boundary!" % ( func_name, param_name)) if isinstance(arg, int) and param_name == "num_parallel_workers" and ( - arg <= 0 or arg > cpu_count()): + arg < 1 or arg > cpu_count()): raise ValueError( "The %s function %s exceeds the boundary(%s)!" % ( func_name, param_name, cpu_count())) if isinstance(arg, int) and param_name != "seed" \ and param_name != "count" and param_name != "prefetch_size" \ - and param_name != "num_parallel_workers" and (arg <= 0 or arg > 2147483647): + and param_name != "num_parallel_workers" and (arg < 1 or arg > 2147483647): raise ValueError( "The %s function %s exceeds the boundary!" % ( func_name, param_name)) @@ -271,8 +271,8 @@ def check_interval_closed(param, param_name, valid_range): def check_num_parallel_workers(value): check_type(value, 'num_parallel_workers', int) - if value <= 0 or value > cpu_count(): - raise ValueError("num_parallel_workers exceeds the boundary between 0 and {}!".format(cpu_count())) + if value < 1 or value > cpu_count(): + raise ValueError("num_parallel_workers exceeds the boundary between 1 and {}!".format(cpu_count())) def check_num_samples(value): From 0b1ae67418a588a0f58e3f7bdadfb42cf53e97ef Mon Sep 17 00:00:00 2001 From: jinyaohui Date: Thu, 9 Apr 2020 09:23:39 +0800 Subject: [PATCH 45/58] modify comment --- example/yolov3_coco2017/train.py | 2 +- mindspore/ccsrc/transform/convert.cc | 2 +- mindspore/nn/wrap/loss_scale.py | 2 +- tests/ut/python/utils/test_callback.py | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/example/yolov3_coco2017/train.py b/example/yolov3_coco2017/train.py index 0a32a6d30d..121e2aa810 100644 --- a/example/yolov3_coco2017/train.py +++ b/example/yolov3_coco2017/train.py @@ -67,7 +67,7 @@ if __name__ == '__main__': parser.add_argument("--distribute", type=bool, default=False, help="Run distribute, default is false.") parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.") - parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or non-sink mode, default is sink") + parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or not, default is sink") parser.add_argument("--epoch_size", type=int, default=10, help="Epoch size, default is 10") parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.") parser.add_argument("--checkpoint_path", type=str, default="", help="Checkpoint file path") diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc index 59985c8ae3..bebd000958 100755 --- a/mindspore/ccsrc/transform/convert.cc +++ b/mindspore/ccsrc/transform/convert.cc @@ -449,7 +449,7 @@ void DfGraphConvertor::InitLoopVar(std::vector *init_input) { if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE) { value = ConfigManager::GetInstance().iter_num(); } else { - MS_LOG(INFO) << "Run with non-sink mode, the iterator number will always be 1"; + MS_LOG(INFO) << "Run with normal(non-sink) mode, the iterator number will always be 1"; value = 1; ConfigManager::GetInstance().set_iter_num(value); } diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py index fd1c22be1f..c6d61e6983 100644 --- a/mindspore/nn/wrap/loss_scale.py +++ b/mindspore/nn/wrap/loss_scale.py @@ -51,7 +51,7 @@ class DynamicLossScaleUpdateCell(Cell): In every training step, the loss scaling value will be updated by loss scaling value/`scale_factor` when there is overflow. And it will be increased by loss scaling value * `scale_factor` if there is no overflow for a continuous `scale_window` steps. This cell is used for Graph mode training in which all - logic will be executed on device side(Another training mode is non-sink mode in which some logic will be + logic will be executed on device side(Another training mode is normal(non-sink) mode in which some logic will be executed on host). Args: diff --git a/tests/ut/python/utils/test_callback.py b/tests/ut/python/utils/test_callback.py index 7e7b893e0c..8c10c8886d 100644 --- a/tests/ut/python/utils/test_callback.py +++ b/tests/ut/python/utils/test_callback.py @@ -112,8 +112,8 @@ def test_save_checkpoint(): os.remove('./test_files/test_ckpt-model.pkl') -def test_loss_monitor_sink_model(): - """Test loss monitor sink model.""" +def test_loss_monitor_sink_mode(): + """Test loss monitor sink mode.""" cb_params = _InternalCallbackParam() cb_params.cur_epoch_num = 4 cb_params.cur_step_num = 2 @@ -131,8 +131,8 @@ def test_loss_monitor_sink_model(): callbacklist.end(run_context) -def test_loss_monitor_feed_model(): - """Test loss monitor non-sink mode.""" +def test_loss_monitor_normal_mode(): + """Test loss monitor normal(non-sink) mode.""" cb_params = _InternalCallbackParam() run_context = RunContext(cb_params) loss_cb = LossMonitor(1) From 25cdf4833679e14d1cda2ae4d523f6856af9c2d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=87=E4=B8=87=E6=B2=A1=E6=83=B3=E5=88=B0?= Date: Tue, 7 Apr 2020 14:56:05 +0800 Subject: [PATCH 46/58] fix review opinions in doc/comments --- mindspore/common/initializer.py | 2 +- mindspore/ops/operations/array_ops.py | 86 ++++++++++----------- mindspore/ops/operations/math_ops.py | 105 +++++++++++++------------- mindspore/ops/operations/nn_ops.py | 44 +++++------ mindspore/train/serialization.py | 2 +- 5 files changed, 116 insertions(+), 123 deletions(-) diff --git a/mindspore/common/initializer.py b/mindspore/common/initializer.py index 4261621272..d55e03314d 100644 --- a/mindspore/common/initializer.py +++ b/mindspore/common/initializer.py @@ -276,7 +276,7 @@ def initializer(init, shape=None, dtype=mstype.float32): shape (Union[tuple, list, int]): A list of integers, a tuple of integers or an integer as the shape of output. Default: None. - dtype (:class:`mindspore.dtype`): The type of data in initialized tensor. Default: mstype.float32. + dtype (:class:`mindspore.dtype`): The type of data in initialized tensor. Default: mindspore.float32. Returns: Tensor, initialized tensor. diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index 59d3083c5d..dda490566f 100644 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -62,7 +62,7 @@ class ExpandDims(PrimitiveWithInfer): Examples: >>> input_tensor = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32) - >>> expand_dims = ExpandDims() + >>> expand_dims = P.ExpandDims() >>> output = expand_dims(input_tensor, 0) """ @@ -101,7 +101,7 @@ class DType(PrimitiveWithInfer): Examples: >>> input_tensor = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32) - >>> type = DType()(input_tensor) + >>> type = P.DType()(input_tensor) """ @prim_attr_register @@ -134,7 +134,7 @@ class SameTypeShape(PrimitiveWithInfer): Examples: >>> input_x = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32) >>> input_y = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32) - >>> out = SameTypeShape()(input_x, input_y) + >>> out = P.SameTypeShape()(input_x, input_y) """ @prim_attr_register @@ -175,7 +175,7 @@ class Cast(PrimitiveWithInfer): >>> input_np = np.random.randn(2, 3, 4, 5).astype(np.float32) >>> input_x = Tensor(input_np) >>> type_dst = mindspore.int32 - >>> cast = Cast() + >>> cast = P.Cast() >>> result = cast(input_x, type_dst) >>> expect = input_np.astype(type_dst) """ @@ -227,7 +227,7 @@ class IsSubClass(PrimitiveWithInfer): bool, the check result. Examples: - >>> result = IsSubClass()(mindspore.int32, mindspore.intc) + >>> result = P.IsSubClass()(mindspore.int32, mindspore.intc) """ @prim_attr_register @@ -262,7 +262,7 @@ class IsInstance(PrimitiveWithInfer): Examples: >>> a = 1 - >>> result = IsInstance()(a, mindspore.int32) + >>> result = P.IsInstance()(a, mindspore.int32) """ @prim_attr_register @@ -303,7 +303,7 @@ class Reshape(PrimitiveWithInfer): Examples: >>> input_tensor = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32) - >>> reshape = Reshape() + >>> reshape = P.Reshape() >>> output = reshape(input_tensor, (3, 2)) """ @@ -366,7 +366,7 @@ class Shape(Primitive): Examples: >>> input_tensor = Tensor(np.ones(shape=[3, 2, 1]), mindspore.float32) - >>> shape = Shape() + >>> shape = P.Shape() >>> output = shape(input_tensor) """ @@ -398,7 +398,7 @@ class Squeeze(PrimitiveWithInfer): Examples: >>> input_tensor = Tensor(np.ones(shape=[3, 2, 1]), mindspore.float32) - >>> squeeze = Squeeze(2) + >>> squeeze = P.Squeeze(2) >>> output = squeeze(input_tensor) """ @@ -450,7 +450,7 @@ class Transpose(PrimitiveWithInfer): Examples: >>> input_tensor = Tensor(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]), mindspore.float32) >>> perm = (0, 2, 1) - >>> transpose = Transpose() + >>> transpose = P.Transpose() >>> output = transpose(input_tensor, perm) """ @@ -504,10 +504,10 @@ class GatherV2(PrimitiveWithInfer): Tensor, the shape of tensor is :math:`(z_1, z_2, ..., z_N)`. Examples: - >>> params = Tensor(np.array([[1, 2, 7, 42], [3, 4, 54, 22], [2, 2, 55, 3]]), mindspore.float32) - >>> indices = Tensor(np.array([1, 2]), mindspore.int32) + >>> input_params = Tensor(np.array([[1, 2, 7, 42], [3, 4, 54, 22], [2, 2, 55, 3]]), mindspore.float32) + >>> input_indices = Tensor(np.array([1, 2]), mindspore.int32) >>> axis = 1 - >>> out = GatherV2()(params, indices, axis) + >>> out = P.GatherV2()(input_params, input_indices, axis) """ @prim_attr_register @@ -556,7 +556,7 @@ class Split(PrimitiveWithInfer): :math:`(y_1, y_2, ..., y_S)`. Examples: - >>> split = Split(1, 2) + >>> split = P.Split(1, 2) >>> x = Tensor(np.array([[1, 1, 1, 1], [2, 2, 2, 2]])) >>> output = split(x) """ @@ -606,7 +606,7 @@ class Rank(PrimitiveWithInfer): Examples: >>> input_tensor = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32) - >>> rank = Rank() + >>> rank = P.Rank() >>> rank(input_tensor) """ @@ -640,7 +640,7 @@ class TruncatedNormal(PrimitiveWithInfer): Examples: >>> input_shape = Tensor(np.array([1, 2, 3])) - >>> truncated_normal = TruncatedNormal() + >>> truncated_normal = P.TruncatedNormal() >>> output = truncated_normal(input_shape) """ @@ -681,7 +681,7 @@ class Size(PrimitiveWithInfer): Examples: >>> input_tensor = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32) - >>> size = Size() + >>> size = P.Size() >>> output = size(input_tensor) """ @@ -826,7 +826,7 @@ class TupleToArray(PrimitiveWithInfer): Tensor, if the input tuple contain `N` numbers, then the output tensor shape is (N,). Examples: - >>> type = TupleToArray()((1,2,3)) + >>> type = P.TupleToArray()((1,2,3)) """ @prim_attr_register @@ -861,7 +861,7 @@ class ScalarToArray(PrimitiveWithInfer): Tensor. 0-D Tensor and the content is the input. Examples: - >>> op = ScalarToArray() + >>> op = P.ScalarToArray() >>> data = 1.0 >>> output = op(data) """ @@ -893,7 +893,7 @@ class ScalarToTensor(PrimitiveWithInfer): Tensor. 0-D Tensor and the content is the input. Examples: - >>> op = ScalarToTensor() + >>> op = P.ScalarToTensor() >>> data = 1 >>> output = op(data, mindspore.float32) """ @@ -934,7 +934,7 @@ class InvertPermutation(PrimitiveWithInfer): tuple[int]. the lenth is same as input. Examples: - >>> invert = InvertPermutation() + >>> invert = P.InvertPermutation() >>> input_data = (3, 4, 0, 2, 1) >>> output = invert(input_data) >>> output == (2, 4, 3, 0, 1) @@ -982,8 +982,8 @@ class Argmax(PrimitiveWithInfer): Tensor, indices of the max value of input tensor across the axis. Examples: - >>> input = Tensor(np.array([2.0, 3.1, 1.2])) - >>> index = Argmax()(input) + >>> input_x = Tensor(np.array([2.0, 3.1, 1.2])) + >>> index = P.Argmax()(input_x) >>> assert index == Tensor(1, mindspore.int64) """ @@ -1030,8 +1030,8 @@ class Argmin(PrimitiveWithInfer): Tensor, indices of the min value of input tensor across the axis. Examples: - >>> input = Tensor(np.array([2.0, 3.1, 1.2])) - >>> index = Argmin()(input) + >>> input_x = Tensor(np.array([2.0, 3.1, 1.2])) + >>> index = P.Argmin()(input_x) >>> assert index == Tensor(2, mindspore.int64) """ @@ -1082,8 +1082,8 @@ class ArgMaxWithValue(PrimitiveWithInfer): :math:`(x_1, x_2, ..., x_{axis-1}, x_{axis+1}, ..., x_N)`. Examples: - >>> input = Tensor(np.random.rand(5)) - >>> index, output = ArgMaxWithValue()(input) + >>> input_x = Tensor(np.random.rand(5)) + >>> index, output = P.ArgMaxWithValue()(input_x) """ @prim_attr_register @@ -1129,8 +1129,8 @@ class ArgMinWithValue(PrimitiveWithInfer): :math:`(x_1, x_2, ..., x_{axis-1}, x_{axis+1}, ..., x_N)`. Examples: - >>> input = Tensor(np.random.rand(5)) - >>> index, output = ArgMinWithValue()(input) + >>> input_x = Tensor(np.random.rand(5)) + >>> index, output = P.ArgMinWithValue()(input_x) """ @prim_attr_register def __init__(self, axis=0, keep_dims=False): @@ -1325,7 +1325,7 @@ class Concat(PrimitiveWithInfer): Examples: >>> data1 = Tensor(np.array([[0, 1], [2, 1]]).astype(np.int32)) >>> data2 = Tensor(np.array([[0, 1], [2, 1]]).astype(np.int32)) - >>> op = Concat() + >>> op = P.Concat() >>> output = op((data1, data2)) """ @@ -1607,7 +1607,7 @@ class Select(PrimitiveWithInfer): Tensor, has the same shape as input_y. The shape is :math:`(x_1, x_2, ..., x_N, ..., x_R)`. Examples: - >>> select = Select() + >>> select = P.Select() >>> input_x = Tensor([True, False]) >>> input_y = Tensor([2,3], mindspore.float32) >>> input_z = Tensor([1,2], mindspore.float32) @@ -1681,7 +1681,7 @@ class StridedSlice(PrimitiveWithInfer): Examples >>> input_x = Tensor([[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], >>> [[5, 5, 5], [6, 6, 6]]], mindspore.float32) - >>> slice = StridedSlice() + >>> slice = P.StridedSlice() >>> output = slice(input_x, (1, 0, 0), (2, 1, 3), (1, 1, 1)) >>> output.shape() (1, 1, 3) @@ -1913,9 +1913,9 @@ class ScatterNd(PrimitiveWithInfer): Tensor, the new tensor, has the same type as `update` and the same shape as `shape`. Examples: - >>> op = ScatterNd() - >>> update = Tensor(np.array([3.2, 1.1]), mindspore.float32) + >>> op = P.ScatterNd() >>> indices = Tensor(np.array([[0, 1], [1, 1]]), mindspore.int32) + >>> update = Tensor(np.array([3.2, 1.1]), mindspore.float32) >>> shape = (3, 3) >>> output = op(indices, update, shape) """ @@ -1964,7 +1964,7 @@ class ResizeNearestNeighbor(PrimitiveWithInfer): Examples: >>> input_tensor = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32) - >>> resize = ResizeNearestNeighbor((2, 2)) + >>> resize = P.ResizeNearestNeighbor((2, 2)) >>> output = resize(input_tensor) """ @@ -1997,7 +1997,7 @@ class GatherNd(PrimitiveWithInfer): Examples: >>> input_x = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32) >>> indices = Tensor(np.array([[0, 0], [1, 1]]), mindspore.int32) - >>> op = GatherNd() + >>> op = P.GatherNd() >>> output = op(input_x, indices) """ @@ -2039,7 +2039,7 @@ class ScatterNdUpdate(PrimitiveWithInfer): >>> input_x = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32) >>> indices = Tensor(np.array([[0, 0], [1, 1]]), mindspore.int32) >>> update = Tensor(np.array([1.0, 2.2]), mindspore.float32) - >>> op = ScatterNdUpdate() + >>> op = P.ScatterNdUpdate() >>> output = op(input_x, indices, update) """ @@ -2090,7 +2090,7 @@ class SpaceToDepth(PrimitiveWithInfer): Examples: >>> x = Tensor(np.random.rand(1,3,2,2), mindspore.float32) >>> block_size = 2 - >>> op = SpaceToDepth(block_size) + >>> op = P.SpaceToDepth(block_size) >>> output = op(x) >>> output.asnumpy().shape == (1,12,1,1) """ @@ -2148,7 +2148,7 @@ class DepthToSpace(PrimitiveWithInfer): Examples: >>> x = Tensor(np.random.rand(1,12,1,1), mindspore.float32) >>> block_size = 2 - >>> op = DepthToSpace(block_size) + >>> op = P.DepthToSpace(block_size) >>> output = op(x) >>> output.asnumpy().shape == (1,3,2,2) """ @@ -2212,8 +2212,8 @@ class SpaceToBatch(PrimitiveWithInfer): >>> block_size = 2 >>> paddings = [[0, 0], [0, 0]] >>> space_to_batch = P.SpaceToBatch(block_size, paddings) - >>> x = Tensor(np.array([[[[1, 2], [3, 4]]]]), mindspore.float32) - >>> space_to_batch(x) + >>> input_x = Tensor(np.array([[[[1, 2], [3, 4]]]]), mindspore.float32) + >>> space_to_batch(input_x) [[[[1.]]], [[[2.]]], [[[3.]]], [[[4.]]]] """ @@ -2280,8 +2280,8 @@ class BatchToSpace(PrimitiveWithInfer): >>> block_size = 2 >>> crops = [[0, 0], [0, 0]] >>> op = P.BatchToSpace(block_size, crops) - >>> x = Tensor(np.array([[[[1]]], [[[2]]], [[[3]]], [[[4]]]]), mindspore.float32) - >>> output = op(x) + >>> input_x = Tensor(np.array([[[[1]]], [[[2]]], [[[3]]], [[[4]]]]), mindspore.float32) + >>> output = op(input_x) [[[[1., 2.], [3., 4.]]]] """ diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index d003f6ee8b..1294a65d02 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -112,9 +112,9 @@ class TensorAdd(_MathBinaryOp): Examples: >>> add = P.TensorAdd() - >>> x = Tensor(np.array([1,2,3]).astype(np.float32)) - >>> y = Tensor(np.array([4,5,6]).astype(np.float32)) - >>> add(x, y) + >>> input_x = Tensor(np.array([1,2,3]).astype(np.float32)) + >>> input_y = Tensor(np.array([4,5,6]).astype(np.float32)) + >>> add(input_x, input_y) [5,7,9] """ @@ -124,23 +124,24 @@ class AssignAdd(PrimitiveWithInfer): Updates a `Parameter` by adding a value to it. Inputs: - - **input_x** (Parameter) - The `Parameter`. - - **input_y** (Union[scalar, Tensor]) - Has the same shape as `input_x`. + - **variable** (Parameter) - The `Parameter`. + - **value** (Union[numbers.Number, Tensor]) - The value to be added to the `variable`. + It should have the same shape as `variable` if it is a Tensor. Examples: >>> class Net(Cell): >>> def __init__(self): >>> super(Net, self).__init__() >>> self.AssignAdd = P.AssignAdd() - >>> self.inputdata = Parameter(initializer(1, [1], mindspore.int64), name="global_step") + >>> self.variable = Parameter(initializer(1, [1], mindspore.int64), name="global_step") >>> >>> def construct(self, x): - >>> self.AssignAdd(self.inputdata, x) - >>> return self.inputdata + >>> self.AssignAdd(self.variable, x) + >>> return self.variable >>> >>> net = Net() - >>> x = Tensor(np.ones([1]).astype(np.int64)*100) - >>> net(x) + >>> value = Tensor(np.ones([1]).astype(np.int64)*100) + >>> net(value) """ __mindspore_signature__ = ( ('variable', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD), @@ -166,22 +167,24 @@ class AssignSub(PrimitiveWithInfer): Updates a `Parameter` by subtracting a value from it. Inputs: - - **input_x** (Parameter) - The `Parameter`. - - **input_y** (Union[scalar, Tensor]) - Has the same shape as `input_x`. + - **variable** (Parameter) - The `Parameter`. + - **value** (Union[numbers.Number, Tensor]) - The value to be subtracted from the `variable`. + It should have the same shape as `variable` if it is a Tensor. Examples: >>> class Net(Cell): >>> def __init__(self): + >>> super(Net, self).__init__() >>> self.AssignSub = P.AssignSub() - >>> self.inputdata = Parameter(initializer(1, [1], mindspore.int64), name="global_step") + >>> self.variable = Parameter(initializer(1, [1], mindspore.int64), name="global_step") >>> >>> def construct(self, x): - >>> self.AssignSub(self.inputdata, x) - >>> return self.inputdata + >>> self.AssignSub(self.variable, x) + >>> return self.variable >>> >>> net = Net() - >>> x = Tensor(np.ones([1]).astype(np.int64)*100) - >>> net(x) + >>> value = Tensor(np.ones([1]).astype(np.int64)*100) + >>> net(value) """ __mindspore_signature__ = ( @@ -263,9 +266,9 @@ class ReduceMean(_Reduce): the shape of output is :math:`(x_1, x_4, ..., x_R)`. Examples: - >>> data = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) >>> op = P.ReduceMean(keep_dims=True) - >>> output = op(data, 1) + >>> output = op(input_x, 1) """ @@ -295,9 +298,9 @@ class ReduceSum(_Reduce): the shape of output is :math:`(x_1, x_4, ..., x_R)`. Examples: - >>> data = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) >>> op = P.ReduceSum(keep_dims=True) - >>> output = op(data, 1) + >>> output = op(input_x, 1) """ @@ -328,9 +331,9 @@ class ReduceAll(_Reduce): the shape of output is :math:`(x_1, x_4, ..., x_R)`. Examples: - >>> data = Tensor(np.array([[True, False], [True, True]])) + >>> input_x = Tensor(np.array([[True, False], [True, True]])) >>> op = P.ReduceAll(keep_dims=True) - >>> output = op(data, 1) + >>> output = op(input_x, 1) """ def __infer__(self, input_x, axis): @@ -364,9 +367,9 @@ class ReduceMax(_Reduce): the shape of output is :math:`(x_1, x_4, ..., x_R)`. Examples: - >>> data = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) >>> op = P.ReduceMax(keep_dims=True) - >>> output = op(data, 1) + >>> output = op(input_x, 1) """ @@ -397,9 +400,9 @@ class ReduceMin(_Reduce): the shape of output is :math:`(x_1, x_4, ..., x_R)`. Examples: - >>> data = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) >>> op = P.ReduceMin(keep_dims=True) - >>> output = op(data, 1) + >>> output = op(input_x, 1) """ @@ -429,9 +432,9 @@ class ReduceProd(_Reduce): the shape of output is :math:`(x_1, x_4, ..., x_R)`. Examples: - >>> data = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) >>> op = P.ReduceProd(keep_dims=True) - >>> output = op(data, 1) + >>> output = op(input_x, 1) """ @@ -451,15 +454,15 @@ class CumProd(PrimitiveWithInfer): Tensor, has the same shape and dtype as the 'input_x'. Examples: - >>> data = Tensor(np.array([a, b, c]).astype(np.float32)) + >>> input_x = Tensor(np.array([a, b, c]).astype(np.float32)) >>> op0 = P.CumProd() - >>> output = op0(data, 0) # output=[a, a * b, a * b * c] + >>> output = op0(input_x, 0) # output=[a, a * b, a * b * c] >>> op1 = P.CumProd(exclusive=True) - >>> output = op1(data, 0) # output=[1, a, a * b] + >>> output = op1(input_x, 0) # output=[1, a, a * b] >>> op2 = P.CumProd(reverse=True) - >>> output = op2(data, 0) # output=[a * b * c, b * c, c] + >>> output = op2(input_x, 0) # output=[a * b * c, b * c, c] >>> op3 = P.CumProd(exclusive=True, reverse=True) - >>> output = op3(data, 0) # output=[b * c, c, 1] + >>> output = op3(input_x, 0) # output=[b * c, c, 1] """ @prim_attr_register def __init__(self, exclusive=False, reverse=False): @@ -1190,7 +1193,7 @@ class FloorMod(_MathBinaryOp): Examples: >>> input_x = Tensor(np.array([2, 4, -1]), mindspore.int32) >>> input_y = Tensor(np.array([3, 3, 3]), mindspore.int32) - >>> floor_mod = FloorMod() + >>> floor_mod = P.FloorMod() >>> floor_mod(input_x, input_y) [2, 1, 2] """ @@ -1207,9 +1210,9 @@ class Acosh(PrimitiveWithInfer): Tensor, has the same shape as `input_x`. Examples: - >>> acosh = Acosh() - >>> X = Tensor(np.array([1.0, 1.5, 3.0, 100.0]), mindspore.float32) - >>> output = acosh(X) + >>> acosh = P.Acosh() + >>> input_x = Tensor(np.array([1.0, 1.5, 3.0, 100.0]), mindspore.float32) + >>> output = acosh(input_x) """ @prim_attr_register @@ -1286,7 +1289,7 @@ class EqualCount(PrimitiveWithInfer): - **input_y** (Tensor) - The second input tensor. Outputs: - Tensor, has the same shape as the `input_x`. + Tensor, with the type as `mindspore.int32` and size as (1,). Examples: >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.int32) @@ -1324,7 +1327,7 @@ class NotEqual(_LogicBinaryOp): Inputs: - **input_x** (Union[Tensor, Number, bool]) - The first input is a tensor whose data type is number or bool, or a number or a bool object. - - **input_y** (Union[Tensor, Number, bool]) - The second input tensor whose data type is same as 'input_x' or + - **input_y** (Union[Tensor, Number, bool]) - The second input tensor whose data type is same as `input_x` or a number or a bool object. Outputs: @@ -1359,11 +1362,11 @@ class Greater(_LogicBinaryOp): Inputs: - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or + - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as `input_x` or a number. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'. + Tensor, the shape is same as the shape after broadcasting, and the data type is bool. Examples: >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.int32) @@ -1386,11 +1389,11 @@ class GreaterEqual(_LogicBinaryOp): Inputs: - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or + - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as `input_x` or a number. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is bool'. + Tensor, the shape is same as the shape after broadcasting, and the data type is bool. Examples: >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.int32) @@ -1413,7 +1416,7 @@ class Less(_LogicBinaryOp): Inputs: - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or + - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as `input_x` or a number. Outputs: @@ -1440,7 +1443,7 @@ class LessEqual(_LogicBinaryOp): Inputs: - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or + - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as `input_x` or a number. Outputs: @@ -1752,8 +1755,8 @@ class Cos(PrimitiveWithInfer): Examples: >>> cos = P.Cos() - >>> X = Tensor(np.array([0.24, 0.83, 0.31, 0.09]), mindspore.float32) - >>> output = cos(X) + >>> input_x = Tensor(np.array([0.24, 0.83, 0.31, 0.09]), mindspore.float32) + >>> output = cos(input_x) """ @prim_attr_register @@ -1780,8 +1783,8 @@ class ACos(PrimitiveWithInfer): Examples: >>> acos = P.ACos() - >>> X = Tensor(np.array([0.74, 0.04, 0.30, 0.56]), mindspore.float32) - >>> output = acos(X) + >>> input_x = Tensor(np.array([0.74, 0.04, 0.30, 0.56]), mindspore.float32) + >>> output = acos(input_x) """ @prim_attr_register @@ -1993,7 +1996,7 @@ class Atan2(_MathBinaryOp): - **input_y** (Tensor) - The input tensor. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'. + Tensor, the shape is same as the shape after broadcasting, and the data type is same as `input_x`. Examples: >>> input_x = Tensor(np.array([[0, 1]]), mindspore.float32) diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index c7c823aeb6..3cc6718484 100644 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -41,7 +41,7 @@ class Flatten(PrimitiveWithInfer): Examples: >>> input_tensor = Tensor(np.ones(shape=[1, 2, 3, 4]), mindspore.float32) - >>> flatten = Flatten() + >>> flatten = P.Flatten() >>> output = flatten(input_tensor) >>> assert output.shape() == (1, 24) """ @@ -155,7 +155,7 @@ class ReLU(PrimitiveWithInfer): Examples: >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]], np.float32)) - >>> relu = ReLU() + >>> relu = P.ReLU() >>> result = relu(input_x) [[0, 4.0, 0.0], [2.0, 0.0, 9.0]] """ @@ -188,7 +188,7 @@ class ReLU6(PrimitiveWithInfer): Examples: >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]], np.float32)) - >>> relu6 = ReLU6() + >>> relu6 = P.ReLU6() >>> result = relu6(input_x) """ @@ -222,10 +222,10 @@ class Elu(PrimitiveWithInfer): Examples: >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]], np.float32)) - >>> elu = Elu() + >>> elu = P.Elu() >>> result = elu(input_x) Tensor([[-0.632 4.0 -0.999] - [2.0 -0.993 9.0 ]], shape=(2, 3), dtype=ms.float32) + [2.0 -0.993 9.0 ]], shape=(2, 3), dtype=mindspore.float32) """ @prim_attr_register @@ -1082,7 +1082,7 @@ class TopK(PrimitiveWithInfer): Examples: >>> topk = P.TopK(sorted=True) - >>> input_x = Tensor([1, 2, 3, 4, 5], mindspore.float16)) + >>> input_x = Tensor([1, 2, 3, 4, 5], mindspore.float16) >>> k = 3 >>> values, indices = topk(input_x, k) >>> assert values == Tensor(np.array([5, 4, 3])) @@ -1223,8 +1223,8 @@ class ApplyMomentum(PrimitiveWithInfer): Examples: >>> net = ResNet50() - >>> loss = SoftmaxCrossEntropyWithLogits() - >>> opt = ApplyMomentum(Tensor(np.array([0.001])), Tensor(np.array([0.9])), + >>> loss = nn.SoftmaxCrossEntropyWithLogits() + >>> opt = P.ApplyMomentum(Tensor(np.array([0.001])), Tensor(np.array([0.9])), filter(lambda x: x.requires_grad, net.get_parameters())) >>> model = Model(net, loss, opt) """ @@ -1351,6 +1351,7 @@ class SGD(PrimitiveWithInfer): class ApplyRMSProp(PrimitiveWithInfer): """ Optimizer that implements the Root Mean Square prop(RMSProp) algorithm. + Please refer to the usage in source code of `nn.RMSProp`. Note: Update `var` according to the RMSProp algorithm. @@ -1386,12 +1387,6 @@ class ApplyRMSProp(PrimitiveWithInfer): Outputs: Tensor, parameters to be update. - - Examples: - >>> net = Net() - >>> loss = nn.SoftmaxCrossEntropyWithLogits() - >>> opt = RMSProp(params=net.trainable_params(), learning_rate=learning_rate) - >>> model = Model(net, loss, opt) """ @prim_attr_register @@ -1424,6 +1419,7 @@ class ApplyRMSProp(PrimitiveWithInfer): class ApplyCenteredRMSProp(PrimitiveWithInfer): """ Optimizer that implements the centered RMSProp algorithm. + Please refer to the usage in source code of `nn.RMSProp`. Note: Update `var` according to the centered RMSProp algorithm. @@ -1464,12 +1460,6 @@ class ApplyCenteredRMSProp(PrimitiveWithInfer): Outputs: Tensor, parameters to be update. - - Examples: - >>> net = Net() - >>> loss = nn.SoftmaxCrossEntropyWithLogits() - >>> opt = RMSProp(params=net.trainable_params(), learning_rate=learning_rate, centered=True) - >>> model = Model(net, loss, opt) """ @prim_attr_register @@ -1596,7 +1586,7 @@ class DropoutGenMask(Primitive): Tensor, the value of generated mask for input shape. Examples: - >>> dropout_gen_mask = DropoutGenMask() + >>> dropout_gen_mask = P.DropoutGenMask() >>> shape = (20, 16, 50) >>> keep_prob = Tensor(0.5, mindspore.float32) >>> mask = dropout_gen_mask(shape, keep_prob) @@ -1631,8 +1621,8 @@ class DropoutDoMask(PrimitiveWithInfer): >>> x = Tensor(np.ones([20, 16, 50]), mindspore.float32) >>> shape = (20, 16, 50) >>> keep_prob = Tensor(0.5, mindspore.float32) - >>> dropout_gen_mask = DropoutGenMask() - >>> dropout_do_mask = DropoutDoMask() + >>> dropout_gen_mask = P.DropoutGenMask() + >>> dropout_do_mask = P.DropoutDoMask() >>> mask = dropout_gen_mask(shape, keep_prob) >>> output = dropout_do_mask(x, mask, keep_prob) >>> assert output.shape() == (20, 16, 50) @@ -1737,7 +1727,7 @@ class OneHot(PrimitiveWithInfer): Examples: >>> indices = Tensor(np.array([0, 1, 2]), mindspore.int32) >>> depth, on_value, off_value = 3, Tensor(1.0, mindspore.float32), Tensor(0.0, mindspore.float32) - >>> onehot = OneHot() + >>> onehot = P.OneHot() >>> result = onehot(indices, depth, on_value, off_value) [[1, 0, 0], [0, 1, 0], [0, 0, 1]] """ @@ -1793,7 +1783,7 @@ class Gelu(PrimitiveWithInfer): Examples: >>> tensor = Tensor(np.array([1.0, 2.0, 3.0]), mindspore.float32) - >>> gelu = Gelu() + >>> gelu = P.Gelu() >>> result = gelu(tensor) """ @@ -1834,7 +1824,7 @@ class GetNext(PrimitiveWithInfer): and the type is described is `types`. Examples: - >>> get_next = GetNext([mindspore.float32, mindspore.int32], [[32, 1, 28, 28], [10]], 'shared_name') + >>> get_next = P.GetNext([mindspore.float32, mindspore.int32], [[32, 1, 28, 28], [10]], 'shared_name') >>> feature, label = get_next() """ @@ -2015,7 +2005,7 @@ class Pad(PrimitiveWithInfer): Examples: >>> input_tensor = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32) - >>> pad_op = Pad(((1, 2), (2, 1))) + >>> pad_op = P.Pad(((1, 2), (2, 1))) >>> output_tensor = pad_op(input_tensor) >>> assert output_tensor == Tensor(np.array([[ 0. , 0. , 0. , 0. , 0. , 0. ], >>> [ 0. , 0. , -0.1, 0.3, 3.6, 0. ], diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py index b334c3e9d8..90d8816094 100644 --- a/mindspore/train/serialization.py +++ b/mindspore/train/serialization.py @@ -406,7 +406,7 @@ def export(net, *inputs, file_name, file_format='GEIR'): file_format (str): MindSpore currently supports 'GEIR', 'ONNX' and 'LITE' format for exported model. - GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of - Ascend model. + Ascend model. - ONNX: Open Neural Network eXchange. An open format built to represent machine learning models. - LITE: Huawei model format for mobile. """ From a6747c522fdc03da9ea9b24b1f9086308120efdf Mon Sep 17 00:00:00 2001 From: kswang Date: Thu, 9 Apr 2020 15:08:47 +0800 Subject: [PATCH 47/58] add ascend mem pool --- .../device/ascend/ascend_device_address.cc | 4 +-- .../device/ascend/ascend_device_address.h | 2 +- .../device/ascend/ascend_kernel_runtime.cc | 2 +- .../device/ascend/ascend_memory_manager.cc | 22 +++++++------ .../device/ascend/ascend_memory_manager.h | 6 +++- ...ory_allocator.cc => ascend_memory_pool.cc} | 31 +++++++------------ ...emory_allocator.h => ascend_memory_pool.h} | 31 ++++++++++--------- mindspore/ccsrc/device/device_address.h | 2 +- .../ccsrc/device/gpu/gpu_device_address.cc | 2 +- .../ccsrc/device/gpu/gpu_kernel_runtime.cc | 16 +++++----- .../ccsrc/device/gpu/gpu_memory_manager.cc | 8 ++--- .../ccsrc/device/gpu/gpu_memory_manager.h | 6 ++-- mindspore/ccsrc/device/kernel_runtime.cc | 8 ++--- mindspore/ccsrc/device/kernel_runtime.h | 1 - mindspore/ccsrc/device/memory_manager.cc | 22 +++++-------- mindspore/ccsrc/device/memory_manager.h | 15 ++++----- tests/ut/cpp/CMakeLists.txt | 2 +- 17 files changed, 84 insertions(+), 96 deletions(-) rename mindspore/ccsrc/device/ascend/{ascend_memory_allocator.cc => ascend_memory_pool.cc} (62%) rename mindspore/ccsrc/device/ascend/{ascend_memory_allocator.h => ascend_memory_pool.h} (67%) diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/device/ascend/ascend_device_address.cc index b8b7f452e3..93f039af0e 100644 --- a/mindspore/ccsrc/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/device/ascend/ascend_device_address.cc @@ -262,8 +262,8 @@ AscendDeviceAddress::~AscendDeviceAddress() { if (ptr_ == nullptr) { return; } - if (mem_dynamic_alloc_) { - AscendMemoryAllocator::GetInstance().FreeTensorMem(ptr_); + if (from_mem_pool_) { + AscendMemoryPool::GetInstance().FreeTensorMem(ptr_); ptr_ = nullptr; } } diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.h b/mindspore/ccsrc/device/ascend/ascend_device_address.h index 60cc64cca7..93746082c1 100644 --- a/mindspore/ccsrc/device/ascend/ascend_device_address.h +++ b/mindspore/ccsrc/device/ascend/ascend_device_address.h @@ -21,7 +21,7 @@ #include #include #include "device/device_address.h" -#include "device/ascend/ascend_memory_allocator.h" +#include "device/ascend/ascend_memory_pool.h" #include "ir/dtype.h" namespace mindspore { diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc index 0c2a97a5a6..0c6861e21f 100644 --- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc @@ -29,7 +29,7 @@ #include "hccl/hcom.h" #include "runtime/context.h" #include "device/ascend/ascend_stream_assign.h" -#include "device/ascend/ascend_memory_allocator.h" +#include "device/ascend/ascend_memory_pool.h" #include "framework/ge_runtime/model_runner.h" #include "device/ascend/tasksink/task_generator.h" #include "session/anf_runtime_algorithm.h" diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc index f033d81d82..42830f54fa 100644 --- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc +++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc @@ -15,29 +15,31 @@ */ #include "device/ascend/ascend_memory_manager.h" -#include "device/ascend/ascend_memory_allocator.h" +#include "device/ascend/ascend_memory_pool.h" #include "utils/context/ms_context.h" #include "runtime/mem.h" namespace mindspore { namespace device { namespace ascend { -static const uint64_t ASCEND_MEM_SIZE = 20; -static const uint64_t ASCEND_MEM_SIZE_BYTE = (ASCEND_MEM_SIZE << 30); +const uint64_t kAscendDeviceMemGB = 20; +const uint64_t kAscendMemPoolGB = 5; +const uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << 30); +const uint64_t kAscendMemPoolSize = (kAscendMemPoolGB << 30); void AscendMemoryManager::MallocDeviceMemory() { - device_mem_size_ = ASCEND_MEM_SIZE_BYTE; - static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO); + device_mem_size_ = kAscendDeviceMemSize; + static_mem_offset_ = device_mem_size_; auto ret = rtMalloc(reinterpret_cast(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM); if (ret != RT_ERROR_NONE) { MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]"; } - device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO)); + device_mem_pool_size_ = kAscendMemPoolSize; ret = rtMalloc(reinterpret_cast(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM); if (ret != RT_ERROR_NONE) { MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]"; } - AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_); - AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_); + AscendMemoryPool::GetInstance().set_device_mem_pool_base(device_mem_pool_base_); + AscendMemoryPool::GetInstance().set_device_mem_pool_size(device_mem_pool_size_); } void AscendMemoryManager::FreeDeviceMemory() { @@ -57,8 +59,8 @@ void AscendMemoryManager::FreeDeviceMemory() { } } -void *AscendMemoryManager::AllocTensorMemDynamic(size_t size) { - return AscendMemoryAllocator::GetInstance().AllocTensorMem(size); +void *AscendMemoryManager::MallocMemFromMemPool(size_t size) { + return AscendMemoryPool::GetInstance().AllocTensorMem(size); } } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h index 8639fb5c72..dea88ac10a 100644 --- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h +++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h @@ -27,7 +27,11 @@ class AscendMemoryManager : public MemoryManager { void MallocDeviceMemory() override; void FreeDeviceMemory() override; - void *AllocTensorMemDynamic(size_t size) override; + void *MallocMemFromMemPool(size_t size) override; + + private: + uint8_t *device_mem_pool_base_{nullptr}; + uint64_t device_mem_pool_size_{0}; }; } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc b/mindspore/ccsrc/device/ascend/ascend_memory_pool.cc similarity index 62% rename from mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc rename to mindspore/ccsrc/device/ascend/ascend_memory_pool.cc index 08a30a28b7..2c38e4290d 100644 --- a/mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc +++ b/mindspore/ccsrc/device/ascend/ascend_memory_pool.cc @@ -14,24 +14,15 @@ * limitations under the License. */ -#include "device/ascend/ascend_memory_allocator.h" +#include "device/ascend/ascend_memory_pool.h" #include "device/ascend/ascend_kernel_runtime.h" #include "utils/log_adapter.h" namespace mindspore { namespace device { namespace ascend { -const uint64_t MEM_SIZE = 20; -const uint64_t MEM_SIZE_BYTE = (MEM_SIZE << 30); - -AscendMemoryAllocator::AscendMemoryAllocator() { - hasMalloc_ = false; - free_mem_size_ = FloatToSize(MEM_SIZE_BYTE * (1 - GRAPH_INIT_ASCEND_MEM_RATIO)); - total_mem_size_ = free_mem_size_; -} - -size_t AscendMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr* addr) { - if (hasMalloc_) { +size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr* addr) { + if (has_malloc_) { MS_LOG(EXCEPTION) << "Has alloc memory pool memory !"; } if (size == 0 || size > free_mem_size_) { @@ -41,35 +32,35 @@ size_t AscendMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr* addr) { if (*addr == nullptr) { MS_LOG(EXCEPTION) << "Device memory pool base is nullptr, failed to alloc memory pool memory!"; } - hasMalloc_ = true; + has_malloc_ = true; free_mem_size_ -= size; return size; } -bool AscendMemoryAllocator::FreeDeviceMem(const DeviceMemPtr& addr) { +bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr& addr) { MS_EXCEPTION_IF_NULL(addr); - hasMalloc_ = false; + has_malloc_ = false; free_mem_size_ = total_mem_size_; return true; } -size_t AscendMemoryAllocator::AlignMemorySize(size_t size) const { +size_t AscendMemoryPool::AlignMemorySize(size_t size) const { if (size == 0) { return DYNAMIC_MEM_ALIGN_SIZE; } return ((size + DYNAMIC_MEM_ALIGN_SIZE + 31) / DYNAMIC_MEM_ALIGN_SIZE) * DYNAMIC_MEM_ALIGN_SIZE; } -size_t AscendMemoryAllocator::mem_alloc_unit_size() const { return free_mem_size_ - 512; } +size_t AscendMemoryPool::mem_alloc_unit_size() const { return free_mem_size_ - 512; } -void AscendMemoryAllocator::set_device_mem_pool_base(uint8_t* device_mem_pool_base) { +void AscendMemoryPool::set_device_mem_pool_base(uint8_t* device_mem_pool_base) { MS_EXCEPTION_IF_NULL(device_mem_pool_base); device_mem_pool_base_ = device_mem_pool_base; } -size_t AscendMemoryAllocator::free_mem_size() { return free_mem_size_; } +size_t AscendMemoryPool::free_mem_size() { return free_mem_size_; } -size_t AscendMemoryAllocator::total_mem_size() { return total_mem_size_; } +size_t AscendMemoryPool::total_mem_size() { return total_mem_size_; } } // namespace ascend } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_allocator.h b/mindspore/ccsrc/device/ascend/ascend_memory_pool.h similarity index 67% rename from mindspore/ccsrc/device/ascend/ascend_memory_allocator.h rename to mindspore/ccsrc/device/ascend/ascend_memory_pool.h index 8b0f89a9b8..c2a29725f4 100644 --- a/mindspore/ccsrc/device/ascend/ascend_memory_allocator.h +++ b/mindspore/ccsrc/device/ascend/ascend_memory_pool.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_ -#define MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_ +#ifndef MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_ +#define MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_ #include #include "pre_activate/mem_reuse/mem_dynamic_allocator.h" @@ -23,22 +23,23 @@ namespace mindspore { namespace device { namespace ascend { -// The fraction of total ascend memory used to compute the graph. -static const float GRAPH_INIT_ASCEND_MEM_RATIO = 0.8; - -class AscendMemoryAllocator : public DynamicMemPoolBestFit { +class AscendMemoryPool : public DynamicMemPoolBestFit { public: - ~AscendMemoryAllocator() override = default; + ~AscendMemoryPool() override = default; size_t AllocDeviceMem(size_t size, DeviceMemPtr* addr) override; bool FreeDeviceMem(const DeviceMemPtr& addr) override; void set_device_mem_pool_base(uint8_t* device_mem_pool_base); - void set_device_mem_pool_size(uint64_t device_mem_pool_size) { device_mem_pool_size_ = device_mem_pool_size; } + void set_device_mem_pool_size(uint64_t device_mem_pool_size) { + device_mem_pool_size_ = device_mem_pool_size; + free_mem_size_ = device_mem_pool_size_; + total_mem_size_ = free_mem_size_; + } size_t free_mem_size() override; size_t total_mem_size() override; - static AscendMemoryAllocator& GetInstance() { - static AscendMemoryAllocator instance; + static AscendMemoryPool& GetInstance() { + static AscendMemoryPool instance; return instance; } @@ -49,10 +50,10 @@ class AscendMemoryAllocator : public DynamicMemPoolBestFit { size_t mem_alloc_unit_size() const override; private: - AscendMemoryAllocator(); - AscendMemoryAllocator(const AscendMemoryAllocator&) = delete; - AscendMemoryAllocator& operator=(const AscendMemoryAllocator&) = delete; - bool hasMalloc_; + AscendMemoryPool() = default; + AscendMemoryPool(const AscendMemoryPool&) = delete; + AscendMemoryPool& operator=(const AscendMemoryPool&) = delete; + bool has_malloc_{false}; uint8_t* device_mem_pool_base_{nullptr}; uint64_t device_mem_pool_size_{0}; size_t free_mem_size_; @@ -62,4 +63,4 @@ class AscendMemoryAllocator : public DynamicMemPoolBestFit { } // namespace device } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_ +#endif // MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_ diff --git a/mindspore/ccsrc/device/device_address.h b/mindspore/ccsrc/device/device_address.h index cb022427e3..2d43963934 100644 --- a/mindspore/ccsrc/device/device_address.h +++ b/mindspore/ccsrc/device/device_address.h @@ -70,7 +70,7 @@ class DeviceAddress { size_t ref_count_{0}; string format_{"DefaultFormat"}; TypeId type_id_{kNumberTypeFloat16}; - bool mem_dynamic_alloc_{false}; + bool from_mem_pool_{false}; friend class KernelRuntime; friend class MemoryManager; friend class mindspore::device::ascend::tasksink::TaskGenerator; diff --git a/mindspore/ccsrc/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/device/gpu/gpu_device_address.cc index 36391d27db..c27a1aa65b 100644 --- a/mindspore/ccsrc/device/gpu/gpu_device_address.cc +++ b/mindspore/ccsrc/device/gpu/gpu_device_address.cc @@ -46,7 +46,7 @@ GPUDeviceAddress::~GPUDeviceAddress() { } auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); - if (mem_dynamic_alloc_) { + if (from_mem_pool_) { GPUMemoryAllocator::GetInstance().FreeTensorMem(ptr_); ptr_ = nullptr; } diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc index 597e188e9d..2ec1a5df29 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc @@ -227,7 +227,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod MS_EXCEPTION_IF_NULL(device_address); auto device_ptr = device_address->ptr_; if (device_ptr == nullptr) { - device_ptr = mem_manager_->AllocTensorMemDynamic(output_sizes[i]); + device_ptr = mem_manager_->MallocMemFromMemPool(output_sizes[i]); MS_EXCEPTION_IF_NULL(device_ptr); device_address->ptr_ = device_ptr; } @@ -244,7 +244,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod kernel_workspaces->emplace_back(nullptr); continue; } - auto device_ptr = mem_manager_->AllocTensorMemDynamic(workspace_sizes[i]); + auto device_ptr = mem_manager_->MallocMemFromMemPool(workspace_sizes[i]); MS_EXCEPTION_IF_NULL(device_ptr); kernel::AddressPtr workspace = std::make_shared(); MS_EXCEPTION_IF_NULL(workspace); @@ -292,7 +292,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN addr_size.emplace_back(device_address.get(), output_size); } - auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total); + auto device_mem_ptr = mem_manager_->MallocMemFromMemPool(total); MS_EXCEPTION_IF_NULL(device_mem_ptr); for (const auto &iter : addr_size) { MS_EXCEPTION_IF_NULL(iter.first); @@ -328,7 +328,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf addr_size.emplace_back(device_address.get(), output_sizes[i]); } - auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total); + auto device_mem_ptr = mem_manager_->MallocMemFromMemPool(total); MS_EXCEPTION_IF_NULL(device_mem_ptr); for (const auto &iter : addr_size) { MS_EXCEPTION_IF_NULL(iter.first); @@ -361,7 +361,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i); MS_EXCEPTION_IF_NULL(device_address); MS_EXCEPTION_IF_NULL(device_address->ptr_); - mem_manager_->FreeTensorMemDynamic(device_address->ptr_); + mem_manager_->FreeMemFromMemPool(device_address->ptr_); device_address->ptr_ = nullptr; } } @@ -372,7 +372,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, auto workspace = kernel_workspaces[i]; if (workspace != nullptr) { MS_EXCEPTION_IF_NULL(workspace->addr); - mem_manager_->FreeTensorMemDynamic(workspace->addr); + mem_manager_->FreeMemFromMemPool(workspace->addr); workspace->addr = nullptr; } } @@ -389,7 +389,7 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, 0); MS_EXCEPTION_IF_NULL(device_address); MS_EXCEPTION_IF_NULL(device_address->ptr_); - mem_manager_->FreeTensorMemDynamic(device_address->ptr_); + mem_manager_->FreeMemFromMemPool(device_address->ptr_); device_address->ptr_ = nullptr; } *is_communication_op = true; @@ -411,7 +411,7 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr auto device_address = AnfAlgo::GetMutableOutputAddr(kernel_input.first, 0); MS_EXCEPTION_IF_NULL(device_address); MS_EXCEPTION_IF_NULL(device_address->ptr_); - mem_manager_->FreeTensorMemDynamic(device_address->ptr_); + mem_manager_->FreeMemFromMemPool(device_address->ptr_); device_address->ptr_ = nullptr; } *is_communication_op = true; diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc index 3944b504e4..7d042264b6 100644 --- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc +++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc @@ -21,11 +21,11 @@ namespace mindspore { namespace device { namespace gpu { -void *GPUMemoryManager::AllocTensorMemDynamic(size_t size) { +void *GPUMemoryManager::MallocMemFromMemPool(size_t size) { return GPUMemoryAllocator::GetInstance().AllocTensorMem(size); } -void GPUMemoryManager::FreeTensorMemDynamic(void *device_ptr) { +void GPUMemoryManager::FreeMemFromMemPool(void *device_ptr) { GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr); } @@ -34,7 +34,7 @@ void GPUMemoryManager::MallocDeviceMemory() { MS_EXCEPTION_IF_NULL(context_ptr); // If use the dynamic memory pool, then alloc the first memory block to init. if (context_ptr->enable_dynamic_mem_pool()) { - auto device_addr = AllocTensorMemDynamic(1); + auto device_addr = MallocMemFromMemPool(1); if (!device_addr) { MS_LOG(ERROR) << "Dynamic memory pool init error."; } @@ -62,7 +62,7 @@ uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool) { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); if (context_ptr->enable_dynamic_mem_pool()) { - auto device_ptr = AllocTensorMemDynamic(size); + auto device_ptr = MallocMemFromMemPool(size); MS_EXCEPTION_IF_NULL(device_ptr); return AddressOffset(device_ptr, 0); } diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h b/mindspore/ccsrc/device/gpu/gpu_memory_manager.h index a18226bdf3..cc5dac2a5e 100644 --- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h +++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.h @@ -28,11 +28,11 @@ class GPUMemoryManager : public MemoryManager { void MallocDeviceMemory() override; void FreeDeviceMemory() override; - void *AllocTensorMemDynamic(size_t size) override; - void FreeTensorMemDynamic(void *device_ptr) override; + void *MallocMemFromMemPool(size_t size) override; + void FreeMemFromMemPool(void *device_ptr) override; protected: - uint8_t *MallocStaticMem(size_t size, bool communication_mem); + uint8_t *MallocStaticMem(size_t size, bool communication_mem) override; }; } // namespace gpu } // namespace device diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/device/kernel_runtime.cc index 16025ed8a4..eebc650347 100644 --- a/mindspore/ccsrc/device/kernel_runtime.cc +++ b/mindspore/ccsrc/device/kernel_runtime.cc @@ -169,7 +169,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector auto device_address = CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); MS_EXCEPTION_IF_NULL(device_address); - mem_manager_->MallocOpMemory(device_address, tensor_size); + mem_manager_->MallocMemFromMemPool(device_address, tensor_size); AnfAlgo::SetOutputAddr(device_address, index, item.get()); } } @@ -198,7 +198,7 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type); MS_EXCEPTION_IF_NULL(device_address); - mem_manager_->MallocOpMemory(device_address, output_sizes[i]); + mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); AnfAlgo::SetOutputAddr(device_address, i, kernel.get()); } } @@ -213,7 +213,7 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { for (size_t i = 0; i < workspace_lists.size(); ++i) { auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown); MS_EXCEPTION_IF_NULL(device_address); - mem_manager_->MallocOpMemory(device_address, workspace_lists[i]); + mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]); AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get()); } } @@ -457,7 +457,7 @@ void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) { bool is_enable_mem_reuse = context_ptr->enable_mem_reuse(); auto mem_flag = kDynamicMem; if (is_enable_mem_reuse) { - mem_manager_->InitReuseDynamicMemory(graph); + mem_manager_->MallocReusedDynamicMem(graph); mem_flag = kReuseDynamicMem; } auto &kernels = graph->execution_order(); diff --git a/mindspore/ccsrc/device/kernel_runtime.h b/mindspore/ccsrc/device/kernel_runtime.h index 1224bf14eb..61b43fd5c0 100644 --- a/mindspore/ccsrc/device/kernel_runtime.h +++ b/mindspore/ccsrc/device/kernel_runtime.h @@ -33,7 +33,6 @@ #include "utils/context/ms_context.h" #include "device/memory_manager.h" -// using mindspore::session::KernelGraph; using mindspore::tensor::Tensor; using TensorPtr = std::shared_ptr; using mindspore::kernel::AddressPtr; diff --git a/mindspore/ccsrc/device/memory_manager.cc b/mindspore/ccsrc/device/memory_manager.cc index 3c1ddee6bc..6977628eb1 100644 --- a/mindspore/ccsrc/device/memory_manager.cc +++ b/mindspore/ccsrc/device/memory_manager.cc @@ -21,12 +21,6 @@ using mindspore::memreuse::BestFitMemReuse; using mindspore::memreuse::MemReuseUtilPtr; namespace mindspore { namespace device { -MemoryManager::~MemoryManager() { - device_mem_base_ = nullptr; - device_mem_pool_base_ = nullptr; - mem_reuse_util_ptr_ = nullptr; -} - size_t MemoryManager::GetCommonAlignSize(size_t input_size) const { return (input_size + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize; } @@ -35,7 +29,7 @@ size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) const { return (input_size + kMemAlignSize - 1) / kMemAlignSize * kMemAlignSize + 2 * kMemAlignSize; } -void MemoryManager::InitReuseDynamicMemory(session::KernelGraph *graph) { +void MemoryManager::MallocReusedDynamicMem(session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared(); MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr); @@ -147,23 +141,23 @@ uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) { } } -void MemoryManager::MallocOpMemory(const DeviceAddressPtr address, size_t size) { - auto device_ptr = AllocTensorMemDynamic(size); +void MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) { + auto device_ptr = MallocMemFromMemPool(size); MS_EXCEPTION_IF_NULL(device_ptr); address->ptr_ = device_ptr; - address->mem_dynamic_alloc_ = true; + address->from_mem_pool_ = true; } -void *MemoryManager::AllocTensorMemDynamic(size_t size) { +void *MemoryManager::MallocMemFromMemPool(size_t size) { if (size == 0) { - MS_LOG(ERROR) << "AllocTensorMemDynamic size is 0."; + MS_LOG(ERROR) << "MallocMemFromMemPool size is 0."; } return nullptr; } -void MemoryManager::FreeTensorMemDynamic(void *device_ptr) { +void MemoryManager::FreeMemFromMemPool(void *device_ptr) { if (device_ptr == nullptr) { - MS_LOG(ERROR) << "FreeTensorMemDynamic device_ptr is null."; + MS_LOG(ERROR) << "FreeMemFromMemPool device_ptr is null."; } } } // namespace device diff --git a/mindspore/ccsrc/device/memory_manager.h b/mindspore/ccsrc/device/memory_manager.h index 2e47237def..82c22f4548 100644 --- a/mindspore/ccsrc/device/memory_manager.h +++ b/mindspore/ccsrc/device/memory_manager.h @@ -31,7 +31,7 @@ using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr; class MemoryManager { public: MemoryManager() = default; - virtual ~MemoryManager(); + virtual ~MemoryManager() = default; virtual void MallocDeviceMemory() = 0; virtual void FreeDeviceMemory() = 0; @@ -40,16 +40,15 @@ class MemoryManager { dynamic_mem_offset_ = 0; } - void InitReuseDynamicMemory(session::KernelGraph *graph); + void MallocReusedDynamicMem(session::KernelGraph *graph); uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size); uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size); virtual uint8_t *MallocMem(int flag, size_t size); - // Alloc memory use the dynamic memory pool. - virtual void *AllocTensorMemDynamic(size_t size); - // Free memory use the dynamic memory pool. - virtual void FreeTensorMemDynamic(void *device_ptr); - virtual void MallocOpMemory(const DeviceAddressPtr address, size_t size); + virtual void MallocMemFromMemPool(const DeviceAddressPtr address, size_t size); + virtual void *MallocMemFromMemPool(size_t size); + virtual void FreeMemFromMemPool(void *device_ptr); + size_t GetCommonAlignSize(size_t input_size) const; size_t GetCommunicationAlignSize(size_t input_size) const; @@ -57,9 +56,7 @@ class MemoryManager { virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem); virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem); uint8_t *device_mem_base_{nullptr}; - uint8_t *device_mem_pool_base_{nullptr}; uint64_t device_mem_size_{0}; - uint64_t device_mem_pool_size_{0}; uint64_t dynamic_mem_offset_{0}; uint64_t static_mem_offset_{0}; size_t total_static_size_ = 0; diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt index 3c1351a857..f5bc07ff69 100644 --- a/tests/ut/cpp/CMakeLists.txt +++ b/tests/ut/cpp/CMakeLists.txt @@ -95,7 +95,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "../../../mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc" "../../../mindspore/ccsrc/device/ascend/ascend_memory_manager.cc" "../../../mindspore/ccsrc/device/ascend/ascend_device_address.cc" - "../../../mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc" + "../../../mindspore/ccsrc/device/ascend/ascend_memory_pool.cc" "../../../mindspore/ccsrc/predict/generator/utils/ir_model_util.cc" "../../../mindspore/ccsrc/predict/predict.cc" "../../../mindspore/ccsrc/predict/converter/*.cc" From 735923c0ce8765d885ece6530fa35ae03cf8cb48 Mon Sep 17 00:00:00 2001 From: zjun Date: Tue, 7 Apr 2020 21:52:28 +0800 Subject: [PATCH 48/58] add aicpu opinfo register --- .../ccsrc/kernel/aicpu/aicpu_kernel_build.cc | 116 +++++---- .../kernel/aicpu/aicpu_kernel_metadata.cc | 78 ++---- mindspore/ccsrc/kernel/aicpu/aicpu_util.h | 3 +- mindspore/ccsrc/kernel/common_utils.cc | 9 + mindspore/ccsrc/kernel/oplib/opinfo.h | 2 +- mindspore/ccsrc/kernel/oplib/oplib.cc | 21 +- mindspore/ops/__init__.py | 4 +- mindspore/ops/_op_impl/__init__.py | 1 + mindspore/ops/_op_impl/aicpu/__init__.py | 19 ++ .../ops/_op_impl/aicpu/dropout_genmask.py | 32 +++ mindspore/ops/_op_impl/aicpu/get_next.py | 39 +++ .../ops/_op_impl/aicpu/init_data_set_queue.py | 27 +++ mindspore/ops/_op_impl/aicpu/print_tensor.py | 39 +++ mindspore/ops/op_info_register.py | 225 +++++++++++------- 14 files changed, 409 insertions(+), 206 deletions(-) create mode 100644 mindspore/ops/_op_impl/aicpu/__init__.py create mode 100644 mindspore/ops/_op_impl/aicpu/dropout_genmask.py create mode 100644 mindspore/ops/_op_impl/aicpu/get_next.py create mode 100644 mindspore/ops/_op_impl/aicpu/init_data_set_queue.py create mode 100644 mindspore/ops/_op_impl/aicpu/print_tensor.py diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc index c89e27c8ce..cf23779415 100644 --- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc +++ b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc @@ -39,45 +39,7 @@ namespace mindspore { namespace kernel { using FNodeAttrHandle = std::function &anf_node, mindspore::NodeDef *proto)>; -const std::vector local_framework_op_vec = {kInitDataSetQueue, kGetNext, kDropoutGenMask, kPrint}; - -void InitDataSetQueueAttr(const std::shared_ptr &anf_node, mindspore::NodeDef *proto) { - MS_EXCEPTION_IF_NULL(anf_node); - MS_EXCEPTION_IF_NULL(proto); - - ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs(); - MS_EXCEPTION_IF_NULL(node_attr); - std::string channel_name = AnfAlgo::GetNodeAttr(anf_node, kQueueName); - (*node_attr)[kChannelName].set_s(channel_name); -} - -void GetNextAttr(const std::shared_ptr &anf_node, mindspore::NodeDef *proto) { - MS_EXCEPTION_IF_NULL(anf_node); - MS_EXCEPTION_IF_NULL(proto); - - ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs(); - MS_EXCEPTION_IF_NULL(node_attr); - std::string shared_name = AnfAlgo::GetNodeAttr(anf_node, kSharedName); - (*node_attr)[kChannelName].set_s(shared_name); -} - -void DropoutGenMaskAttr(const std::shared_ptr &anf_node, mindspore::NodeDef *proto) { - MS_EXCEPTION_IF_NULL(anf_node); - MS_EXCEPTION_IF_NULL(proto); - - ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs(); - MS_EXCEPTION_IF_NULL(node_attr); - int seed = AnfAlgo::GetNodeAttr(anf_node, kSeed); - int seed2 = AnfAlgo::GetNodeAttr(anf_node, kSeed2); - (*node_attr)["seed"].set_i(seed); - (*node_attr)["seed2"].set_i(seed2); -} - -void CreateAttrFuncMap(std::map *mOpAttrFuncMap) { - (void)mOpAttrFuncMap->emplace(std::pair(kInitDataSetQueue, InitDataSetQueueAttr)); - (void)mOpAttrFuncMap->emplace(std::pair(kGetNext, GetNextAttr)); - (void)mOpAttrFuncMap->emplace(std::pair(kDropoutGenMask, DropoutGenMaskAttr)); -} +const std::vector local_framework_op_vec = {kInitData, kGetNext, kDropoutGenMask, kPrint}; bool SetIOIputSize(const std::shared_ptr &anf_node, const size_t &input_num, std::vector *input_size_list) { @@ -147,24 +109,74 @@ bool SetIOSize(const std::shared_ptr &anf_node, const std::shared_ptr *node_attr) { + MS_EXCEPTION_IF_NULL(node_attr); + if (type == "int") { + auto attr_value = GetValue(value); + (*node_attr)[attr_name].set_i(attr_value); + } else if (type == "str") { + auto attr_value = GetValue(value); + (*node_attr)[attr_name].set_s(attr_value); + } else if (type == "bool") { + auto attr_value = GetValue(value); + (*node_attr)[attr_name].set_b(attr_value); + } else if (type == "float") { + auto attr_value = GetValue(value); + (*node_attr)[attr_name].set_f(attr_value); + } else if (type == "listInt") { + std::vector attr_value; + auto value_type = value->type(); + MS_EXCEPTION_IF_NULL(value_type); + auto value_type_str = value_type->ToString(); + if (value_type_str == "Int32") { + int data = GetValue(value); + attr_value.push_back(data); + } else { + attr_value = GetValue>(value); + } + mindspore::AttrValue input_shape_attr; + mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array(); + MS_EXCEPTION_IF_NULL(input_shape_attr_list); + for (const auto shape : attr_value) { + input_shape_attr_list->add_i(shape); + } + (*node_attr)[attr_name] = input_shape_attr; + } else { + MS_LOG(EXCEPTION) << "type: " << type << "not support"; + } +} + void SetNodeAttr(const std::shared_ptr &anf_node, mindspore::NodeDef *proto) { std::string op_name = AnfAlgo::GetCNodeName(anf_node); - if (op_name == "InitDataSetQueue") { - op_name = "InitData"; + if (op_name == kInitDataSetQueue) { + op_name = kInitData; } - if (op_name == "Print") { + if (op_name == kPrint) { return; } - std::map mOpAttrFuncMap; - CreateAttrFuncMap(&mOpAttrFuncMap); - FNodeAttrHandle func_ptr = nullptr; - auto iter = mOpAttrFuncMap.find(op_name); - if (iter != mOpAttrFuncMap.end()) { - func_ptr = iter->second; - MS_EXCEPTION_IF_NULL(func_ptr); - func_ptr(anf_node, proto); - } else { - MS_LOG(ERROR) << "Don't support node [" << op_name << "] to set nodedef of attr"; + + auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU); + MS_EXCEPTION_IF_NULL(op_info_ptr); + auto attrs_ptr = op_info_ptr->attrs_ptr(); + auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); + MS_EXCEPTION_IF_NULL(primitive); + ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs(); + for (const auto &attr_ptr : attrs_ptr) { + std::string attr_name = attr_ptr->name(); + std::string real_name; + auto value = primitive->GetAttr(attr_name); + if (value != nullptr) { + if (attr_name == kQueueName || attr_name == kSharedName) { + real_name = kChannelName; + } else if (attr_name == kSeed) { + real_name = "seed"; + } else if (attr_name == kSeed2) { + real_name = "seed2"; + } + std::string type = attr_ptr->type(); + ParseAttrValue(type, real_name, value, node_attr); + } } MS_LOG(INFO) << "Set node attr end!"; } diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc index ac0b0d9f7a..6675051069 100644 --- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc +++ b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc @@ -17,68 +17,27 @@ #include "kernel/aicpu/aicpu_kernel_metadata.h" #include #include +#include "kernel/oplib/oplib.h" +#include "kernel/common_utils.h" +#include "kernel/aicpu/aicpu_util.h" #include "session/anf_runtime_algorithm.h" namespace mindspore { namespace kernel { -constexpr auto kInitDataSetQueueOpName = "InitDataSetQueue"; -constexpr auto kGetNext = "GetNext"; -constexpr auto kDropoutGenMask = "DropoutGenMask"; -constexpr auto kPrint = "Print"; -const std::vector AICPU_OPS = {kInitDataSetQueueOpName, kGetNext, kDropoutGenMask, kPrint}; - -std::shared_ptr CreateKernelInfo(const std::vector &inputs_format, - const std::vector &inputs_device_type, - const std::vector &outputs_format, - const std::vector &outputs_device_type) { - auto builder = KernelBuildInfo::KernelBuildInfoBuilder(); - builder.SetInputsFormat(inputs_format); - builder.SetInputsDeviceType(inputs_device_type); - builder.SetOutputsFormat(outputs_format); - builder.SetOutputsDeviceType(outputs_device_type); - builder.SetProcessor(AICPU); - builder.SetKernelType(AICPU_KERNEL); - builder.SetFusionType(OPAQUE); - return builder.Build(); -} - -bool CheckIfExistAicpuMeta(const std::string &op_name) { - if (std::find(AICPU_OPS.begin(), AICPU_OPS.end(), op_name) != AICPU_OPS.end()) { - return false; - } - return true; -} - void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list) { MS_LOG(INFO) << "AicpuMetadataInfo."; MS_EXCEPTION_IF_NULL(kernel_node); MS_EXCEPTION_IF_NULL(kernel_info_list); std::string op_name = AnfAlgo::GetCNodeName(kernel_node); - if (CheckIfExistAicpuMeta(op_name)) { - MS_LOG(DEBUG) << "Aicpu doesn't have metadata of op [" << op_name << "]."; - return; - } - - if (op_name == kInitDataSetQueueOpName) { - kernel_info_list->push_back(CreateKernelInfo({}, {}, {}, {})); + if (op_name == kInitDataSetQueue) { + op_name = kInitData; } - - if (op_name == kGetNext) { - std::vector outputs_format; - std::vector outputs_type; - for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) { - outputs_format.emplace_back(kOpFormat_DEFAULT); - outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index)); - } - kernel_info_list->push_back(CreateKernelInfo({}, {}, outputs_format, outputs_type)); - } - - if (op_name == kDropoutGenMask) { - kernel_info_list->push_back(CreateKernelInfo({kOpFormat_NCHW, kOpFormat_NCHW}, - {kInt32->type_id(), kFloat16->type_id()}, {kOpFormat_NCHW}, - {kUInt8->type_id()})); + auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU); + if (op_info_ptr == nullptr) { + MS_LOG(WARNING) << "Aicpu doestn't have metadata of op [" << op_name << "]"; + return; } - + // For compatibility with the current framework if (op_name == kPrint) { std::vector inputs_format; std::vector inputs_type; @@ -92,11 +51,20 @@ void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vectorpush_back(CreateKernelInfo(inputs_format, inputs_type, outputs_format, outputs_type)); + auto builder = KernelBuildInfo::KernelBuildInfoBuilder(); + builder.SetInputsFormat(inputs_format); + builder.SetInputsDeviceType(inputs_type); + builder.SetOutputsFormat(outputs_format); + builder.SetOutputsDeviceType(outputs_type); + builder.SetProcessor(AICPU); + builder.SetKernelType(AICPU_KERNEL); + builder.SetFusionType(OPAQUE); + kernel_info_list->push_back(builder.Build()); + return; } - - if (kernel_info_list->empty()) { - MS_LOG(INFO) << "Aicpu dose not has metadata of op[ " << op_name << "]."; + if (!ParseMetadata(kernel_node, op_info_ptr, AICPU, kernel_info_list)) { + MS_LOG(WARNING) << "Aicpu parsed metadata op [" << op_name << "] failed"; + return; } } } // namespace kernel diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_util.h b/mindspore/ccsrc/kernel/aicpu/aicpu_util.h index f521418f6b..08fca16a3b 100644 --- a/mindspore/ccsrc/kernel/aicpu/aicpu_util.h +++ b/mindspore/ccsrc/kernel/aicpu/aicpu_util.h @@ -24,7 +24,8 @@ namespace mindspore { namespace kernel { -constexpr auto kInitDataSetQueue = "InitData"; +constexpr auto kInitDataSetQueue = "InitDataSetQueue"; +constexpr auto kInitData = "InitData"; constexpr auto kGetNext = "GetNext"; constexpr auto kDropoutGenMask = "DropoutGenMask"; constexpr auto kPrint = "Print"; diff --git a/mindspore/ccsrc/kernel/common_utils.cc b/mindspore/ccsrc/kernel/common_utils.cc index c2f2638753..137ae65414 100644 --- a/mindspore/ccsrc/kernel/common_utils.cc +++ b/mindspore/ccsrc/kernel/common_utils.cc @@ -417,6 +417,8 @@ void SetKernelBuildInfo(const std::shared_ptrSetKernelType(AUTO_DIFF_KERNEL); + } else if (imply_type == kAICPU) { + builder->SetKernelType(AICPU_KERNEL); } else { builder->SetKernelType(TBE_KERNEL); } @@ -471,6 +473,13 @@ bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptrpush_back(builder->Build()); + } + } else { + if (processor == AICPU) { + auto builder = std::make_shared(); + MS_EXCEPTION_IF_NULL(builder); + SetKernelBuildInfo(builder, processor, op_info_ptr); kernel_info_list->push_back(builder->Build()); } } diff --git a/mindspore/ccsrc/kernel/oplib/opinfo.h b/mindspore/ccsrc/kernel/oplib/opinfo.h index 56abea9269..215df21776 100644 --- a/mindspore/ccsrc/kernel/oplib/opinfo.h +++ b/mindspore/ccsrc/kernel/oplib/opinfo.h @@ -24,7 +24,7 @@ namespace mindspore { namespace kernel { -enum OpImplyType { kAKG = 0, kTBE }; +enum OpImplyType { kAKG = 0, kTBE = 1, kAICPU }; enum OpIOType { kInput = 0, kOutput }; class OpAttr { diff --git a/mindspore/ccsrc/kernel/oplib/oplib.cc b/mindspore/ccsrc/kernel/oplib/oplib.cc index 4059b8e246..d2464bce47 100644 --- a/mindspore/ccsrc/kernel/oplib/oplib.cc +++ b/mindspore/ccsrc/kernel/oplib/oplib.cc @@ -39,6 +39,7 @@ constexpr auto kDtypeFormat = "dtype_format"; constexpr auto kAttr = "attr"; constexpr auto kIputs = "inputs"; constexpr auto kOutputs = "outputs"; +constexpr auto kAiCPU = "AiCPU"; constexpr auto kTbe = "TBE"; constexpr auto kAkg = "akg"; constexpr auto kAutodiff = "AutoDiff"; @@ -60,6 +61,8 @@ std::string ImplTypeToStr(OpImplyType impl_type) { return kTbe; case kAKG: return kAkg; + case kAICPU: + return kAiCPU; default: return "unknow"; } @@ -76,6 +79,9 @@ bool OpLib::RegOp(const std::string& json_string, const std::string& impl_path) } else if (imply_type_string == kAutodiff) { OpImplyType imply_type = kAKG; ret = DecodeOpInfo(op_json, imply_type, impl_path); + } else if (imply_type_string == kAiCPU) { + OpImplyType imply_type = kAICPU; + ret = DecodeOpInfo(op_json, imply_type, impl_path); } else { MS_LOG(DEBUG) << "Not support imply_type"; } @@ -154,7 +160,9 @@ bool OpLib::DecodeAttr(const nlohmann::json& obj, const OpImplyType imply_type, std::shared_ptr op_attr = std::make_shared(); MS_EXCEPTION_IF_NULL(op_attr); op_attr->set_name(obj.at(kName)); - op_attr->set_param_type(obj.at(kParamType)); + if (imply_type != kAICPU) { + op_attr->set_param_type(obj.at(kParamType)); + } op_attr->set_type(obj.at(kType)); if (imply_type == kTBE) { op_attr->set_value(obj.at(kValue)); @@ -242,9 +250,10 @@ std::shared_ptr OpLib::FindOp(const std::string& op_name, OpImplyType im auto context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context); bool is_gpu = (context->device_target() == kGPUDevice); - if ((is_gpu && imply_type == kTBE) || (!is_gpu && imply_type != kTBE)) { - MS_LOG(DEBUG) << "FindOp failed: opname:" << op_name << "imply_type:" << ImplTypeToStr(imply_type) - << "current op num:" << op_info_.size(); + if ((is_gpu && (imply_type == kTBE || imply_type == kAICPU)) || + (!is_gpu && (imply_type != kTBE && imply_type != kAICPU))) { + MS_LOG(ERROR) << "FindOp failed: opname:" << op_name << ", imply_type:" << ImplTypeToStr(imply_type) + << ", current op num:" << op_info_.size(); return nullptr; } for (const auto& op_info : op_info_) { @@ -253,8 +262,8 @@ std::shared_ptr OpLib::FindOp(const std::string& op_name, OpImplyType im return op_info; } } - MS_LOG(DEBUG) << "FindOp failed: opname:" << op_name << "imply_type:" << ImplTypeToStr(imply_type) - << "current op num:" << op_info_.size(); + MS_LOG(DEBUG) << "FindOp failed: opname:" << op_name << ", imply_type:" << ImplTypeToStr(imply_type) + << ", current op num:" << op_info_.size(); return nullptr; } diff --git a/mindspore/ops/__init__.py b/mindspore/ops/__init__.py index 6f4f680672..0e6c114566 100644 --- a/mindspore/ops/__init__.py +++ b/mindspore/ops/__init__.py @@ -30,7 +30,7 @@ Note: from .primitive import Primitive, PrimitiveWithInfer, prim_attr_register from .vm_impl_registry import get_vm_impl_fn, vm_impl_registry -from .op_info_register import op_info_register, TBERegOp, DataType +from .op_info_register import op_info_register, AiCPURegOp, TBERegOp, DataType from .primitive import constexpr from .._c_expression import signature_rw, signature_kind @@ -40,6 +40,6 @@ __primitive__ = [ ] __all__ = ["get_vm_impl_fn", "vm_impl_registry", - "op_info_register", "TBERegOp", "DataType", + "op_info_register", "AiCPURegOp", "TBERegOp", "DataType", "constexpr"] __all__.extend(__primitive__) diff --git a/mindspore/ops/_op_impl/__init__.py b/mindspore/ops/_op_impl/__init__.py index b8370cc64e..76444881cc 100644 --- a/mindspore/ops/_op_impl/__init__.py +++ b/mindspore/ops/_op_impl/__init__.py @@ -16,5 +16,6 @@ from .akg.gpu import * from .tbe import * +from .aicpu import * __all__ = [] diff --git a/mindspore/ops/_op_impl/aicpu/__init__.py b/mindspore/ops/_op_impl/aicpu/__init__.py new file mode 100644 index 0000000000..b0f90a629b --- /dev/null +++ b/mindspore/ops/_op_impl/aicpu/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""aicpu ops""" +from .init_data_set_queue import _init_data_set_queue_aicpu +from .dropout_genmask import _dropout_genmask_aicpu +from .get_next import _get_next_aicpu +from .print_tensor import _print_aicpu diff --git a/mindspore/ops/_op_impl/aicpu/dropout_genmask.py b/mindspore/ops/_op_impl/aicpu/dropout_genmask.py new file mode 100644 index 0000000000..96707a5010 --- /dev/null +++ b/mindspore/ops/_op_impl/aicpu/dropout_genmask.py @@ -0,0 +1,32 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""InitDataSetQueue op""" +from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType + +dropout_genmask_op_info = AiCPURegOp("DropoutGenMask") \ + .fusion_type("OPAQUE") \ + .input(0, "x1", "required") \ + .input(1, "x2", "required") \ + .output(0, "y", "required") \ + .attr("Seed0", "int") \ + .attr("Seed1", "int") \ + .dtype_format(DataType.I32_NCHW, DataType.F16_NCHW, DataType.U8_NCHW) \ + .get_op_info() + +@op_info_register(dropout_genmask_op_info) +def _dropout_genmask_aicpu(): + """Dropout AiCPU register""" + return diff --git a/mindspore/ops/_op_impl/aicpu/get_next.py b/mindspore/ops/_op_impl/aicpu/get_next.py new file mode 100644 index 0000000000..ce32014211 --- /dev/null +++ b/mindspore/ops/_op_impl/aicpu/get_next.py @@ -0,0 +1,39 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""InitDataSetQueue op""" +from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType + +get_next_op_info = AiCPURegOp("GetNext") \ + .fusion_type("OPAQUE") \ + .output(0, "y", "dynamic") \ + .attr("shared_name", "str") \ + .dtype_format(DataType.BOOL_Default) \ + .dtype_format(DataType.I8_Default) \ + .dtype_format(DataType.I16_Default) \ + .dtype_format(DataType.I32_Default) \ + .dtype_format(DataType.I64_Default) \ + .dtype_format(DataType.F16_Default) \ + .dtype_format(DataType.U8_Default) \ + .dtype_format(DataType.U16_Default) \ + .dtype_format(DataType.U32_Default) \ + .dtype_format(DataType.U64_Default) \ + .dtype_format(DataType.F32_Default) \ + .get_op_info() + +@op_info_register(get_next_op_info) +def _get_next_aicpu(): + """GetNext AiCPU register""" + return diff --git a/mindspore/ops/_op_impl/aicpu/init_data_set_queue.py b/mindspore/ops/_op_impl/aicpu/init_data_set_queue.py new file mode 100644 index 0000000000..a48e01eced --- /dev/null +++ b/mindspore/ops/_op_impl/aicpu/init_data_set_queue.py @@ -0,0 +1,27 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""InitDataSetQueue op""" +from mindspore.ops.op_info_register import op_info_register, AiCPURegOp + +init_data_set_queue_op_info = AiCPURegOp("InitData") \ + .fusion_type("OPAQUE") \ + .attr("queue_name", "str") \ + .get_op_info() + +@op_info_register(init_data_set_queue_op_info) +def _init_data_set_queue_aicpu(): + """InitDataSetQueue AiCPU register""" + return diff --git a/mindspore/ops/_op_impl/aicpu/print_tensor.py b/mindspore/ops/_op_impl/aicpu/print_tensor.py new file mode 100644 index 0000000000..011f4a3d9d --- /dev/null +++ b/mindspore/ops/_op_impl/aicpu/print_tensor.py @@ -0,0 +1,39 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""InitDataSetQueue op""" +from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType + +print_op_info = AiCPURegOp("Print") \ + .fusion_type("OPAQUE") \ + .input(0, "x", "dynamic") \ + .output(0, "y", "required") \ + .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.I16_Default, DataType.I16_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I64_Default, DataType.I64_Default) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default) \ + .dtype_format(DataType.U16_Default, DataType.U16_Default) \ + .dtype_format(DataType.U32_Default, DataType.U32_Default) \ + .dtype_format(DataType.U64_Default, DataType.U64_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ + .get_op_info() + +@op_info_register(print_op_info) +def _print_aicpu(): + """Print AiCPU register""" + return diff --git a/mindspore/ops/op_info_register.py b/mindspore/ops/op_info_register.py index 6a42099c89..0750094e18 100644 --- a/mindspore/ops/op_info_register.py +++ b/mindspore/ops/op_info_register.py @@ -78,14 +78,15 @@ class RegOp(): self.inputs = [] self.outputs = [] self.attr_ = [] + self.fusion_type_ = '' self.dtype_format_ = [] - def is_string(self, value): + def _is_string(self, value): """ Check if the value is a str type. Args: - value: Parameter to to check. + value: Parameter to be checked. Raises: TypeError: If the type of value is not a str. @@ -93,12 +94,12 @@ class RegOp(): if not isinstance(value, str): raise TypeError("%s value must be str" % str(value)) - def is_int(self, value): + def _is_int(self, value): """ Check if the value is a int. Args: - value: Parameter to to check. + value: Parameter to be checked. Raises: TypeError: If the type of value is not a int. @@ -106,12 +107,12 @@ class RegOp(): if not isinstance(value, int): raise TypeError("%s value must be int" % str(value)) - def is_bool(self, value): + def _is_bool(self, value): """ Check if the value is a bool. Args: - value: Parameter to to check. + value: Parameter to be checked. Raises: TypeError: If the type of value is not a bool. @@ -119,6 +120,51 @@ class RegOp(): if not isinstance(value, bool): raise TypeError("%s value must be bool" % str(value)) + def _check_param(self, param_list, key_list, fn_list, kwargs): + """ + Check if the parameter type is correct. + + Args: + param_list (list): Parameter list to be checked. + key_list (list): The keys of output dict. + fn_list (list): Function used for parameter checking. If the function list has only one element, + all parameters will use the same function. + kwargs (dict): Other parameter information. + + Raises: + TypeError: If the type of value is not list. + ValueError: If the size of param list is not equal to the size of key list, or + the size of param list is not equal to the size of funtion list. + """ + for i in [param_list, key_list, fn_list]: + if not isinstance(i, list): + raise TypeError("%s value must be list type" % str(i)) + if len(param_list) != len(key_list) or (len(fn_list) != 1 and len(param_list) != len(fn_list)): + raise ValueError("param_list size {}, key_list size {}, must be equal.And fn_list size {}.". + format(len(param_list), len(key_list), len(fn_list))) + out_dict = {} + for idx, element in enumerate(param_list): + if element is not None: + if len(fn_list) == 1: + fn_list[0](element) + else: + fn_list[idx](element) + out_dict[key_list[idx]] = element + if kwargs: + out_dict = dict(out_dict, kwargs) + return out_dict + + def fusion_type(self, fusion_type): + """ + Register fusion type. + + Args: + fusion_type (str): Value of fusion type. + """ + self._is_string(fusion_type) + self.fusion_type_ = fusion_type + return self + def dtype_format(self, *args): """ Register dtype and format. @@ -136,8 +182,8 @@ class RegOp(): for arg in args: if not isinstance(arg, tuple) or len(arg) != 2: raise ValueError("dtype and format value must be tuple of two elements") - self.is_string(arg[0]) - self.is_string(arg[1]) + self._is_string(arg[0]) + self._is_string(arg[1]) dtype_format.append(arg) self.dtype_format_.append(tuple(dtype_format)) return self @@ -159,13 +205,71 @@ class RegOp(): return op_info +class AiCPURegOp(RegOp): + """Class for AiCPU op info register""" + + def __init__(self, op_name): + super(AiCPURegOp, self).__init__(op_name) + self.imply_type = "AiCPU" + + def input(self, index=None, name=None, param_type=None, **kwargs): + """ + Register AiCPU op input information. + + Args: + index (int): Order of the input. Default: None. + name (str): Name of the input. Default: None. + param_type (str): Param type of the input. Default: None. + kwargs (dict): Other information for the input. + """ + param_list = [index, name, param_type] + key_list = ["index", "name", "param_type"] + fn_list = [self._is_int, self._is_string, self._is_string] + input_dict = self._check_param(param_list, key_list, fn_list, kwargs) + self.inputs.append(input_dict) + return self + + def output(self, index=None, name=None, param_type=None, **kwargs): + """ + Register AiCPU op output information. + + Args: + index (int): Order of the output. Default: None. + name (str): Name of the output. Default: None. + param_type (str): Param type of the output. Default: None. + kwargs (dict): Other information for the output. + """ + param_list = [index, name, param_type] + key_list = ["index", "name", "param_type"] + fn_list = [self._is_int, self._is_string, self._is_string] + output_dict = self._check_param(param_list, key_list, fn_list, kwargs) + self.outputs.append(output_dict) + return self + + def attr(self, name=None, value_type=None, value=None, **kwargs): + """ + Register AiCPU op attribute information. + + Args: + name (str): Name of the attribute. Default: None. + value_type (str): Value type of the attribute. Default: None. + value (str): Value type of the attribute. Default: None. + kwargs (dict): Other information for the attribute. + """ + param_list = [name, value_type, value] + key_list = ["name", "type", "value"] + fn_list = [self._is_string] + attr_dict = self._check_param(param_list, key_list, fn_list, kwargs) + self.attr_.append(attr_dict) + return self + + class TBERegOp(RegOp): """Class for TBE op info register.""" def __init__(self, op_name=""): super(TBERegOp, self).__init__(op_name) self.imply_type = "TBE" - self.fusion_type_ = '' self.async_flag_ = False self.binfile_name_ = '' self.compute_cost_ = 10 @@ -175,17 +279,6 @@ class TBERegOp(RegOp): self.dynamic_format_ = False self.op_pattern_ = "" - def fusion_type(self, fusion_type): - """ - Register fusion type. - - Args: - fusion_type (str): Value of fusion type. - """ - self.is_string(fusion_type) - self.fusion_type_ = fusion_type - return self - def async_flag(self, async_flag): """ Register async flag. @@ -193,7 +286,7 @@ class TBERegOp(RegOp): Args: async_flag (bool): Value of async flag. """ - self.is_bool(async_flag) + self._is_bool(async_flag) self.async_flag_ = async_flag return self @@ -204,7 +297,7 @@ class TBERegOp(RegOp): Args: binfile_name (str): Name of op binfile. """ - self.is_string(binfile_name) + self._is_string(binfile_name) self.binfile_name_ = binfile_name return self @@ -215,7 +308,7 @@ class TBERegOp(RegOp): Args: compute_cost (int): Value of compute cost. """ - self.is_int(compute_cost) + self._is_int(compute_cost) self.compute_cost_ = compute_cost return self @@ -226,7 +319,7 @@ class TBERegOp(RegOp): Args: kernel_name (str): Name of op kernel. """ - self.is_string(kernel_name) + self._is_string(kernel_name) self.kernel_name_ = kernel_name return self @@ -237,7 +330,7 @@ class TBERegOp(RegOp): Args: partial_flag (bool): Value of partial flag. """ - self.is_bool(partial_flag) + self._is_bool(partial_flag) self.partial_flag_ = partial_flag return self @@ -248,7 +341,7 @@ class TBERegOp(RegOp): Args: reshape_type (str): Value of reshape type. """ - self.is_string(reshape_type) + self._is_string(reshape_type) self.reshape_type_ = reshape_type return self @@ -259,56 +352,43 @@ class TBERegOp(RegOp): Args: reshape_type (bool): Value of dynamic format. """ - self.is_bool(dynamic_format) + self._is_bool(dynamic_format) self.dynamic_format_ = dynamic_format return self def op_pattern(self, pattern=None): """ - Register op pattern information. + Register TBE op pattern information. Args: pattern (str): Value of op pattern. """ - if pattern is not None and self.istring(pattern): + if pattern is not None and self._is_string(pattern): self.op_pattern_ = pattern return self def attr(self, name=None, param_type=None, value_type=None, value=None, default_value=None, **kwargs): """ - Register op attribute information. + Register TBE op attribute information. Args: name (str): Name of the attribute. Default: None. param_type (str): Param type of the attribute. Default: None. - type (str): Type of the attribute. Default: None. + value_type (str): Type of the attribute. Default: None. value (str): Value of the attribute. Default: None. default_value (str): Default value of attribute. Default: None. kwargs (dict): Other information for the attribute. """ param_list = [name, param_type, value_type, value, default_value] - attr_dict = {} - for index, element in enumerate(param_list): - if element is not None: - self.is_string(element) - if index == 0: - attr_dict["name"] = element - elif index == 1: - attr_dict["param_type"] = element - elif index == 2: - attr_dict["type"] = element - elif index == 3: - attr_dict["value"] = element - elif index == 4: - attr_dict["default_value"] = element - if kwargs: - attr_dict = dict(attr_dict, **kwargs) + key_list = ["name", "param_type", "type", "value", "default_value"] + fn_list = [self._is_string] + attr_dict = self._check_param(param_list, key_list, fn_list, kwargs) self.attr_.append(attr_dict) return self def input(self, index=None, name=None, need_compile=None, param_type=None, shape=None, **kwargs): """ - Register op input information. + Register TBE op input information. Args: index (int): Order of the input. Default: None. @@ -319,32 +399,15 @@ class TBERegOp(RegOp): kwargs (dict): Other information for the input. """ param_list = [index, name, need_compile, param_type, shape] - input_dict = {} - for idx, element in enumerate(param_list): - if element is not None: - if idx == 0: - self.is_int(element) - input_dict["index"] = element - elif idx == 1: - self.is_string(element) - input_dict["name"] = element - elif idx == 2: - self.is_bool(element) - input_dict["need_compile"] = element - elif idx == 3: - self.is_string(element) - input_dict["param_type"] = element - elif idx == 4: - self.is_string(element) - input_dict["shape"] = element - if kwargs: - input_dict = dict(input_dict, **kwargs) + key_list = ["index", "name", "need_compile", "param_type", "shape"] + fn_list = [self._is_int, self._is_string, self._is_bool, self._is_string, self._is_string] + input_dict = self._check_param(param_list, key_list, fn_list, kwargs) self.inputs.append(input_dict) return self def output(self, index=None, name=None, need_compile=None, param_type=None, shape=None, **kwargs): """ - Register op output information. + Register TBE op output information. Args: index (int): Order of the output. Default: None. @@ -355,29 +418,13 @@ class TBERegOp(RegOp): kwargs (dict): Other information for the output. """ param_list = [index, name, need_compile, param_type, shape] - output_dict = {} - for idx, element in enumerate(param_list): - if element is not None: - if idx == 0: - self.is_int(element) - output_dict["index"] = element - elif idx == 1: - self.is_string(element) - output_dict["name"] = element - elif idx == 2: - self.is_bool(element) - output_dict["need_compile"] = element - elif idx == 3: - self.is_string(element) - output_dict["param_type"] = element - elif idx == 4: - self.is_string(element) - output_dict["shape"] = element - if kwargs: - output_dict = dict(output_dict, **kwargs) + key_list = ["index", "name", "need_compile", "param_type", "shape"] + fn_list = [self._is_int, self._is_string, self._is_bool, self._is_string, self._is_string] + output_dict = self._check_param(param_list, key_list, fn_list, kwargs) self.outputs.append(output_dict) return self + class DataType(): """ Various combinations of dtype and formatself. From 99c353e6966cf14a23d421e3ac490fffa7cce324 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Thu, 9 Apr 2020 19:07:01 +0800 Subject: [PATCH 49/58] add custom environment variable ASCEND_CUSTOM_PATH for customized Ascend software installation --- cmake/dependency_graphengine.cmake | 6 +++++- mindspore/ccsrc/CMakeLists.txt | 12 ++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/cmake/dependency_graphengine.cmake b/cmake/dependency_graphengine.cmake index 2420f47736..2a90cc1458 100644 --- a/cmake/dependency_graphengine.cmake +++ b/cmake/dependency_graphengine.cmake @@ -39,7 +39,11 @@ elseif (DEFINED ENV{D_LINK_PATH}) find_library(resource libresource.so ${GE_LIB_PATH}) else() # Ascend mode - set(ASCEND_PATH /usr/local/Ascend) + if(DEFINED ENV{ASCEND_CUSTOM_PATH}) + set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH}) + else() + set(ASCEND_PATH /usr/local/Ascend) + endif() set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common) set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64) find_library(c_sec libc_sec.so ${ASCEND_DRIVER_PATH}) diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 1d104148c3..c49c962bdd 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -296,7 +296,11 @@ if(ENABLE_D) endif() else() MESSAGE("use system default lib") - set(ASCEND_PATH /usr/local/Ascend) + if(DEFINED ENV{ASCEND_CUSTOM_PATH}) + set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH}) + else() + set(ASCEND_PATH /usr/local/Ascend) + endif() set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common) set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64) endif() @@ -500,7 +504,11 @@ add_dependencies(add_ms_lib _c_expression) if (NOT ENABLE_GE) if (ENABLE_D) - set(ASCEND_PATH /usr/local/Ascend) + if(DEFINED ENV{ASCEND_CUSTOM_PATH}) + set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH}) + else() + set(ASCEND_PATH /usr/local/Ascend) + endif() set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common) add_custom_target(add_ge_lib ALL COMMAND cp ${MS_CCSRC_BUILD_PATH}/../../graphengine/src/common/graph/libgraph.so ${MS_LIB_PATH} From fc4ad192dc11006f25fee75095072a6d5d5f2019 Mon Sep 17 00:00:00 2001 From: liuxiao Date: Thu, 9 Apr 2020 14:37:00 +0800 Subject: [PATCH 50/58] modified api name Stack -> Pack, Unstack -> Unpack --- mindspore/ccsrc/transform/convert.cc | 4 +- mindspore/ops/_grad/grad_array_ops.py | 20 ++++---- mindspore/ops/operations/__init__.py | 6 +-- mindspore/ops/operations/array_ops.py | 70 ++++++++++++--------------- tests/ut/python/ops/test_ops.py | 32 ++++++------ 5 files changed, 62 insertions(+), 70 deletions(-) diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc index bebd000958..20adec5b97 100755 --- a/mindspore/ccsrc/transform/convert.cc +++ b/mindspore/ccsrc/transform/convert.cc @@ -148,8 +148,8 @@ const char kNameSlice[] = "Slice"; const char kNameAddN[] = "AddN"; const char kNameLess[] = "Less"; const char kNameGreater[] = "Greater"; -const char kNameStack[] = "Stack"; -const char kNameUnstack[] = "Unstack"; +const char kNamePack[] = "Pack"; +const char kNameUnpack[] = "Unpack"; const char kNameMerge[] = "Merge"; const char kNameGeSwitch[] = "GeSwitch"; diff --git a/mindspore/ops/_grad/grad_array_ops.py b/mindspore/ops/_grad/grad_array_ops.py index 0a0caf471e..abad030ae9 100644 --- a/mindspore/ops/_grad/grad_array_ops.py +++ b/mindspore/ops/_grad/grad_array_ops.py @@ -266,26 +266,26 @@ def get_bprop_gather_v2(self): return bprop -@bprop_getters.register(P.Stack) -def get_bprop_stack(self): - """Generate bprop for Stack""" +@bprop_getters.register(P.Pack) +def get_bprop_pack(self): + """Generate bprop for Pack""" axis = self.axis def bprop(x, out, dout): - stack_grad = P.Unstack(axis) - out = stack_grad(dout) + pack_grad = P.Unpack(axis) + out = pack_grad(dout) return (out,) return bprop -@bprop_getters.register(P.Unstack) -def get_bprop_unstack(self): - """Generate bprop for Unstack""" +@bprop_getters.register(P.Unpack) +def get_bprop_unpack(self): + """Generate bprop for Unpack""" axis = self.axis def bprop(x, out, dout): - unstack_grad = P.Stack(axis) - out = unstack_grad(dout) + unpack_grad = P.Pack(axis) + out = unpack_grad(dout) return (out,) return bprop diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index 5fd3f07876..c10aef1ac0 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -19,7 +19,7 @@ Primitive operator classes. A collection of operators to build nerual networks or computing functions. """ -from .array_ops import (Argmax, Argmin, Cast, ConcatOffset, Concat, Stack, Unstack, +from .array_ops import (Argmax, Argmin, Cast, ConcatOffset, Concat, Pack, Unpack, Diag, DiagPart, DType, ExpandDims, Eye, Fill, GatherNd, GatherV2, InvertPermutation, IsInstance, IsSubClass, ArgMaxWithValue, OnesLike, ZerosLike, @@ -112,8 +112,8 @@ __all__ = [ 'OneHot', 'GatherV2', 'Concat', - 'Stack', - 'Unstack', + 'Pack', + 'Unpack', 'Tile', 'BiasAdd', 'Gelu', diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index dda490566f..ac7f8ed699 100644 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -1350,8 +1350,8 @@ class Concat(PrimitiveWithInfer): return out -def _get_stack_shape(x_shape, x_type, axis): - """for satck output shape""" +def _get_pack_shape(x_shape, x_type, axis): + """for pack output shape""" validator.check_type("shape", x_shape, [tuple]) validator.check_integer("len of input_x shape", len(x_shape), 0, Rel.GT) validator.check_subclass("shape0", x_type[0], mstype.tensor) @@ -1368,43 +1368,40 @@ def _get_stack_shape(x_shape, x_type, axis): validator.check('x_type[%d]' % i, x_type[i], 'base', x_type[0]) for j in range(rank_base): if v[j] != x_shape[0][j]: - raise ValueError("Stack evaluator element %d shape in input can not stack with first element" % i) + raise ValueError("Pack evaluator element %d shape in input can not pack with first element" % i) out_shape.insert(axis, N) return out_shape -class Stack(PrimitiveWithInfer): +class Pack(PrimitiveWithInfer): r""" - Stacks a list of rank-`R` tensors into one rank-`(R+1)` tensor. + Packs a list of tensors in specified axis. - Packs the list of tensors in `input_x` into a tensor with rank one higher than - each tensor in `input_x`, by packing them along the `axis` dimension. - Given a list of length `N` of tensors of shape `(A, B, C)`; + Packs the list of input tensors with the same rank `R`, output is a tensor of rank `(R+1)`. - If `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`. - - If `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`. Etc. + Given input tensors of shape :math:`(x_1, x_2, ..., x_R)`. Set the number of input tensors as `N`. + If :math:`0 \le axis`, the output tensor shape is :math:`(x_1, x_2, ..., x_{axis}, N, x_{axis+1}, ..., x_R)`. Args: - axis (int): The axis to stack along. Negative values wrap around, - so the valid range is [-(R+1), R+1). Default: 0. + axis (int): Dimension along which to pack. Default: 0. + Negative values wrap around. The range is [-(R+1), R+1). Inputs: - **input_x** (Union[tuple, list]) - A Tuple or list of Tensor objects with the same shape and type. Outputs: - Tensor. A stacked Tensor with the same type as values. + Tensor. A packed Tensor with the same type as `input_x`. Examples: >>> data1 = Tensor(np.array([0, 1]).astype(np.float32)) >>> data2 = Tensor(np.array([2, 3]).astype(np.float32)) - >>> op = P.Stack() - >>> output = op([data1, data2]) + >>> pack = P.Pack() + >>> output = pack([data1, data2]) [[0, 1], [2, 3]] """ @prim_attr_register def __init__(self, axis=0): - """init Stack""" + """init Pack""" self.__setattr_flag__ = True validator.check_type("axis", axis, [int]) self.axis = axis @@ -1413,38 +1410,33 @@ class Stack(PrimitiveWithInfer): x_shape = value['shape'] x_type = value['dtype'] self.add_prim_attr('num', len(x_shape)) - all_shape = _get_stack_shape(x_shape, x_type, self.axis) + all_shape = _get_pack_shape(x_shape, x_type, self.axis) out = {'shape': all_shape, 'dtype': x_type[0], 'value': None} return out -class Unstack(PrimitiveWithInfer): +class Unpack(PrimitiveWithInfer): r""" - Unpacks the given dimension of a rank-`R` tensor into rank-`(R-1)` tensors. - - Unpacks num tensors from value by chipping it along the axis dimension. - If num is not specified (the default), it is inferred from value's shape. - If value.shape[axis] is not known, ValueError is raised. + Unpacks tensor in specified axis. - For example, given a tensor of shape (A, B, C, D); + Unpacks a tensor of rank `R` along axis dimension, output tensors will have rank `(R-1)`. - If axis == 0 then the i'th tensor in output is the slice value[i, :, :, :] and - each tensor in output will have shape (B, C, D). (Note that the dimension unpacked along is gone, unlike split). + Given a tensor of shape :math:`(x_1, x_2, ..., x_R)`. If :math:`0 \le axis`, + the shape of tensor in output is :math:`(x_1, x_2, ..., x_{axis}, x_{axis+2}, ..., x_R)`. - If axis == 1 then the i'th tensor in output is the slice value[:, i, :, :] and - each tensor in output will have shape (A, C, D). Etc. - - This is the opposite of stack. + This is the opposite of pack. Args: - axis (int): The axis to unstack along. Defaults to the first dimension. - Negative values wrap around, so the valid range is [-R, R). + axis (int): Dimension along which to pack. Default: 0. + Negative values wrap around. The range is [-R, R). + num (int): The number of tensors to be unpacked to. Default : "None". + If `num` is not specified, it is inferred from the shape of `input_x`. Inputs: - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`. - A rank R > 0 Tensor to be unstacked. + A rank R > 0 Tensor to be unpacked. Outputs: A tuple of Tensors, the shape of each objects is same. @@ -1454,15 +1446,15 @@ class Unstack(PrimitiveWithInfer): or if len(input_x.shape[axis]) not equal to num. Examples: - >>> unstack = P.Unstack() - >>> x = Tensor(np.array([[1, 1, 1, 1], [2, 2, 2, 2]])) - >>> output = unstack(x) + >>> unpack = P.Unpack() + >>> input_x = Tensor(np.array([[1, 1, 1, 1], [2, 2, 2, 2]])) + >>> output = unpack(input_x) ([1, 1, 1, 1], [2, 2, 2, 2]) """ @prim_attr_register def __init__(self, axis=0): - """init Unstack""" + """init Unpack""" self.__setattr_flag__ = True validator.check_type("axis", axis, [int]) self.axis = axis @@ -1479,7 +1471,7 @@ class Unstack(PrimitiveWithInfer): validator.check_integer("output_num", output_num, 0, Rel.GT) self.add_prim_attr('num', output_num) output_valid_check = x_shape[self.axis] - output_num - validator.check_integer("the dimension which to unstack divides output_num", output_valid_check, 0, Rel.EQ) + validator.check_integer("The dimension which to unpack divides output_num", output_valid_check, 0, Rel.EQ) out_shapes = [] out_dtypes = [] out_shape = x_shape[:self.axis] + x_shape[self.axis + 1:] diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py index 97481e69a2..a3d771d7ec 100755 --- a/tests/ut/python/ops/test_ops.py +++ b/tests/ut/python/ops/test_ops.py @@ -80,9 +80,9 @@ class NetForConcat1(nn.Cell): return self.concat((x1, x2)) -class NetForStackInput(nn.Cell): +class NetForPackInput(nn.Cell): def __init__(self, op): - super(NetForStackInput, self).__init__() + super(NetForPackInput, self).__init__() self.op = op self.mul = P.Mul() @@ -93,9 +93,9 @@ class NetForStackInput(nn.Cell): return self.op(t) -class NetForUnstackInput(nn.Cell): +class NetForUnpackInput(nn.Cell): def __init__(self, op): - super(NetForUnstackInput, self).__init__() + super(NetForUnpackInput, self).__init__() self.op = op self.mul = P.Mul() @@ -996,33 +996,33 @@ test_case_array_ops = [ Tensor(np.array([1], np.float32)), Tensor(np.array([1], np.float32)))], 'desc_bprop': [[3,]]}), - ('StackV2_0', { - 'block': NetForStackInput(P.Stack()), + ('Pack_0', { + 'block': NetForPackInput(P.Pack()), 'desc_inputs':[[2, 2], [2, 2], [2, 2]], 'desc_bprop':[[3, 2, 2]], }), - ('StackV2_1', { - 'block': NetForStackInput(P.Stack(axis=-2)), + ('Pack_1', { + 'block': NetForPackInput(P.Pack(axis=-2)), 'desc_inputs':[[3, 2, 3], [3, 2, 3], [3, 2, 3]], 'desc_bprop':[[3, 2, 3, 3]], }), - ('StackV2_2', { - 'block': NetForStackInput(P.Stack()), + ('Pack_2', { + 'block': NetForPackInput(P.Pack()), 'desc_inputs':[[2, 2]], 'desc_bprop':[[2, 2, 2]], }), - ('StackV2_3', { - 'block': NetForStackInput(P.Stack()), + ('Pack_3', { + 'block': NetForPackInput(P.Pack()), 'desc_inputs':[[128, 128], [128, 128]], 'desc_bprop':[[2, 128, 128]], }), - ('UnstackV2_0', { - 'block': NetForUnstackInput(P.Unstack(axis=0)), + ('Unpack_0', { + 'block': NetForUnpackInput(P.Unpack(axis=0)), 'desc_inputs':[[2, 4]], 'desc_bprop':[[4], [4]], }), - ('UnstackV2_1', { - 'block': NetForUnstackInput(P.Unstack(axis=-1)), + ('Unpack_1', { + 'block': NetForUnpackInput(P.Unpack(axis=-1)), 'desc_inputs':[Tensor(np.array([[1, 1, 1]], np.float32))], 'desc_bprop':[[1], [1], [1]], }), From f57bd919e08432e7fe4a6177371936cd4680c34c Mon Sep 17 00:00:00 2001 From: yao_yf Date: Thu, 9 Apr 2020 15:02:33 +0800 Subject: [PATCH 51/58] Integrate two allreduce fusion set interfaces into one --- mindspore/parallel/__init__.py | 4 +--- mindspore/parallel/_auto_parallel_context.py | 14 ++++++++++---- ...allreduce_fusion.py => _dp_allreduce_fusion.py} | 4 ++-- 3 files changed, 13 insertions(+), 9 deletions(-) rename mindspore/parallel/{dp_allreduce_fusion.py => _dp_allreduce_fusion.py} (94%) diff --git a/mindspore/parallel/__init__.py b/mindspore/parallel/__init__.py index c79704f110..79d8e67a8d 100644 --- a/mindspore/parallel/__init__.py +++ b/mindspore/parallel/__init__.py @@ -15,9 +15,7 @@ """ This interface is ONLY used in Auto-parallel procedure. """ -from .dp_allreduce_fusion import set_fusion_strategy_by_idx, set_fusion_strategy_by_size from .algo_parameter_config import get_algo_parameters, reset_algo_parameters, \ set_algo_parameters -__all__ = ["set_fusion_strategy_by_idx", "set_fusion_strategy_by_size", "get_algo_parameters", - "reset_algo_parameters", "set_algo_parameters"] +__all__ = ["get_algo_parameters", "reset_algo_parameters", "set_algo_parameters"] diff --git a/mindspore/parallel/_auto_parallel_context.py b/mindspore/parallel/_auto_parallel_context.py index 3564ad4395..c99ac4a3c7 100644 --- a/mindspore/parallel/_auto_parallel_context.py +++ b/mindspore/parallel/_auto_parallel_context.py @@ -14,6 +14,8 @@ # ============================================================================ """Context of auto parallel""" import threading +import mindspore.context as context +from mindspore.parallel._dp_allreduce_fusion import _set_fusion_strategy_by_idx, _set_fusion_strategy_by_size from mindspore._c_expression import AutoParallelContext from mindspore._extends.pynative_helper import args_type_check @@ -219,13 +221,15 @@ class _AutoParallelContext: indices (list): Indices list. Raises: - ValueError: If type of indices item is not int. + TypeError: If type of indices item is not int. """ self.check_context_handle() for index in indices: if not isinstance(index, int): raise TypeError('indices has invalid value') - return self._context_handle.set_all_reduce_fusion_split_indices(indices) + self._context_handle.set_all_reduce_fusion_split_indices(indices) + if context.get_context("device_target") == "Ascend": + _set_fusion_strategy_by_idx(indices) def get_all_reduce_fusion_split_indices(self): """Get allreduce fusion split indices.""" @@ -240,13 +244,15 @@ class _AutoParallelContext: sizes (list): Sizes list. Raises: - ValueError: If type of sizes item is not int. + TypeError: If type of sizes item is not int. """ self.check_context_handle() for size in sizes: if not isinstance(size, int): raise TypeError('sizes has invalid value') - return self._context_handle.set_all_reduce_fusion_split_sizes(sizes) + self._context_handle.set_all_reduce_fusion_split_sizes(sizes) + if context.get_context("device_target") == "Ascend": + _set_fusion_strategy_by_size(sizes) def get_all_reduce_fusion_split_sizes(self): """Get allreduce fusion split sizes.""" diff --git a/mindspore/parallel/dp_allreduce_fusion.py b/mindspore/parallel/_dp_allreduce_fusion.py similarity index 94% rename from mindspore/parallel/dp_allreduce_fusion.py rename to mindspore/parallel/_dp_allreduce_fusion.py index 979823bd80..3c7039dbd6 100644 --- a/mindspore/parallel/dp_allreduce_fusion.py +++ b/mindspore/parallel/_dp_allreduce_fusion.py @@ -43,7 +43,7 @@ def _c_array(ctype, values): return (ctype * len(values))(*values) -def set_fusion_strategy_by_idx(idxList, group="hccl_world_group"): +def _set_fusion_strategy_by_idx(idxList, group="hccl_world_group"): """ A function set gradient segment strategy according to the index list. @@ -100,7 +100,7 @@ def set_fusion_strategy_by_idx(idxList, group="hccl_world_group"): raise RuntimeError('Allreduce split error') -def set_fusion_strategy_by_size(dataSizeList, group="hccl_world_group"): +def _set_fusion_strategy_by_size(dataSizeList, group="hccl_world_group"): """ A function set gradient segment strategy according to the data size percentage list. From dd112c98fc4efe734617dab0c7729c9e51390837 Mon Sep 17 00:00:00 2001 From: jojobugfree Date: Thu, 9 Apr 2020 14:47:05 +0800 Subject: [PATCH 52/58] change logging to mindspore.log --- mindspore/context.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mindspore/context.py b/mindspore/context.py index 89365f3d1c..2938b87119 100644 --- a/mindspore/context.py +++ b/mindspore/context.py @@ -17,16 +17,14 @@ The context of mindspore, used to configure the current execution environment, including execution mode, execution backend and other feature switchs. """ import threading -import logging from collections import namedtuple from types import FunctionType +from mindspore import log as logger from mindspore._c_expression import MSContext from mindspore._extends.pynative_helper import args_type_check from mindspore.parallel._auto_parallel_context import _set_auto_parallel_context, _get_auto_parallel_context, \ _reset_auto_parallel_context -logger = logging.getLogger('Context') - __all__ = ['GRAPH_MODE', 'PYNATIVE_MODE', 'set_context', 'get_context', 'set_auto_parallel_context', 'get_auto_parallel_context', 'reset_auto_parallel_context'] From a73347db6a70429d45dbcf6c2889760dc95f8033 Mon Sep 17 00:00:00 2001 From: Yanjun Peng Date: Thu, 9 Apr 2020 11:04:13 +0800 Subject: [PATCH 53/58] fix dataset para validator check --- mindspore/dataset/engine/samplers.py | 1 - mindspore/dataset/engine/validators.py | 5 +++++ mindspore/dataset/transforms/vision/validators.py | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/mindspore/dataset/engine/samplers.py b/mindspore/dataset/engine/samplers.py index ed36e72b65..62a3dbed18 100644 --- a/mindspore/dataset/engine/samplers.py +++ b/mindspore/dataset/engine/samplers.py @@ -127,7 +127,6 @@ class RandomSampler(): Raises: ValueError: If replacement is not boolean. - ValueError: If num_samples is not None and replacement is false. ValueError: If num_samples is not positive. """ diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py index 4c84cfe354..63d7c58270 100644 --- a/mindspore/dataset/engine/validators.py +++ b/mindspore/dataset/engine/validators.py @@ -556,6 +556,11 @@ def check_generatordataset(method): if column_names is None: raise ValueError("column_names is not provided.") + # check prefetch_size range + prefetch_size = param_dict.get('prefetch_size') + if prefetch_size is not None and (prefetch_size <= 0 or prefetch_size > 1024): + raise ValueError("prefetch_size exceeds the boundary.") + check_param_type(nreq_param_int, param_dict, int) check_param_type(nreq_param_list, param_dict, list) diff --git a/mindspore/dataset/transforms/vision/validators.py b/mindspore/dataset/transforms/vision/validators.py index caab120af4..ef4b879f8c 100644 --- a/mindspore/dataset/transforms/vision/validators.py +++ b/mindspore/dataset/transforms/vision/validators.py @@ -104,6 +104,10 @@ def check_padding(padding): raise ValueError("The size of the padding list or tuple should be 2 or 4.") else: raise TypeError("Padding can be any of: a number, a tuple or list of size 2 or 4.") + if not (isinstance(left, int) and isinstance(top, int) and isinstance(right, int) and isinstance(bottom, int)): + raise TypeError("Padding value should be integer.") + if left < 0 or top < 0 or right < 0 or bottom < 0: + raise ValueError("Padding value could not be negative.") return left, top, right, bottom From d87fc50e3542ba1ad039bc209ea0f760e68af37a Mon Sep 17 00:00:00 2001 From: Cathy Wong Date: Thu, 9 Apr 2020 15:22:33 -0400 Subject: [PATCH 54/58] Correct shuffle UT buffer_size > #dataset-row as valid --- .../data/dataset/golden/shuffle_05_result.npz | Bin 0 -> 1507 bytes tests/ut/python/dataset/test_shuffle.py | 39 +++++++++--------- 2 files changed, 20 insertions(+), 19 deletions(-) create mode 100644 tests/ut/data/dataset/golden/shuffle_05_result.npz diff --git a/tests/ut/data/dataset/golden/shuffle_05_result.npz b/tests/ut/data/dataset/golden/shuffle_05_result.npz new file mode 100644 index 0000000000000000000000000000000000000000..27eb0a470d370fab9d4938e5fb9e3eb50d603bd1 GIT binary patch literal 1507 zcmbW1OH&hB6vw+00!c(c4LUmWtAJtD)+OVZ!lkD_Ooy{uH(5UT@6!HZoH}d``mL*e)pVH>6?PTC89M~ul9TG z?dO$%rm1g8i)dNL$qdDd<(jT_vQs6hpjvN}i*u=EU3;$m94*+Dyc3-;qA%v7F(bNK za;i?Ym?=30Te)Ym8x@=Um9=cyro9-C8N(w3G2<8GyH~jG&lk5g%e8pEZTT|wXz+S zZA(6CbAC()p+SEwLklu2;9t_o`$FzYCXf0b zj85?;sAfj14xJo(1@>V-8T$kd;2;_M1@@qWjEF!B0%UXvv|~3J2Lyr$k#SHUf-W)+ z3ACb(jKcz9>>{IEz(6nhaDp~R7~!TI{mSkI>AIpj#sBTIS#Pf0IggLD*Yq+`gFj^hdG1Z>hvct&~|1=1^6CB15y z{nj4_{h??xX-$6dHm3F0=SJi2X5^Y>M*scfZmwI_c4W#j`#${nxS2L>nE?+lW0^hw zyfm7TS(izJLF*8M$GZ9bVc^%k1_d2?VJqKqtMG(CUo_8FL7cCg&Qz(qkTN zq$&7FZ=;oT0WG9=5GK6~Kj|U@r1zM0pIJ)@I=N2HWoA84Z=Ry9Fzq4J(oB2Av<%Z8 zGcC)s9MkelD{Q}##IPB;`X;DeXyDERm#Z%CI5&yUDWg3FJ>gp=mzNJ$Lv?_;ApHX_g H`G6I{m literal 0 HcmV?d00001 diff --git a/tests/ut/python/dataset/test_shuffle.py b/tests/ut/python/dataset/test_shuffle.py index 2b7a251d2c..4a823c5fb7 100644 --- a/tests/ut/python/dataset/test_shuffle.py +++ b/tests/ut/python/dataset/test_shuffle.py @@ -98,6 +98,25 @@ def test_shuffle_04(): save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN) +def test_shuffle_05(): + """ + Test shuffle: buffer_size > number-of-rows-in-dataset + """ + logger.info("test_shuffle_05") + # define parameters + buffer_size = 13 + seed = 1 + parameters = {"params": {'buffer_size': buffer_size, "seed": seed}} + + # apply dataset operations + data1 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES) + ds.config.set_seed(seed) + data1 = data1.shuffle(buffer_size=buffer_size) + + filename = "shuffle_05_result.npz" + save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN) + + def test_shuffle_exception_01(): """ Test shuffle exception: buffer_size<0 @@ -152,24 +171,6 @@ def test_shuffle_exception_03(): assert "buffer_size" in str(e) -def test_shuffle_exception_04(): - """ - Test shuffle exception: buffer_size > number-of-rows-in-dataset - """ - logger.info("test_shuffle_exception_04") - - # apply dataset operations - data1 = ds.TFRecordDataset(DATA_DIR) - ds.config.set_seed(1) - try: - data1 = data1.shuffle(buffer_size=13) - sum([1 for _ in data1]) - - except BaseException as e: - logger.info("Got an exception in DE: {}".format(str(e))) - assert "buffer_size" in str(e) - - def test_shuffle_exception_05(): """ Test shuffle exception: Missing mandatory buffer_size input parameter @@ -229,10 +230,10 @@ if __name__ == '__main__': test_shuffle_02() test_shuffle_03() test_shuffle_04() + test_shuffle_05() test_shuffle_exception_01() test_shuffle_exception_02() test_shuffle_exception_03() - test_shuffle_exception_04() test_shuffle_exception_05() test_shuffle_exception_06() test_shuffle_exception_07() From cc1416bfc2e5d1a06e66da466c7bee14aa0d1e42 Mon Sep 17 00:00:00 2001 From: biffex Date: Thu, 9 Apr 2020 15:04:24 +0800 Subject: [PATCH 55/58] constant duplicate mul for momentum --- mindspore/ccsrc/optimizer/irpass.cc | 6 +-- .../optimizer/irpass/arithmetic_simplify.h | 54 ++++++++++++++++++- mindspore/ccsrc/utils/graph_utils.cc | 2 + mindspore/ops/operations/math_ops.py | 8 +++ tests/ut/cpp/optimizer/lib_test.cc | 13 +++++ .../gtest_input/optimizer/opt_test.py | 33 ++++++++++++ 6 files changed, 112 insertions(+), 4 deletions(-) diff --git a/mindspore/ccsrc/optimizer/irpass.cc b/mindspore/ccsrc/optimizer/irpass.cc index cdc960792f..0991c31b00 100644 --- a/mindspore/ccsrc/optimizer/irpass.cc +++ b/mindspore/ccsrc/optimizer/irpass.cc @@ -45,9 +45,9 @@ namespace mindspore { namespace opt { namespace irpass { OptimizeIRPassLib::OptimizeIRPassLib() { - arithmetic_simplify_ = MakeSubstitution( - ArithmeticSimplify(), "arithmetic_simplify", - {prim::kPrimScalarAdd, prim::kPrimScalarMul, prim::kPrimTensorAdd, prim::kPrimIdentity, prim::kPrimMomentum}); + arithmetic_simplify_ = MakeSubstitution(ArithmeticSimplify(), "arithmetic_simplify", + {prim::kPrimScalarAdd, prim::kPrimScalarMul, prim::kPrimTensorAdd, + prim::kPrimIdentity, prim::kPrimMomentum, prim::kPrimMul}); special_op_eliminate_ = MakeSubstitution(SpecialOpEliminater(), "special_op_eliminate", {prim::kPrimInsertGradientOf, prim::kPrimPrintShapeType, prim::kPrimGetRefKey, prim::kPrimMirror, prim::kPrimVirtualDiv}); diff --git a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h b/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h index 8c5610ed1b..ab191aab20 100644 --- a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h +++ b/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h @@ -179,6 +179,55 @@ class OptUpdateZeroTensor : public AnfVisitor { } }; +// {prim::kPrimMul, Tensor1, {orim::kPrimMul, Tensor2, {...}}} -> +// {prim::kPrimMul, {...}, {prim::kPrimMul, Tensor1, Tensor2}} +class ConstantDuplicateMul : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + Reset(); + // {prim::kPrimMul, Tensor1, {...}} + AnfVisitor::Match(prim::kPrimMul, {IsNode, IsNode})(node); + if (vnode_ == nullptr || cnode_ == nullptr) { + return nullptr; + } + auto tensor1 = vnode_; + auto mul = cnode_; + + Reset(); + // {prim::kPrimMul, Tensor2, {...}} + AnfVisitor::Match(prim::kPrimMul, {IsNode, IsNode})(mul); + if (vnode_ == nullptr || cnode_ == nullptr) { + return nullptr; + } + auto tensor2 = vnode_; + auto cnode = cnode_; + + auto PrimMul = GetValueNode(mul->input(0)); + auto fg = node->func_graph(); + auto ttmul = NewCNode({NewValueNode(PrimMul), tensor1, tensor2}, fg); + return NewCNode({NewValueNode(PrimMul), cnode, ttmul}, fg); + } + + void Visit(const AnfNodePtr &node) override { + if (IsValueNode(node)) { + vnode_ = node; + } + + if (IsCNode(node)) { + cnode_ = node->cast(); + } + } + + void Reset() { + vnode_ = nullptr; + cnode_ = nullptr; + } + + private: + AnfNodePtr vnode_; + CNodePtr cnode_; +}; + class ArithmeticSimplify { public: ArithmeticSimplify() @@ -186,12 +235,14 @@ class ArithmeticSimplify { add_by_zero_(), tensor_add_by_zero_(), identity_(prim::kPrimIdentity), - opt_update_zero_tensor_() { + opt_update_zero_tensor_(), + constant_duplicate_mul_() { eliminaters_.emplace_back(multiply_by_zero_or_one_); eliminaters_.emplace_back(add_by_zero_); eliminaters_.emplace_back(tensor_add_by_zero_); eliminaters_.emplace_back(identity_); eliminaters_.emplace_back(opt_update_zero_tensor_); + eliminaters_.emplace_back(constant_duplicate_mul_); } ~ArithmeticSimplify() = default; @@ -212,6 +263,7 @@ class ArithmeticSimplify { TensorAddByZero tensor_add_by_zero_; PrimEliminater identity_; OptUpdateZeroTensor opt_update_zero_tensor_; + ConstantDuplicateMul constant_duplicate_mul_; std::vector eliminaters_{}; }; } // namespace irpass diff --git a/mindspore/ccsrc/utils/graph_utils.cc b/mindspore/ccsrc/utils/graph_utils.cc index 938df2c291..55ef8dc3d5 100644 --- a/mindspore/ccsrc/utils/graph_utils.cc +++ b/mindspore/ccsrc/utils/graph_utils.cc @@ -400,6 +400,8 @@ static bool SameNodeShallow(const AnfNodePtr& node1, const AnfNodePtr& node2, Fu auto a2 = GetValueNode(node2); if (a1->isa() && a2->isa()) { return a1->cast()->name() == a2->cast()->name(); + } else if (a1->isa() && a2->isa()) { + return a1->cast()->ValueEqual(*(a2->cast())); } else { return *a1 == *a2; } diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 1294a65d02..106886c45c 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -774,6 +774,14 @@ class Mul(_MathBinaryOp): >>> mul(input_x, input_y) [4, 10, 18] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = x * y + out = np.array(out, x.dtype) + return Tensor(out) + return None class Square(PrimitiveWithInfer): diff --git a/tests/ut/cpp/optimizer/lib_test.cc b/tests/ut/cpp/optimizer/lib_test.cc index ff3c00d37a..2d4cf0e78e 100644 --- a/tests/ut/cpp/optimizer/lib_test.cc +++ b/tests/ut/cpp/optimizer/lib_test.cc @@ -543,5 +543,18 @@ TEST_F(TestOptLib, test_print_tuple_wrapper) { ASSERT_TRUE(CheckOpt(before2, after2, patterns)); ASSERT_TRUE(CheckOpt(before3, before3, patterns)); } + +TEST_F(TestOptLib, test_constant_duplicate_mul) { + FuncGraphPtr beforell = getPyFun.CallAndParseRet("test_constant_duplicate_mul", "beforell"); + FuncGraphPtr beforelr = getPyFun.CallAndParseRet("test_constant_duplicate_mul", "beforelr"); + FuncGraphPtr beforerl = getPyFun.CallAndParseRet("test_constant_duplicate_mul", "beforerl"); + FuncGraphPtr beforerr = getPyFun.CallAndParseRet("test_constant_duplicate_mul", "beforerr"); + FuncGraphPtr after = getPyFun.CallAndParseRet("test_constant_duplicate_mul", "after"); + auto patterns = std::vector({irpass.arithmetic_simplify_}); + ASSERT_TRUE(CheckOpt(beforell, after, patterns)); + ASSERT_TRUE(CheckOpt(beforelr, after, patterns)); + ASSERT_TRUE(CheckOpt(beforerl, after, patterns)); + ASSERT_TRUE(CheckOpt(beforerr, after, patterns)); +} } // namespace opt } // namespace mindspore diff --git a/tests/ut/cpp/python_input/gtest_input/optimizer/opt_test.py b/tests/ut/cpp/python_input/gtest_input/optimizer/opt_test.py index 53eb2130f0..d494ad27d3 100644 --- a/tests/ut/cpp/python_input/gtest_input/optimizer/opt_test.py +++ b/tests/ut/cpp/python_input/gtest_input/optimizer/opt_test.py @@ -16,6 +16,8 @@ from mindspore.ops import Primitive, PrimitiveWithInfer from mindspore.ops import operations as P from mindspore.ops.operations import _grad_ops as G +from mindspore import Tensor +import numpy as np # pylint: disable=unused-variable @@ -903,3 +905,34 @@ def test_print_tuple_wrapper(tag): return print_(make_tuple(x, y, z)) return fns[tag] + +def test_constant_duplicate_mul(tag): + fns = FnDict() + Mul = Primitive('Mul'); + Sqrt = Primitive('Sqrt'); + + x = Tensor(np.array([[2, 2], [2, 3]]).astype('float32')) + tensor1 = Tensor(np.array([[1.2, 2.1], [2.2, 3.2]]).astype('float32')) + tensor2 = Tensor(np.array([[2.2, 3.1], [3.2, 4.2]]).astype('float32')) + + @fns + def beforell(): + return Mul(tensor1, Mul(tensor2, Sqrt(x))) + + @fns + def beforelr(): + return Mul(tensor1, Mul(Sqrt(x), tensor2)) + + @fns + def beforerl(): + return Mul(Mul(Sqrt(x), tensor2), tensor1) + + @fns + def beforerr(): + return Mul(Mul(Sqrt(x), tensor2), tensor1) + + @fns + def after(): + return Mul(Sqrt(x), Mul(tensor1, tensor2)) + + return fns[tag] From 1c7d0c0b39c34d851d93132facddaa34e4b37fab Mon Sep 17 00:00:00 2001 From: leonwanghui Date: Thu, 9 Apr 2020 15:58:36 +0800 Subject: [PATCH 56/58] Update setuptool info Signed-off-by: leonwanghui --- CONTRIBUTING.md | 4 +- README.md | 2 +- RELEASE.md | 2 +- package.sh | 2 +- setup_package.py => setup.py | 129 +++++++++++++++++++++++------------ 5 files changed, 90 insertions(+), 49 deletions(-) rename setup_package.py => setup.py (53%) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 85fee704c2..105c620942 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -105,11 +105,11 @@ When reporting issues, refer to this format: * If it is a new feature that needs lots of design details, a design proposal should also be submitted. * After reaching consensus in the issue discussions and design proposal reviews, complete the development on the forked repo and submit a PR. * None of PRs is not permitted until it receives **2+ LGTM** from approvers. Please NOTICE that approver is NOT allowed to add *LGTM* on his own PR. -* After PR is sufficiently discussed, it will get merged, abondoned or rejected depending on the outcome of the discussion. +* After PR is sufficiently discussed, it will get merged, abandoned or rejected depending on the outcome of the discussion. **PRs advisory:** - Any irrelevant changes should be avoided. - Make sure your commit history being ordered. - Always keep your branch up with the master branch. -- For bug-fix PRs, make sure all related issues being linked. +- For bug-fix PRs, make sure all related issues being linked. diff --git a/README.md b/README.md index 925c22591d..be8ca5189a 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,7 @@ Check out how MindSpore Open Governance [works](https://gitee.com/mindspore/comm - [MindSpore Slack](https://join.slack.com/t/mindspore/shared_invite/enQtOTcwMTIxMDI3NjM0LTNkMWM2MzI5NjIyZWU5ZWQ5M2EwMTQ5MWNiYzMxOGM4OWFhZjI4M2E5OGI2YTg3ODU1ODE2Njg1MThiNWI3YmQ) - Communication platform for developers. - IRC channel at `#mindspore` (only for meeting minutes logging purpose) - Video Conferencing: meet.jit.si -- Mailing-list: https://mailweb.mindspore.cn/postorius/lists +- Mailing-list: https://mailweb.mindspore.cn/postorius/lists ## Contributing diff --git a/RELEASE.md b/RELEASE.md index 8920095bb5..ce9064e4b1 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -70,4 +70,4 @@ * [MindSpore Official Website] (https://www.mindspore.cn/) * [MindInsight Visualization Debugging and Optimization] (https://gitee.com/mindspore/mindinsight) * [MindArmour Model Security Hardening Package] (https://gitee.com/mindspore/mindarmour) -* [GraphEngine Computational Graph Engine] (https://gitee.com/mindspore/graphengine) \ No newline at end of file +* [GraphEngine Computational Graph Engine] (https://gitee.com/mindspore/graphengine) diff --git a/package.sh b/package.sh index 67f4761f37..0c75a1bbfd 100755 --- a/package.sh +++ b/package.sh @@ -110,7 +110,7 @@ else export MS_PACKAGE_NAME="mindspore" fi -${PYTHON} "${BASEPATH}/setup_package.py" bdist_wheel +${PYTHON} "${BASEPATH}/setup.py" bdist_wheel chmod -R 700 ${PACKAGE_PATH}/mindspore/ chmod -R 700 ${PACKAGE_PATH}/${MS_PACKAGE_NAME//-/_}.egg-info/ diff --git a/setup_package.py b/setup.py similarity index 53% rename from setup_package.py rename to setup.py index 87b5718de2..e009a9b312 100644 --- a/setup_package.py +++ b/setup.py @@ -14,17 +14,15 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -"""setup_package.""" +"""setup package.""" import os import stat + from setuptools import setup, find_packages from setuptools.command.egg_info import egg_info from setuptools.command.build_py import build_py version = '0.1.0' -author = 'The MindSpore Authors' -author_email = 'contact@mindspore.cn' -home_page = 'https://www.mindspore.cn' backend_policy = os.getenv('BACKEND_POLICY') commit_id = os.getenv('COMMIT_ID').replace("\n", "") @@ -33,56 +31,70 @@ package_name = os.getenv('MS_PACKAGE_NAME').replace("\n", "") pwd = os.path.dirname(os.path.realpath(__file__)) pkg_dir = os.path.join(pwd, 'build/package') -def write_version(file): + +def _read_file(filename): + with open(os.path.join(pwd, filename)) as f: + return f.read() + + +readme = _read_file('README.md') +release = _read_file('RELEASE.md') + + +def _write_version(file): file.write("__version__ = '{}'\n".format(version)) -def write_config(file): + +def _write_config(file): file.write("__backend__ = '{}'\n".format(backend_policy)) -def write_commit_file(file): + +def _write_commit_file(file): file.write("__commit_id__ = '{}'\n".format(commit_id)) -def build_depends(): + +def build_dependencies(): """generate python file""" - version_file = os.path.join(pwd, 'build/package/mindspore', 'version.py') + version_file = os.path.join(pkg_dir, 'mindspore', 'version.py') with open(version_file, 'w') as f: - write_version(f) + _write_version(f) - version_file = os.path.join(pwd, 'mindspore/', 'version.py') + version_file = os.path.join(pwd, 'mindspore', 'version.py') with open(version_file, 'w') as f: - write_version(f) + _write_version(f) - config_file = os.path.join(pwd, 'build/package/mindspore', 'default_config.py') + config_file = os.path.join(pkg_dir, 'mindspore', 'default_config.py') with open(config_file, 'w') as f: - write_config(f) + _write_config(f) - config_file = os.path.join(pwd, 'mindspore/', 'default_config.py') + config_file = os.path.join(pwd, 'mindspore', 'default_config.py') with open(config_file, 'w') as f: - write_config(f) + _write_config(f) - commit_file = os.path.join(pwd, 'build/package/mindspore', '.commit_id') + commit_file = os.path.join(pkg_dir, 'mindspore', '.commit_id') with open(commit_file, 'w') as f: - write_commit_file(f) + _write_commit_file(f) - commit_file = os.path.join(pwd, 'mindspore/', '.commit_id') + commit_file = os.path.join(pwd, 'mindspore', '.commit_id') with open(commit_file, 'w') as f: - write_commit_file(f) - -descriptions = 'An AI computing framework that supports development for AI applications in all scenarios.' - -requires = [ - 'numpy >= 1.17.0', - 'protobuf >= 3.8.0', - 'asttokens >= 1.1.13', - 'pillow >= 6.2.0', - 'scipy == 1.3.3', - 'easydict >= 1.9', - 'sympy >= 1.4', - 'cffi >= 1.13.2', - 'decorator >= 4.4.0' - ], + _write_commit_file(f) + -package_datas = { +build_dependencies() + +required_package = [ + 'numpy >= 1.17.0', + 'protobuf >= 3.8.0', + 'asttokens >= 1.1.13', + 'pillow >= 6.2.0', + 'scipy == 1.3.3', + 'easydict >= 1.9', + 'sympy >= 1.4', + 'cffi >= 1.13.2', + 'decorator >= 4.4.0' +] + +package_data = { '': [ '*.so*', 'lib/*.so*', @@ -91,7 +103,6 @@ package_datas = { ] } -build_depends() def update_permissions(path): """ @@ -103,20 +114,25 @@ def update_permissions(path): for dirpath, dirnames, filenames in os.walk(path): for dirname in dirnames: dir_fullpath = os.path.join(dirpath, dirname) - os.chmod(dir_fullpath, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC | stat.S_IRGRP | stat.S_IXGRP) + os.chmod(dir_fullpath, stat.S_IREAD | stat.S_IWRITE | + stat.S_IEXEC | stat.S_IRGRP | stat.S_IXGRP) for filename in filenames: file_fullpath = os.path.join(dirpath, filename) os.chmod(file_fullpath, stat.S_IREAD) + class EggInfo(egg_info): """Egg info.""" + def run(self): super().run() egg_info_dir = os.path.join(pkg_dir, 'mindspore.egg-info') update_permissions(egg_info_dir) + class BuildPy(build_py): """BuildPy.""" + def run(self): super().run() mindspore_dir = os.path.join(pkg_dir, 'build', 'lib', 'mindspore') @@ -124,21 +140,46 @@ class BuildPy(build_py): mindspore_dir = os.path.join(pkg_dir, 'build', 'lib', 'akg') update_permissions(mindspore_dir) + setup( - python_requires='>=3.7', name=package_name, version=version, - author=author, - author_email=author_email, - url=home_page, + author='The MindSpore Authors', + author_email='contact@mindspore.cn', + url='https://www.mindspore.cn', + download_url='https://gitee.com/mindspore/mindspore/tags', + project_urls={ + 'Sources': 'https://gitee.com/mindspore/mindspore', + 'Issue Tracker': 'https://gitee.com/mindspore/mindspore/issues', + }, + description='MindSpore is a new open source deep learning training/inference ' + 'framework that could be used for mobile, edge and cloud scenarios.', + long_description="\n\n".join([readme, release]), packages=find_packages(), - package_data=package_datas, + package_data=package_data, include_package_data=True, cmdclass={ 'egg_info': EggInfo, 'build_py': BuildPy, }, - install_requires=requires, - description=descriptions, + python_requires='>=3.7', + install_requires=required_package, + classifiers=[ + 'Development Status :: 4 - Beta', + 'Environment :: Console', + 'Intended Audience :: Science/Research', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3 :: Only', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: C++', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Software Development', + 'Topic :: Software Development :: Libraries', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], license='Apache 2.0', + keywords='mindspore machine learning', ) From 406475160f0d604e1021e42da2c22a5330128d5b Mon Sep 17 00:00:00 2001 From: c00425699 Date: Thu, 9 Apr 2020 14:40:43 +0800 Subject: [PATCH 57/58] refactor OperatorCostPtr in OperatorInfo --- .../auto_parallel/operator_costmodel.cc | 54 ------------------- .../auto_parallel/operator_costmodel.h | 30 ++--------- .../ccsrc/parallel/ops_info/activation_info.h | 18 ++----- .../ccsrc/parallel/ops_info/arithmetic_info.h | 6 +-- .../parallel/ops_info/batch_parallel_info.h | 6 +-- .../ccsrc/parallel/ops_info/bias_add_info.h | 6 +-- .../parallel/ops_info/dropout_do_mask_info.h | 8 +-- .../ccsrc/parallel/ops_info/generator_info.h | 6 +-- .../ccsrc/parallel/ops_info/get_next_info.h | 6 +-- .../parallel/ops_info/l2_normalize_info.h | 6 +-- mindspore/ccsrc/parallel/ops_info/loss_info.h | 6 +-- .../ccsrc/parallel/ops_info/matmul_info.cc | 6 +-- .../ccsrc/parallel/ops_info/matmul_info.h | 7 +-- .../ccsrc/parallel/ops_info/onehot_info.h | 6 +-- .../ccsrc/parallel/ops_info/operator_info.cc | 13 +++-- .../ccsrc/parallel/ops_info/operator_info.h | 13 +++-- .../ccsrc/parallel/ops_info/prelu_info.h | 6 +-- .../parallel/ops_info/reduce_method_info.cc | 8 ++- .../parallel/ops_info/reduce_method_info.h | 8 +-- .../ccsrc/parallel/ops_info/reshape_info.h | 8 +-- .../parallel/ops_info/tmp_identity_info.h | 8 +-- .../ccsrc/parallel/ops_info/transpose_info.h | 6 +-- .../parallel/ops_info/virtual_dataset_info.h | 8 +-- .../cpp/parallel/ops_info/activation_test.cc | 8 +-- .../cpp/parallel/ops_info/matmul_info_test.cc | 4 +- .../parallel/ops_info/tensor_add_info_test.cc | 8 +-- .../cpp/parallel/ops_info/tmpidentity_test.cc | 4 +- 27 files changed, 62 insertions(+), 211 deletions(-) diff --git a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc index 7c17b499b1..93d7dc56c5 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc @@ -514,60 +514,6 @@ double ArithmeticCost::GetBackwardCommCost(const std::vector& inputs return result; } -double L2NormalizeCost::GetBackwardCommCost(const std::vector& inputs, const std::vector&, - const int32_t& stage_id) const { - double result = 0.0; - if (is_parameter_[0]) { - TensorInfo input_tensor_info = inputs[0]; - CheckGlobalDeviceManager(); - MS_EXCEPTION_IF_NULL(g_device_manager); - auto total_device_num = g_device_manager->GetDeviceListByStageId(stage_id).size(); - - Shape input_shape = input_tensor_info.shape(); - Shape input_slice_shape = input_tensor_info.slice_shape(); - int32_t used_device_num = 1; - for (size_t i = 0; i < input_shape.size(); ++i) { - used_device_num *= input_shape[i] / input_slice_shape[i]; - } - - if (total_device_num != IntToSize(used_device_num)) - result += ListProduct(input_slice_shape) * static_cast(inputs_type_lengths_[0]); - } - - return result; -} - -double L2NormalizeCost::GetForwardComputationCost(const std::vector& inputs, const std::vector&, - const int32_t&) const { - TensorInfo input0_info = inputs[0]; - Shape input0_slice_shape = input0_info.slice_shape(); - return ListProduct(input0_slice_shape) * static_cast(inputs_type_lengths_[0]); -} - -double L2NormalizeCost::GetBackwardComputationCost(const std::vector& inputs, - const std::vector&, const int32_t& stage_id) const { - double result = 0.0; - - if (is_parameter_[0]) { - TensorInfo input_tensor_info = inputs[0]; - CheckGlobalDeviceManager(); - MS_EXCEPTION_IF_NULL(g_device_manager); - auto total_device_num = g_device_manager->GetDeviceListByStageId(stage_id).size(); - - Shape input_shape = input_tensor_info.shape(); - Shape input_slice_shape = input_tensor_info.slice_shape(); - int32_t used_device_num = 1; - for (size_t i = 0; i < input_shape.size(); ++i) { - used_device_num *= input_shape[i] / input_slice_shape[i]; - } - - if (total_device_num != IntToSize(used_device_num)) - result += ListProduct(input_slice_shape) * static_cast(inputs_type_lengths_[0]); - } - - return result; -} - bool IsDataParallel(const Shape& shape, const Shape& slice_shape, const int32_t& stage_id) { CheckGlobalDeviceManager(); MS_EXCEPTION_IF_NULL(g_device_manager); diff --git a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h index 8f0099bba3..73f3ff139f 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h +++ b/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h @@ -132,6 +132,8 @@ class ActivationCost : public OperatorCost { }; using ActivationCostPtr = std::shared_ptr; +using TransposeCost = ActivationCost; +using TransposeCostPtr = std::shared_ptr; class SoftmaxCost : public OperatorCost { public: @@ -415,32 +417,8 @@ class ArithmeticCost : public OperatorCost { const int32_t& stage_id) const override; }; using ArithmeticCostPtr = std::shared_ptr; - -class L2NormalizeCost : public OperatorCost { - public: - L2NormalizeCost() = default; - ~L2NormalizeCost() override = default; - - double GetCommCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardCommCost(inputs, outputs, stage_id) + GetBackwardCommCost(inputs, outputs, stage_id); - } - double GetForwardCommCost(const std::vector&, const std::vector&, - const int32_t&) const override { - return 0.0; - } - double GetBackwardCommCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; - double GetComputationCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override { - return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); - } - double GetForwardComputationCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; - double GetBackwardComputationCost(const std::vector& inputs, const std::vector& outputs, - const int32_t& stage_id) const override; -}; -using L2NormalizeCostPtr = std::shared_ptr; +using BiasAddCost = ArithmeticCost; +using BiasAddCostPtr = std::shared_ptr; class ReduceMethodCost : public OperatorCost { public: diff --git a/mindspore/ccsrc/parallel/ops_info/activation_info.h b/mindspore/ccsrc/parallel/ops_info/activation_info.h index 183b593e23..21774c43ee 100644 --- a/mindspore/ccsrc/parallel/ops_info/activation_info.h +++ b/mindspore/ccsrc/parallel/ops_info/activation_info.h @@ -32,8 +32,8 @@ namespace parallel { class ActivationBase : public OperatorInfo { public: ActivationBase(const std::string& operator_name, const Shapes& inputs_shape, const Shapes& outputs_shape, - const PrimitiveAttrs& attrs) - : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs) {} + const PrimitiveAttrs& attrs, OperatorCostPtr cost) + : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs, cost) {} ~ActivationBase() override = default; Status Init(const StrategyPtr& strategy) override; @@ -51,19 +51,13 @@ class Activation : public ActivationBase { public: Activation(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : ActivationBase(name, inputs_shape, outputs_shape, attrs) { - ac_cost_ptr_ = std::make_shared(); - } + : ActivationBase(name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~Activation() override = default; Status GenerateStrategies(int32_t stage_id) override; Status SetCostUnderStrategy(const StrategyPtr& strategy) override; - OperatorCostPtr GetOperatorCost() const override { return ac_cost_ptr_; } protected: Status CheckStrategy(const StrategyPtr& strategy) override; - - private: - ActivationCostPtr ac_cost_ptr_; }; class ActivationInfo : public Activation { @@ -108,13 +102,10 @@ class Softmax : public ActivationBase { public: explicit Softmax(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : ActivationBase(name, inputs_shape, outputs_shape, attrs) { - sm_cost_ptr_ = std::make_shared(); - } + : ActivationBase(name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~Softmax() override = default; Status GenerateStrategies(int32_t stage_id) override; Status SetCostUnderStrategy(const StrategyPtr& strategy) override; - OperatorCostPtr GetOperatorCost() const override { return sm_cost_ptr_; } protected: Status CheckStrategy(const StrategyPtr& strategy) override; @@ -122,7 +113,6 @@ class Softmax : public ActivationBase { private: std::vector axis_; - SoftmaxCostPtr sm_cost_ptr_; }; class SoftmaxInfo : public Softmax { diff --git a/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h b/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h index 7cd0d66b1b..daa2ad595c 100644 --- a/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h +++ b/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h @@ -33,15 +33,12 @@ class ArithmeticBase : public OperatorInfo { public: ArithmeticBase(const std::string& operator_name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs) { - arithmeticcost_ptr_ = std::make_shared(); - } + : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~ArithmeticBase() override = default; Status Init(const StrategyPtr& strategy) override; Status InitForCostModel(const StrategyPtr& strategy) override; Status GenerateStrategies(int32_t) override; Status SetCostUnderStrategy(const StrategyPtr&) override; - OperatorCostPtr GetOperatorCost() const override { return arithmeticcost_ptr_; } void ReComputeBatchSplitFlagList() override; protected: @@ -54,7 +51,6 @@ class ArithmeticBase : public OperatorInfo { Status InferTensorMap() override; Status InferTensorLayout(TensorLayouts* inputs_layout, TensorLayouts* outputs_layout, const Shape& dev_matrix_array); Shapes InferExpendShape(); - ArithmeticCostPtr arithmeticcost_ptr_; }; class SubInfo : public ArithmeticBase { diff --git a/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h b/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h index 57711b5298..fae96dcab5 100644 --- a/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h +++ b/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h @@ -31,16 +31,13 @@ class BatchParallelInfo : public OperatorInfo { public: BatchParallelInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : OperatorInfo(name, inputs_shape, outputs_shape, attrs), dev_num_(1) { - bp_cost_ptr_ = std::make_shared(); - } + : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared()), dev_num_(1) {} ~BatchParallelInfo() override = default; Status Init(const StrategyPtr& strategy) override; Status InitForCostModel(const StrategyPtr& strategy) override; Status GenerateStrategies(int32_t stage_id) override; Status SetCostUnderStrategy(const StrategyPtr& strategy) override; - OperatorCostPtr GetOperatorCost() const override { return bp_cost_ptr_; } protected: Status CheckStrategy(const StrategyPtr& strategy) override; @@ -55,7 +52,6 @@ class BatchParallelInfo : public OperatorInfo { private: int32_t dev_num_; - BatchParallelCostPtr bp_cost_ptr_; }; class SparseSoftmaxCrossEntropyWithLogitsInfo : public BatchParallelInfo { diff --git a/mindspore/ccsrc/parallel/ops_info/bias_add_info.h b/mindspore/ccsrc/parallel/ops_info/bias_add_info.h index 07f0bc00ff..dea5c90c88 100644 --- a/mindspore/ccsrc/parallel/ops_info/bias_add_info.h +++ b/mindspore/ccsrc/parallel/ops_info/bias_add_info.h @@ -34,16 +34,13 @@ class BiasAddInfo : public OperatorInfo { public: BiasAddInfo(const std::string& operator_name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs) { - biasaddcost_ptr_ = std::make_shared(); - } + : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~BiasAddInfo() override = default; Status Init(const StrategyPtr& strategy) override; Status InitForCostModel(const StrategyPtr& strategy) override; Status GenerateStrategies(int32_t) override; Status SetCostUnderStrategy(const StrategyPtr&) override; - OperatorCostPtr GetOperatorCost() const override { return biasaddcost_ptr_; } void ReComputeBatchSplitFlagList() override; protected: @@ -55,7 +52,6 @@ class BiasAddInfo : public OperatorInfo { Status InferDevMatrixShape() override; Status InferTensorMap() override; Status InferTensorLayout(TensorLayouts* inputs_layout, TensorLayouts* outputs_layout, const Shape& dev_matrix_array); - ArithmeticCostPtr biasaddcost_ptr_; }; } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h b/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h index e43601355a..859b3e06a4 100644 --- a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h +++ b/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h @@ -33,15 +33,12 @@ class DropoutDoMaskInfo : public OperatorInfo { public: DropoutDoMaskInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : OperatorInfo(name, inputs_shape, outputs_shape, attrs) { - bpcost_ptr_ = std::make_shared(); - } + : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~DropoutDoMaskInfo() override = default; Status Init(const StrategyPtr& strategy) override; Status GenerateStrategies(int32_t stage_id) override; Status SetCostUnderStrategy(const StrategyPtr& strategy) override; - OperatorCostPtr GetOperatorCost() const override { return bpcost_ptr_; } Status InitForCostModel(const StrategyPtr& strategy) override; std::shared_ptr>> GenerateBatchStrategies() override; @@ -53,9 +50,6 @@ class DropoutDoMaskInfo : public OperatorInfo { Status GetAttrs() override { return SUCCESS; } Status InferTensorInfo() override; Status InferDevMatrixShape() override; - - private: - BatchParallelCostPtr bpcost_ptr_; }; } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/parallel/ops_info/generator_info.h b/mindspore/ccsrc/parallel/ops_info/generator_info.h index a280fac28e..68024593f3 100644 --- a/mindspore/ccsrc/parallel/ops_info/generator_info.h +++ b/mindspore/ccsrc/parallel/ops_info/generator_info.h @@ -32,15 +32,12 @@ class GeneratorBase : public OperatorInfo { public: GeneratorBase(const std::string &operator_name, const Shapes &inputs_shape, const Shapes &outputs_shape, const PrimitiveAttrs &attrs) - : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs) { - generatorbasecost_ptr_ = std::make_shared(); - } + : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~GeneratorBase() override = default; Status Init(const StrategyPtr &strategy) override; Status SetCostUnderStrategy(const StrategyPtr &strategy) override; - OperatorCostPtr GetOperatorCost() const override { return generatorbasecost_ptr_; } Status InitForCostModel(const StrategyPtr &strategy) override; protected: @@ -52,7 +49,6 @@ class GeneratorBase : public OperatorInfo { Status InferMirrorOps() override { return SUCCESS; } Status InferForwardCommunication() override { return SUCCESS; } virtual Status InferReplaceOps(const StrategyPtr &strategy) = 0; - GeneratorBaseCostPtr generatorbasecost_ptr_; }; class DropoutGenMaskInfo : public GeneratorBase { diff --git a/mindspore/ccsrc/parallel/ops_info/get_next_info.h b/mindspore/ccsrc/parallel/ops_info/get_next_info.h index 32adce1165..9a65eff035 100644 --- a/mindspore/ccsrc/parallel/ops_info/get_next_info.h +++ b/mindspore/ccsrc/parallel/ops_info/get_next_info.h @@ -32,14 +32,11 @@ class GetNextInfo : public OperatorInfo { public: GetNextInfo(const std::string &operator_name, const Shapes &inputs_shape, const Shapes &outputs_shape, const PrimitiveAttrs &attrs) - : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs) { - getnextcost_ptr_ = std::make_shared(); - } + : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~GetNextInfo() override = default; Status Init(const StrategyPtr &strategy) override; Status SetCostUnderStrategy(const StrategyPtr &strategy) override; - OperatorCostPtr GetOperatorCost() const override { return getnextcost_ptr_; } Status InitForCostModel(const StrategyPtr &strategy) override; Status GenerateStrategies(int32_t stage_id) override; @@ -65,7 +62,6 @@ class GetNextInfo : public OperatorInfo { Shapes shapes_; int32_t output_num_ = 0; std::string shared_name_; - GetNextCostPtr getnextcost_ptr_; }; } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h b/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h index c0af9dbcb9..22ed5a965b 100644 --- a/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h +++ b/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h @@ -33,12 +33,9 @@ class L2NormalizeInfo : public Activation { public: L2NormalizeInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : Activation(name, inputs_shape, outputs_shape, attrs) { - l2normalizecost_ptr_ = std::make_shared(); - } + : Activation(name, inputs_shape, outputs_shape, attrs) {} ~L2NormalizeInfo() override = default; Status GenerateStrategies(int32_t stage_id) override; - OperatorCostPtr GetOperatorCost() const override { return l2normalizecost_ptr_; } protected: Status GetAttrs() override; @@ -47,7 +44,6 @@ class L2NormalizeInfo : public Activation { private: int32_t axis_ = 0; // Default value = 0 - L2NormalizeCostPtr l2normalizecost_ptr_; }; } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/parallel/ops_info/loss_info.h b/mindspore/ccsrc/parallel/ops_info/loss_info.h index 6a9697a447..f1c2537a39 100644 --- a/mindspore/ccsrc/parallel/ops_info/loss_info.h +++ b/mindspore/ccsrc/parallel/ops_info/loss_info.h @@ -36,16 +36,13 @@ class SoftmaxCrossEntropyWithLogitsInfo : public OperatorInfo { public: SoftmaxCrossEntropyWithLogitsInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : OperatorInfo(name, inputs_shape, outputs_shape, attrs) { - softmax_loss_cost_ptr_ = std::make_shared(); - } + : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~SoftmaxCrossEntropyWithLogitsInfo() override = default; Status Init(const StrategyPtr& strategy) override; Status InitForCostModel(const StrategyPtr& strategy) override; Status GenerateStrategies(int32_t stage_id) override; Status SetCostUnderStrategy(const StrategyPtr& strategy) override; - OperatorCostPtr GetOperatorCost() const override { return softmax_loss_cost_ptr_; } void ReComputeBatchSplitFlagList() override; protected: @@ -59,7 +56,6 @@ class SoftmaxCrossEntropyWithLogitsInfo : public OperatorInfo { // There are two outputs for SoftmaxCrossEntropyWithLogits, and outputs[1] is used for grad and overload // the InferAsLossDivisor. Status InferAsLossDivisor() override; - SoftmaxCrossEntropyWithLogitsCostPtr softmax_loss_cost_ptr_; private: int32_t axis_ = -1; // default -1 diff --git a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc b/mindspore/ccsrc/parallel/ops_info/matmul_info.cc index 2b02dc100d..848116d68a 100644 --- a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/matmul_info.cc @@ -593,11 +593,11 @@ Status MatMulBase::SetCostUnderStrategy(const mindspore::parallel::StrategyPtr& // Here, we use the origin outputs_, because we only use the slice size of the output tensor. // It does not matter whether the output tensor is transposed or not. double computation_cost = - matmulcost_ptr->GetForwardComputationCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id); - double communication_cost = matmulcost_ptr->GetCommCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id); + cost()->GetForwardComputationCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id); + double communication_cost = cost()->GetCommCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id); std::shared_ptr result = std::make_shared(computation_cost, communication_cost); result->communication_without_parameter_ = - matmulcost_ptr->GetForwardCommCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id); + cost()->GetForwardCommCost(relica_inputs_tensor_vector, outputs_tensor_info_, stage_id); result->communication_with_partial_para_ = result->communication_without_parameter_ + COST_MODEL_GAMMA * (communication_cost - result->communication_without_parameter_); diff --git a/mindspore/ccsrc/parallel/ops_info/matmul_info.h b/mindspore/ccsrc/parallel/ops_info/matmul_info.h index 7ced12b14a..2d3312774d 100644 --- a/mindspore/ccsrc/parallel/ops_info/matmul_info.h +++ b/mindspore/ccsrc/parallel/ops_info/matmul_info.h @@ -34,9 +34,7 @@ class MatMulBase : public OperatorInfo { public: MatMulBase(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : OperatorInfo(name, inputs_shape, outputs_shape, attrs) { - matmulcost_ptr = std::make_shared(); - } + : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~MatMulBase() override = default; Status Init(const StrategyPtr& strategy) override; @@ -48,7 +46,6 @@ class MatMulBase : public OperatorInfo { Status PrepareStrategy(int32_t stage_id, size_t dev_num, Dimensions combined_partitions, size_t input0_shape_size, size_t input1_shape_size, StrategyPtr* sp); - OperatorCostPtr GetOperatorCost() const override { return matmulcost_ptr; } Status SwapLastTwoElements(Shape* shape); protected: @@ -66,8 +63,6 @@ class MatMulBase : public OperatorInfo { bool transpose_b_ = false; size_t mat_a_dimension_ = 0; size_t mat_b_dimension_ = 0; - - MatMulCostPtr matmulcost_ptr; }; class MatMul : public MatMulBase { diff --git a/mindspore/ccsrc/parallel/ops_info/onehot_info.h b/mindspore/ccsrc/parallel/ops_info/onehot_info.h index 4697e201a4..a54d8479b3 100644 --- a/mindspore/ccsrc/parallel/ops_info/onehot_info.h +++ b/mindspore/ccsrc/parallel/ops_info/onehot_info.h @@ -33,16 +33,13 @@ class OneHotInfo : public OperatorInfo { public: OneHotInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : OperatorInfo(name, inputs_shape, outputs_shape, attrs) { - onehot_cost_ptr_ = std::make_shared(); - } + : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~OneHotInfo() override = default; Status Init(const StrategyPtr& strategy) override; Status InitForCostModel(const StrategyPtr& strategy) override; Status GenerateStrategies(int32_t stage_id) override; Status SetCostUnderStrategy(const StrategyPtr& strategy) override; - OperatorCostPtr GetOperatorCost() const override { return onehot_cost_ptr_; } ReplaceGraphPtr replace_graph(const CNodePtr& cnode) override; std::shared_ptr>> GenerateBatchStrategies() override; @@ -60,7 +57,6 @@ class OneHotInfo : public OperatorInfo { Status ComputeReplaceGraph(const CNodePtr& cnode); int axis_ = -1; - OneHotCostPtr onehot_cost_ptr_; int32_t rank_ = 0; int32_t total_class_number_ = 1; int32_t classes_each_device_ = 1; diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.cc b/mindspore/ccsrc/parallel/ops_info/operator_info.cc index 11c518d844..a24f3e616b 100644 --- a/mindspore/ccsrc/parallel/ops_info/operator_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/operator_info.cc @@ -1034,12 +1034,11 @@ Status OperatorInfo::SetCostUnderStrategyBase(const StrategyPtr& strategy) { return FAILED; } int32_t stage_id = strategy->GetInputStage(); - double computation_cost = - GetOperatorCost()->GetForwardComputationCost(inputs_tensor_info_, outputs_tensor_info_, stage_id); - double communication_cost = GetOperatorCost()->GetCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id); + double computation_cost = cost()->GetForwardComputationCost(inputs_tensor_info_, outputs_tensor_info_, stage_id); + double communication_cost = cost()->GetCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id); std::shared_ptr result = std::make_shared(computation_cost, communication_cost); result->communication_without_parameter_ = - GetOperatorCost()->GetForwardCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id); + cost()->GetForwardCommCost(inputs_tensor_info_, outputs_tensor_info_, stage_id); result->communication_with_partial_para_ = result->communication_without_parameter_ + COST_MODEL_GAMMA * (communication_cost - result->communication_without_parameter_); @@ -1096,7 +1095,7 @@ Status OperatorInfo::set_is_parameter(const std::vector& is_parameter) { return FAILED; } is_parameter_ = is_parameter; - GetOperatorCost()->set_is_parameter(is_parameter); + cost()->set_is_parameter(is_parameter); return SUCCESS; } @@ -1193,7 +1192,7 @@ Status OperatorInfo::SetInputAndOutputTypeLength(const std::vector& inpu } inputs_type_lengths_ = input_lengths; outputs_type_lengths_ = output_lengths; - GetOperatorCost()->SetInputAndOutputTypeLength(input_lengths, output_lengths); + cost()->SetInputAndOutputTypeLength(input_lengths, output_lengths); return SUCCESS; } @@ -1211,7 +1210,7 @@ void OperatorInfo::BreakingTiesForPerferringDataParallel(const StrategyPtr& stra } double OperatorInfo::GetForwardMemoryCostFromCNode() { - return GetOperatorCost()->GetForwardComputationCost(inputs_tensor_info_, outputs_tensor_info_, 0); + return cost()->GetForwardComputationCost(inputs_tensor_info_, outputs_tensor_info_, 0); } } // namespace parallel diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.h b/mindspore/ccsrc/parallel/ops_info/operator_info.h index e7b8af0a7e..8fcae8ad33 100644 --- a/mindspore/ccsrc/parallel/ops_info/operator_info.h +++ b/mindspore/ccsrc/parallel/ops_info/operator_info.h @@ -53,12 +53,13 @@ class Edge; class OperatorInfo { public: - OperatorInfo(std::string name, Shapes inputs_shape, Shapes outputs_shape, PrimitiveAttrs attrs) + OperatorInfo(std::string name, Shapes inputs_shape, Shapes outputs_shape, PrimitiveAttrs attrs, OperatorCostPtr cost) : name_(std::move(name)), inputs_shape_(std::move(inputs_shape)), outputs_shape_(std::move(outputs_shape)), attrs_(std::move(attrs)), - is_alive_(true) { + is_alive_(true), + cost_(cost) { std::vector not_parameteter(inputs_shape_.size(), false); is_parameter_ = not_parameteter; refkey_parameter_name_ = ""; @@ -75,7 +76,8 @@ class OperatorInfo { // Given the stage_id (which indicates the number of devices), // generate all strategies for this operator virtual Status GenerateStrategies(int32_t stage_id) = 0; - virtual OperatorCostPtr GetOperatorCost() const = 0; + const OperatorCostPtr& cost() const { return cost_; } + void set_cost(const OperatorCostPtr& cost) { cost_ = cost; } virtual Status SetCostUnderStrategy(const StrategyPtr& strategy) = 0; virtual std::shared_ptr>> GenerateBatchStrategies(); @@ -115,7 +117,7 @@ class OperatorInfo { void ReplaceSuccEdge(const std::shared_ptr& op, const std::shared_ptr& new_edge); void ReplacePreEdges(const std::shared_ptr& op, const std::shared_ptr& new_edge); void ReplaceSuccEdges(const std::shared_ptr& op, const std::shared_ptr& new_edge); - std::vector GetOutputTypeLengths() const { return GetOperatorCost()->outputs_type_lengths(); } + std::vector GetOutputTypeLengths() const { return cost()->outputs_type_lengths(); } void SetSelectedStrategyAndCost(const StrategyPtr& s_strategy, const CostPtr& cost) { selected_strategy_ = s_strategy; selected_cost_ = cost; @@ -221,6 +223,9 @@ class OperatorInfo { std::string refkey_parameter_name_; CNodePtr cnode_; int32_t used_devices_ = -1; + + private: + OperatorCostPtr cost_; }; Shape GetSliceShape(const Shape& tensor_shape, const Dimensions& strategy); diff --git a/mindspore/ccsrc/parallel/ops_info/prelu_info.h b/mindspore/ccsrc/parallel/ops_info/prelu_info.h index d491ecb331..bdfb11550b 100644 --- a/mindspore/ccsrc/parallel/ops_info/prelu_info.h +++ b/mindspore/ccsrc/parallel/ops_info/prelu_info.h @@ -35,15 +35,12 @@ class PReLUInfo : public OperatorInfo { public: PReLUInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : OperatorInfo(name, inputs_shape, outputs_shape, attrs) { - prelucost_ptr = std::make_shared(); - } + : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~PReLUInfo() override = default; Status Init(const StrategyPtr& strategy) override; Status InitForCostModel(const StrategyPtr& strategy) override; Status GenerateStrategies(int32_t stage_id) override; - OperatorCostPtr GetOperatorCost() const override { return prelucost_ptr; } Status SetCostUnderStrategy(const StrategyPtr& strategy) override; protected: @@ -59,7 +56,6 @@ class PReLUInfo : public OperatorInfo { private: Dimensions input_strategy_; - PReLUCostPtr prelucost_ptr; }; } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc b/mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc index 5b07f8d0a9..aa64e72d05 100644 --- a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc @@ -109,8 +109,12 @@ Status ReduceMethod::GetAttrs() { } cross_batch_ = cross_batch_iter->second->cast()->value(); } - reducemethodcost_ptr_->set_cross_batch(cross_batch_); - + auto reducemethodcost = std::dynamic_pointer_cast(cost()); + if (reducemethodcost == nullptr) { + MS_LOG(ERROR) << "Cost cast to ReduceMethodCostPtr failed!"; + return FAILED; + } + reducemethodcost->set_cross_batch(cross_batch_); return SUCCESS; } diff --git a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.h b/mindspore/ccsrc/parallel/ops_info/reduce_method_info.h index 8e2e17af99..c2ddbc87ce 100644 --- a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.h +++ b/mindspore/ccsrc/parallel/ops_info/reduce_method_info.h @@ -34,9 +34,7 @@ class ReduceMethod : public OperatorInfo { public: ReduceMethod(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape, const PrimitiveAttrs &attrs) - : OperatorInfo(name, inputs_shape, outputs_shape, attrs) { - reducemethodcost_ptr_ = std::make_shared(); - } + : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~ReduceMethod() override = default; Status Init(const StrategyPtr &strategy) override; @@ -44,13 +42,11 @@ class ReduceMethod : public OperatorInfo { Status GenerateStrategies(int32_t stage_id) override; Status SetCostUnderStrategy(const StrategyPtr &strategy) override; - OperatorCostPtr GetOperatorCost() const override { return reducemethodcost_ptr_; } protected: std::string reduce_method_; bool keepdims_ = false; bool cross_batch_ = false; - ReduceMethodCostPtr reducemethodcost_ptr_; Status CheckStrategy(const StrategyPtr &strategy) override; Status GetAttrs() override; Dimensions InferOutputStrategy(); @@ -110,7 +106,7 @@ class ReduceMeanInfo : public ReduceMethod { ReduceMeanInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape, const PrimitiveAttrs &attrs) : ReduceMethod(name, inputs_shape, outputs_shape, attrs) { - reducemethodcost_ptr_ = std::make_shared(); + set_cost(std::make_shared()); } ~ReduceMeanInfo() override = default; diff --git a/mindspore/ccsrc/parallel/ops_info/reshape_info.h b/mindspore/ccsrc/parallel/ops_info/reshape_info.h index 1d6a14b1f6..38192a5d01 100644 --- a/mindspore/ccsrc/parallel/ops_info/reshape_info.h +++ b/mindspore/ccsrc/parallel/ops_info/reshape_info.h @@ -36,12 +36,10 @@ class ReshapeInfo : public OperatorInfo { public: ReshapeInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : OperatorInfo(name, inputs_shape, outputs_shape, attrs), + : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared()), dev_num_(0), input_layout_set_flag_(false), - output_layout_set_flag_(false) { - reshape_cost_ptr_ = std::make_shared(); - } + output_layout_set_flag_(false) {} ~ReshapeInfo() override = default; Status Init(const StrategyPtr& strategy) override; void SetInputLayout(const TensorLayout& input_layout) { @@ -55,7 +53,6 @@ class ReshapeInfo : public OperatorInfo { Status InitForCostModel(const StrategyPtr& strategy) override; Status GenerateStrategies(int32_t stage_id) override; Status SetCostUnderStrategy(const StrategyPtr& strategy) override; - OperatorCostPtr GetOperatorCost() const override { return reshape_cost_ptr_; } protected: Status CheckStrategy(const StrategyPtr& strategy) override; @@ -67,7 +64,6 @@ class ReshapeInfo : public OperatorInfo { Status InferTensorLayout(TensorLayouts* inputs_layout, TensorLayouts* outputs_layout); Status GetAttrs() override; Strategys GetOutputsStrategy(); - ReshapeCostPtr reshape_cost_ptr_; private: Status GetParameterInput(); diff --git a/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h b/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h index 6df5856e0c..cf850683a6 100644 --- a/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h +++ b/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h @@ -34,9 +34,7 @@ class TmpIdentityInfo : public OperatorInfo { public: TmpIdentityInfo(const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs, const std::string& name = IDENTITY_INFO) - : OperatorInfo(name, inputs_shape, outputs_shape, attrs) { - id_cost_ptr_ = std::make_shared(); - } + : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~TmpIdentityInfo() override = default; Status Init(const StrategyPtr& strategy) override; @@ -44,7 +42,6 @@ class TmpIdentityInfo : public OperatorInfo { Status GenerateStrategies(int32_t stage_id) override; Status SetCostUnderStrategy(const StrategyPtr& strategy) override; - OperatorCostPtr GetOperatorCost() const override { return id_cost_ptr_; } protected: Status CheckStrategy(const StrategyPtr& strategy) override; @@ -54,9 +51,6 @@ class TmpIdentityInfo : public OperatorInfo { Status InferTensorInfo() override; Status InferDevMatrixShape() override; Status InferTensorMap() override; - - private: - TmpIdentityCostPtr id_cost_ptr_; }; } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/parallel/ops_info/transpose_info.h b/mindspore/ccsrc/parallel/ops_info/transpose_info.h index 4f6f6bb695..2714b352b6 100644 --- a/mindspore/ccsrc/parallel/ops_info/transpose_info.h +++ b/mindspore/ccsrc/parallel/ops_info/transpose_info.h @@ -35,15 +35,12 @@ class TransposeInfo : public OperatorInfo { public: TransposeInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : OperatorInfo(name, inputs_shape, outputs_shape, attrs) { - transpose_cost_ptr_ = std::make_shared(); - } + : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~TransposeInfo() override = default; Status Init(const StrategyPtr& strategy) override; Status InitForCostModel(const StrategyPtr& strategy) override; Status GenerateStrategies(int32_t stage_id) override; Status SetCostUnderStrategy(const StrategyPtr& strategy) override; - OperatorCostPtr GetOperatorCost() const override { return transpose_cost_ptr_; } protected: Status CheckStrategy(const StrategyPtr& strategy) override; @@ -60,7 +57,6 @@ class TransposeInfo : public OperatorInfo { Status ComputeAxis(); std::vector axis_v_; Dimensions input_strategy_; - ActivationCostPtr transpose_cost_ptr_; }; } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h b/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h index d0278f27d9..b958adeabe 100644 --- a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h +++ b/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h @@ -32,16 +32,13 @@ class VirtualDatasetInfo : public OperatorInfo { public: VirtualDatasetInfo(const std::string& name, const Shapes& inputs_shape, const Shapes& outputs_shape, const PrimitiveAttrs& attrs) - : OperatorInfo(name, inputs_shape, outputs_shape, attrs) { - vd_cost_ptr_ = std::make_shared(); - } + : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared()) {} ~VirtualDatasetInfo() override = default; Status Init(const StrategyPtr& strategy) override; Status InitForCostModel(const StrategyPtr& strategy) override; Status GenerateStrategies(int32_t stage_id) override; Status SetCostUnderStrategy(const StrategyPtr& strategy) override; - OperatorCostPtr GetOperatorCost() const override { return vd_cost_ptr_; } void ReComputeBatchSplitFlagList() override; protected: @@ -53,9 +50,6 @@ class VirtualDatasetInfo : public OperatorInfo { Status InferTensorMap() override; Status GetAttrs() override; Status InferAsLossDivisor() override; - - private: - VirtualDatasetCostPtr vd_cost_ptr_; }; } // namespace parallel diff --git a/tests/ut/cpp/parallel/ops_info/activation_test.cc b/tests/ut/cpp/parallel/ops_info/activation_test.cc index 5d18c5372f..a8f8425ae9 100644 --- a/tests/ut/cpp/parallel/ops_info/activation_test.cc +++ b/tests/ut/cpp/parallel/ops_info/activation_test.cc @@ -84,9 +84,9 @@ TEST_F(TestActivation, test_activation_strategies) { act_ptr_->InitForCostModel(sp); std::vector inputs_info = act_ptr_->inputs_tensor_info(); std::vector outputs_info = act_ptr_->outputs_tensor_info(); - ASSERT_DOUBLE_EQ(act_ptr_->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()), + ASSERT_DOUBLE_EQ(act_ptr_->cost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()), cost.computation_cost_); - ASSERT_DOUBLE_EQ(act_ptr_->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()), + ASSERT_DOUBLE_EQ(act_ptr_->cost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()), cost.communication_cost_); } } @@ -109,9 +109,9 @@ TEST_F(TestActivation, test_softmax_strategies) { soft_ptr_->InitForCostModel(sp); std::vector inputs_info = soft_ptr_->inputs_tensor_info(); std::vector outputs_info = soft_ptr_->outputs_tensor_info(); - ASSERT_DOUBLE_EQ(soft_ptr_->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()), + ASSERT_DOUBLE_EQ(soft_ptr_->cost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()), cost.computation_cost_); - ASSERT_DOUBLE_EQ(soft_ptr_->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()), + ASSERT_DOUBLE_EQ(soft_ptr_->cost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()), cost.communication_cost_); } } diff --git a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc index 99ca9f8e0e..2fece098e8 100644 --- a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc +++ b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc @@ -569,7 +569,7 @@ TEST_F(TestMatmulInfo, test_GenerateStrategies1) { matmul1->InitForCostModel(sp); std::vector inputs_info = matmul1->inputs_tensor_info(); std::vector outputs_info = matmul1->outputs_tensor_info(); - ASSERT_DOUBLE_EQ(matmul1->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()), + ASSERT_DOUBLE_EQ(matmul1->cost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()), cost.computation_cost_); break; } @@ -599,7 +599,7 @@ TEST_F(TestMatmulInfo, test_GenerateStrategies2) { TensorInfo replica_input1_info(tly, input1_shape, input1_slice_shape); replica_inputs_info.push_back(replica_input1_info); - ASSERT_DOUBLE_EQ(matmul3->GetOperatorCost()->GetComputationCost(replica_inputs_info, outputs_info, sp->GetInputStage()), + ASSERT_DOUBLE_EQ(matmul3->cost()->GetComputationCost(replica_inputs_info, outputs_info, sp->GetInputStage()), cost.computation_cost_); break; } diff --git a/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc b/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc index 6cb9739b1c..8c956328a7 100644 --- a/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc +++ b/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc @@ -188,11 +188,11 @@ TEST_F(TestTensorAddInfo, GenerateStrategies) { tensor_add->InitForCostModel(sp); std::vector inputs_info = tensor_add->inputs_tensor_info(); std::vector outputs_info = tensor_add->outputs_tensor_info(); - double memory_cost0 = tensor_add->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()); + double memory_cost0 = tensor_add->cost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()); double memory_cost1 = cost.computation_cost_; bool memory = memory_cost0 - memory_cost1 <= 1.0; - double comm_cost0 = tensor_add->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()); + double comm_cost0 = tensor_add->cost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()); double comm_cost1 = cost.communication_cost_; bool comm = comm_cost0 - comm_cost1 <= 1.0; @@ -210,11 +210,11 @@ TEST_F(TestTensorAddInfo, GenerateStrategies1) { tensor_add1->InitForCostModel(sp); std::vector inputs_info = tensor_add1->inputs_tensor_info(); std::vector outputs_info = tensor_add1->outputs_tensor_info(); - double memory_cost0 = tensor_add1->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()); + double memory_cost0 = tensor_add1->cost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()); double memory_cost1 = cost.computation_cost_; bool memory = memory_cost0 - memory_cost1 <= 1.0; - double comm_cost0 = tensor_add1->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()); + double comm_cost0 = tensor_add1->cost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()); double comm_cost1 = cost.communication_cost_; bool comm = comm_cost0 - comm_cost1 <= 1.0; diff --git a/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc b/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc index 043746498f..3971a2b471 100644 --- a/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc +++ b/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc @@ -145,9 +145,9 @@ TEST_F(TestTmpIdentityInfo, test_generate_strategies) { identity_ptr->Init(sp); std::vector inputs_info = identity_ptr->inputs_tensor_info(); std::vector outputs_info = identity_ptr->outputs_tensor_info(); - ASSERT_DOUBLE_EQ(identity_ptr->GetOperatorCost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()), + ASSERT_DOUBLE_EQ(identity_ptr->cost()->GetComputationCost(inputs_info, outputs_info, sp->GetInputStage()), cost.computation_cost_); - ASSERT_DOUBLE_EQ(identity_ptr->GetOperatorCost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()), + ASSERT_DOUBLE_EQ(identity_ptr->cost()->GetCommCost(inputs_info, outputs_info, sp->GetInputStage()), cost.communication_cost_); } } From 5e9cfaf6effad2cf6121e63ca434a31bd00f7d4d Mon Sep 17 00:00:00 2001 From: chang zherui <760161589@qq.com> Date: Fri, 10 Apr 2020 19:20:37 +0800 Subject: [PATCH 58/58] syn-code1 --- mindspore/ccsrc/pipeline/pipeline_ge.cc | 2 +- mindspore/ccsrc/utils/callbacks.h | 2 +- mindspore/nn/optim/rmsprop.py | 2 +- tests/st/networks/test_network_main.py | 55 +------------------------ 4 files changed, 5 insertions(+), 56 deletions(-) diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/pipeline_ge.cc index ee67d46cf7..6ce0ea5316 100644 --- a/mindspore/ccsrc/pipeline/pipeline_ge.cc +++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc @@ -533,4 +533,4 @@ void ExportDFGraph(const std::string& file_name, const std::string& phase) { MS_LOG(DEBUG) << "ExportGraph End"; } } // namespace pipeline -} // namespace mindspore \ No newline at end of file +} // namespace mindspore diff --git a/mindspore/ccsrc/utils/callbacks.h b/mindspore/ccsrc/utils/callbacks.h index 6f099ef4ca..a1e4e75d5b 100644 --- a/mindspore/ccsrc/utils/callbacks.h +++ b/mindspore/ccsrc/utils/callbacks.h @@ -40,7 +40,7 @@ const int kCallbackOk = 0; const int kCallbackFalied = 1; bool GetParameterShape(const FuncGraphPtr& anf_graph, const std::string& param_name, - const std::shared_ptr>& shape) + const std::shared_ptr>& shape); uint32_t SummarySaveCallback(uint32_t, const std::map&); } // namespace callbacks diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py index e252f89f2f..b17a101708 100644 --- a/mindspore/nn/optim/rmsprop.py +++ b/mindspore/nn/optim/rmsprop.py @@ -194,4 +194,4 @@ class RMSProp(Optimizer): else: success = self.hyper_map(F.partial(rmsprop_opt, self.opt, lr, self.decay, self.epsilon, self.momentum), params, self.ms, self.moment, gradients) - return success \ No newline at end of file + return success diff --git a/tests/st/networks/test_network_main.py b/tests/st/networks/test_network_main.py index 730602c0ae..4689adee54 100644 --- a/tests/st/networks/test_network_main.py +++ b/tests/st/networks/test_network_main.py @@ -12,11 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -<<<<<<< HEAD:tests/st/networks/test_network_main.py """ -Function: +Function: test network -Usage: +Usage: python test_network_main.py --net lenet --target Ascend """ import os @@ -32,47 +31,6 @@ from models.lenet import LeNet from models.resnetv1_5 import resnet50 from models.alexnet import AlexNet context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") -======= -import pytest -from mindspore.nn import TrainOneStepCell, WithLossCell -import mindspore.context as context -from mindspore.nn.optim import Momentum -import numpy as np -import mindspore.nn as nn -from mindspore.ops import operations as P -from mindspore import Tensor - -class LeNet(nn.Cell): - def __init__(self): - super(LeNet, self).__init__() - self.relu = P.ReLU() - self.batch_size = 32 - - self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0, has_bias=False, pad_mode='valid') - self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0, has_bias=False, pad_mode='valid') - self.pool = nn.MaxPool2d(kernel_size=2, stride=2) - self.reshape = P.Reshape() - self.fc1 = nn.Dense(400, 120) - self.fc2 = nn.Dense(120, 84) - self.fc3 = nn.Dense(84, 10) - - def construct(self, input_x): - output = self.conv1(input_x) - output = self.relu(output) - output = self.pool(output) - output = self.conv2(output) - output = self.relu(output) - output = self.pool(output) - output = self.reshape(output, (self.batch_size, -1)) - output = self.fc1(output) - output = self.relu(output) - output = self.fc2(output) - output = self.relu(output) - output = self.fc3(output) - return output - -context.set_context(mode=context.GRAPH_MODE, device_target="CPU") ->>>>>>> add cpu st lenet:tests/st/networks/test_cpu_lenet.py def train(net, data, label): learning_rate = 0.01 @@ -89,24 +47,17 @@ def train(net, data, label): print("+++++++++++++++++++++++++++") assert res -<<<<<<< HEAD:tests/st/networks/test_network_main.py def test_resnet50(): data = Tensor(np.ones([32, 3 ,224, 224]).astype(np.float32) * 0.01) label = Tensor(np.ones([32]).astype(np.int32)) net = resnet50(32, 10) train(net, data, label) -======= -@pytest.mark.level0 -@pytest.mark.platform_x86_cpu -@pytest.mark.env_onecard ->>>>>>> add cpu st lenet:tests/st/networks/test_cpu_lenet.py def test_lenet(): data = Tensor(np.ones([32, 1 ,32, 32]).astype(np.float32) * 0.01) label = Tensor(np.ones([32]).astype(np.int32)) net = LeNet() train(net, data, label) -<<<<<<< HEAD:tests/st/networks/test_network_main.py def test_alexnet(): data = Tensor(np.ones([32, 3 ,227, 227]).astype(np.float32) * 0.01) @@ -128,5 +79,3 @@ if __name__ == "__main__": test_alexnet() else: print("Please add net name like --net lenet") -======= ->>>>>>> add cpu st lenet:tests/st/networks/test_cpu_lenet.py