Merge pull request !1348 from yihuaijie/mastertags/v0.3.0-alpha
| @@ -1,154 +1,154 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import os | |||
| import pytest | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| import mindspore.context as context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| device_num = 2 | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| rank_id = 0 | |||
| def setup_module(): | |||
| global device_num | |||
| global rank_id | |||
| np.random.seed(0) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") | |||
| context.set_context(device_id=device_id) | |||
| distributedTool.init() | |||
| device_num = distributedTool.get_group_size() | |||
| rank_id = distributedTool.get_rank() | |||
| context.set_auto_parallel_context(device_num=device_num, | |||
| global_rank=rank_id) | |||
| def teardown_module(): | |||
| distributedTool.release() | |||
| class Onehot(Cell): | |||
| def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None): | |||
| super(Onehot, self).__init__() | |||
| trans_stra = None | |||
| if strategy: | |||
| trans_stra = (strategy[0],) | |||
| self.onehot = P.OneHot().set_strategy(strategy=strategy) | |||
| self.depth = depth | |||
| self.on_value = Tensor(on_value, ms.float32) | |||
| self.off_value = Tensor(off_value, ms.float32) | |||
| self.transpose = P.Transpose().set_strategy(strategy=trans_stra) | |||
| self.sub = P.Sub().set_strategy(strategy=((1, 1), (1, 1))) | |||
| def construct(self, input, indices): | |||
| x = self.onehot(indices, self.depth, self.on_value, self.off_value) | |||
| x = self.transpose(x, (1, 0)) | |||
| x = self.sub(input, x) | |||
| return x | |||
| class DataGenerator(): | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def generate_data(self, shape): | |||
| data = np.random.rand(*shape) | |||
| return data | |||
| def input_data(self, shape): | |||
| data = (self.generate_data(shape) * 2).astype(np.float32) | |||
| stra = [1] * len(shape) | |||
| stra[0] = device_num | |||
| datas = self.get_parallel_blocks(data, stra) | |||
| return Tensor(data), Tensor(datas[rank_id]) | |||
| def label_data(self, shape, classes): | |||
| data = (self.generate_data(shape) * (classes - 1)).astype(np.int32) | |||
| stra = [1] * len(shape) | |||
| stra[0] = device_num | |||
| datas = self.get_parallel_blocks(data, stra) | |||
| return Tensor(data), Tensor(datas[rank_id]) | |||
| class OneHotFactory: | |||
| def __init__(self, batch_size, classes, on_value=1.0, off_value=0.0, axis=None, strategy=None): | |||
| dataGen = DataGenerator() | |||
| self.input_full, self.input_part = dataGen.input_data((classes, batch_size)) | |||
| self.label_full, self.label_part = dataGen.label_data((batch_size,), classes) | |||
| self.depth = classes | |||
| self.on_value = on_value | |||
| self.off_value = off_value | |||
| self.axis = axis | |||
| self.strategy = strategy | |||
| def forward_mindspore_single_impl(self): | |||
| net = Onehot(axis=self.axis, | |||
| depth=self.depth, | |||
| on_value=self.on_value, | |||
| off_value=self.off_value) | |||
| out = net(self.input_full, self.label_full) | |||
| return out | |||
| def forward_mindspore_parallel_impl(self): | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net = Onehot(axis=self.axis, | |||
| depth=self.depth, | |||
| on_value=self.on_value, | |||
| off_value=self.off_value, strategy=self.strategy) | |||
| out = net.compile_and_run(self.input_full, self.label_full) | |||
| return out | |||
| def forward_cmp(self): | |||
| out_mindspore_single = self.forward_mindspore_single_impl().asnumpy() | |||
| context.reset_auto_parallel_context() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl().asnumpy() | |||
| context.reset_auto_parallel_context() | |||
| assert np.allclose(out_mindspore_single, out_mindspore_parallel, 0.0001, 0.0001) | |||
| def test_reid_onehot_forward_int32_128_depth1024_model_parallel(): | |||
| fact = OneHotFactory(batch_size=128, | |||
| classes=1024, | |||
| on_value=1.000000, | |||
| off_value=0.000000, | |||
| axis=-1, | |||
| strategy=((1, device_num), (), ())) | |||
| fact.forward_cmp() | |||
| def test_reid_onehot_forward_int32_1024_depth128_model_parallel(): | |||
| fact = OneHotFactory(batch_size=1024, | |||
| classes=128, | |||
| on_value=1.000000, | |||
| off_value=0.000000, | |||
| axis=-1, | |||
| strategy=((1, device_num), (), ())) | |||
| fact.forward_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import os | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| import mindspore.context as context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| device_num = 2 | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| rank_id = 0 | |||
| def setup_module(): | |||
| global device_num | |||
| global rank_id | |||
| np.random.seed(0) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") | |||
| context.set_context(device_id=device_id) | |||
| distributedTool.init() | |||
| device_num = distributedTool.get_group_size() | |||
| rank_id = distributedTool.get_rank() | |||
| context.set_auto_parallel_context(device_num=device_num, | |||
| global_rank=rank_id) | |||
| def teardown_module(): | |||
| distributedTool.release() | |||
| class Onehot(Cell): | |||
| def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None): | |||
| super(Onehot, self).__init__() | |||
| trans_stra = None | |||
| if strategy: | |||
| trans_stra = (strategy[0],) | |||
| self.onehot = P.OneHot().set_strategy(strategy=strategy) | |||
| self.depth = depth | |||
| self.on_value = Tensor(on_value, ms.float32) | |||
| self.off_value = Tensor(off_value, ms.float32) | |||
| self.transpose = P.Transpose().set_strategy(strategy=trans_stra) | |||
| self.sub = P.Sub().set_strategy(strategy=((1, 1), (1, 1))) | |||
| self.axis = axis | |||
| def construct(self, input_, indices): | |||
| x = self.onehot(indices, self.depth, self.on_value, self.off_value) | |||
| x = self.transpose(x, (1, 0)) | |||
| x = self.sub(input_, x) | |||
| return x | |||
| class DataGenerator(): | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def generate_data(self, shape): | |||
| data = np.random.rand(*shape) | |||
| return data | |||
| def input_data(self, shape): | |||
| data = (self.generate_data(shape) * 2).astype(np.float32) | |||
| stra = [1] * len(shape) | |||
| stra[0] = device_num | |||
| datas = self.get_parallel_blocks(data, stra) | |||
| return Tensor(data), Tensor(datas[rank_id]) | |||
| def label_data(self, shape, classes): | |||
| data = (self.generate_data(shape) * (classes - 1)).astype(np.int32) | |||
| stra = [1] * len(shape) | |||
| stra[0] = device_num | |||
| datas = self.get_parallel_blocks(data, stra) | |||
| return Tensor(data), Tensor(datas[rank_id]) | |||
| class OneHotFactory: | |||
| def __init__(self, batch_size, classes, on_value=1.0, off_value=0.0, axis=None, strategy=None): | |||
| data_gen = DataGenerator() | |||
| self.input_full, self.input_part = data_gen.input_data((classes, batch_size)) | |||
| self.label_full, self.label_part = data_gen.label_data((batch_size,), classes) | |||
| self.depth = classes | |||
| self.on_value = on_value | |||
| self.off_value = off_value | |||
| self.axis = axis | |||
| self.strategy = strategy | |||
| def forward_mindspore_single_impl(self): | |||
| net = Onehot(axis=self.axis, | |||
| depth=self.depth, | |||
| on_value=self.on_value, | |||
| off_value=self.off_value) | |||
| out = net(self.input_full, self.label_full) | |||
| return out | |||
| def forward_mindspore_parallel_impl(self): | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net = Onehot(axis=self.axis, | |||
| depth=self.depth, | |||
| on_value=self.on_value, | |||
| off_value=self.off_value, strategy=self.strategy) | |||
| out = net.compile_and_run(self.input_full, self.label_full) | |||
| return out | |||
| def forward_cmp(self): | |||
| out_mindspore_single = self.forward_mindspore_single_impl().asnumpy() | |||
| context.reset_auto_parallel_context() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl().asnumpy() | |||
| context.reset_auto_parallel_context() | |||
| assert np.allclose(out_mindspore_single, out_mindspore_parallel, 0.0001, 0.0001) | |||
| def test_reid_onehot_forward_int32_128_depth1024_model_parallel(): | |||
| fact = OneHotFactory(batch_size=128, | |||
| classes=1024, | |||
| on_value=1.000000, | |||
| off_value=0.000000, | |||
| axis=-1, | |||
| strategy=((1, device_num), (), ())) | |||
| fact.forward_cmp() | |||
| def test_reid_onehot_forward_int32_1024_depth128_model_parallel(): | |||
| fact = OneHotFactory(batch_size=1024, | |||
| classes=128, | |||
| on_value=1.000000, | |||
| off_value=0.000000, | |||
| axis=-1, | |||
| strategy=((1, device_num), (), ())) | |||
| fact.forward_cmp() | |||
| @@ -1,275 +1,276 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import os | |||
| import pytest | |||
| from numpy import allclose | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common import dtype as mstype | |||
| from mindspore.common.parameter import ParameterTuple, Parameter | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.nn.optim.momentum import Momentum | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import functional as F | |||
| from mindspore.ops import operations as P | |||
| from mindspore.train import Model, ParallelMode | |||
| from mindspore.train.callback import Callback | |||
| np.set_printoptions(threshold=np.inf) | |||
| device_num = 2 | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| rank_id = 0 | |||
| embed = 128 | |||
| classes = 32 | |||
| batch_size = 32 * 2 | |||
| MatmulParamShape = (classes, embed) | |||
| def setup_module(): | |||
| global device_num | |||
| global rank_id | |||
| np.random.seed(0) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") | |||
| context.set_context(device_id=device_id) | |||
| distributedTool.init() | |||
| rank_id = distributedTool.get_rank() | |||
| device_num = distributedTool.get_group_size() | |||
| context.set_auto_parallel_context(device_num=device_num, | |||
| global_rank=device_id) | |||
| def teardown_module(): | |||
| distributedTool.release() | |||
| class DataGenerator(): | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def generate_data(self, shape): | |||
| size = np.cumprod(shape)[-1] | |||
| num_range = min(size, 1000) | |||
| data = (np.arange(0, size) % num_range) / num_range | |||
| data = np.reshape(data, shape) | |||
| return data | |||
| def input_data(self, shape): | |||
| data = (self.generate_data(shape) * 0.1).astype(np.float32) | |||
| stra = [1] * len(shape) | |||
| stra[0] = device_num | |||
| datas = self.get_parallel_blocks(data, stra) | |||
| return Tensor(data), Tensor(datas[rank_id]) | |||
| def label_data(self, shape, embed): | |||
| data = (self.generate_data(shape) * (embed - 1)).astype(np.int32) | |||
| stra = [1] * len(shape) | |||
| stra[0] = device_num | |||
| datas = self.get_parallel_blocks(data, stra) | |||
| return Tensor(data), Tensor(datas[rank_id]) | |||
| class Dataset(): | |||
| def __init__(self, predict, label, length=1, input_num=2): | |||
| self.predict = predict | |||
| self.label = label | |||
| self.index = 0 | |||
| self.length = length | |||
| self.input_num = input_num | |||
| def __iter__(self): | |||
| return self | |||
| def __next__(self): | |||
| if self.index >= self.length: | |||
| raise StopIteration | |||
| self.index += 1 | |||
| if self.input_num == 2: | |||
| return self.predict, self.label | |||
| else: | |||
| return self.predict, | |||
| def reset(self): | |||
| self.index = 0 | |||
| def get_dataset_size(self): | |||
| return self.length | |||
| def get_repeat_count(self): | |||
| return self.length | |||
| class ModelCallback(Callback): | |||
| def __init__(self): | |||
| super(ModelCallback, self).__init__() | |||
| self.loss_list = [] | |||
| def epoch_end(self, run_context, *args): | |||
| cb_params = run_context.original_args() | |||
| result = cb_params.net_outputs | |||
| self.loss_list.append(result.asnumpy().mean()) | |||
| class SoftmaxCrossEntropyExpand(Cell): | |||
| def __init__(self, sparse=False, stra_list=[]): | |||
| super(SoftmaxCrossEntropyExpand, self).__init__() | |||
| if len(stra_list) < 11: | |||
| stra_list = [None] * 11 | |||
| self.exp = P.Exp() | |||
| self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy=stra_list[1]) | |||
| self.onehot = P.OneHot().set_strategy(strategy=stra_list[2]) | |||
| self.on_value = Tensor(1.0, mstype.float32) | |||
| self.off_value = Tensor(0.0, mstype.float32) | |||
| self.div = P.Div().set_strategy(strategy=stra_list[3]) | |||
| self.log = P.Log().set_strategy(strategy=stra_list[4]) | |||
| self.sum_cross_entropy = P.ReduceSum(keep_dims=False).set_strategy(strategy=stra_list[5]) | |||
| self.mul = P.Mul().set_strategy(strategy=stra_list[6]) | |||
| self.mul2 = P.Mul().set_strategy(strategy=stra_list[7]) | |||
| self.cast = P.Cast() | |||
| self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy=stra_list[8]) | |||
| self.sparse = sparse | |||
| self.reduce_max = P.ReduceMax(keep_dims=True).set_strategy(strategy=stra_list[9]) | |||
| self.sub = P.Sub().set_strategy(strategy=stra_list[10]) | |||
| def construct(self, logit, label): | |||
| logit_max = self.reduce_max(logit, -1) | |||
| exp = self.exp(self.sub(logit, logit_max)) | |||
| exp_sum = self.reduce_sum(exp, -1) | |||
| softmax_result = self.div(exp, exp_sum) | |||
| if self.sparse: | |||
| label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) | |||
| softmax_result_log = self.log(softmax_result) | |||
| loss = self.sum_cross_entropy((self.mul(softmax_result_log, label)), -1) | |||
| loss = self.mul2(F.scalar_to_array(-1.0), loss) | |||
| loss = self.reduce_mean(loss, -1) | |||
| return loss | |||
| class MatmulNet(Cell): | |||
| def __init__(self, matmul_stra=None, loss_stra_list=[]): | |||
| super(MatmulNet, self).__init__() | |||
| self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy=matmul_stra) | |||
| self.loss = SoftmaxCrossEntropyExpand(sparse=True, stra_list=loss_stra_list) | |||
| self.weight = Parameter(Tensor(np.ones(MatmulParamShape), dtype=ms.float32), name="weight") | |||
| def construct(self, x, label): | |||
| loss_input = self.matmul(x, self.weight) | |||
| out = self.loss(loss_input, label) | |||
| return out | |||
| class LossFactory(): | |||
| def __init__(self): | |||
| dataGen = DataGenerator() | |||
| self.input_full, self.input_part = dataGen.input_data((batch_size, embed)) | |||
| self.label_full, self.label_part = dataGen.label_data((batch_size,), embed) | |||
| def single_matmul_trains(self): | |||
| single_callback = ModelCallback() | |||
| net = MatmulNet() | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| model = Model(net, optimizer=optimizer) | |||
| epoch_size = 6 | |||
| dataset = Dataset(self.input_full, self.label_full) | |||
| model.train(epoch_size, dataset, callbacks=single_callback, dataset_sink_mode=False) | |||
| loss_value = np.array(single_callback.loss_list) | |||
| return loss_value | |||
| def data_parallel_matmul_trains(self): | |||
| parallel_callback = ModelCallback() | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net = MatmulNet() | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| model = Model(net, optimizer=optimizer) | |||
| epoch_size = 6 | |||
| dataset = Dataset(self.input_part, self.label_part) | |||
| model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False) | |||
| loss_value = np.array(parallel_callback.loss_list) | |||
| return loss_value | |||
| def model_parallel_matmul_trains(self): | |||
| parallel_callback = ModelCallback() | |||
| matmul_stra = ((1, 1), (device_num, 1)) | |||
| reduce_max_stra = ((1, device_num),) | |||
| sub_stra = ((1, device_num), (1, 1)) | |||
| exp_stra = ((1, device_num),) | |||
| reduce_sum_stra = ((1, device_num),) | |||
| div_stra = ((1, device_num), (1, 1)) | |||
| log_stra = ((1, device_num),) | |||
| mul_stra = ((1, device_num), (1, device_num)) | |||
| sum_cross_entropy_stra = ((1, device_num),) | |||
| mul2_stra = ((), (device_num,)) | |||
| reduce_mean_stra = ((device_num,),) | |||
| onehot_stra = ((1, device_num), (), ()) | |||
| loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra, | |||
| sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra] | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list) | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| model = Model(net, optimizer=optimizer) | |||
| epoch_size = 6 | |||
| dataset = Dataset(self.input_part, self.label_part) | |||
| model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False) | |||
| loss_value = np.array(parallel_callback.loss_list) | |||
| return loss_value | |||
| def mix_parallel_matmul_trains(self): | |||
| parallel_callback = ModelCallback() | |||
| matmul_stra = ((device_num, 1), (1, 1)) | |||
| reduce_max_stra = ((1, device_num),) | |||
| sub_stra = ((device_num, 1), (device_num, 1)) | |||
| exp_stra = ((1, device_num),) | |||
| reduce_sum_stra = ((1, device_num),) | |||
| div_stra = ((1, device_num), (1, 1)) | |||
| log_stra = ((1, device_num),) | |||
| mul_stra = ((1, device_num), (1, device_num)) | |||
| sum_cross_entropy_stra = ((1, device_num),) | |||
| mul2_stra = ((), (device_num,)) | |||
| reduce_mean_stra = ((device_num,),) | |||
| onehot_stra = ((1, device_num), (), ()) | |||
| loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra, | |||
| sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra] | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list) | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| model = Model(net, optimizer=optimizer) | |||
| epoch_size = 6 | |||
| dataset = Dataset(self.input_part, self.label_part) | |||
| model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False) | |||
| loss_value = np.array(parallel_callback.loss_list) | |||
| return loss_value | |||
| def test_all_trains(): | |||
| loss_factory = LossFactory() | |||
| context.reset_auto_parallel_context() | |||
| single_loss = loss_factory.single_matmul_trains() | |||
| model_parallel_loss = loss_factory.model_parallel_matmul_trains() | |||
| mix_parallel_loss = loss_factory.mix_parallel_matmul_trains() | |||
| assert allclose(single_loss, model_parallel_loss) | |||
| assert allclose(single_loss, mix_parallel_loss) | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import os | |||
| import numpy as np | |||
| from numpy import allclose | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common import dtype as mstype | |||
| from mindspore.common.parameter import Parameter | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.nn.optim.momentum import Momentum | |||
| from mindspore.ops import functional as F | |||
| from mindspore.ops import operations as P | |||
| from mindspore.train import Model | |||
| from mindspore.train.callback import Callback | |||
| np.set_printoptions(threshold=np.inf) | |||
| device_num = 2 | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| rank_id = 0 | |||
| embed = 128 | |||
| classes = 32 | |||
| batch_size = 32 * 2 | |||
| MatmulParamShape = (classes, embed) | |||
| def setup_module(): | |||
| global device_num | |||
| global rank_id | |||
| np.random.seed(0) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") | |||
| context.set_context(device_id=device_id) | |||
| distributedTool.init() | |||
| rank_id = distributedTool.get_rank() | |||
| device_num = distributedTool.get_group_size() | |||
| context.set_auto_parallel_context(device_num=device_num, | |||
| global_rank=device_id) | |||
| def teardown_module(): | |||
| distributedTool.release() | |||
| class DataGenerator(): | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def generate_data(self, shape): | |||
| size = np.cumprod(shape)[-1] | |||
| num_range = min(size, 1000) | |||
| data = (np.arange(0, size) % num_range) / num_range | |||
| data = np.reshape(data, shape) | |||
| return data | |||
| def input_data(self, shape): | |||
| data = (self.generate_data(shape) * 0.1).astype(np.float32) | |||
| stra = [1] * len(shape) | |||
| stra[0] = device_num | |||
| datas = self.get_parallel_blocks(data, stra) | |||
| return Tensor(data), Tensor(datas[rank_id]) | |||
| def label_data(self, shape, embed_): | |||
| data = (self.generate_data(shape) * (embed_ - 1)).astype(np.int32) | |||
| stra = [1] * len(shape) | |||
| stra[0] = device_num | |||
| datas = self.get_parallel_blocks(data, stra) | |||
| return Tensor(data), Tensor(datas[rank_id]) | |||
| class Dataset(): | |||
| def __init__(self, predict, label, length=1, input_num=2): | |||
| self.predict = predict | |||
| self.label = label | |||
| self.index = 0 | |||
| self.length = length | |||
| self.input_num = input_num | |||
| def __iter__(self): | |||
| return self | |||
| def __next__(self): | |||
| if self.index >= self.length: | |||
| raise StopIteration | |||
| self.index += 1 | |||
| if self.input_num == 2: | |||
| return (self.predict, self.label) | |||
| return (self.predict,) | |||
| def reset(self): | |||
| self.index = 0 | |||
| def get_dataset_size(self): | |||
| return self.length | |||
| def get_repeat_count(self): | |||
| return self.length | |||
| class ModelCallback(Callback): | |||
| def __init__(self): | |||
| super(ModelCallback, self).__init__() | |||
| self.loss_list = [] | |||
| def epoch_end(self, run_context): | |||
| cb_params = run_context.original_args() | |||
| result = cb_params.net_outputs | |||
| self.loss_list.append(result.asnumpy().mean()) | |||
| class SoftmaxCrossEntropyExpand(Cell): | |||
| def __init__(self, sparse=False, stra_list=None): | |||
| super(SoftmaxCrossEntropyExpand, self).__init__() | |||
| if stra_list is None: | |||
| stra_list = [] | |||
| if len(stra_list) < 11: | |||
| stra_list = [None] * 11 | |||
| self.exp = P.Exp() | |||
| self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy=stra_list[1]) | |||
| self.onehot = P.OneHot().set_strategy(strategy=stra_list[2]) | |||
| self.on_value = Tensor(1.0, mstype.float32) | |||
| self.off_value = Tensor(0.0, mstype.float32) | |||
| self.div = P.Div().set_strategy(strategy=stra_list[3]) | |||
| self.log = P.Log().set_strategy(strategy=stra_list[4]) | |||
| self.sum_cross_entropy = P.ReduceSum(keep_dims=False).set_strategy(strategy=stra_list[5]) | |||
| self.mul = P.Mul().set_strategy(strategy=stra_list[6]) | |||
| self.mul2 = P.Mul().set_strategy(strategy=stra_list[7]) | |||
| self.cast = P.Cast() | |||
| self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy=stra_list[8]) | |||
| self.sparse = sparse | |||
| self.reduce_max = P.ReduceMax(keep_dims=True).set_strategy(strategy=stra_list[9]) | |||
| self.sub = P.Sub().set_strategy(strategy=stra_list[10]) | |||
| def construct(self, logit, label): | |||
| logit_max = self.reduce_max(logit, -1) | |||
| exp = self.exp(self.sub(logit, logit_max)) | |||
| exp_sum = self.reduce_sum(exp, -1) | |||
| softmax_result = self.div(exp, exp_sum) | |||
| if self.sparse: | |||
| label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) | |||
| softmax_result_log = self.log(softmax_result) | |||
| loss = self.sum_cross_entropy((self.mul(softmax_result_log, label)), -1) | |||
| loss = self.mul2(F.scalar_to_array(-1.0), loss) | |||
| loss = self.reduce_mean(loss, -1) | |||
| return loss | |||
| class MatmulNet(Cell): | |||
| def __init__(self, matmul_stra=None, loss_stra_list=None): | |||
| super(MatmulNet, self).__init__() | |||
| if loss_stra_list is None: | |||
| loss_stra_list = [] | |||
| self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy=matmul_stra) | |||
| self.loss = SoftmaxCrossEntropyExpand(sparse=True, stra_list=loss_stra_list) | |||
| self.weight = Parameter(Tensor(np.ones(MatmulParamShape), dtype=ms.float32), name="weight") | |||
| def construct(self, x, label): | |||
| loss_input = self.matmul(x, self.weight) | |||
| out = self.loss(loss_input, label) | |||
| return out | |||
| class LossFactory(): | |||
| def __init__(self): | |||
| data_gen = DataGenerator() | |||
| self.input_full, self.input_part = data_gen.input_data((batch_size, embed)) | |||
| self.label_full, self.label_part = data_gen.label_data((batch_size,), embed) | |||
| def single_matmul_trains(self): | |||
| single_callback = ModelCallback() | |||
| net = MatmulNet() | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| model = Model(net, optimizer=optimizer) | |||
| epoch_size = 6 | |||
| dataset = Dataset(self.input_full, self.label_full) | |||
| model.train(epoch_size, dataset, callbacks=single_callback, dataset_sink_mode=False) | |||
| loss_value = np.array(single_callback.loss_list) | |||
| return loss_value | |||
| def data_parallel_matmul_trains(self): | |||
| parallel_callback = ModelCallback() | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net = MatmulNet() | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| model = Model(net, optimizer=optimizer) | |||
| epoch_size = 6 | |||
| dataset = Dataset(self.input_part, self.label_part) | |||
| model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False) | |||
| loss_value = np.array(parallel_callback.loss_list) | |||
| return loss_value | |||
| def model_parallel_matmul_trains(self): | |||
| parallel_callback = ModelCallback() | |||
| matmul_stra = ((1, 1), (device_num, 1)) | |||
| reduce_max_stra = ((1, device_num),) | |||
| sub_stra = ((1, device_num), (1, 1)) | |||
| exp_stra = ((1, device_num),) | |||
| reduce_sum_stra = ((1, device_num),) | |||
| div_stra = ((1, device_num), (1, 1)) | |||
| log_stra = ((1, device_num),) | |||
| mul_stra = ((1, device_num), (1, device_num)) | |||
| sum_cross_entropy_stra = ((1, device_num),) | |||
| mul2_stra = ((), (device_num,)) | |||
| reduce_mean_stra = ((device_num,),) | |||
| onehot_stra = ((1, device_num), (), ()) | |||
| loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra, | |||
| sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra] | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list) | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| model = Model(net, optimizer=optimizer) | |||
| epoch_size = 6 | |||
| dataset = Dataset(self.input_part, self.label_part) | |||
| model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False) | |||
| loss_value = np.array(parallel_callback.loss_list) | |||
| return loss_value | |||
| def mix_parallel_matmul_trains(self): | |||
| parallel_callback = ModelCallback() | |||
| matmul_stra = ((device_num, 1), (1, 1)) | |||
| reduce_max_stra = ((1, device_num),) | |||
| sub_stra = ((device_num, 1), (device_num, 1)) | |||
| exp_stra = ((1, device_num),) | |||
| reduce_sum_stra = ((1, device_num),) | |||
| div_stra = ((1, device_num), (1, 1)) | |||
| log_stra = ((1, device_num),) | |||
| mul_stra = ((1, device_num), (1, device_num)) | |||
| sum_cross_entropy_stra = ((1, device_num),) | |||
| mul2_stra = ((), (device_num,)) | |||
| reduce_mean_stra = ((device_num,),) | |||
| onehot_stra = ((1, device_num), (), ()) | |||
| loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra, | |||
| sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra] | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list) | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| model = Model(net, optimizer=optimizer) | |||
| epoch_size = 6 | |||
| dataset = Dataset(self.input_part, self.label_part) | |||
| model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False) | |||
| loss_value = np.array(parallel_callback.loss_list) | |||
| return loss_value | |||
| def test_all_trains(): | |||
| loss_factory = LossFactory() | |||
| context.reset_auto_parallel_context() | |||
| single_loss = loss_factory.single_matmul_trains() | |||
| model_parallel_loss = loss_factory.model_parallel_matmul_trains() | |||
| mix_parallel_loss = loss_factory.mix_parallel_matmul_trains() | |||
| assert allclose(single_loss, model_parallel_loss) | |||
| assert allclose(single_loss, mix_parallel_loss) | |||
| @@ -1,26 +1,26 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import os | |||
| import pytest | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_ascend_training | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.env_single | |||
| def test_expand_loss(): | |||
| sh_path = os.path.split(os.path.realpath(__file__))[0] | |||
| ret = os.system(f"sh {sh_path}/run_auto_parallel_loss_expand.sh") | |||
| assert (ret == 0) | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import os | |||
| import pytest | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_ascend_training | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.env_single | |||
| def test_expand_loss(): | |||
| sh_path = os.path.split(os.path.realpath(__file__))[0] | |||
| ret = os.system(f"sh {sh_path}/run_auto_parallel_loss_expand.sh") | |||
| assert ret == 0 | |||
| @@ -1,22 +1,21 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import os | |||
| import pytest | |||
| def test_expand_loss(): | |||
| ret = os.system("sh run_onehot_model_parallel.sh") | |||
| assert (ret == 0) | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import os | |||
| def test_expand_loss(): | |||
| ret = os.system("sh run_onehot_model_parallel.sh") | |||
| assert ret == 0 | |||
| @@ -13,8 +13,8 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import os | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.common.dtype as mstype | |||
| @@ -37,31 +37,29 @@ init() | |||
| context.set_auto_parallel_context(mirror_mean=True, parallel_mode=ParallelMode.AUTO_PARALLEL) | |||
| def weight_variable(shape, factor=0.1): | |||
| def weight_variable(): | |||
| return One() | |||
| def _conv3x3(in_channels, out_channels, stride=1, padding=0, pad_mode='same'): | |||
| init_value = weight_variable((out_channels, in_channels, 3, 3)) | |||
| init_value = weight_variable() | |||
| return nn.Conv2d(in_channels, out_channels, | |||
| kernel_size=3, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value) | |||
| def _conv1x1(in_channels, out_channels, stride=1, padding=0, pad_mode='same'): | |||
| init_value = weight_variable((out_channels, in_channels, 1, 1)) | |||
| init_value = weight_variable() | |||
| return nn.Conv2d(in_channels, out_channels, | |||
| kernel_size=1, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value) | |||
| def _conv7x7(in_channels, out_channels, stride=1, padding=0, pad_mode='same'): | |||
| init_value = weight_variable((out_channels, in_channels, 7, 7)) | |||
| init_value = weight_variable() | |||
| return nn.Conv2d(in_channels, out_channels, | |||
| kernel_size=7, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value) | |||
| def _fused_bn(channels, momentum=0.9): | |||
| init_weight = weight_variable((channels,)) | |||
| init_bias = weight_variable((channels,)) | |||
| return nn.BatchNorm2d(channels, momentum=momentum) | |||
| @@ -210,8 +208,8 @@ class ResNet(nn.Cell): | |||
| self.mean = P.ReduceMean(keep_dims=True) | |||
| self.end_point = nn.Dense(2048, num_classes, has_bias=True, | |||
| weight_init=weight_variable((num_classes, 2048)), | |||
| bias_init=weight_variable((num_classes,))) | |||
| weight_init=weight_variable(), | |||
| bias_init=weight_variable()) | |||
| self.squeeze = P.Squeeze() | |||
| self.cast = P.Cast() | |||
| @@ -345,9 +343,8 @@ class Dataset(): | |||
| raise StopIteration | |||
| self.index += 1 | |||
| if self.input_num == 2: | |||
| return self.predict, self.label | |||
| else: | |||
| return self.predict, | |||
| return (self.predict, self.label) | |||
| return (self.predict,) | |||
| def reset(self): | |||
| self.index = 0 | |||
| @@ -364,7 +361,7 @@ class ModelCallback(Callback): | |||
| super(ModelCallback, self).__init__() | |||
| self.loss_list = [] | |||
| def epoch_end(self, run_context, *args): | |||
| def epoch_end(self, run_context): | |||
| cb_params = run_context.original_args() | |||
| result = cb_params.net_outputs | |||
| self.loss_list.append(result.asnumpy().mean()) | |||
| @@ -376,9 +373,9 @@ class ModelCallback(Callback): | |||
| def test_train_feed(num_classes=8192): | |||
| set_algo_parameters(elementwise_op_strategy_follow=True) | |||
| parallel_callback = ModelCallback() | |||
| dataGen = DataGenerator() | |||
| input_full, input_part = dataGen.input_data((32 * 2, 3, 224, 224)) | |||
| label_full, label_part = dataGen.label_data((32 * 2,)) | |||
| data_gen = DataGenerator() | |||
| _, input_part = data_gen.input_data((32 * 2, 3, 224, 224)) | |||
| _, label_part = data_gen.label_data((32 * 2,)) | |||
| dataset = Dataset(input_part, label_part) | |||
| net = resnet50(num_classes) | |||
| loss = SoftmaxCrossEntropyExpand(sparse=True) | |||
| @@ -396,9 +393,9 @@ def test_train_feed(num_classes=8192): | |||
| def test_train_feed2(num_classes=1001): | |||
| set_algo_parameters(elementwise_op_strategy_follow=True) | |||
| parallel_callback = ModelCallback() | |||
| dataGen = DataGenerator() | |||
| input_full, input_part = dataGen.input_data((32 * 2, 3, 224, 224)) | |||
| label_full, label_part = dataGen.label_data((32 * 2,)) | |||
| data_gen = DataGenerator() | |||
| _, input_part = data_gen.input_data((32 * 2, 3, 224, 224)) | |||
| _, label_part = data_gen.label_data((32 * 2,)) | |||
| dataset = Dataset(input_part, label_part) | |||
| net = resnet50(num_classes) | |||
| loss = SoftmaxCrossEntropyExpand(sparse=True) | |||
| @@ -1,17 +1,17 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import sys | |||
| sys.path.append("../../..") | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import sys | |||
| sys.path.append("../../..") | |||
| @@ -25,7 +25,6 @@ from mindspore.nn import Dense | |||
| from mindspore.nn import Momentum | |||
| from mindspore.nn import ReLU | |||
| from mindspore.nn import TrainOneStepCell, WithLossCell | |||
| from mindspore.ops.operations import Split | |||
| from mindspore.ops.operations.comm_ops import AllReduce, AllGather, _AlltoAll, ReduceOp, ReduceScatter | |||
| from mindspore.ops.operations.comm_ops import Broadcast | |||
| @@ -16,8 +16,8 @@ | |||
| @File : test_data_parallel_lenet.py | |||
| @Desc : test data parallel lenet | |||
| """ | |||
| import numpy as np | |||
| import os | |||
| import numpy as np | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| @@ -80,7 +80,6 @@ def test_lenet5_train_step_training_pynative(): | |||
| context.reset_auto_parallel_context() | |||
| context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, | |||
| device_num=8, mirror_mean=True) | |||
| size = 3 | |||
| predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01) | |||
| label = Tensor(np.zeros([1, 10]).astype(np.float32)) | |||
| DatasetLenet(predict, label, 2) | |||
| @@ -19,7 +19,7 @@ from mindspore.parallel._utils import _reset_op_id | |||
| from mindspore.parallel.algo_parameter_config import reset_algo_parameters | |||
| def setup_module(module): | |||
| def setup_module(): | |||
| auto_parallel_context().set_enable_all_reduce_fusion(enable_all_reduce_fusion=True) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) | |||
| reset_cost_model_context() | |||
| @@ -1,178 +1,178 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| import pytest | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class AddRelu(Cell): | |||
| def __init__(self, strategy0=None, strategy1=None): | |||
| super(AddRelu, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.relu = P.ReLU(strategy=strategy1) | |||
| def construct(self, x, z): | |||
| out = self.add(x, z) | |||
| return self.relu(out) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class AddReluFactory: | |||
| def __init__(self, input_shape, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = 1.0 | |||
| self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, | |||
| input_shape).astype(np.float32) | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| need_dev_num = 1 | |||
| need_dev_num_ = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| for s in strategy1[1]: | |||
| need_dev_num_ = need_dev_num_ * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.y_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num_ | |||
| def forward_mindspore_impl(self): | |||
| net = AddRelu() | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(self.input_np2, ms.float32) | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| output_grad = Tensor(self.output_grad_np) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = AddRelu() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1]) | |||
| output_grad = Tensor(output_grads[self.out_id]) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(self.input_np2, ms.float32) | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], | |||
| parallel_inputs_run=[x1, y1, output_grad]) | |||
| return input_grad | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1]) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| @pytest.mark.reid_forward | |||
| def test_reid_add_relu_input_256_64(): | |||
| stra0 = (0, (2, 2), ()) | |||
| stra1 = (0, (2, 2)) | |||
| fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_add_relu_input_256_64(): | |||
| stra0 = (0, (2, 2), ()) | |||
| stra1 = (0, (2, 2)) | |||
| fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.grad_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class AddRelu(Cell): | |||
| def __init__(self, strategy0=None, strategy1=None): | |||
| super(AddRelu, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.relu = P.ReLU(strategy=strategy1) | |||
| def construct(self, x, z): | |||
| out = self.add(x, z) | |||
| return self.relu(out) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class AddReluFactory: | |||
| def __init__(self, input_shape, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = 1.0 | |||
| self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, | |||
| input_shape).astype(np.float32) | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| need_dev_num = 1 | |||
| need_dev_num_ = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| for s in strategy1[1]: | |||
| need_dev_num_ = need_dev_num_ * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.y_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num_ | |||
| def forward_mindspore_impl(self): | |||
| net = AddRelu() | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(self.input_np2, ms.float32) | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| output_grad = Tensor(self.output_grad_np) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = AddRelu() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1]) | |||
| output_grad = Tensor(output_grads[self.out_id]) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(self.input_np2, ms.float32) | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], | |||
| parallel_inputs_run=[x1, y1, output_grad]) | |||
| return input_grad | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1]) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| _ = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| _ = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| @pytest.mark.reid_forward | |||
| def test_reid_add_relu_input_256_64(): | |||
| stra0 = (0, (2, 2), ()) | |||
| stra1 = (0, (2, 2)) | |||
| fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_add_relu_input_256_64(): | |||
| stra0 = (0, (2, 2), ()) | |||
| stra1 = (0, (2, 2)) | |||
| fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.grad_cmp() | |||
| @@ -1,356 +1,356 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| from numpy import allclose | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore._checkparam import check_bool, twice | |||
| from mindspore.common.initializer import initializer | |||
| from mindspore.common.parameter import Parameter | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class _Conv(Cell): | |||
| r"""Applies a N-D convolution over an input signal composed of several input | |||
| planes. | |||
| """ | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| kernel_size, | |||
| stride, | |||
| pad_mode, | |||
| padding, | |||
| dilation, | |||
| group, | |||
| has_bias, | |||
| weight_init, | |||
| bias_init): | |||
| super(_Conv, self).__init__() | |||
| self.in_channels = in_channels | |||
| self.out_channels = out_channels | |||
| self.kernel_size = kernel_size | |||
| self.stride = stride | |||
| self.pad_mode = pad_mode | |||
| self.padding = padding | |||
| self.dilation = dilation | |||
| self.group = group | |||
| self.has_bias = has_bias | |||
| if not (isinstance(in_channels, int) and in_channels > 0): | |||
| raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed ' | |||
| + str(in_channels) + ', should be a int and greater than 0.') | |||
| if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \ | |||
| (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \ | |||
| kernel_size[0] < 1 or kernel_size[1] < 1: | |||
| raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed ' | |||
| + str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.') | |||
| if in_channels % group != 0: | |||
| raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by ' | |||
| 'attr \'group\' of \'Conv2D\' Op.') | |||
| if out_channels % group != 0: | |||
| raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by ' | |||
| 'attr \'group\' of \'Conv2D\' Op.') | |||
| self.weight = Parameter(initializer( | |||
| weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight') | |||
| if check_bool(has_bias): | |||
| self.bias = Parameter(initializer( | |||
| bias_init, [out_channels]), name='bias') | |||
| else: | |||
| if bias_init != 'zeros': | |||
| print("Value of 'has_bias' is False, value of 'bias_init' will be ignored.") | |||
| self.bias = None | |||
| def construct(self, *inputs): | |||
| raise NotImplementedError | |||
| class Conv2d(_Conv): | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| kernel_size, | |||
| stride=1, | |||
| pad_mode='same', | |||
| padding=0, | |||
| dilation=1, | |||
| group=1, | |||
| has_bias=False, | |||
| weight_init='normal', | |||
| bias_init='zeros', | |||
| strategy=None): | |||
| kernel_size = twice(kernel_size) | |||
| super(Conv2d, self).__init__( | |||
| in_channels, | |||
| out_channels, | |||
| kernel_size, | |||
| stride, | |||
| pad_mode, | |||
| padding, | |||
| dilation, | |||
| group, | |||
| has_bias, | |||
| weight_init, | |||
| bias_init) | |||
| self.add = P.TensorAdd(strategy) | |||
| self.conv2d = P.Conv2D(out_channel=self.out_channels, | |||
| kernel_size=self.kernel_size, | |||
| mode=1, | |||
| pad_mode=self.pad_mode, | |||
| pad=self.padding, | |||
| stride=self.stride, | |||
| dilation=self.dilation, | |||
| group=self.group, | |||
| strategy=None) | |||
| self.bias_add = P.BiasAdd() | |||
| def construct(self, input1, input2): | |||
| x = self.add(input1, input2) | |||
| if self.has_bias: | |||
| return self.bias_add(self.conv2d(x, self.weight), | |||
| self.bias) | |||
| return self.conv2d(x, self.weight) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, input1, input2, output_grad): | |||
| return grad_all_with_sens(self.network)(input1, input2, output_grad) | |||
| class Conv2dFactory: | |||
| def __init__(self, input_shape, filter_shape, stride, pad_mode, padding, dilation, group, has_bias): | |||
| self.in_n, self.in_c, self.in_h, self.in_w = input_shape | |||
| self.out_c, self.kernel_c, self.kernel_h, self.kernel_w = filter_shape | |||
| self.stride = stride | |||
| self.pad_mode = pad_mode | |||
| self.padding = padding | |||
| self.dilation = dilation | |||
| self.group = group | |||
| self.strategy0 = (0, (4, 1, 1, 1), (1, 1, 1, 1)) | |||
| prefix = "" | |||
| input_size = 1 | |||
| filter_size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) + "_" | |||
| input_size = input_size * s | |||
| self.prefix = prefix | |||
| for s in filter_shape: | |||
| filter_size = filter_size * s | |||
| number_range1 = min(10, input_size) | |||
| number_range2 = min(10, filter_size) | |||
| self.input_np1 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 2, input_shape).astype( | |||
| np.float16) | |||
| self.input_np2 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 4, input_shape).astype( | |||
| np.float16) | |||
| self.weight_np = np.reshape(np.arange(0, filter_size) % number_range2 - number_range2 / 2, filter_shape).astype( | |||
| np.float16) | |||
| self.has_bias = has_bias | |||
| if self.has_bias is True: | |||
| self.bias_np = np.arange(0, self.out_c).astype(np.float16) | |||
| self.out_shape = (128, 64, 56, 56) | |||
| out_size = 1 | |||
| for s in self.out_shape: | |||
| out_size = out_size * s | |||
| number_range3 = min(10, out_size) | |||
| self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range3 - number_range3 / 2, | |||
| self.out_shape).astype(np.float16) | |||
| self.x_id = device_id % 4 | |||
| self.y_id = device_id % 4 | |||
| self.out_strategy = self.strategy0[1] | |||
| self.out_id = device_id % 4 | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_conv2d_mindspore_impl(self): | |||
| input1 = Tensor(self.input_np1) | |||
| input2 = Tensor(self.input_np2) | |||
| weight = Tensor(self.weight_np) | |||
| if self.has_bias: | |||
| bias = Tensor(self.bias_np) | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=True, weight_init=weight, | |||
| bias_init=bias) | |||
| else: | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=False, weight_init=weight) | |||
| out = net(input1, input2) | |||
| return out.asnumpy() | |||
| def forward_conv2d_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| weight = Tensor(self.weight_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| if self.has_bias: | |||
| bias = Tensor(self.bias_np) | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=True, weight_init=weight, | |||
| bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) | |||
| else: | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=False, weight_init=weight, | |||
| strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_conv2d_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| weight = Tensor(self.weight_np) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| if self.has_bias: | |||
| bias = Tensor(self.bias_np) | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=True, weight_init=weight, | |||
| bias_init=bias, ) | |||
| else: | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=False, weight_init=weight) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| out_grad = grad_net(x, y, output_grad) | |||
| return out_grad | |||
| def grad_conv2d_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| weight = Tensor(self.weight_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| output_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| output_grad1 = Tensor(output_grads[self.out_id]) | |||
| if self.has_bias: | |||
| bias = Tensor(self.bias_np) | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=True, weight_init=weight, | |||
| bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) | |||
| else: | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=False, weight_init=weight, | |||
| strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_train() | |||
| grad_net.set_auto_parallel() | |||
| out_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return out_grad | |||
| def forward_conv2d_cmp(self): | |||
| out_mindspore = self.forward_conv2d_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_conv2d_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) | |||
| def grad_conv2d_cmp(self): | |||
| input_grad_mindspore = self.grad_conv2d_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_conv2d_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[1]) | |||
| assert allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.001, 0.001) | |||
| assert allclose(input_grad_blocks_1[self.x_id], input_grad_mindspore_parallel1, 0.001, 0.001) | |||
| def test_reid_conv2d_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true(): | |||
| fact = Conv2dFactory(input_shape=(128, 64, 112, 112), | |||
| filter_shape=(64, 64, 1, 1), | |||
| stride=2, pad_mode='valid', padding=0, | |||
| dilation=1, group=1, has_bias=False) | |||
| fact.forward_conv2d_cmp() | |||
| def test_reid_conv2d_grad_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true(): | |||
| fact = Conv2dFactory(input_shape=(128, 64, 112, 112), | |||
| filter_shape=(64, 64, 1, 1), | |||
| stride=2, pad_mode='valid', padding=0, | |||
| dilation=1, group=1, has_bias=False) | |||
| fact.grad_conv2d_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| from numpy import allclose | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore._checkparam import check_bool, twice | |||
| from mindspore.common.initializer import initializer | |||
| from mindspore.common.parameter import Parameter | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class _Conv(Cell): | |||
| r"""Applies a N-D convolution over an input signal composed of several input | |||
| planes. | |||
| """ | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| kernel_size, | |||
| stride, | |||
| pad_mode, | |||
| padding, | |||
| dilation, | |||
| group, | |||
| has_bias, | |||
| weight_init, | |||
| bias_init): | |||
| super(_Conv, self).__init__() | |||
| self.in_channels = in_channels | |||
| self.out_channels = out_channels | |||
| self.kernel_size = kernel_size | |||
| self.stride = stride | |||
| self.pad_mode = pad_mode | |||
| self.padding = padding | |||
| self.dilation = dilation | |||
| self.group = group | |||
| self.has_bias = has_bias | |||
| if not (isinstance(in_channels, int) and in_channels > 0): | |||
| raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed ' | |||
| + str(in_channels) + ', should be a int and greater than 0.') | |||
| if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \ | |||
| (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \ | |||
| kernel_size[0] < 1 or kernel_size[1] < 1: | |||
| raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed ' | |||
| + str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.') | |||
| if in_channels % group != 0: | |||
| raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by ' | |||
| 'attr \'group\' of \'Conv2D\' Op.') | |||
| if out_channels % group != 0: | |||
| raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by ' | |||
| 'attr \'group\' of \'Conv2D\' Op.') | |||
| self.weight = Parameter(initializer( | |||
| weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight') | |||
| if check_bool(has_bias): | |||
| self.bias = Parameter(initializer( | |||
| bias_init, [out_channels]), name='bias') | |||
| else: | |||
| if bias_init != 'zeros': | |||
| print("Value of 'has_bias' is False, value of 'bias_init' will be ignored.") | |||
| self.bias = None | |||
| def construct(self, *inputs): | |||
| raise NotImplementedError | |||
| class Conv2d(_Conv): | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| kernel_size, | |||
| stride=1, | |||
| pad_mode='same', | |||
| padding=0, | |||
| dilation=1, | |||
| group=1, | |||
| has_bias=False, | |||
| weight_init='normal', | |||
| bias_init='zeros', | |||
| strategy=None): | |||
| kernel_size = twice(kernel_size) | |||
| super(Conv2d, self).__init__( | |||
| in_channels, | |||
| out_channels, | |||
| kernel_size, | |||
| stride, | |||
| pad_mode, | |||
| padding, | |||
| dilation, | |||
| group, | |||
| has_bias, | |||
| weight_init, | |||
| bias_init) | |||
| self.add = P.TensorAdd(strategy) | |||
| self.conv2d = P.Conv2D(out_channel=self.out_channels, | |||
| kernel_size=self.kernel_size, | |||
| mode=1, | |||
| pad_mode=self.pad_mode, | |||
| pad=self.padding, | |||
| stride=self.stride, | |||
| dilation=self.dilation, | |||
| group=self.group, | |||
| strategy=None) | |||
| self.bias_add = P.BiasAdd() | |||
| def construct(self, input1, input2): | |||
| x = self.add(input1, input2) | |||
| if self.has_bias: | |||
| return self.bias_add(self.conv2d(x, self.weight), | |||
| self.bias) | |||
| return self.conv2d(x, self.weight) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, input1, input2, output_grad): | |||
| return grad_all_with_sens(self.network)(input1, input2, output_grad) | |||
| class Conv2dFactory: | |||
| def __init__(self, input_shape, filter_shape, stride, pad_mode, padding, dilation, group, has_bias): | |||
| self.in_n, self.in_c, self.in_h, self.in_w = input_shape | |||
| self.out_c, self.kernel_c, self.kernel_h, self.kernel_w = filter_shape | |||
| self.stride = stride | |||
| self.pad_mode = pad_mode | |||
| self.padding = padding | |||
| self.dilation = dilation | |||
| self.group = group | |||
| self.strategy0 = (0, (4, 1, 1, 1), (1, 1, 1, 1)) | |||
| prefix = "" | |||
| input_size = 1 | |||
| filter_size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) + "_" | |||
| input_size = input_size * s | |||
| self.prefix = prefix | |||
| for s in filter_shape: | |||
| filter_size = filter_size * s | |||
| number_range1 = min(10, input_size) | |||
| number_range2 = min(10, filter_size) | |||
| self.input_np1 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 2, input_shape).astype( | |||
| np.float16) | |||
| self.input_np2 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 4, input_shape).astype( | |||
| np.float16) | |||
| self.weight_np = np.reshape(np.arange(0, filter_size) % number_range2 - number_range2 / 2, filter_shape).astype( | |||
| np.float16) | |||
| self.has_bias = has_bias | |||
| if self.has_bias is True: | |||
| self.bias_np = np.arange(0, self.out_c).astype(np.float16) | |||
| self.out_shape = (128, 64, 56, 56) | |||
| out_size = 1 | |||
| for s in self.out_shape: | |||
| out_size = out_size * s | |||
| number_range3 = min(10, out_size) | |||
| self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range3 - number_range3 / 2, | |||
| self.out_shape).astype(np.float16) | |||
| self.x_id = device_id % 4 | |||
| self.y_id = device_id % 4 | |||
| self.out_strategy = self.strategy0[1] | |||
| self.out_id = device_id % 4 | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_conv2d_mindspore_impl(self): | |||
| input1 = Tensor(self.input_np1) | |||
| input2 = Tensor(self.input_np2) | |||
| weight = Tensor(self.weight_np) | |||
| if self.has_bias: | |||
| bias = Tensor(self.bias_np) | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=True, weight_init=weight, | |||
| bias_init=bias) | |||
| else: | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=False, weight_init=weight) | |||
| out = net(input1, input2) | |||
| return out.asnumpy() | |||
| def forward_conv2d_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| weight = Tensor(self.weight_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| if self.has_bias: | |||
| bias = Tensor(self.bias_np) | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=True, weight_init=weight, | |||
| bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) | |||
| else: | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=False, weight_init=weight, | |||
| strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_conv2d_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| weight = Tensor(self.weight_np) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| if self.has_bias: | |||
| bias = Tensor(self.bias_np) | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=True, weight_init=weight, | |||
| bias_init=bias,) | |||
| else: | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=False, weight_init=weight) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| out_grad = grad_net(x, y, output_grad) | |||
| return out_grad | |||
| def grad_conv2d_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| weight = Tensor(self.weight_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| output_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| output_grad1 = Tensor(output_grads[self.out_id]) | |||
| if self.has_bias: | |||
| bias = Tensor(self.bias_np) | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=True, weight_init=weight, | |||
| bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) | |||
| else: | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=False, weight_init=weight, | |||
| strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_train() | |||
| grad_net.set_auto_parallel() | |||
| out_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return out_grad | |||
| def forward_conv2d_cmp(self): | |||
| out_mindspore = self.forward_conv2d_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_conv2d_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) | |||
| def grad_conv2d_cmp(self): | |||
| input_grad_mindspore = self.grad_conv2d_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_conv2d_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[1]) | |||
| assert allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.001, 0.001) | |||
| assert allclose(input_grad_blocks_1[self.x_id], input_grad_mindspore_parallel1, 0.001, 0.001) | |||
| def test_reid_conv2d_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true(): | |||
| fact = Conv2dFactory(input_shape=(128, 64, 112, 112), | |||
| filter_shape=(64, 64, 1, 1), | |||
| stride=2, pad_mode='valid', padding=0, | |||
| dilation=1, group=1, has_bias=False) | |||
| fact.forward_conv2d_cmp() | |||
| def test_reid_conv2d_grad_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true(): | |||
| fact = Conv2dFactory(input_shape=(128, 64, 112, 112), | |||
| filter_shape=(64, 64, 1, 1), | |||
| stride=2, pad_mode='valid', padding=0, | |||
| dilation=1, group=1, has_bias=False) | |||
| fact.grad_conv2d_cmp() | |||
| @@ -1,120 +1,120 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.nn import Dropout | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Net(Cell): | |||
| def __init__(self, keep_prob, seed0, seed1, strategy=None): | |||
| super(Net, self).__init__() | |||
| self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy) | |||
| def construct(self, input): | |||
| x = self.drop(input) | |||
| return x | |||
| # pylint: disable=comparison-with-itself | |||
| class DropoutFactory: | |||
| def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None): | |||
| size = 1 | |||
| prefix = "" | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(10, size) | |||
| self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32) | |||
| self.keep_prob = keep_prob | |||
| self.seed0 = seed0 | |||
| self.seed1 = seed1 | |||
| self.strategy0 = strategy0 | |||
| need_dev_num = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def d4_tensor_compare(self, input, out_me): | |||
| [a, b, c, d] = input.shape | |||
| for i in range(a): | |||
| for j in range(b): | |||
| for k in range(c): | |||
| for e in range(d): | |||
| if out_me[i, j, k, e] == 0: | |||
| assert True == True | |||
| else: | |||
| assert np.allclose(out_me[i, j, k, e], input[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001) | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| net = Net(0.4, 0, 0, strategy=self.strategy0) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1]) | |||
| return out.asnumpy() | |||
| def forward_cmp(self): | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1]) | |||
| self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel) | |||
| def test_reid_dropout_forward_seed_F32_64_512_8_8(): | |||
| fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1))) | |||
| fact.forward_cmp() | |||
| def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat(): | |||
| fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1))) | |||
| fact.forward_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.nn import Dropout | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Net(Cell): | |||
| def __init__(self, keep_prob, seed0, seed1, strategy=None): | |||
| super(Net, self).__init__() | |||
| self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy) | |||
| def construct(self, input_): | |||
| x = self.drop(input_) | |||
| return x | |||
| # pylint: disable=comparison-with-itself | |||
| class DropoutFactory: | |||
| def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None): | |||
| size = 1 | |||
| prefix = "" | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(10, size) | |||
| self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32) | |||
| self.keep_prob = keep_prob | |||
| self.seed0 = seed0 | |||
| self.seed1 = seed1 | |||
| self.strategy0 = strategy0 | |||
| need_dev_num = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def d4_tensor_compare(self, input_, out_me): | |||
| [a, b, c, d] = input_.shape | |||
| for i in range(a): | |||
| for j in range(b): | |||
| for k in range(c): | |||
| for e in range(d): | |||
| if out_me[i, j, k, e] == 0: | |||
| assert True | |||
| else: | |||
| assert np.allclose(out_me[i, j, k, e], input_[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001) | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| net = Net(0.4, 0, 0, strategy=self.strategy0) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1]) | |||
| return out.asnumpy() | |||
| def forward_cmp(self): | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1]) | |||
| self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel) | |||
| def test_reid_dropout_forward_seed_F32_64_512_8_8(): | |||
| fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1))) | |||
| fact.forward_cmp() | |||
| def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat(): | |||
| fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1))) | |||
| fact.forward_cmp() | |||
| @@ -1,154 +1,154 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class MatmulSingle(Cell): | |||
| def __init__(self, transpose_a=False, transpose_b=False): | |||
| super(MatmulSingle, self).__init__() | |||
| self.matmul = P.MatMul(transpose_a, transpose_b) | |||
| self.pow = P.Pow() | |||
| self.reduce_sum = P.ReduceSum() | |||
| def construct(self, x, y): | |||
| out = self.matmul(x, y) | |||
| out = self.pow(out, 2.0) | |||
| out = self.reduce_sum(out, None) | |||
| return out | |||
| class MatmulAllgather(Cell): | |||
| def __init__(self, group, transpose_a=False, transpose_b=False): | |||
| super(MatmulAllgather, self).__init__() | |||
| self.allgather = P.AllGather(group=group) | |||
| self.matmul = P.MatMul(transpose_a, transpose_b) | |||
| self.pow = P.Pow() | |||
| self.reduce_sum = P.ReduceSum() | |||
| self.allreduce = P.AllReduce(group=group) | |||
| def construct(self, x, y): | |||
| x = self.allgather(x) | |||
| out = self.matmul(x, y) | |||
| out = self.pow(out, 2.0) | |||
| out = self.reduce_sum(out, None) | |||
| out = self.allreduce(out) | |||
| return out | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, sens): | |||
| return grad_all_with_sens(self.network)(x, y, sens) | |||
| class MatmulAllgatherFactory: | |||
| def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra): | |||
| self.inputx = self.GenValue(inputx_shape, 10) | |||
| self.inputy = self.GenValue(inputy_shape, 20) | |||
| self.x_stra = x_stra | |||
| self.y_stra = y_stra | |||
| stra_size = 1 | |||
| for s in x_stra: | |||
| stra_size = stra_size * s | |||
| self.stra_size = stra_size | |||
| def GenValue(self, input_shape, delta): | |||
| size = 1 | |||
| for s in input_shape: | |||
| size = size * s | |||
| number_range = min(100, size) | |||
| input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32) | |||
| return input_np | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def grad_mindspore_impl_single(self): | |||
| x = Tensor(self.inputx) | |||
| y = Tensor(self.inputy) | |||
| sens = Tensor(1.0, dtype=ms.float32) | |||
| net = MatmulSingle() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, sens) | |||
| return input_grad | |||
| def grad_mindspore_impl_reduce(self): | |||
| inputxs = self.get_parallel_blocks(self.inputx, self.x_stra) | |||
| inputys = self.get_parallel_blocks(self.inputy, self.y_stra) | |||
| x = Tensor(inputxs[device_id % self.stra_size]) | |||
| y = Tensor(inputys[device_id % self.stra_size]) | |||
| repeat_num = device_num / self.stra_size | |||
| v = self.stra_size * repeat_num * repeat_num * repeat_num | |||
| sens = Tensor(1.0 / v, dtype=ms.float32) | |||
| net = MatmulAllgather("hccl_world_group") | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, sens) | |||
| return input_grad | |||
| def grad_cmp(self): | |||
| single_results = self.grad_mindspore_impl_single() | |||
| reduce_results = self.grad_mindspore_impl_reduce() | |||
| single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size] | |||
| reduce_result0 = reduce_results[0].asnumpy() | |||
| single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size] | |||
| reduce_result1 = reduce_results[1].asnumpy() | |||
| assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001) | |||
| assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001) | |||
| def test_reduce_grad(): | |||
| inputx_shape = (64, 32) | |||
| inputy_shape = (32, 64) | |||
| fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4)) | |||
| fact.grad_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class MatmulSingle(Cell): | |||
| def __init__(self, transpose_a=False, transpose_b=False): | |||
| super(MatmulSingle, self).__init__() | |||
| self.matmul = P.MatMul(transpose_a, transpose_b) | |||
| self.pow = P.Pow() | |||
| self.reduce_sum = P.ReduceSum() | |||
| def construct(self, x, y): | |||
| out = self.matmul(x, y) | |||
| out = self.pow(out, 2.0) | |||
| out = self.reduce_sum(out, None) | |||
| return out | |||
| class MatmulAllgather(Cell): | |||
| def __init__(self, group, transpose_a=False, transpose_b=False): | |||
| super(MatmulAllgather, self).__init__() | |||
| self.allgather = P.AllGather(group=group) | |||
| self.matmul = P.MatMul(transpose_a, transpose_b) | |||
| self.pow = P.Pow() | |||
| self.reduce_sum = P.ReduceSum() | |||
| self.allreduce = P.AllReduce(group=group) | |||
| def construct(self, x, y): | |||
| x = self.allgather(x) | |||
| out = self.matmul(x, y) | |||
| out = self.pow(out, 2.0) | |||
| out = self.reduce_sum(out, None) | |||
| out = self.allreduce(out) | |||
| return out | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, sens): | |||
| return grad_all_with_sens(self.network)(x, y, sens) | |||
| class MatmulAllgatherFactory: | |||
| def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra): | |||
| self.inputx = self.gen_value(inputx_shape, 10) | |||
| self.inputy = self.gen_value(inputy_shape, 20) | |||
| self.x_stra = x_stra | |||
| self.y_stra = y_stra | |||
| stra_size = 1 | |||
| for s in x_stra: | |||
| stra_size = stra_size * s | |||
| self.stra_size = stra_size | |||
| def gen_value(self, input_shape, delta): | |||
| size = 1 | |||
| for s in input_shape: | |||
| size = size * s | |||
| number_range = min(100, size) | |||
| input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32) | |||
| return input_np | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def grad_mindspore_impl_single(self): | |||
| x = Tensor(self.inputx) | |||
| y = Tensor(self.inputy) | |||
| sens = Tensor(1.0, dtype=ms.float32) | |||
| net = MatmulSingle() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, sens) | |||
| return input_grad | |||
| def grad_mindspore_impl_reduce(self): | |||
| inputxs = self.get_parallel_blocks(self.inputx, self.x_stra) | |||
| inputys = self.get_parallel_blocks(self.inputy, self.y_stra) | |||
| x = Tensor(inputxs[device_id % self.stra_size]) | |||
| y = Tensor(inputys[device_id % self.stra_size]) | |||
| repeat_num = device_num / self.stra_size | |||
| v = self.stra_size * repeat_num * repeat_num * repeat_num | |||
| sens = Tensor(1.0 / v, dtype=ms.float32) | |||
| net = MatmulAllgather("hccl_world_group") | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, sens) | |||
| return input_grad | |||
| def grad_cmp(self): | |||
| single_results = self.grad_mindspore_impl_single() | |||
| reduce_results = self.grad_mindspore_impl_reduce() | |||
| single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size] | |||
| reduce_result0 = reduce_results[0].asnumpy() | |||
| single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size] | |||
| reduce_result1 = reduce_results[1].asnumpy() | |||
| assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001) | |||
| assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001) | |||
| def test_reduce_grad(): | |||
| inputx_shape = (64, 32) | |||
| inputy_shape = (32, 64) | |||
| fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4)) | |||
| fact.grad_cmp() | |||
| @@ -1,175 +1,175 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class MatmulSingle(Cell): | |||
| def __init__(self, transpose_a=False, transpose_b=False): | |||
| super(MatmulSingle, self).__init__() | |||
| self.matmul1 = P.MatMul(transpose_a, transpose_b) | |||
| self.matmul2 = P.MatMul(transpose_a, transpose_b) | |||
| self.pow = P.Pow() | |||
| self.reduce_sum = P.ReduceSum() | |||
| def construct(self, x, y, z): | |||
| out = self.matmul1(x, y) | |||
| out = self.matmul2(out, z) | |||
| out = self.pow(out, 2.0) | |||
| out = self.reduce_sum(out, None) | |||
| return out | |||
| class MatmulReduce(Cell): | |||
| def __init__(self, group, transpose_a=False, transpose_b=False): | |||
| super(MatmulReduce, self).__init__() | |||
| self.matmul1 = P.MatMul(transpose_a, transpose_b) | |||
| self.allreduce1 = P.AllReduce(group=group) | |||
| self.matmul2 = P.MatMul(transpose_a, transpose_b) | |||
| self.pow = P.Pow() | |||
| self.reduce_sum = P.ReduceSum() | |||
| self.allreduce2 = P.AllReduce(group=group) | |||
| def construct(self, x, y, z): | |||
| out = self.matmul1(x, y) | |||
| out = self.allreduce1(out) | |||
| out = self.matmul2(out, z) | |||
| out = self.pow(out, 2.0) | |||
| out = self.reduce_sum(out, None) | |||
| out = self.allreduce2(out) | |||
| return out | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, z, sens): | |||
| return grad_all_with_sens(self.network)(x, y, z, sens) | |||
| class MatmulReduceFactory: | |||
| def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra): | |||
| self.inputx = self.GenValue(inputx_shape, 10) | |||
| self.inputy = self.GenValue(inputy_shape, 20) | |||
| self.inputz = self.GenValue(inputz_shape, 30) | |||
| self.x_stra = x_stra | |||
| self.y_stra = y_stra | |||
| self.z_stra = z_stra | |||
| stra_size = 1 | |||
| for s in x_stra: | |||
| stra_size = stra_size * s | |||
| self.stra_size = stra_size | |||
| def GenValue(self, input_shape, delta): | |||
| size = 1 | |||
| for s in input_shape: | |||
| size = size * s | |||
| number_range = min(100, size) | |||
| input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32) | |||
| return input_np | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def grad_mindspore_impl_single(self): | |||
| x = Tensor(self.inputx) | |||
| y = Tensor(self.inputy) | |||
| z = Tensor(self.inputz) | |||
| sens = Tensor(1.0, dtype=ms.float32) | |||
| net = MatmulSingle() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, z, sens) | |||
| return input_grad | |||
| def grad_mindspore_impl_reduce(self): | |||
| inputxs = self.get_parallel_blocks(self.inputx, self.x_stra) | |||
| inputys = self.get_parallel_blocks(self.inputy, self.y_stra) | |||
| inputzs = self.get_parallel_blocks(self.inputz, self.z_stra) | |||
| x = Tensor(inputxs[device_id % self.stra_size]) | |||
| y = Tensor(inputys[device_id % self.stra_size]) | |||
| z = Tensor(inputzs[device_id % self.stra_size]) | |||
| repeat_num = device_num / self.stra_size | |||
| v = self.stra_size * repeat_num * repeat_num * repeat_num | |||
| sens = Tensor(1.0 / v, dtype=ms.float32) | |||
| net = MatmulReduce("hccl_world_group") | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, z, sens) | |||
| return input_grad | |||
| def grad_cmp(self): | |||
| single_results = self.grad_mindspore_impl_single() | |||
| reduce_results = self.grad_mindspore_impl_reduce() | |||
| single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size] | |||
| reduce_result0 = reduce_results[0].asnumpy() | |||
| single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size] | |||
| reduce_result1 = reduce_results[1].asnumpy() | |||
| single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size] | |||
| reduce_result2 = reduce_results[2].asnumpy() | |||
| assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001) | |||
| assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001) | |||
| assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001) | |||
| def test_reduce_grad(): | |||
| inputx_shape = (32, 64) | |||
| inputy_shape = (64, 64) | |||
| inputz_shape = (64, 32) | |||
| fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4)) | |||
| fact.grad_cmp() | |||
| def test_reduce_grad_repeat(): | |||
| inputx_shape = (32, 64) | |||
| inputy_shape = (64, 64) | |||
| inputz_shape = (64, 32) | |||
| fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2)) | |||
| fact.grad_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class MatmulSingle(Cell): | |||
| def __init__(self, transpose_a=False, transpose_b=False): | |||
| super(MatmulSingle, self).__init__() | |||
| self.matmul1 = P.MatMul(transpose_a, transpose_b) | |||
| self.matmul2 = P.MatMul(transpose_a, transpose_b) | |||
| self.pow = P.Pow() | |||
| self.reduce_sum = P.ReduceSum() | |||
| def construct(self, x, y, z): | |||
| out = self.matmul1(x, y) | |||
| out = self.matmul2(out, z) | |||
| out = self.pow(out, 2.0) | |||
| out = self.reduce_sum(out, None) | |||
| return out | |||
| class MatmulReduce(Cell): | |||
| def __init__(self, group, transpose_a=False, transpose_b=False): | |||
| super(MatmulReduce, self).__init__() | |||
| self.matmul1 = P.MatMul(transpose_a, transpose_b) | |||
| self.allreduce1 = P.AllReduce(group=group) | |||
| self.matmul2 = P.MatMul(transpose_a, transpose_b) | |||
| self.pow = P.Pow() | |||
| self.reduce_sum = P.ReduceSum() | |||
| self.allreduce2 = P.AllReduce(group=group) | |||
| def construct(self, x, y, z): | |||
| out = self.matmul1(x, y) | |||
| out = self.allreduce1(out) | |||
| out = self.matmul2(out, z) | |||
| out = self.pow(out, 2.0) | |||
| out = self.reduce_sum(out, None) | |||
| out = self.allreduce2(out) | |||
| return out | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, z, sens): | |||
| return grad_all_with_sens(self.network)(x, y, z, sens) | |||
| class MatmulReduceFactory: | |||
| def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra): | |||
| self.inputx = self.gen_value(inputx_shape, 10) | |||
| self.inputy = self.gen_value(inputy_shape, 20) | |||
| self.inputz = self.gen_value(inputz_shape, 30) | |||
| self.x_stra = x_stra | |||
| self.y_stra = y_stra | |||
| self.z_stra = z_stra | |||
| stra_size = 1 | |||
| for s in x_stra: | |||
| stra_size = stra_size * s | |||
| self.stra_size = stra_size | |||
| def gen_value(self, input_shape, delta): | |||
| size = 1 | |||
| for s in input_shape: | |||
| size = size * s | |||
| number_range = min(100, size) | |||
| input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32) | |||
| return input_np | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def grad_mindspore_impl_single(self): | |||
| x = Tensor(self.inputx) | |||
| y = Tensor(self.inputy) | |||
| z = Tensor(self.inputz) | |||
| sens = Tensor(1.0, dtype=ms.float32) | |||
| net = MatmulSingle() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, z, sens) | |||
| return input_grad | |||
| def grad_mindspore_impl_reduce(self): | |||
| inputxs = self.get_parallel_blocks(self.inputx, self.x_stra) | |||
| inputys = self.get_parallel_blocks(self.inputy, self.y_stra) | |||
| inputzs = self.get_parallel_blocks(self.inputz, self.z_stra) | |||
| x = Tensor(inputxs[device_id % self.stra_size]) | |||
| y = Tensor(inputys[device_id % self.stra_size]) | |||
| z = Tensor(inputzs[device_id % self.stra_size]) | |||
| repeat_num = device_num / self.stra_size | |||
| v = self.stra_size * repeat_num * repeat_num * repeat_num | |||
| sens = Tensor(1.0 / v, dtype=ms.float32) | |||
| net = MatmulReduce("hccl_world_group") | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, z, sens) | |||
| return input_grad | |||
| def grad_cmp(self): | |||
| single_results = self.grad_mindspore_impl_single() | |||
| reduce_results = self.grad_mindspore_impl_reduce() | |||
| single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size] | |||
| reduce_result0 = reduce_results[0].asnumpy() | |||
| single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size] | |||
| reduce_result1 = reduce_results[1].asnumpy() | |||
| single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size] | |||
| reduce_result2 = reduce_results[2].asnumpy() | |||
| assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001) | |||
| assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001) | |||
| assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001) | |||
| def test_reduce_grad(): | |||
| inputx_shape = (32, 64) | |||
| inputy_shape = (64, 64) | |||
| inputz_shape = (64, 32) | |||
| fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4)) | |||
| fact.grad_cmp() | |||
| def test_reduce_grad_repeat(): | |||
| inputx_shape = (32, 64) | |||
| inputy_shape = (64, 64) | |||
| inputz_shape = (64, 32) | |||
| fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2)) | |||
| fact.grad_cmp() | |||
| @@ -1,207 +1,206 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| import pytest | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class L2normalize(Cell): | |||
| def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None): | |||
| super(L2normalize, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.l2norm = P.L2Normalize(axis, epsilon, strategy1) | |||
| def construct(self, x, y): | |||
| out = self.add(x, y) | |||
| out = self.l2norm(out) | |||
| return out | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class L2normalizeFactory: | |||
| def __init__(self, input_shape, axis, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| target_shape = input_shape | |||
| self.target_shape = target_shape | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(1000, target_size) | |||
| self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, | |||
| target_shape).astype(np.float32) | |||
| self.axis = axis | |||
| self.epsilon = 1e-4 | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| out_strategy = strategy1[1] | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| self.out_id = device_id % need_dev_num1 | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| net = L2normalize(self.axis, self.epsilon) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| net = L2normalize(self.axis, self.epsilon) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return input_grad | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_l2normalize_input_128_512(): | |||
| input_shape = (128, 512) | |||
| axis = 0 | |||
| fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) | |||
| fact.forward_cmp() | |||
| def test_reid_l2normalize_grad_input_128_512(): | |||
| input_shape = (128, 512) | |||
| axis = 0 | |||
| fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) | |||
| fact.grad_cmp() | |||
| def test_reid_l2normalize_input_128_512_repeat(): | |||
| input_shape = (128, 512) | |||
| axis = 0 | |||
| fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2))) | |||
| fact.forward_cmp() | |||
| def test_reid_l2normalize_grad_input_128_512_repeat(): | |||
| input_shape = (128, 512) | |||
| axis = 0 | |||
| fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2))) | |||
| fact.grad_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class L2normalize(Cell): | |||
| def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None): | |||
| super(L2normalize, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.l2norm = P.L2Normalize(axis, epsilon, strategy1) | |||
| def construct(self, x, y): | |||
| out = self.add(x, y) | |||
| out = self.l2norm(out) | |||
| return out | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class L2normalizeFactory: | |||
| def __init__(self, input_shape, axis, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| target_shape = input_shape | |||
| self.target_shape = target_shape | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(1000, target_size) | |||
| self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, | |||
| target_shape).astype(np.float32) | |||
| self.axis = axis | |||
| self.epsilon = 1e-4 | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| out_strategy = strategy1[1] | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| self.out_id = device_id % need_dev_num1 | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| net = L2normalize(self.axis, self.epsilon) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| net = L2normalize(self.axis, self.epsilon) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return input_grad | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_l2normalize_input_128_512(): | |||
| input_shape = (128, 512) | |||
| axis = 0 | |||
| fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) | |||
| fact.forward_cmp() | |||
| def test_reid_l2normalize_grad_input_128_512(): | |||
| input_shape = (128, 512) | |||
| axis = 0 | |||
| fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) | |||
| fact.grad_cmp() | |||
| def test_reid_l2normalize_input_128_512_repeat(): | |||
| input_shape = (128, 512) | |||
| axis = 0 | |||
| fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2))) | |||
| fact.forward_cmp() | |||
| def test_reid_l2normalize_grad_input_128_512_repeat(): | |||
| input_shape = (128, 512) | |||
| axis = 0 | |||
| fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2))) | |||
| fact.grad_cmp() | |||
| @@ -1,196 +1,195 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| import pytest | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class AddRelu(Cell): | |||
| def __init__(self, strategy0=None, strategy1=None): | |||
| super(AddRelu, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.relu = P.ReLU(strategy=strategy1) | |||
| def construct(self, x, y): | |||
| out = self.add(x, y) | |||
| out = self.relu(out) | |||
| return out | |||
| class NetWithLoss(Cell): | |||
| def __init__(self, network, strategy2=None): | |||
| super(NetWithLoss, self).__init__() | |||
| self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2) | |||
| self.network = network | |||
| def construct(self, x, y, b): | |||
| predict = self.network(x, y) | |||
| return self.loss(predict, b)[0] | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, b): | |||
| return grad_all(self.network)(x, y, b) | |||
| class AddReluFactory: | |||
| def __init__(self, input_shape, strategy0, strategy1, strategy2): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| target_shape = input_shape | |||
| self.target_shape = target_shape | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(10, target_size) | |||
| self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype( | |||
| np.float32) | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| self.strategy2 = strategy2 | |||
| out_strategy = strategy1[1] | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| self.out_id = device_id % need_dev_num1 | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| net = AddRelu() | |||
| net_with_loss = NetWithLoss(net) | |||
| grad_net = Grad(net_with_loss) | |||
| grad_net.set_train() | |||
| input_grads = [] | |||
| for i in range(0, 3): | |||
| input_grad = grad_net(x, y, output_grad) | |||
| input_grads.append(input_grad) | |||
| return input_grads | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| net_with_loss = NetWithLoss(net, strategy2=self.strategy2) | |||
| grad_net = Grad(net_with_loss) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grads = [] | |||
| for i in range(0, 3): | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| input_grads.append(input_grad) | |||
| return input_grads | |||
| def grad_cmp(self): | |||
| input_grad_mindspores = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl() | |||
| for i in range(0, len(input_grad_mindspores)): | |||
| input_grad_mindspore = input_grad_mindspores[i] | |||
| input_grad_mindspore_parallel = input_grad_mindspore_parallels[i] | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy", | |||
| input_grad_blocks_0[self.x_id]) | |||
| np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy", | |||
| input_grad_blocks_1[self.y_id]) | |||
| np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", | |||
| input_grad_mindspore_parallel0) | |||
| np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", | |||
| input_grad_mindspore_parallel1) | |||
| assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_l2normalize_grad_input_128_512(): | |||
| input_shape = (128, 512) | |||
| fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)), | |||
| strategy2=(0, (4, 1), (4, 1))) | |||
| fact.grad_cmp() | |||
| def test_reid_l2normalize_grad_input_128_512_stridesplit(): | |||
| input_shape = (128, 512) | |||
| fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)), | |||
| strategy2=(0, (4, 1), (4, 1))) | |||
| fact.grad_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class AddRelu(Cell): | |||
| def __init__(self, strategy0=None, strategy1=None): | |||
| super(AddRelu, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.relu = P.ReLU(strategy=strategy1) | |||
| def construct(self, x, y): | |||
| out = self.add(x, y) | |||
| out = self.relu(out) | |||
| return out | |||
| class NetWithLoss(Cell): | |||
| def __init__(self, network, strategy2=None): | |||
| super(NetWithLoss, self).__init__() | |||
| self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2) | |||
| self.network = network | |||
| def construct(self, x, y, b): | |||
| predict = self.network(x, y) | |||
| return self.loss(predict, b)[0] | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, b): | |||
| return grad_all(self.network)(x, y, b) | |||
| class AddReluFactory: | |||
| def __init__(self, input_shape, strategy0, strategy1, strategy2): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| target_shape = input_shape | |||
| self.target_shape = target_shape | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(10, target_size) | |||
| self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype( | |||
| np.float32) | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| self.strategy2 = strategy2 | |||
| out_strategy = strategy1[1] | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| self.out_id = device_id % need_dev_num1 | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| net = AddRelu() | |||
| net_with_loss = NetWithLoss(net) | |||
| grad_net = Grad(net_with_loss) | |||
| grad_net.set_train() | |||
| input_grads = [] | |||
| for i in range(0, 3): | |||
| input_grad = grad_net(x, y, output_grad) | |||
| input_grads.append(input_grad) | |||
| return input_grads | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| net_with_loss = NetWithLoss(net, strategy2=self.strategy2) | |||
| grad_net = Grad(net_with_loss) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grads = [] | |||
| for i in range(0, 3): | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| input_grads.append(input_grad) | |||
| return input_grads | |||
| def grad_cmp(self): | |||
| input_grad_mindspores = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl() | |||
| for i in range(0, len(input_grad_mindspores)): | |||
| input_grad_mindspore = input_grad_mindspores[i] | |||
| input_grad_mindspore_parallel = input_grad_mindspore_parallels[i] | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy", | |||
| input_grad_blocks_0[self.x_id]) | |||
| np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy", | |||
| input_grad_blocks_1[self.y_id]) | |||
| np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", | |||
| input_grad_mindspore_parallel0) | |||
| np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", | |||
| input_grad_mindspore_parallel1) | |||
| assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_l2normalize_grad_input_128_512(): | |||
| input_shape = (128, 512) | |||
| fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)), | |||
| strategy2=(0, (4, 1), (4, 1))) | |||
| fact.grad_cmp() | |||
| def test_reid_l2normalize_grad_input_128_512_stridesplit(): | |||
| input_shape = (128, 512) | |||
| fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)), | |||
| strategy2=(0, (4, 1), (4, 1))) | |||
| fact.grad_cmp() | |||
| @@ -1,329 +1,329 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| from numpy import allclose | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Matmul(Cell): | |||
| def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None): | |||
| super(Matmul, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy1) | |||
| self.matmul = P.MatMul(transpose_a, transpose_b, strategy=strategy0) | |||
| def construct(self, x, w, z): | |||
| out = self.add(x, z) | |||
| return self.matmul(out, w) | |||
| class BatchMatMul(Cell): | |||
| def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None): | |||
| super(BatchMatMul, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy1) | |||
| self.batchmatmul = P.BatchMatMul(transpose_a, transpose_b, strategy=strategy0) | |||
| def construct(self, x, w, z): | |||
| out = self.add(x, z) | |||
| return self.batchmatmul(out, w) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, inputa, inputb, inputz, output_grad): | |||
| gout = grad_all_with_sens(self.network)(inputa, inputb, inputz, output_grad) | |||
| return gout | |||
| class BatchmatmulFactory: | |||
| def __init__(self, inputa_shape, inputb_shape, transpose_a, transpose_b, strategy, strategy_): | |||
| self.strategy = strategy | |||
| self.strategy_ = strategy_ | |||
| inputa_size = 1 | |||
| inputb_size = 1 | |||
| prefix = "" | |||
| for s in inputa_shape: | |||
| prefix = prefix + str(s) + "_" | |||
| inputa_size = inputa_size * s | |||
| prefix = prefix + "and" | |||
| for s in inputb_shape: | |||
| prefix = prefix + str(s) + "_" | |||
| inputb_size = inputb_size * s | |||
| number_rangea = min(1000, inputa_size) | |||
| number_rangeb = min(1000, inputb_size) | |||
| self.inputa = np.reshape(np.arange(0, inputa_size) % number_rangea - number_rangea / 2, inputa_shape).astype( | |||
| np.float32) | |||
| self.inputb = np.reshape(np.arange(0, inputb_size) % number_rangeb - number_rangeb / 2, inputb_shape).astype( | |||
| np.float32) | |||
| self.inputz = np.zeros(self.inputa.shape).astype(np.float32) | |||
| self.transpose_a = transpose_a | |||
| self.transpose_b = transpose_b | |||
| out_shape = [] | |||
| device_matrix = [] | |||
| out_strategy = [] | |||
| if transpose_a: | |||
| temp = inputa_shape[-1] | |||
| inputa_shape[-1] = inputa_shape[-2] | |||
| inputa_shape[-2] = temp | |||
| if transpose_b: | |||
| temp = inputb_shape[-1] | |||
| inputb_shape[-1] = inputb_shape[-2] | |||
| inputb_shape[-2] = temp | |||
| if (len(inputa_shape) >= len(inputb_shape)): | |||
| out_shape = list(inputa_shape) | |||
| out_shape[-1] = inputb_shape[-1] | |||
| else: | |||
| out_shape = list(inputb_shape) | |||
| out_shape[-2] = inputa_shape[-2] | |||
| strategy1 = list(self.strategy[1]) | |||
| strategy2 = list(self.strategy[2]) | |||
| if transpose_a: | |||
| temp = strategy1[-1] | |||
| strategy1[-1] = strategy1[-2] | |||
| strategy1[-2] = temp | |||
| if transpose_b: | |||
| temp = strategy2[-1] | |||
| strategy2[-1] = strategy2[-2] | |||
| strategy2[-2] = temp | |||
| if (len(strategy1) >= len(strategy2)): | |||
| out_strategy = strategy1.copy() | |||
| out_strategy[-1] = strategy2[-1] | |||
| else: | |||
| out_strategy = strategy2.copy() | |||
| out_strategy[-2] = strategy1[-2] | |||
| device_matrix = out_strategy.copy() | |||
| device_matrix.insert(-1, strategy1[-1]) | |||
| self.out_strategy = out_strategy | |||
| need_dev_num = 1 | |||
| for s in device_matrix: | |||
| need_dev_num = need_dev_num * s | |||
| self.need_dev_num = need_dev_num | |||
| self.device_matrix = device_matrix | |||
| out_size = 1 | |||
| for s in out_shape: | |||
| out_size = out_size * s | |||
| number_range = min(1000, out_size) | |||
| self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range - number_range / 2, out_shape).astype( | |||
| np.float32) | |||
| device_index = self.id_to_list(device_id % need_dev_num, self.device_matrix) | |||
| x_index = device_index[:-1].copy() | |||
| if transpose_a: | |||
| temp = x_index[-1] | |||
| x_index[-1] = x_index[-2] | |||
| x_index[-2] = temp | |||
| y_index = device_index[:-3].copy() | |||
| y_index.append(device_index[-2]) | |||
| y_index.append(device_index[-1]) | |||
| if transpose_b: | |||
| temp = y_index[-1] | |||
| y_index[-1] = y_index[-2] | |||
| y_index[-2] = temp | |||
| out_index = device_index[:-2].copy() | |||
| out_index.append(device_index[-1]) | |||
| print(device_matrix) | |||
| print(device_index) | |||
| need_dev_num_ = 1 | |||
| for s in strategy_[1]: | |||
| need_dev_num_ = need_dev_num_ * s | |||
| self.x_id = device_id % need_dev_num_ | |||
| self.y_id = self.list_to_id(y_index, self.strategy[2]) | |||
| self.out_id = self.list_to_id(out_index, self.out_strategy) | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| """ | |||
| shape:每一维的上限,如(2,4,8) | |||
| """ | |||
| def id_to_list(self, id, shape): | |||
| result = [] | |||
| r = id | |||
| for i in range(0, len(shape)): | |||
| v = 1 | |||
| for j in range(i + 1, len(shape)): | |||
| v = v * shape[j] | |||
| result.append(r // v) | |||
| r = r % v | |||
| return result | |||
| def list_to_id(self, id_list, shape): | |||
| result = 0 | |||
| for i in range(0, len(id_list)): | |||
| v = 1 | |||
| for j in range(i + 1, len(id_list)): | |||
| v = v * shape[j] | |||
| result = result + id_list[i] * v | |||
| return result | |||
| def forward_mindspore_impl(self): | |||
| if len(self.inputa.shape) > 2: | |||
| matmul = BatchMatMul(self.transpose_a, self.transpose_b) | |||
| else: | |||
| matmul = Matmul(self.transpose_a, self.transpose_b) | |||
| matmul.set_train() | |||
| out_me = matmul(Tensor(self.inputa), Tensor(self.inputb), Tensor(self.inputz)) | |||
| return out_me.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| if len(self.inputa.shape) > 2: | |||
| matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) | |||
| else: | |||
| matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| x = Tensor(self.inputa) | |||
| y = Tensor(self.inputb) | |||
| z = Tensor(self.inputz) | |||
| xs = self.get_parallel_blocks(self.inputa, self.strategy_[1]) | |||
| ys = self.get_parallel_blocks(self.inputb, self.strategy[2]) | |||
| zs = self.get_parallel_blocks(self.inputz, self.strategy_[1]) | |||
| x1 = Tensor(xs[self.x_id]) # | |||
| y1 = Tensor(ys[self.y_id]) # 需要从设备矩阵推导 | |||
| z1 = Tensor(zs[self.x_id]) | |||
| matmul.set_train() | |||
| matmul.set_auto_parallel() | |||
| out_me = matmul(x, y, z, parallel_inputs_compile=[x, y, z], parallel_inputs_run=[x1, y1, z1]) | |||
| return out_me.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.inputa) | |||
| y = Tensor(self.inputb) | |||
| z = Tensor(self.inputz) | |||
| if len(self.inputa.shape) > 2: | |||
| matmul = BatchMatMul(self.transpose_a, self.transpose_b) | |||
| else: | |||
| matmul = Matmul(self.transpose_a, self.transpose_b) | |||
| net_me = Grad(matmul) | |||
| net_me.set_train() | |||
| out_grad_me = Tensor(self.output_grad_np) | |||
| out_grad = net_me(x, y, z, out_grad_me) | |||
| return out_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| if len(self.inputa.shape) > 2: | |||
| matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) | |||
| else: | |||
| matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) | |||
| x = Tensor(self.inputa) | |||
| y = Tensor(self.inputb) | |||
| z = Tensor(self.inputz) | |||
| out_grad_me = Tensor(self.output_grad_np) | |||
| xs = self.get_parallel_blocks(self.inputa, self.strategy_[1]) | |||
| ys = self.get_parallel_blocks(self.inputb, self.strategy[2]) | |||
| zs = self.get_parallel_blocks(self.inputz, self.strategy_[1]) | |||
| out_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(xs[self.x_id]) # 需要从设备矩阵推导 | |||
| y1 = Tensor(ys[self.y_id]) # | |||
| z1 = Tensor(zs[self.x_id]) | |||
| out_grad1 = Tensor(out_grads[self.out_id]) | |||
| net_me = Grad(matmul) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net_me.set_auto_parallel() | |||
| net_me.set_train() | |||
| out_grad = net_me(x, y, z, out_grad_me, parallel_inputs_compile=[x, y, z, out_grad1], | |||
| parallel_inputs_run=[x1, y1, z1, out_grad1]) | |||
| return out_grad | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspores = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| assert allclose(out_mindspores[self.out_id], out_mindspore_parallel, 0.0001, 0.0001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspores0 = self.get_parallel_blocks(input_grad_mindspore[0].asnumpy(), self.strategy_[1]) | |||
| input_grad_mindspores1 = self.get_parallel_blocks(input_grad_mindspore[1].asnumpy(), self.strategy[2]) | |||
| input_grad_mindspores2 = self.get_parallel_blocks(input_grad_mindspore[2].asnumpy(), self.strategy_[1]) | |||
| assert allclose(input_grad_mindspores0[self.x_id], input_grad_mindspore_parallel[0].asnumpy(), 0.0001, 0.0001) | |||
| assert allclose(input_grad_mindspores1[self.y_id], input_grad_mindspore_parallel[1].asnumpy(), 0.0001, 0.0001) | |||
| assert allclose(input_grad_mindspores2[self.x_id], input_grad_mindspore_parallel[2].asnumpy(), 0.0001, 0.0001) | |||
| def test_reid_batchmatmul_inputa_128_512_inputb_2000_512(): | |||
| inputa = [128, 512] | |||
| inputb = [2000, 512] | |||
| fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2))) | |||
| fact.forward_cmp() | |||
| def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512(): | |||
| inputa = [128, 512] | |||
| inputb = [2000, 512] | |||
| fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2))) | |||
| fact.grad_cmp() | |||
| def test_reid_batchmatmul_inputa_128_512_inputb_2000_512_redistribution(): | |||
| inputa = [128, 512] | |||
| inputb = [2000, 512] | |||
| fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2))) | |||
| fact.forward_cmp() | |||
| def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512_redistribution(): | |||
| inputa = [128, 512] | |||
| inputb = [2000, 512] | |||
| fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2))) | |||
| fact.grad_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| from numpy import allclose | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Matmul(Cell): | |||
| def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None): | |||
| super(Matmul, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy1) | |||
| self.matmul = P.MatMul(transpose_a, transpose_b, strategy=strategy0) | |||
| def construct(self, x, w, z): | |||
| out = self.add(x, z) | |||
| return self.matmul(out, w) | |||
| class BatchMatMul(Cell): | |||
| def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None): | |||
| super(BatchMatMul, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy1) | |||
| self.batchmatmul = P.BatchMatMul(transpose_a, transpose_b, strategy=strategy0) | |||
| def construct(self, x, w, z): | |||
| out = self.add(x, z) | |||
| return self.batchmatmul(out, w) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, inputa, inputb, inputz, output_grad): | |||
| gout = grad_all_with_sens(self.network)(inputa, inputb, inputz, output_grad) | |||
| return gout | |||
| class BatchmatmulFactory: | |||
| def __init__(self, inputa_shape, inputb_shape, transpose_a, transpose_b, strategy, strategy_): | |||
| self.strategy = strategy | |||
| self.strategy_ = strategy_ | |||
| inputa_size = 1 | |||
| inputb_size = 1 | |||
| prefix = "" | |||
| for s in inputa_shape: | |||
| prefix = prefix + str(s) + "_" | |||
| inputa_size = inputa_size * s | |||
| prefix = prefix + "and" | |||
| for s in inputb_shape: | |||
| prefix = prefix + str(s) + "_" | |||
| inputb_size = inputb_size * s | |||
| number_rangea = min(1000, inputa_size) | |||
| number_rangeb = min(1000, inputb_size) | |||
| self.inputa = np.reshape(np.arange(0, inputa_size) % number_rangea - number_rangea / 2, inputa_shape).astype( | |||
| np.float32) | |||
| self.inputb = np.reshape(np.arange(0, inputb_size) % number_rangeb - number_rangeb / 2, inputb_shape).astype( | |||
| np.float32) | |||
| self.inputz = np.zeros(self.inputa.shape).astype(np.float32) | |||
| self.transpose_a = transpose_a | |||
| self.transpose_b = transpose_b | |||
| out_shape = [] | |||
| device_matrix = [] | |||
| out_strategy = [] | |||
| if transpose_a: | |||
| temp = inputa_shape[-1] | |||
| inputa_shape[-1] = inputa_shape[-2] | |||
| inputa_shape[-2] = temp | |||
| if transpose_b: | |||
| temp = inputb_shape[-1] | |||
| inputb_shape[-1] = inputb_shape[-2] | |||
| inputb_shape[-2] = temp | |||
| if len(inputa_shape) >= len(inputb_shape): | |||
| out_shape = list(inputa_shape) | |||
| out_shape[-1] = inputb_shape[-1] | |||
| else: | |||
| out_shape = list(inputb_shape) | |||
| out_shape[-2] = inputa_shape[-2] | |||
| strategy1 = list(self.strategy[1]) | |||
| strategy2 = list(self.strategy[2]) | |||
| if transpose_a: | |||
| temp = strategy1[-1] | |||
| strategy1[-1] = strategy1[-2] | |||
| strategy1[-2] = temp | |||
| if transpose_b: | |||
| temp = strategy2[-1] | |||
| strategy2[-1] = strategy2[-2] | |||
| strategy2[-2] = temp | |||
| if len(strategy1) >= len(strategy2): | |||
| out_strategy = strategy1.copy() | |||
| out_strategy[-1] = strategy2[-1] | |||
| else: | |||
| out_strategy = strategy2.copy() | |||
| out_strategy[-2] = strategy1[-2] | |||
| device_matrix = out_strategy.copy() | |||
| device_matrix.insert(-1, strategy1[-1]) | |||
| self.out_strategy = out_strategy | |||
| need_dev_num = 1 | |||
| for s in device_matrix: | |||
| need_dev_num = need_dev_num * s | |||
| self.need_dev_num = need_dev_num | |||
| self.device_matrix = device_matrix | |||
| out_size = 1 | |||
| for s in out_shape: | |||
| out_size = out_size * s | |||
| number_range = min(1000, out_size) | |||
| self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range - number_range / 2, out_shape).astype( | |||
| np.float32) | |||
| device_index = self.id_to_list(device_id % need_dev_num, self.device_matrix) | |||
| x_index = device_index[:-1].copy() | |||
| if transpose_a: | |||
| temp = x_index[-1] | |||
| x_index[-1] = x_index[-2] | |||
| x_index[-2] = temp | |||
| y_index = device_index[:-3].copy() | |||
| y_index.append(device_index[-2]) | |||
| y_index.append(device_index[-1]) | |||
| if transpose_b: | |||
| temp = y_index[-1] | |||
| y_index[-1] = y_index[-2] | |||
| y_index[-2] = temp | |||
| out_index = device_index[:-2].copy() | |||
| out_index.append(device_index[-1]) | |||
| print(device_matrix) | |||
| print(device_index) | |||
| need_dev_num_ = 1 | |||
| for s in strategy_[1]: | |||
| need_dev_num_ = need_dev_num_ * s | |||
| self.x_id = device_id % need_dev_num_ | |||
| self.y_id = self.list_to_id(y_index, self.strategy[2]) | |||
| self.out_id = self.list_to_id(out_index, self.out_strategy) | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def id_to_list(self, id_, shape): | |||
| """ | |||
| shape:每一维的上限,如(2,4,8) | |||
| """ | |||
| result = [] | |||
| r = id_ | |||
| for i in range(0, len(shape)): | |||
| v = 1 | |||
| for j in range(i + 1, len(shape)): | |||
| v = v * shape[j] | |||
| result.append(r // v) | |||
| r = r % v | |||
| return result | |||
| def list_to_id(self, id_list, shape): | |||
| result = 0 | |||
| for i in range(0, len(id_list)): | |||
| v = 1 | |||
| for j in range(i + 1, len(id_list)): | |||
| v = v * shape[j] | |||
| result = result + id_list[i] * v | |||
| return result | |||
| def forward_mindspore_impl(self): | |||
| if len(self.inputa.shape) > 2: | |||
| matmul = BatchMatMul(self.transpose_a, self.transpose_b) | |||
| else: | |||
| matmul = Matmul(self.transpose_a, self.transpose_b) | |||
| matmul.set_train() | |||
| out_me = matmul(Tensor(self.inputa), Tensor(self.inputb), Tensor(self.inputz)) | |||
| return out_me.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| if len(self.inputa.shape) > 2: | |||
| matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) | |||
| else: | |||
| matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| x = Tensor(self.inputa) | |||
| y = Tensor(self.inputb) | |||
| z = Tensor(self.inputz) | |||
| xs = self.get_parallel_blocks(self.inputa, self.strategy_[1]) | |||
| ys = self.get_parallel_blocks(self.inputb, self.strategy[2]) | |||
| zs = self.get_parallel_blocks(self.inputz, self.strategy_[1]) | |||
| x1 = Tensor(xs[self.x_id]) # | |||
| y1 = Tensor(ys[self.y_id]) # 需要从设备矩阵推导 | |||
| z1 = Tensor(zs[self.x_id]) | |||
| matmul.set_train() | |||
| matmul.set_auto_parallel() | |||
| out_me = matmul(x, y, z, parallel_inputs_compile=[x, y, z], parallel_inputs_run=[x1, y1, z1]) | |||
| return out_me.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.inputa) | |||
| y = Tensor(self.inputb) | |||
| z = Tensor(self.inputz) | |||
| if len(self.inputa.shape) > 2: | |||
| matmul = BatchMatMul(self.transpose_a, self.transpose_b) | |||
| else: | |||
| matmul = Matmul(self.transpose_a, self.transpose_b) | |||
| net_me = Grad(matmul) | |||
| net_me.set_train() | |||
| out_grad_me = Tensor(self.output_grad_np) | |||
| out_grad = net_me(x, y, z, out_grad_me) | |||
| return out_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| if len(self.inputa.shape) > 2: | |||
| matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) | |||
| else: | |||
| matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) | |||
| x = Tensor(self.inputa) | |||
| y = Tensor(self.inputb) | |||
| z = Tensor(self.inputz) | |||
| out_grad_me = Tensor(self.output_grad_np) | |||
| xs = self.get_parallel_blocks(self.inputa, self.strategy_[1]) | |||
| ys = self.get_parallel_blocks(self.inputb, self.strategy[2]) | |||
| zs = self.get_parallel_blocks(self.inputz, self.strategy_[1]) | |||
| out_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(xs[self.x_id]) # 需要从设备矩阵推导 | |||
| y1 = Tensor(ys[self.y_id]) # | |||
| z1 = Tensor(zs[self.x_id]) | |||
| out_grad1 = Tensor(out_grads[self.out_id]) | |||
| net_me = Grad(matmul) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net_me.set_auto_parallel() | |||
| net_me.set_train() | |||
| out_grad = net_me(x, y, z, out_grad_me, parallel_inputs_compile=[x, y, z, out_grad1], | |||
| parallel_inputs_run=[x1, y1, z1, out_grad1]) | |||
| return out_grad | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspores = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| assert allclose(out_mindspores[self.out_id], out_mindspore_parallel, 0.0001, 0.0001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspores0 = self.get_parallel_blocks(input_grad_mindspore[0].asnumpy(), self.strategy_[1]) | |||
| input_grad_mindspores1 = self.get_parallel_blocks(input_grad_mindspore[1].asnumpy(), self.strategy[2]) | |||
| input_grad_mindspores2 = self.get_parallel_blocks(input_grad_mindspore[2].asnumpy(), self.strategy_[1]) | |||
| assert allclose(input_grad_mindspores0[self.x_id], input_grad_mindspore_parallel[0].asnumpy(), 0.0001, 0.0001) | |||
| assert allclose(input_grad_mindspores1[self.y_id], input_grad_mindspore_parallel[1].asnumpy(), 0.0001, 0.0001) | |||
| assert allclose(input_grad_mindspores2[self.x_id], input_grad_mindspore_parallel[2].asnumpy(), 0.0001, 0.0001) | |||
| def test_reid_batchmatmul_inputa_128_512_inputb_2000_512(): | |||
| inputa = [128, 512] | |||
| inputb = [2000, 512] | |||
| fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2))) | |||
| fact.forward_cmp() | |||
| def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512(): | |||
| inputa = [128, 512] | |||
| inputb = [2000, 512] | |||
| fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2))) | |||
| fact.grad_cmp() | |||
| def test_reid_batchmatmul_inputa_128_512_inputb_2000_512_redistribution(): | |||
| inputa = [128, 512] | |||
| inputb = [2000, 512] | |||
| fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2))) | |||
| fact.forward_cmp() | |||
| def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512_redistribution(): | |||
| inputa = [128, 512] | |||
| inputb = [2000, 512] | |||
| fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2))) | |||
| fact.grad_cmp() | |||
| @@ -1,214 +1,213 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| import pytest | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, input1, input2, output_grad): | |||
| return grad_all_with_sens(self.network)(input1, input2, output_grad) | |||
| class Max(Cell): | |||
| def __init__(self, axis, keep_dims, strategy0=None, strategy1=None): | |||
| super(Max, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1) | |||
| self.axis = axis | |||
| def construct(self, input1, input2): | |||
| out = self.add(input1, input2) | |||
| return self.reduce_max(out, self.axis) | |||
| class MaxFactory: | |||
| def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1): | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| self.axis = axis | |||
| self.keep_dims = keep_dims | |||
| input_size = 1 | |||
| prefix = "" | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) + "_" | |||
| input_size = input_size * s | |||
| number_range = min(1000, input_size) | |||
| self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = self.input_np1.copy() | |||
| self.out_grad_np = None | |||
| out_shape = list(input_shape) | |||
| out_shape.pop(axis) | |||
| out_size = input_size / input_shape[axis] | |||
| number_range_ = min(1000, out_size) | |||
| self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype( | |||
| np.float32) | |||
| out_strategy = list(strategy1[1]) | |||
| out_strategy.pop(axis) | |||
| self.out_strategy = out_strategy | |||
| need_dev_num = 1 | |||
| need_dev_num_ = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| for s in out_strategy: | |||
| need_dev_num_ = need_dev_num_ * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.y_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num_ | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_mindspore_impl(self): | |||
| input1 = Tensor(self.input_np1) | |||
| input2 = Tensor(self.input_np2) | |||
| net = Max(axis=self.axis, keep_dims=self.keep_dims) | |||
| out = net(input1, input2) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(xs[self.x_id]) | |||
| y1 = Tensor(ys[self.y_id]) | |||
| net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| input1 = Tensor(self.input_np1) | |||
| input2 = Tensor(self.input_np2) | |||
| out_grad = Tensor(self.out_grad_np) | |||
| net = Max(axis=self.axis, keep_dims=self.keep_dims) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(input1, input2, out_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy) | |||
| out_grad = Tensor(output_grads[self.out_id]) | |||
| xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(xs[self.x_id]) | |||
| y1 = Tensor(ys[self.y_id]) | |||
| net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad], | |||
| parallel_inputs_run=[x1, y1, out_grad]) | |||
| return input_grad | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| print(out_mindspore) | |||
| print(out_mindspore_parallel) | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_max_forward_input_256_64(): | |||
| fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)), | |||
| strategy1=(0, (4, 1))) | |||
| fact.forward_cmp() | |||
| def test_reid_max_grad_input_256_64(): | |||
| fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)), | |||
| strategy1=(0, (4, 1))) | |||
| fact.grad_cmp() | |||
| def test_reid_max_forward_input_128_64_32_32(): | |||
| fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), | |||
| strategy1=(0, (2, 1, 2, 1))) | |||
| fact.forward_cmp() | |||
| def test_reid_max_grad_input_128_64_32_32(): | |||
| fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), | |||
| strategy1=(0, (2, 1, 2, 1))) | |||
| fact.grad_cmp() | |||
| def test_reid_max_forward_input_256_64_repeat(): | |||
| fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)), | |||
| strategy1=(0, (2, 1))) | |||
| fact.forward_cmp() | |||
| def test_reid_max_grad_input_256_64_repeat(): | |||
| fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)), | |||
| strategy1=(0, (2, 1))) | |||
| fact.grad_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, input1, input2, output_grad): | |||
| return grad_all_with_sens(self.network)(input1, input2, output_grad) | |||
| class Max(Cell): | |||
| def __init__(self, axis, keep_dims, strategy0=None, strategy1=None): | |||
| super(Max, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1) | |||
| self.axis = axis | |||
| def construct(self, input1, input2): | |||
| out = self.add(input1, input2) | |||
| return self.reduce_max(out, self.axis) | |||
| class MaxFactory: | |||
| def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1): | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| self.axis = axis | |||
| self.keep_dims = keep_dims | |||
| input_size = 1 | |||
| prefix = "" | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) + "_" | |||
| input_size = input_size * s | |||
| number_range = min(1000, input_size) | |||
| self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = self.input_np1.copy() | |||
| self.out_grad_np = None | |||
| out_shape = list(input_shape) | |||
| out_shape.pop(axis) | |||
| out_size = input_size / input_shape[axis] | |||
| number_range_ = min(1000, out_size) | |||
| self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype( | |||
| np.float32) | |||
| out_strategy = list(strategy1[1]) | |||
| out_strategy.pop(axis) | |||
| self.out_strategy = out_strategy | |||
| need_dev_num = 1 | |||
| need_dev_num_ = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| for s in out_strategy: | |||
| need_dev_num_ = need_dev_num_ * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.y_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num_ | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_mindspore_impl(self): | |||
| input1 = Tensor(self.input_np1) | |||
| input2 = Tensor(self.input_np2) | |||
| net = Max(axis=self.axis, keep_dims=self.keep_dims) | |||
| out = net(input1, input2) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(xs[self.x_id]) | |||
| y1 = Tensor(ys[self.y_id]) | |||
| net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| input1 = Tensor(self.input_np1) | |||
| input2 = Tensor(self.input_np2) | |||
| out_grad = Tensor(self.out_grad_np) | |||
| net = Max(axis=self.axis, keep_dims=self.keep_dims) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(input1, input2, out_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy) | |||
| out_grad = Tensor(output_grads[self.out_id]) | |||
| xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(xs[self.x_id]) | |||
| y1 = Tensor(ys[self.y_id]) | |||
| net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad], | |||
| parallel_inputs_run=[x1, y1, out_grad]) | |||
| return input_grad | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| print(out_mindspore) | |||
| print(out_mindspore_parallel) | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_max_forward_input_256_64(): | |||
| fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)), | |||
| strategy1=(0, (4, 1))) | |||
| fact.forward_cmp() | |||
| def test_reid_max_grad_input_256_64(): | |||
| fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)), | |||
| strategy1=(0, (4, 1))) | |||
| fact.grad_cmp() | |||
| def test_reid_max_forward_input_128_64_32_32(): | |||
| fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), | |||
| strategy1=(0, (2, 1, 2, 1))) | |||
| fact.forward_cmp() | |||
| def test_reid_max_grad_input_128_64_32_32(): | |||
| fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), | |||
| strategy1=(0, (2, 1, 2, 1))) | |||
| fact.grad_cmp() | |||
| def test_reid_max_forward_input_256_64_repeat(): | |||
| fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)), | |||
| strategy1=(0, (2, 1))) | |||
| fact.forward_cmp() | |||
| def test_reid_max_grad_input_256_64_repeat(): | |||
| fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)), | |||
| strategy1=(0, (2, 1))) | |||
| fact.grad_cmp() | |||
| @@ -1,201 +1,200 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| import pytest | |||
| from numpy import allclose | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class MulSoftmax(Cell): | |||
| def __init__(self, strategy0=None, strategy1=None, axis=0): | |||
| super(MulSoftmax, self).__init__() | |||
| self.mul = P.Mul(strategy=strategy0) | |||
| self.softmax = P.Softmax(axis=axis, strategy=strategy1) | |||
| def construct(self, x, z): | |||
| out = self.mul(x, z) | |||
| return self.softmax(out) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class MulSoftmaxFactory: | |||
| def __init__(self, input_shape, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = 1.0 | |||
| self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, | |||
| input_shape).astype(np.float32) | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| need_dev_num = 1 | |||
| need_dev_num_ = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| for s in strategy1[1]: | |||
| need_dev_num_ = need_dev_num_ * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.y_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num_ | |||
| def forward_mindspore_impl(self): | |||
| net = MulSoftmax() | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(self.input_np2, ms.float32) | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| output_grad = Tensor(self.output_grad_np) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = MulSoftmax() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1]) | |||
| output_grad = Tensor(output_grads[self.out_id]) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_train() | |||
| grad_net.set_auto_parallel() | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(self.input_np2, ms.float32) | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], | |||
| parallel_inputs_run=[x1, y1, output_grad]) | |||
| return input_grad | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| np.save(path + str(device_id) + "_" + self.prefix + "_forward_parallel.npy", out_mindspore_parallel) | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1]) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", input_grad_mindspore_parallel0) | |||
| np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", input_grad_mindspore_parallel1) | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, | |||
| self.strategy0[1]) # 这里由于TensorMul两个输入X1没做广播,X2做了广播 | |||
| assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| @pytest.mark.reid_forward | |||
| def test_reid_mul_softmax_input_128x64(): | |||
| stra0 = (0, (1, 4), ()) | |||
| stra1 = (0, (1, 4)) | |||
| fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_mul_softmax_input_128x64(): | |||
| stra0 = (0, (1, 4), ()) | |||
| stra1 = (0, (1, 4)) | |||
| fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.grad_cmp() | |||
| @pytest.mark.reid_forward | |||
| def test_reid_mul_softmax_input_128x64_all_to_all(): | |||
| stra0 = (0, (4, 1), ()) | |||
| stra1 = (0, (1, 4)) | |||
| fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_mul_softmax_input_128x64_all_to_all(): | |||
| stra0 = (0, (4, 1), ()) | |||
| stra1 = (0, (1, 4)) | |||
| fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.grad_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class MulSoftmax(Cell): | |||
| def __init__(self, strategy0=None, strategy1=None, axis=0): | |||
| super(MulSoftmax, self).__init__() | |||
| self.mul = P.Mul(strategy=strategy0) | |||
| self.softmax = P.Softmax(axis=axis, strategy=strategy1) | |||
| def construct(self, x, z): | |||
| out = self.mul(x, z) | |||
| return self.softmax(out) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class MulSoftmaxFactory: | |||
| def __init__(self, input_shape, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = 1.0 | |||
| self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, | |||
| input_shape).astype(np.float32) | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| need_dev_num = 1 | |||
| need_dev_num_ = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| for s in strategy1[1]: | |||
| need_dev_num_ = need_dev_num_ * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.y_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num_ | |||
| def forward_mindspore_impl(self): | |||
| net = MulSoftmax() | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(self.input_np2, ms.float32) | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| output_grad = Tensor(self.output_grad_np) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = MulSoftmax() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1]) | |||
| output_grad = Tensor(output_grads[self.out_id]) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_train() | |||
| grad_net.set_auto_parallel() | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(self.input_np2, ms.float32) | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], | |||
| parallel_inputs_run=[x1, y1, output_grad]) | |||
| return input_grad | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| np.save(path + str(device_id) + "_" + self.prefix + "_forward_parallel.npy", out_mindspore_parallel) | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1]) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", input_grad_mindspore_parallel0) | |||
| np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", input_grad_mindspore_parallel1) | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, | |||
| self.strategy0[1]) # 这里由于TensorMul两个输入X1没做广播,X2做了广播 | |||
| assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| @pytest.mark.reid_forward | |||
| def test_reid_mul_softmax_input_128x64(): | |||
| stra0 = (0, (1, 4), ()) | |||
| stra1 = (0, (1, 4)) | |||
| fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_mul_softmax_input_128x64(): | |||
| stra0 = (0, (1, 4), ()) | |||
| stra1 = (0, (1, 4)) | |||
| fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.grad_cmp() | |||
| @pytest.mark.reid_forward | |||
| def test_reid_mul_softmax_input_128x64_all_to_all(): | |||
| stra0 = (0, (4, 1), ()) | |||
| stra1 = (0, (1, 4)) | |||
| fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_mul_softmax_input_128x64_all_to_all(): | |||
| stra0 = (0, (4, 1), ()) | |||
| stra1 = (0, (1, 4)) | |||
| fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.grad_cmp() | |||
| @@ -1,149 +1,147 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| import pytest | |||
| from numpy import allclose | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Onehot(Cell): | |||
| def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None): | |||
| super(Onehot, self).__init__() | |||
| self.onehot = P.OneHot(axis, strategy=strategy) | |||
| self.depth = depth | |||
| self.on_value = Tensor(on_value, ms.float32) | |||
| self.off_value = Tensor(off_value, ms.float32) | |||
| def construct(self, indices): | |||
| return self.onehot(indices, self.depth, self.on_value, self.off_value) | |||
| class OneHotFactory: | |||
| def __init__(self, input_shape, depth, on_value=1.0, off_value=0.0, axis=None, dtype=None, strategy0=None): | |||
| size = 1 | |||
| prefix = "" | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(10, size) | |||
| self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.int32) | |||
| self.depth = depth | |||
| self.on_value = on_value | |||
| self.off_value = off_value | |||
| self.axis = axis | |||
| self.dtype = dtype | |||
| self.strategy0 = strategy0 | |||
| need_dev_num = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def grad_mindspore_impl(self): | |||
| output_grad = Tensor(self.output_grad_np) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = AddRelu() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def forward_mindspore_impl(self): | |||
| indices = Tensor(self.input_np) | |||
| net = Onehot(axis=self.axis, | |||
| depth=self.depth, | |||
| on_value=self.on_value, | |||
| off_value=self.off_value) | |||
| out = net(indices) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| net = Onehot(axis=self.axis, | |||
| depth=self.depth, | |||
| on_value=self.on_value, | |||
| off_value=self.off_value, strategy=self.strategy0) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1]) | |||
| return out.asnumpy() | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy0[1]) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.0001) | |||
| def test_reid_onehot_forward_int32_128_depth13000(): | |||
| fact = OneHotFactory(input_shape=(128,), | |||
| depth=131072, | |||
| on_value=1.000000, | |||
| off_value=0.000000, | |||
| axis=-1, | |||
| dtype="float32", | |||
| strategy0=(0, (2,))) | |||
| fact.forward_cmp() | |||
| def test_reid_onehot_forward_int32_131072_depth127(): | |||
| fact = OneHotFactory(input_shape=(131072,), | |||
| depth=127, | |||
| on_value=1.000000, | |||
| off_value=0.000000, | |||
| axis=-1, | |||
| dtype="float32", | |||
| strategy0=(0, (4,))) | |||
| fact.forward_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Onehot(Cell): | |||
| def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None): | |||
| super(Onehot, self).__init__() | |||
| self.onehot = P.OneHot(axis, strategy=strategy) | |||
| self.depth = depth | |||
| self.on_value = Tensor(on_value, ms.float32) | |||
| self.off_value = Tensor(off_value, ms.float32) | |||
| def construct(self, indices): | |||
| return self.onehot(indices, self.depth, self.on_value, self.off_value) | |||
| class OneHotFactory: | |||
| def __init__(self, input_shape, depth, on_value=1.0, off_value=0.0, axis=None, dtype=None, strategy0=None): | |||
| size = 1 | |||
| prefix = "" | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(10, size) | |||
| self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.int32) | |||
| self.depth = depth | |||
| self.on_value = on_value | |||
| self.off_value = off_value | |||
| self.axis = axis | |||
| self.dtype = dtype | |||
| self.strategy0 = strategy0 | |||
| need_dev_num = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def grad_mindspore_impl(self): | |||
| output_grad = Tensor(self.output_grad_np) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = AddRelu() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def forward_mindspore_impl(self): | |||
| indices = Tensor(self.input_np) | |||
| net = Onehot(axis=self.axis, | |||
| depth=self.depth, | |||
| on_value=self.on_value, | |||
| off_value=self.off_value) | |||
| out = net(indices) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| net = Onehot(axis=self.axis, | |||
| depth=self.depth, | |||
| on_value=self.on_value, | |||
| off_value=self.off_value, strategy=self.strategy0) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1]) | |||
| return out.asnumpy() | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy0[1]) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.0001) | |||
| def test_reid_onehot_forward_int32_128_depth13000(): | |||
| fact = OneHotFactory(input_shape=(128,), | |||
| depth=131072, | |||
| on_value=1.000000, | |||
| off_value=0.000000, | |||
| axis=-1, | |||
| dtype="float32", | |||
| strategy0=(0, (2,))) | |||
| fact.forward_cmp() | |||
| def test_reid_onehot_forward_int32_131072_depth127(): | |||
| fact = OneHotFactory(input_shape=(131072,), | |||
| depth=127, | |||
| on_value=1.000000, | |||
| off_value=0.000000, | |||
| axis=-1, | |||
| dtype="float32", | |||
| strategy0=(0, (4,))) | |||
| fact.forward_cmp() | |||
| @@ -1,206 +1,206 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| import pytest | |||
| from numpy import allclose | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class PReLU(Cell): | |||
| def __init__(self, channel=1, w=0.25, strategy_=None, strategy1_=None): | |||
| super(PReLU, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy1_) | |||
| self.prelu = P.PReLU(strategy=strategy_) | |||
| def construct(self, x, z, w): | |||
| out = self.add(x, z) | |||
| return self.prelu(out, w) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, input, z, w, output_grad): | |||
| return grad_all_with_sens(self.network)(input, z, w, output_grad) | |||
| class PReLUFactory: | |||
| def __init__(self, input_shape, strategy): | |||
| n, c = input_shape[:2] | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(np.float32) | |||
| self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, | |||
| input_shape).astype(np.float32) | |||
| self.channel = c | |||
| self.weight = np.array([np.float32(0.25)] * c) | |||
| self.strategy = strategy | |||
| def forward_mindspore_impl(self): | |||
| net = PReLU(channel=self.channel, w=self.weight) | |||
| x = Tensor(self.input_np) | |||
| z = Tensor(np.zeros(self.input_np.shape), ms.float32) | |||
| w = Tensor(self.weight) | |||
| out = net(x, z, w) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, | |||
| strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1])) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| x = Tensor(self.input_np) | |||
| z = Tensor(np.zeros(self.input_np.shape), ms.float32) | |||
| w = Tensor(self.weight) | |||
| inputs = self.get_parallel_blocks(self.input_np, self.strategy[1]) | |||
| block_id = device_id % len(inputs) | |||
| x1 = Tensor(inputs[block_id]) | |||
| z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32) | |||
| w1 = Tensor(self.weight) | |||
| out = net(x, z, w, parallel_inputs_compile=[x, z, w], parallel_inputs_run=[x1, z1, w1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| output_grad = Tensor(self.output_grad_np) | |||
| x = Tensor(self.input_np) | |||
| z = Tensor(np.zeros(self.input_np.shape), ms.float32) | |||
| w = Tensor(self.weight) | |||
| net = PReLU(channel=self.channel, w=self.weight) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, z, w, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy[1]) | |||
| block_id = device_id % len(output_grads) | |||
| output_grad = Tensor(output_grads[block_id]) | |||
| x = Tensor(self.input_np) | |||
| z = Tensor(np.zeros(self.input_np.shape), ms.float32) | |||
| w = Tensor(self.weight) | |||
| net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, | |||
| strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1])) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| inputs = self.get_parallel_blocks(self.input_np, self.strategy[1]) | |||
| x1 = Tensor(inputs[block_id]) | |||
| z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32) | |||
| w1 = Tensor(self.weight) | |||
| input_grad = grad_net(x, z, w, output_grad, parallel_inputs_compile=[x, z, w, output_grad], | |||
| parallel_inputs_run=[x1, z1, w1, output_grad]) | |||
| return input_grad | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy[1]) | |||
| block_id = device_id % len(out_blocks) | |||
| assert np.allclose(out_blocks[block_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore2 = input_grad_mindspore[2].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_mindspore_parallel2 = input_grad_mindspore_parallel[2].asnumpy() | |||
| input_grad_blocks = self.get_parallel_blocks(input_grad_mindspore0, self.strategy[1]) | |||
| input1_grad_blocks = self.get_parallel_blocks(input_grad_mindspore1, self.strategy[1]) | |||
| block_id = device_id % len(input_grad_blocks) | |||
| assert np.allclose(input_grad_blocks[block_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_mindspore2, input_grad_mindspore_parallel2, 0.0001, 0.0001) | |||
| assert np.allclose(input1_grad_blocks[block_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| @pytest.mark.reid_grad | |||
| def test_reid_prelu_input_128x64x112x112_repeat(): | |||
| stra = (0, (1, 1, 2, 1), (1)) | |||
| fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_prelu_input_128x64x112x112_repeat(): | |||
| stra = (0, (1, 1, 2, 1), (1)) | |||
| fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) | |||
| fact.grad_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_prelu_input_128x64x112x112_mix(): | |||
| stra = (0, (2, 1, 1, 2), (1)) | |||
| fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_prelu_input_128x64x112x112_mix(): | |||
| stra = (0, (2, 1, 1, 2), (1)) | |||
| fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) | |||
| fact.grad_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class PReLU(Cell): | |||
| def __init__(self, channel=1, w=0.25, strategy_=None, strategy1_=None): | |||
| super(PReLU, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy1_) | |||
| self.prelu = P.PReLU(strategy=strategy_) | |||
| self.channel = channel | |||
| def construct(self, x, z, w): | |||
| out = self.add(x, z) | |||
| return self.prelu(out, w) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, input_, z, w, output_grad): | |||
| return grad_all_with_sens(self.network)(input_, z, w, output_grad) | |||
| class PReLUFactory: | |||
| def __init__(self, input_shape, strategy): | |||
| n, c = input_shape[:2] | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(np.float32) | |||
| self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, | |||
| input_shape).astype(np.float32) | |||
| self.channel = c | |||
| self.weight = np.array([np.float32(0.25)] * c) | |||
| self.strategy = strategy | |||
| def forward_mindspore_impl(self): | |||
| net = PReLU(channel=self.channel, w=self.weight) | |||
| x = Tensor(self.input_np) | |||
| z = Tensor(np.zeros(self.input_np.shape), ms.float32) | |||
| w = Tensor(self.weight) | |||
| out = net(x, z, w) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, | |||
| strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1])) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| x = Tensor(self.input_np) | |||
| z = Tensor(np.zeros(self.input_np.shape), ms.float32) | |||
| w = Tensor(self.weight) | |||
| inputs = self.get_parallel_blocks(self.input_np, self.strategy[1]) | |||
| block_id = device_id % len(inputs) | |||
| x1 = Tensor(inputs[block_id]) | |||
| z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32) | |||
| w1 = Tensor(self.weight) | |||
| out = net(x, z, w, parallel_inputs_compile=[x, z, w], parallel_inputs_run=[x1, z1, w1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| output_grad = Tensor(self.output_grad_np) | |||
| x = Tensor(self.input_np) | |||
| z = Tensor(np.zeros(self.input_np.shape), ms.float32) | |||
| w = Tensor(self.weight) | |||
| net = PReLU(channel=self.channel, w=self.weight) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, z, w, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy[1]) | |||
| block_id = device_id % len(output_grads) | |||
| output_grad = Tensor(output_grads[block_id]) | |||
| x = Tensor(self.input_np) | |||
| z = Tensor(np.zeros(self.input_np.shape), ms.float32) | |||
| w = Tensor(self.weight) | |||
| net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, | |||
| strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1])) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| inputs = self.get_parallel_blocks(self.input_np, self.strategy[1]) | |||
| x1 = Tensor(inputs[block_id]) | |||
| z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32) | |||
| w1 = Tensor(self.weight) | |||
| input_grad = grad_net(x, z, w, output_grad, parallel_inputs_compile=[x, z, w, output_grad], | |||
| parallel_inputs_run=[x1, z1, w1, output_grad]) | |||
| return input_grad | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy[1]) | |||
| block_id = device_id % len(out_blocks) | |||
| assert np.allclose(out_blocks[block_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore2 = input_grad_mindspore[2].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_mindspore_parallel2 = input_grad_mindspore_parallel[2].asnumpy() | |||
| input_grad_blocks = self.get_parallel_blocks(input_grad_mindspore0, self.strategy[1]) | |||
| input1_grad_blocks = self.get_parallel_blocks(input_grad_mindspore1, self.strategy[1]) | |||
| block_id = device_id % len(input_grad_blocks) | |||
| assert np.allclose(input_grad_blocks[block_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_mindspore2, input_grad_mindspore_parallel2, 0.0001, 0.0001) | |||
| assert np.allclose(input1_grad_blocks[block_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| @pytest.mark.reid_grad | |||
| def test_reid_prelu_input_128x64x112x112_repeat(): | |||
| stra = (0, (1, 1, 2, 1), (1)) | |||
| fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_prelu_input_128x64x112x112_repeat(): | |||
| stra = (0, (1, 1, 2, 1), (1)) | |||
| fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) | |||
| fact.grad_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_prelu_input_128x64x112x112_mix(): | |||
| stra = (0, (2, 1, 1, 2), (1)) | |||
| fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_prelu_input_128x64x112x112_mix(): | |||
| stra = (0, (2, 1, 1, 2), (1)) | |||
| fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) | |||
| fact.grad_cmp() | |||
| @@ -1,253 +1,252 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| import pytest | |||
| from numpy import allclose as allclose_nparray | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class GradScalar(Cell): | |||
| def __init__(self, network): | |||
| super(GradScalar, self).__init__() | |||
| self.network = network | |||
| self.sens = Tensor([1.0], dtype=ms.float32) | |||
| def construct(self, x, y): | |||
| return grad_all_with_sens(self.network)(x, y, self.sens) | |||
| class ReduceMean(Cell): | |||
| def __init__(self, keep_dims, axis, strategy0=None, strategy1=None): | |||
| super(ReduceMean, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.reduce_mean = P.ReduceMean(keep_dims=keep_dims).set_strategy(strategy=strategy1) | |||
| self.axis = axis | |||
| def construct(self, x, y): | |||
| out = self.add(x, y) | |||
| return self.reduce_mean(out, self.axis) | |||
| class ReduceMeanFactory: | |||
| def __init__(self, input_shape, keep_dims, axis, strategy0=None, strategy1=None): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| self.keep_dims = keep_dims | |||
| self.axis = axis | |||
| target_shape = self.input_np1.mean(axis=axis, keepdims=keep_dims).shape | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(1000, target_size) | |||
| self.output_grad_np = np.array([1.0], dtype=np.float32) | |||
| if len(target_shape) > 0: | |||
| self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range, target_shape).astype( | |||
| np.float32) + 1.0 | |||
| self.shape = target_shape | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| out_strategy = [] | |||
| axis_ = list(axis) | |||
| if axis_[0] == -1: | |||
| axis_[0] = len(input_shape) - 1 | |||
| for i in range(0, len(input_shape)): | |||
| if i in axis_: | |||
| if keep_dims: | |||
| out_strategy.append(1) | |||
| else: | |||
| out_strategy.append(strategy1[1][i]) | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| block_id = device_id % need_dev_num0 | |||
| device_index = self.id_to_list(block_id, self.strategy1[1]) | |||
| print(device_index) | |||
| for i in axis: | |||
| device_index[i] = 0 | |||
| print(device_index) | |||
| self.out_id = self.list_to_id(device_index, self.out_strategy) | |||
| print(self.out_id) | |||
| def id_to_list(self, id, shape): | |||
| result = [] | |||
| r = id | |||
| for i in range(0, len(shape)): | |||
| v = 1 | |||
| for j in range(i + 1, len(shape)): | |||
| v = v * shape[j] | |||
| result.append(r // v) | |||
| r = r % v | |||
| return result | |||
| def list_to_id(self, id_list, shape): | |||
| result = 0 | |||
| for i in range(0, len(id_list)): | |||
| v = 1 | |||
| for j in range(i + 1, len(id_list)): | |||
| v = v * shape[j] | |||
| result = result + id_list[i] * v | |||
| return result | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| out_grad = Tensor(self.output_grad_np) | |||
| net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, out_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return input_grad | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_reducemean_input_64x16(): | |||
| fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)), | |||
| strategy1=(0, (4,))) | |||
| fact.forward_cmp() | |||
| def test_grad_reid_reducemean_input_64x16(): | |||
| fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)), | |||
| strategy1=(0, (4,))) | |||
| fact.grad_cmp() | |||
| def test_reid_reducemean_input_64x128x28x28(): | |||
| fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3), | |||
| strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1))) | |||
| fact.forward_cmp() | |||
| def test_grad_reid_reducemean_input_64x128x28x28(): | |||
| fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3), | |||
| strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1))) | |||
| fact.grad_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| from numpy import allclose as allclose_nparray | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class GradScalar(Cell): | |||
| def __init__(self, network): | |||
| super(GradScalar, self).__init__() | |||
| self.network = network | |||
| self.sens = Tensor([1.0], dtype=ms.float32) | |||
| def construct(self, x, y): | |||
| return grad_all_with_sens(self.network)(x, y, self.sens) | |||
| class ReduceMean(Cell): | |||
| def __init__(self, keep_dims, axis, strategy0=None, strategy1=None): | |||
| super(ReduceMean, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.reduce_mean = P.ReduceMean(keep_dims=keep_dims).set_strategy(strategy=strategy1) | |||
| self.axis = axis | |||
| def construct(self, x, y): | |||
| out = self.add(x, y) | |||
| return self.reduce_mean(out, self.axis) | |||
| class ReduceMeanFactory: | |||
| def __init__(self, input_shape, keep_dims, axis, strategy0=None, strategy1=None): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| self.keep_dims = keep_dims | |||
| self.axis = axis | |||
| target_shape = self.input_np1.mean(axis=axis, keepdims=keep_dims).shape | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(1000, target_size) | |||
| self.output_grad_np = np.array([1.0], dtype=np.float32) | |||
| if len(target_shape) > 0: | |||
| self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range, target_shape).astype( | |||
| np.float32) + 1.0 | |||
| self.shape = target_shape | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| out_strategy = [] | |||
| axis_ = list(axis) | |||
| if axis_[0] == -1: | |||
| axis_[0] = len(input_shape) - 1 | |||
| for i in range(0, len(input_shape)): | |||
| if i in axis_: | |||
| if keep_dims: | |||
| out_strategy.append(1) | |||
| else: | |||
| out_strategy.append(strategy1[1][i]) | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| block_id = device_id % need_dev_num0 | |||
| device_index = self.id_to_list(block_id, self.strategy1[1]) | |||
| print(device_index) | |||
| for i in axis: | |||
| device_index[i] = 0 | |||
| print(device_index) | |||
| self.out_id = self.list_to_id(device_index, self.out_strategy) | |||
| print(self.out_id) | |||
| def id_to_list(self, id_, shape): | |||
| result = [] | |||
| r = id_ | |||
| for i in range(0, len(shape)): | |||
| v = 1 | |||
| for j in range(i + 1, len(shape)): | |||
| v = v * shape[j] | |||
| result.append(r // v) | |||
| r = r % v | |||
| return result | |||
| def list_to_id(self, id_list, shape): | |||
| result = 0 | |||
| for i in range(0, len(id_list)): | |||
| v = 1 | |||
| for j in range(i + 1, len(id_list)): | |||
| v = v * shape[j] | |||
| result = result + id_list[i] * v | |||
| return result | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| out_grad = Tensor(self.output_grad_np) | |||
| net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, out_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return input_grad | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_reducemean_input_64x16(): | |||
| fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)), | |||
| strategy1=(0, (4,))) | |||
| fact.forward_cmp() | |||
| def test_grad_reid_reducemean_input_64x16(): | |||
| fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)), | |||
| strategy1=(0, (4,))) | |||
| fact.grad_cmp() | |||
| def test_reid_reducemean_input_64x128x28x28(): | |||
| fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3), | |||
| strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1))) | |||
| fact.forward_cmp() | |||
| def test_grad_reid_reducemean_input_64x128x28x28(): | |||
| fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3), | |||
| strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1))) | |||
| fact.grad_cmp() | |||
| @@ -1,206 +1,206 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| import pytest | |||
| from numpy import allclose as allclose_nparray | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class Reshape(Cell): | |||
| def __init__(self, target_shape, strategy0=None, strategy1=None): | |||
| super(Reshape, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.reshape = P.Reshape(strategy=strategy1) | |||
| self.shape = tuple(target_shape) | |||
| def construct(self, input1, input2): | |||
| x = self.add(input1, input2) | |||
| return self.reshape(x, self.shape) | |||
| class ReshapeFactory: | |||
| def __init__(self, input_shape, target_shape, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(1000, target_size) | |||
| self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, | |||
| target_shape).astype(np.float32) | |||
| self.target_shape = target_shape | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| out_strategy = [1] * len(target_shape) | |||
| out_strategy[0] = strategy1[1][0] | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| self.out_id = device_id % need_dev_num1 | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_reshape_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| net = Reshape(self.target_shape) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_reshape_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_reshape_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| net = Reshape(self.target_shape) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_reshape_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return input_grad | |||
| def forward_reshape_cmp(self): | |||
| out_mindspore = self.forward_reshape_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_reshape_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_reshape_cmp(self): | |||
| input_grad_mindspore = self.grad_reshape_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_reshape_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| @pytest.mark.reid_forward | |||
| def test_reid_reshape_input_128x512x7x7_target_128x25088(): | |||
| fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088), | |||
| strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1))) | |||
| fact.forward_reshape_cmp() | |||
| def test_reid_reshape_grad_input_128x512x7x7_target_128x25088(): | |||
| fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088), | |||
| strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1))) | |||
| fact.grad_reshape_cmp() | |||
| @pytest.mark.reid_forward | |||
| def test_reid_reshape_input_128x64_target_128x64x1x1(): | |||
| fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)), | |||
| strategy1=(0, (2, 1))) | |||
| fact.forward_reshape_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_reshape_grad_input_128x64_target_128x64x1x1(): | |||
| fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)), | |||
| strategy1=(0, (2, 1))) | |||
| fact.grad_reshape_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import pytest | |||
| from numpy import allclose as allclose_nparray | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class Reshape(Cell): | |||
| def __init__(self, target_shape, strategy0=None, strategy1=None): | |||
| super(Reshape, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.reshape = P.Reshape(strategy=strategy1) | |||
| self.shape = tuple(target_shape) | |||
| def construct(self, input1, input2): | |||
| x = self.add(input1, input2) | |||
| return self.reshape(x, self.shape) | |||
| class ReshapeFactory: | |||
| def __init__(self, input_shape, target_shape, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(1000, target_size) | |||
| self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, | |||
| target_shape).astype(np.float32) | |||
| self.target_shape = target_shape | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| out_strategy = [1] * len(target_shape) | |||
| out_strategy[0] = strategy1[1][0] | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| self.out_id = device_id % need_dev_num1 | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_reshape_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| net = Reshape(self.target_shape) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_reshape_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_reshape_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| net = Reshape(self.target_shape) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_reshape_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return input_grad | |||
| def forward_reshape_cmp(self): | |||
| out_mindspore = self.forward_reshape_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_reshape_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_reshape_cmp(self): | |||
| input_grad_mindspore = self.grad_reshape_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_reshape_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| @pytest.mark.reid_forward | |||
| def test_reid_reshape_input_128x512x7x7_target_128x25088(): | |||
| fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088), | |||
| strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1))) | |||
| fact.forward_reshape_cmp() | |||
| def test_reid_reshape_grad_input_128x512x7x7_target_128x25088(): | |||
| fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088), | |||
| strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1))) | |||
| fact.grad_reshape_cmp() | |||
| @pytest.mark.reid_forward | |||
| def test_reid_reshape_input_128x64_target_128x64x1x1(): | |||
| fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)), | |||
| strategy1=(0, (2, 1))) | |||
| fact.forward_reshape_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_reshape_grad_input_128x64_target_128x64x1x1(): | |||
| fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)), | |||
| strategy1=(0, (2, 1))) | |||
| fact.grad_reshape_cmp() | |||
| @@ -1,236 +1,235 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import os | |||
| import pytest | |||
| from numpy import allclose as allclose_nparray | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Net(Cell): | |||
| def __init__(self, perm_in, strategy0=None, strategy1=None): | |||
| super(Net, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.transpose = P.Transpose(strategy=strategy1) | |||
| self.perm_in = perm_in | |||
| def construct(self, x, y): | |||
| out = self.add(x, y) | |||
| return self.transpose(out, self.perm_in) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class TransposeFactory: | |||
| def __init__(self, input_shape, perm_in, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| target_shape = self.input_np1.transpose(perm_in).shape | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(1000, target_size) | |||
| self.target_shape = target_shape | |||
| self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, | |||
| target_shape).astype(np.float32) | |||
| self.perm_in = perm_in | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| out_strategy = [] | |||
| for i in perm_in: | |||
| out_strategy.append(strategy1[1][i]) | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| device_index = self.id_to_list(device_id % need_dev_num1, | |||
| self.strategy1[1]) # encoding to get the index before transpose | |||
| device_index_transpose = [] | |||
| for i in perm_in: | |||
| device_index_transpose.append(device_index[i]) | |||
| self.out_id = self.list_to_id(device_index_transpose, self.out_strategy) | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def id_to_list(self, id, shape): | |||
| result = [] | |||
| r = id | |||
| for i in range(0, len(shape)): | |||
| v = 1 | |||
| for j in range(i + 1, len(shape)): | |||
| v = v * shape[j] | |||
| result.append(r // v) | |||
| r = r % v | |||
| return result | |||
| def list_to_id(self, id_list, shape): | |||
| result = 0 | |||
| for i in range(0, len(id_list)): | |||
| v = 1 | |||
| for j in range(i + 1, len(id_list)): | |||
| v = v * shape[j] | |||
| result = result + id_list[i] * v | |||
| return result | |||
| def forward_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| net = Net(self.perm_in) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| net = Net(self.perm_in) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return input_grad | |||
| def forward_transpose_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_transpose_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_transpose_input_256x512_output_512x256_perm_1x0(): | |||
| fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2))) | |||
| fact.forward_transpose_cmp() | |||
| def test_reid_grad_transpose_input_256x512_output_512x256_perm_1x0(): | |||
| fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2))) | |||
| fact.grad_transpose_cmp() | |||
| def test_reid_transpose_input_512x256_output_256x512_perm_1x0(): | |||
| fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) | |||
| fact.forward_transpose_cmp() | |||
| def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0(): | |||
| fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) | |||
| fact.grad_transpose_cmp() | |||
| def test_reid_transpose_input_512x256_output_256x512_perm_1x0_repeat(): | |||
| fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1))) | |||
| fact.forward_transpose_cmp() | |||
| def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0_repeat(): | |||
| fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1))) | |||
| fact.grad_transpose_cmp() | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| from numpy import allclose as allclose_nparray | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Net(Cell): | |||
| def __init__(self, perm_in, strategy0=None, strategy1=None): | |||
| super(Net, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.transpose = P.Transpose(strategy=strategy1) | |||
| self.perm_in = perm_in | |||
| def construct(self, x, y): | |||
| out = self.add(x, y) | |||
| return self.transpose(out, self.perm_in) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class TransposeFactory: | |||
| def __init__(self, input_shape, perm_in, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| target_shape = self.input_np1.transpose(perm_in).shape | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(1000, target_size) | |||
| self.target_shape = target_shape | |||
| self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, | |||
| target_shape).astype(np.float32) | |||
| self.perm_in = perm_in | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| out_strategy = [] | |||
| for i in perm_in: | |||
| out_strategy.append(strategy1[1][i]) | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| device_index = self.id_to_list(device_id % need_dev_num1, | |||
| self.strategy1[1]) # encoding to get the index before transpose | |||
| device_index_transpose = [] | |||
| for i in perm_in: | |||
| device_index_transpose.append(device_index[i]) | |||
| self.out_id = self.list_to_id(device_index_transpose, self.out_strategy) | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def id_to_list(self, id_, shape): | |||
| result = [] | |||
| r = id_ | |||
| for i in range(0, len(shape)): | |||
| v = 1 | |||
| for j in range(i + 1, len(shape)): | |||
| v = v * shape[j] | |||
| result.append(r // v) | |||
| r = r % v | |||
| return result | |||
| def list_to_id(self, id_list, shape): | |||
| result = 0 | |||
| for i in range(0, len(id_list)): | |||
| v = 1 | |||
| for j in range(i + 1, len(id_list)): | |||
| v = v * shape[j] | |||
| result = result + id_list[i] * v | |||
| return result | |||
| def forward_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| net = Net(self.perm_in) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| net = Net(self.perm_in) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return input_grad | |||
| def forward_transpose_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_transpose_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_transpose_input_256x512_output_512x256_perm_1x0(): | |||
| fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2))) | |||
| fact.forward_transpose_cmp() | |||
| def test_reid_grad_transpose_input_256x512_output_512x256_perm_1x0(): | |||
| fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2))) | |||
| fact.grad_transpose_cmp() | |||
| def test_reid_transpose_input_512x256_output_256x512_perm_1x0(): | |||
| fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) | |||
| fact.forward_transpose_cmp() | |||
| def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0(): | |||
| fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) | |||
| fact.grad_transpose_cmp() | |||
| def test_reid_transpose_input_512x256_output_256x512_perm_1x0_repeat(): | |||
| fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1))) | |||
| fact.forward_transpose_cmp() | |||
| def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0_repeat(): | |||
| fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1))) | |||
| fact.grad_transpose_cmp() | |||
| @@ -54,7 +54,7 @@ class Grad(nn.Cell): | |||
| return C.grad_all(self.network)(x, y) | |||
| def compile(net, x, y): | |||
| def compile_net(net, x, y): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y) | |||
| @@ -69,7 +69,7 @@ def test_add_relu_stride_slice(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| compile(net, x, y) | |||
| compile_net(net, x, y) | |||
| def test_add_relu_all_gather(): | |||
| @@ -82,4 +82,4 @@ def test_add_relu_all_gather(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| compile(net, x, y) | |||
| compile_net(net, x, y) | |||
| @@ -17,7 +17,6 @@ import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor, context | |||
| from mindspore import context | |||
| from mindspore.common.api import _executor | |||
| from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits | |||
| from mindspore.nn.optim.momentum import Momentum | |||
| @@ -131,56 +130,56 @@ def test_allreduce_fusion_parameters(): | |||
| cost_model_context.reset_cost_model_context() | |||
| cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=2) | |||
| algorithm = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_algorithm') | |||
| assert (algorithm == 2) | |||
| assert algorithm == 2 | |||
| cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1) | |||
| algorithm = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_algorithm') | |||
| assert (algorithm == 1) | |||
| assert algorithm == 1 | |||
| cost_model_context.reset_cost_model_context() | |||
| algorithm = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_algorithm') | |||
| assert (algorithm == 0) | |||
| assert algorithm == 0 | |||
| cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2) | |||
| fusion_times = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_times') | |||
| assert (fusion_times == 2) | |||
| assert fusion_times == 2 | |||
| cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.2) | |||
| tail_percent = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_percent') | |||
| assert (tail_percent == 0.2) | |||
| assert tail_percent == 0.2 | |||
| cost_model_context.reset_cost_model_context() | |||
| tail_percent = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_percent') | |||
| assert (tail_percent == 0.1) | |||
| assert tail_percent == 0.1 | |||
| cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_time=0.2) | |||
| tail_time = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_time') | |||
| assert (tail_time == 0.2) | |||
| assert tail_time == 0.2 | |||
| cost_model_context.reset_cost_model_context() | |||
| tail_time = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_time') | |||
| assert (tail_time == 0.1) | |||
| assert tail_time == 0.1 | |||
| cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_inherent_time=0.2) | |||
| allreduce_inherent_time = cost_model_context.get_cost_model_context( | |||
| 'costmodel_allreduce_fusion_allreduce_inherent_time') | |||
| assert (allreduce_inherent_time == 0.2) | |||
| assert allreduce_inherent_time == 0.2 | |||
| cost_model_context.reset_cost_model_context() | |||
| allreduce_inherent_time = cost_model_context.get_cost_model_context( | |||
| 'costmodel_allreduce_fusion_allreduce_inherent_time') | |||
| assert (allreduce_inherent_time == 0.1) | |||
| assert allreduce_inherent_time == 0.1 | |||
| cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_bandwidth=0.2) | |||
| allreduce_bandwidth = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_allreduce_bandwidth') | |||
| assert (allreduce_bandwidth == 0.2) | |||
| assert allreduce_bandwidth == 0.2 | |||
| cost_model_context.reset_cost_model_context() | |||
| allreduce_bandwidth = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_allreduce_bandwidth') | |||
| assert (allreduce_bandwidth == 0.1) | |||
| assert allreduce_bandwidth == 0.1 | |||
| cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_computation_time_parameter=0.2) | |||
| computation_time_parameter = cost_model_context.get_cost_model_context( | |||
| 'costmodel_allreduce_fusion_computation_time_parameter') | |||
| assert (computation_time_parameter == 0.2) | |||
| assert computation_time_parameter == 0.2 | |||
| cost_model_context.reset_cost_model_context() | |||
| computation_time_parameter = cost_model_context.get_cost_model_context( | |||
| 'costmodel_allreduce_fusion_computation_time_parameter') | |||
| assert (computation_time_parameter == 0.1) | |||
| assert computation_time_parameter == 0.1 | |||
| def test_allreduce_fusion1(): | |||
| @@ -201,7 +200,7 @@ def test_allreduce_fusion1(): | |||
| 'backbone2.fc2.weight': 1, | |||
| 'backbone2.fc1.weight': 1, | |||
| 'backbone1.fc1.weight': 1} | |||
| assert (allreduce_fusion_dict == expect_dict) | |||
| assert allreduce_fusion_dict == expect_dict | |||
| cost_model_context.reset_cost_model_context() | |||
| @@ -214,7 +213,7 @@ def test_allreduce_fusion2(): | |||
| net = SimpleDMLNet(DenseNet1(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None)) | |||
| allreduce_fusion_dict = train_common(net) | |||
| expect_dict = {} | |||
| assert (allreduce_fusion_dict == expect_dict) | |||
| assert allreduce_fusion_dict == expect_dict | |||
| cost_model_context.reset_cost_model_context() | |||
| @@ -240,7 +239,7 @@ def test_allreduce_fusion3(): | |||
| 'backbone1.fc2.weight': 2, | |||
| 'backbone1.fc1.bias': 2, | |||
| 'backbone1.fc1.weight': 2} | |||
| assert (allreduce_fusion_dict == expect_dict) | |||
| assert allreduce_fusion_dict == expect_dict | |||
| cost_model_context.reset_cost_model_context() | |||
| @@ -267,7 +266,7 @@ def test_allreduce_fusion4(): | |||
| 'backbone1.fc2.weight': 1, | |||
| 'backbone1.fc1.weight': 1} | |||
| assert (allreduce_fusion_dict == expect_dict) | |||
| assert allreduce_fusion_dict == expect_dict | |||
| cost_model_context.reset_cost_model_context() | |||
| @@ -295,7 +294,7 @@ def test_allreduce_fusion5(): | |||
| 'backbone1.fc4.weight': 2, | |||
| 'backbone1.fc3.weight': 2, | |||
| 'backbone1.fc2.weight': 1, | |||
| 'backbone1.fc1.weight': 1, } | |||
| 'backbone1.fc1.weight': 1,} | |||
| assert (allreduce_fusion_dict == expect_dict) | |||
| assert allreduce_fusion_dict == expect_dict | |||
| cost_model_context.reset_cost_model_context() | |||
| @@ -67,7 +67,6 @@ def all_to_all_net(strategy1): | |||
| def all_to_all_common(strategy1): | |||
| batch_size = 32 | |||
| learning_rate = 0.1 | |||
| momentum = 0.9 | |||
| epoch_size = 2 | |||
| @@ -104,7 +103,7 @@ def test_all_to_all(): | |||
| [8, 1]], | |||
| 'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/MatMul-op0': [ | |||
| [1, 1], [1, 8]]} | |||
| assert (strategys == expect_dict) | |||
| assert strategys == expect_dict | |||
| context.set_context(save_graphs=False) | |||
| @@ -43,7 +43,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, b) | |||
| def compile(net, x, y, b): | |||
| def compile_net(net, x, y, b): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, b) | |||
| @@ -69,7 +69,7 @@ def test_matmul_sub(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_add(): | |||
| @@ -93,7 +93,7 @@ def test_matmul_add(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_mul(): | |||
| @@ -117,7 +117,7 @@ def test_matmul_mul(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_div(): | |||
| @@ -141,7 +141,7 @@ def test_matmul_div(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_greater(): | |||
| @@ -165,7 +165,7 @@ def test_matmul_greater(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_add_broadcast(): | |||
| @@ -189,7 +189,7 @@ def test_matmul_add_broadcast(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_add_broadcast2(): | |||
| @@ -213,7 +213,7 @@ def test_matmul_add_broadcast2(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 1]), dtype=ms.float32) | |||
| b = Tensor(np.ones([1, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_sub_broadcast(): | |||
| @@ -237,7 +237,7 @@ def test_matmul_sub_broadcast(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_sub_broadcast2(): | |||
| @@ -261,7 +261,7 @@ def test_matmul_sub_broadcast2(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 1]), dtype=ms.float32) | |||
| b = Tensor(np.ones([1, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_mul_broadcast(): | |||
| @@ -285,7 +285,7 @@ def test_matmul_mul_broadcast(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_mul_broadcast2(): | |||
| @@ -309,7 +309,7 @@ def test_matmul_mul_broadcast2(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 1]), dtype=ms.float32) | |||
| b = Tensor(np.ones([1, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_div_broadcast(): | |||
| @@ -333,7 +333,7 @@ def test_matmul_div_broadcast(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_div_broadcast2(): | |||
| @@ -357,7 +357,7 @@ def test_matmul_div_broadcast2(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 1]), dtype=ms.float32) | |||
| b = Tensor(np.ones([1, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_greater_broadcast(): | |||
| @@ -381,7 +381,7 @@ def test_matmul_greater_broadcast(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_greater_broadcast2(): | |||
| @@ -405,7 +405,7 @@ def test_matmul_greater_broadcast2(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 1]), dtype=ms.float32) | |||
| b = Tensor(np.ones([1, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_floordiv(): | |||
| @@ -429,7 +429,7 @@ def test_matmul_floordiv(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_floordiv_broadcast(): | |||
| @@ -453,7 +453,7 @@ def test_matmul_floordiv_broadcast(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_floordiv_broadcast2(): | |||
| @@ -477,7 +477,7 @@ def test_matmul_floordiv_broadcast2(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 1]), dtype=ms.float32) | |||
| b = Tensor(np.ones([1, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_assign_sub(): | |||
| @@ -504,4 +504,4 @@ def test_assign_sub(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| z = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| compile(net, x, y, z) | |||
| compile_net(net, x, y, z) | |||
| @@ -20,7 +20,6 @@ from mindspore import Tensor | |||
| from mindspore import context | |||
| from mindspore.common.api import _executor | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import operations as P | |||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||
| @@ -18,7 +18,6 @@ import mindspore as ms | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore import context | |||
| from mindspore import context | |||
| from mindspore.common.api import _executor | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import operations as P | |||
| @@ -48,7 +47,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, b) | |||
| def compile(net, x, y, b, phase): | |||
| def compile_net(net, x, y, b, phase): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, b, phase=phase) | |||
| @@ -73,7 +72,7 @@ def test_auto_parallel_arithmetic(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 128]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 128]), dtype=ms.float32) | |||
| compile(net, x, y, b, phase='train') | |||
| compile_net(net, x, y, b, phase='train') | |||
| strategies = _executor._get_strategy(net) | |||
| expected_strategies = {'Default/network-Net/FloorDiv-op0': [[2, 4], [2, 4]], | |||
| 'Default/network-Net/MatMul-op1': [[2, 1], [1, 4]]} | |||
| @@ -100,7 +99,7 @@ def test_auto_parallel_arithmetic_broadcast_both(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 1]), dtype=ms.float32) | |||
| b = Tensor(np.ones([1, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b, phase='train') | |||
| compile_net(net, x, y, b, phase='train') | |||
| strategies = _executor._get_strategy(net) | |||
| expected_strategies = {'Default/network-Net/FloorDiv-op0': [[8, 1], [1, 1]], | |||
| 'Default/network-Net/MatMul-op1': [[8, 1], [1, 1]]} | |||
| @@ -127,7 +126,7 @@ def test_auto_parallel_arithmetic_broadcast_right(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 32]), dtype=ms.float32) | |||
| b = Tensor(np.ones([32]), dtype=ms.float32) | |||
| compile(net, x, y, b, phase='train') | |||
| compile_net(net, x, y, b, phase='train') | |||
| strategies = _executor._get_strategy(net) | |||
| expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [2]], | |||
| 'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]} | |||
| @@ -154,7 +153,7 @@ def test_auto_parallel_arithmetic_broadcast_left(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 32]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) | |||
| compile(net, x, y, b, phase="train") | |||
| compile_net(net, x, y, b, phase="train") | |||
| strategies = _executor._get_strategy(net) | |||
| expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [1, 4, 2]], | |||
| 'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]} | |||
| @@ -12,8 +12,8 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import re | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.nn as nn | |||
| @@ -21,7 +21,6 @@ from mindspore import Tensor | |||
| from mindspore import context | |||
| from mindspore.common.api import _executor | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import operations as P | |||
| from mindspore.parallel._utils import _reset_op_id as reset_op_id | |||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||
| @@ -10,7 +10,6 @@ from mindspore.ops import composite as C | |||
| from mindspore.ops import functional as F | |||
| from mindspore.ops import operations as P | |||
| from mindspore.parallel import _cost_model_context as cost_model_context | |||
| from mindspore.parallel import set_algo_parameters, get_algo_parameters, reset_algo_parameters | |||
| from mindspore.parallel._utils import _reset_op_id as reset_op_id | |||
| @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, z, w, b) | |||
| def compile(net, x, y, z, w, b): | |||
| def compile_net(net, x, y, z, w, b): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, z, w, b) | |||
| @@ -77,7 +77,7 @@ def test_four_matmul_linear(): | |||
| net = GradWrap(NetWithLoss(Net())) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| compile(net, x, y, z, w, b) | |||
| compile_net(net, x, y, z, w, b) | |||
| def test_four_matmul1(): | |||
| @@ -103,7 +103,7 @@ def test_four_matmul1(): | |||
| net = GradWrap(NetWithLoss(Net())) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| compile(net, x, y, z, w, b) | |||
| compile_net(net, x, y, z, w, b) | |||
| def test_four_matmul2(): | |||
| @@ -130,4 +130,4 @@ def test_four_matmul2(): | |||
| net = GradWrap(NetWithLoss(Net())) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| compile(net, x, y, z, w, b) | |||
| compile_net(net, x, y, z, w, b) | |||
| @@ -36,4 +36,4 @@ def test_inference_phase(): | |||
| train_network.set_train() | |||
| train_network.set_auto_parallel() | |||
| output = train_network(predict, label) | |||
| _ = train_network(predict, label) | |||
| @@ -12,8 +12,8 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import re | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.nn as nn | |||
| @@ -16,7 +16,6 @@ import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore import Tensor, Parameter | |||
| from mindspore import context | |||
| from mindspore.common import dtype as mstype | |||
| @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, b) | |||
| def compile(net, x, y, b): | |||
| def compile_net(net, x, y, b): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, b) | |||
| @@ -71,7 +71,7 @@ def test_sum_mul(): | |||
| x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_sum_mul2(): | |||
| @@ -95,7 +95,7 @@ def test_sum_mul2(): | |||
| x = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_sum_mul3(): | |||
| @@ -119,4 +119,4 @@ def test_sum_mul3(): | |||
| x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| @@ -215,7 +215,7 @@ def test_reshape_auto_5(): | |||
| size = 8 | |||
| context.set_auto_parallel_context(device_num=size, global_rank=0) | |||
| x = Tensor(np.ones([4, 1024 * size, 1]), dtype=ms.float32) | |||
| y = Tensor(np.ones([4, 1024 * size, ]), dtype=ms.float32) | |||
| y = Tensor(np.ones([4, 1024 * size,]), dtype=ms.float32) | |||
| net = GradWrap(NetWithLoss(Net())) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| @@ -263,7 +263,7 @@ def test_reshape_auto_6(): | |||
| size = 8 | |||
| context.set_auto_parallel_context(device_num=size, global_rank=0) | |||
| x = Tensor(np.ones([4, 1024, 1]), dtype=ms.float32) | |||
| y = Tensor(np.ones([4, 1024, ]), dtype=ms.float32) | |||
| y = Tensor(np.ones([4, 1024,]), dtype=ms.float32) | |||
| net = GradWrap(NetWithLoss(Net())) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, b) | |||
| def compile(net, x, y, b): | |||
| def compile_net(net, x, y, b): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, b) | |||
| @@ -72,7 +72,7 @@ def test_rhombus1(): | |||
| net = GradWrap(NetWithLoss(Net())) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_rhombus2(): | |||
| @@ -103,7 +103,7 @@ def test_rhombus2(): | |||
| net = GradWrap(NetWithLoss(Net())) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_rhombus3(): | |||
| @@ -134,4 +134,4 @@ def test_rhombus3(): | |||
| net = GradWrap(NetWithLoss(Net())) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| compile(net, x, y, z) | |||
| compile_net(net, x, y, z) | |||
| @@ -21,7 +21,6 @@ from mindspore import context | |||
| from mindspore.common.api import _executor | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import operations as P | |||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||
| class NetWithLoss(nn.Cell): | |||
| @@ -105,8 +105,8 @@ def test_dmnet_train_step(): | |||
| size = 8 | |||
| context.set_auto_parallel_context(device_num=size, global_rank=0) | |||
| input = Tensor(np.ones([4096, 4096]).astype(np.float32) * 0.01) | |||
| input_ = Tensor(np.ones([4096, 4096]).astype(np.float32) * 0.01) | |||
| net = GradWrap(NetWithLoss(MultiTransformer())) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, input) | |||
| _executor.compile(net, input_) | |||
| @@ -1,5 +1,19 @@ | |||
| import numpy as np | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import re | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.nn as nn | |||
| @@ -33,7 +47,7 @@ class Blockcell(nn.Cell): | |||
| return out | |||
| def getBlock(): | |||
| def get_block(): | |||
| return Blockcell() | |||
| @@ -41,8 +55,8 @@ def test_two_bn(): | |||
| class Net(nn.Cell): | |||
| def __init__(self): | |||
| super().__init__() | |||
| self.block1 = getBlock() | |||
| self.block2 = getBlock() | |||
| self.block1 = get_block() | |||
| self.block2 = get_block() | |||
| self.relu = P.ReLU() | |||
| self.add = P.TensorAdd() | |||
| self.bias = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| @@ -104,23 +104,23 @@ def test_two_matmul(): | |||
| set_algo_parameters(tensor_slice_align_enable=False, tensor_slice_align_size=32, | |||
| fully_use_devices=False, elementwise_op_strategy_follow=False) | |||
| para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable") | |||
| assert para_slice_align_enable == False | |||
| assert not para_slice_align_enable | |||
| para_slice_align_size = get_algo_parameters("tensor_slice_align_size") | |||
| assert para_slice_align_size == 32 | |||
| fully_use_devices = get_algo_parameters("fully_use_devices") | |||
| assert fully_use_devices == False | |||
| assert not fully_use_devices | |||
| elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow") | |||
| assert elementwise_op_strategy_follow == False | |||
| assert not elementwise_op_strategy_follow | |||
| reset_algo_parameters() | |||
| para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable") | |||
| assert para_slice_align_enable == False | |||
| assert not para_slice_align_enable | |||
| para_slice_align_size = get_algo_parameters("tensor_slice_align_size") | |||
| assert para_slice_align_size == 16 | |||
| fully_use_devices = get_algo_parameters("fully_use_devices") | |||
| assert fully_use_devices == True | |||
| assert fully_use_devices | |||
| elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow") | |||
| assert elementwise_op_strategy_follow == False | |||
| assert not elementwise_op_strategy_follow | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| @@ -11,9 +11,8 @@ | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import math | |||
| import numpy as np | |||
| import os | |||
| import mindspore as ms | |||
| import mindspore.nn as nn | |||
| @@ -21,10 +20,8 @@ from mindspore import Tensor, Parameter | |||
| from mindspore import context | |||
| from mindspore.common import dtype as mstype | |||
| from mindspore.common.api import _executor | |||
| from mindspore.common.initializer import initializer | |||
| from mindspore.nn.loss.loss import _Loss | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import functional as F | |||
| from mindspore.ops import operations as P | |||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||
| @@ -41,7 +41,7 @@ _w2 = Tensor(np.ones([128, 32, 32]), dtype=ms.float32) | |||
| _b = Tensor(np.ones([128, 64, 16]), dtype=ms.float32) | |||
| def compile(net): | |||
| def compile_net(net): | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| train_net = TrainOneStepCell(net, optimizer) | |||
| train_net.set_auto_parallel() | |||
| @@ -54,7 +54,7 @@ def test_batch_matmul_data_parallel(): | |||
| strategy1 = ((16, 1, 1), (16, 1, 1)) | |||
| strategy2 = ((16, 1, 1), (16, 1, 1)) | |||
| net = Net(_w1, _w2, False, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_batch_matmul_model_parallel(): | |||
| @@ -62,7 +62,7 @@ def test_batch_matmul_model_parallel(): | |||
| strategy1 = ((1, 1, 1), (1, 1, 1)) | |||
| strategy2 = ((1, 1, 1), (1, 1, 16)) | |||
| net = Net(_w1, _w2, False, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_batch_matmul_hybrid_parallel(): | |||
| @@ -70,13 +70,13 @@ def test_batch_matmul_hybrid_parallel(): | |||
| strategy1 = ((2, 2, 2), (2, 2, 2)) | |||
| strategy2 = ((2, 2, 2), (2, 2, 2)) | |||
| net = Net(_w1, _w2, False, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_batch_matmul_auto_parallel(): | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) | |||
| net = Net(_w1, _w2, False) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_batch_matmul_repeat_calc(): | |||
| @@ -84,7 +84,7 @@ def test_batch_matmul_repeat_calc(): | |||
| strategy1 = ((2, 2, 4), (2, 2, 4)) | |||
| strategy2 = ((1, 2, 2), (1, 2, 2)) | |||
| net = Net(_w1, _w2, False, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_batch_matmul_transpose_b(): | |||
| @@ -92,4 +92,4 @@ def test_batch_matmul_transpose_b(): | |||
| strategy1 = ((2, 2, 4), (2, 2, 4)) | |||
| strategy2 = ((1, 2, 2), (1, 2, 2)) | |||
| net = Net(_w1, _w2, True, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| @@ -30,7 +30,6 @@ from mindspore.train import Model, ParallelMode | |||
| from tests.dataset_mock import MindData | |||
| dev_num = 8 | |||
| strategy_no_weight = ((dev_num, 1, 1, 1),) | |||
| strategy_weight = ((dev_num, 1, 1, 1), (1, 1, 1, 1)) | |||
| strategy_bn = ((dev_num, 1, 1, 1), (1,), (1,)) | |||
| strategy_fc_weight_bias = ((dev_num, 1), (1, 1), (1,)) | |||
| @@ -62,7 +61,7 @@ def conv7x7(in_channels, out_channels, stride=1, padding=0): | |||
| weight_shape = (out_channels, in_channels, 7, 7) | |||
| weight = Tensor(np.ones(weight_shape).astype(np.float32)) | |||
| conv = Conv2d(in_channels, out_channels, | |||
| kernel_size=7, stride=stride, padding=0, weight_init=weight, has_bias=False, | |||
| kernel_size=7, stride=stride, padding=padding, weight_init=weight, has_bias=False, | |||
| pad_mode="same") | |||
| conv.conv2d.set_strategy(strategy_weight) | |||
| return conv | |||
| @@ -95,7 +94,7 @@ class ResNet(Cell): | |||
| def __init__(self, num_classes=100): | |||
| super(ResNet, self).__init__() | |||
| strategy_no_weight = ((dev_num, 1, 1, 1),) | |||
| self.conv1 = conv7x7(3, 64, stride=2, padding=3) | |||
| self.conv1 = conv7x7(3, 64, stride=2, padding=0) | |||
| self.bn1 = bn_with_initialize(64) | |||
| self.relu = ReLU() | |||
| self.relu.relu.set_strategy(strategy_no_weight) | |||
| @@ -124,7 +123,6 @@ def test_batchnorm_batch_parallel(): | |||
| learning_rate = 0.1 | |||
| momentum = 0.9 | |||
| epoch_size = 2 | |||
| rank_size = 0 | |||
| predict = Tensor(np.ones([batch_size, 3, 224, 224]), dtype=ms.float32) | |||
| label = Tensor(np.ones([batch_size]), dtype=ms.int32) | |||
| @@ -171,7 +171,7 @@ class PReLU(nn.Cell): | |||
| if not isinstance(w, Tensor): | |||
| w = Tensor(w) | |||
| self.w = Parameter(initializer(w, [channel, ]), name='a') | |||
| self.w = Parameter(initializer(w, [channel,]), name='a') | |||
| self.prelu = P.PReLU() | |||
| self.relu = P.ReLU().set_strategy(((1))) | |||
| @@ -181,7 +181,7 @@ class PReLU(nn.Cell): | |||
| class BNNet(nn.Cell): | |||
| def __init__(self, strategy0, strategy1, strategy2): | |||
| def __init__(self): | |||
| super(BNNet, self).__init__() | |||
| self.bn = FusedBatchNorm(512) | |||
| self.prelu = PReLU(512) | |||
| @@ -192,13 +192,12 @@ class BNNet(nn.Cell): | |||
| return x | |||
| def bn_net(strategy0, strategy1, strategy2): | |||
| return BNNet(strategy0=strategy0, strategy1=strategy1, strategy2=strategy2) | |||
| def bn_net(): | |||
| return BNNet() | |||
| def bn_common(parallel_mode, train_flag, strategy0=None, strategy1=None, strategy2=None, strategy_loss=None): | |||
| def bn_common(parallel_mode, train_flag, strategy_loss=None): | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| batch_size = 32 | |||
| learning_rate = 0.1 | |||
| momentum = 0.9 | |||
| epoch_size = 2 | |||
| @@ -207,7 +206,7 @@ def bn_common(parallel_mode, train_flag, strategy0=None, strategy1=None, strateg | |||
| predict = Tensor(np.ones([32, 512]), dtype=ms.float32) | |||
| label = Tensor(np.ones([32]), dtype=ms.int32) | |||
| dataset = Dataset(predict, label, 2) | |||
| net = bn_net(strategy0, strategy1, strategy2) | |||
| net = bn_net() | |||
| loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) | |||
| loss.softmax_cross_entropy.set_strategy(strategy_loss) | |||
| @@ -21,7 +21,7 @@ from mindspore import context | |||
| from mindspore.common.parameter import Parameter | |||
| from mindspore.nn.optim import Momentum | |||
| from mindspore.ops import operations as P | |||
| from mindspore.train import Model, ParallelMode | |||
| from mindspore.train import Model | |||
| from tests.dataset_mock import MindData | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| @@ -54,7 +54,7 @@ def test_param_broadcast(): | |||
| network.set_train() | |||
| predict = Tensor(np.ones([64, 512]).astype(np.float32) * 0.01) | |||
| out = network(predict) | |||
| _ = network(predict) | |||
| context.reset_auto_parallel_context() | |||
| @@ -67,5 +67,5 @@ def test_param_not_broadcast(): | |||
| network.set_train() | |||
| predict = Tensor(np.ones([64, 512]).astype(np.float32) * 0.01) | |||
| out = network(predict) | |||
| _ = network(predict) | |||
| context.reset_auto_parallel_context() | |||
| @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, b) | |||
| def compile(net, x, y, b): | |||
| def compile_net(net, x, y, b): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, b) | |||
| @@ -69,7 +69,7 @@ def test_matmul_equal(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_not_equal(): | |||
| @@ -92,7 +92,7 @@ def test_matmul_not_equal(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_not_equal_repeated_calculation(): | |||
| @@ -115,7 +115,7 @@ def test_matmul_not_equal_repeated_calculation(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_maximum(): | |||
| @@ -138,7 +138,7 @@ def test_matmul_maximum(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_maximum_broadcast(): | |||
| @@ -161,7 +161,7 @@ def test_matmul_maximum_broadcast(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_maximum_broadcast2(): | |||
| @@ -184,7 +184,7 @@ def test_matmul_maximum_broadcast2(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 1]), dtype=ms.float32) | |||
| b = Tensor(np.ones([1, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_minimum(): | |||
| @@ -207,7 +207,7 @@ def test_matmul_minimum(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_minimum_broadcast(): | |||
| @@ -230,7 +230,7 @@ def test_matmul_minimum_broadcast(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_minimum_broadcast2(): | |||
| @@ -253,7 +253,7 @@ def test_matmul_minimum_broadcast2(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 1]), dtype=ms.float32) | |||
| b = Tensor(np.ones([1, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_minimum_auto_parallel(): | |||
| @@ -274,4 +274,4 @@ def test_matmul_minimum_auto_parallel(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 1]), dtype=ms.float32) | |||
| b = Tensor(np.ones([1, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| @@ -12,8 +12,6 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import mindspore as ms | |||
| from mindspore import Tensor | |||
| from mindspore.train._utils import _to_full_shapes, _to_full_tensor | |||
| @@ -35,7 +33,7 @@ def test_to_full_tensor_1(): | |||
| expect = ([[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [1, 2, 3], [4, 5, 6], [0, 0, 0], [0, 0, 0]]) | |||
| expect_tensor = Tensor(expect, dtype=ms.float32) | |||
| assert (full_tensor[0] == expect_tensor) | |||
| assert full_tensor[0] == expect_tensor | |||
| def test_to_full_tensor_2(): | |||
| @@ -52,7 +50,7 @@ def test_to_full_tensor_2(): | |||
| expect_tensor1 = Tensor(expect1, dtype=ms.int32) | |||
| expect_tensors = (expect_tensor0, expect_tensor1) | |||
| assert (full_tensor == expect_tensors) | |||
| assert full_tensor == expect_tensors | |||
| def test_to_full_tensor_sens_2(): | |||
| @@ -70,4 +68,4 @@ def test_to_full_tensor_sens_2(): | |||
| expect_tensor_sens = Tensor(0.1, dtype=ms.float32) | |||
| expect_tensors = (expect_tensor0, expect_tensor1, expect_tensor_sens) | |||
| assert (full_tensor == expect_tensors) | |||
| assert full_tensor == expect_tensors | |||
| @@ -47,8 +47,8 @@ class DenseMutMulNet(nn.Cell): | |||
| def test_dmnet_train_step(): | |||
| context.reset_auto_parallel_context() | |||
| input = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01) | |||
| input_ = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01) | |||
| label = Tensor(np.zeros([32, 768]).astype(np.float32)) | |||
| net = DenseMutMulNet() | |||
| net = train_step_with_loss_warp(DenseMutMulNet()) | |||
| _executor.compile(net, input, label) | |||
| _executor.compile(net, input_, label) | |||
| @@ -32,7 +32,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, bias) | |||
| def compile(net, x, y, bias): | |||
| def compile_net(net, x, y, bias): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, bias) | |||
| @@ -58,7 +58,7 @@ def test_sum_as_loss_float16(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float16) | |||
| y = Tensor(np.ones([64, 32]), dtype=ms.float16) | |||
| bias = Tensor(np.ones([64]), dtype=ms.float16) | |||
| compile(net, x, y, bias) | |||
| compile_net(net, x, y, bias) | |||
| def test_sum_as_loss_float32(): | |||
| @@ -82,7 +82,7 @@ def test_sum_as_loss_float32(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| bias = Tensor(np.ones([64]), dtype=ms.float32) | |||
| compile(net, x, y, bias) | |||
| compile_net(net, x, y, bias) | |||
| def test_sum_as_loss_int32(): | |||
| @@ -106,4 +106,4 @@ def test_sum_as_loss_int32(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.int32) | |||
| y = Tensor(np.ones([64, 32]), dtype=ms.int32) | |||
| bias = Tensor(np.ones([64]), dtype=ms.int32) | |||
| compile(net, x, y, bias) | |||
| compile_net(net, x, y, bias) | |||
| @@ -50,7 +50,7 @@ _w1 = Tensor(np.ones([128, 64]), dtype=ms.float32) | |||
| _b = Tensor(np.ones([128, 64]), dtype=ms.float32) | |||
| def compile(net): | |||
| def compile_net(net): | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| train_net = TrainOneStepCell(net, optimizer) | |||
| train_net.set_auto_parallel() | |||
| @@ -63,7 +63,7 @@ def test_dropout_do_mask_data_parallel(): | |||
| strategy1 = ((16, 1), (16, 1)) | |||
| strategy2 = ((16, 1),) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_dropout_do_mask_model_parallel(): | |||
| @@ -71,7 +71,7 @@ def test_dropout_do_mask_model_parallel(): | |||
| strategy1 = ((1, 16), (1, 16)) | |||
| strategy2 = ((1, 16),) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_dropout_do_mask_hybrid_parallel(): | |||
| @@ -79,13 +79,13 @@ def test_dropout_do_mask_hybrid_parallel(): | |||
| strategy1 = ((4, 4), (4, 4)) | |||
| strategy2 = ((4, 4),) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_dropout_do_mask_auto_parallel(): | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) | |||
| net = Net(_w1) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_dropout_do_mask_repeat_calc(): | |||
| @@ -93,4 +93,4 @@ def test_dropout_do_mask_repeat_calc(): | |||
| strategy1 = ((4, 4), (4, 4)) | |||
| strategy2 = ((2, 4),) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, b) | |||
| def compile(net, x, y, b): | |||
| def compile_net(net, x, y, b): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, b) | |||
| @@ -72,7 +72,7 @@ def test_matmul_pow(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_exp(): | |||
| @@ -98,7 +98,7 @@ def test_matmul_exp(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_log(): | |||
| @@ -124,7 +124,7 @@ def test_matmul_log(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_logical_not(): | |||
| @@ -151,7 +151,7 @@ def test_matmul_logical_not(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_cast(): | |||
| @@ -178,7 +178,7 @@ def test_matmul_cast(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.int32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_cast_before_mirror(): | |||
| @@ -202,7 +202,7 @@ def test_cast_before_mirror(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float16) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_cast_before_mirror1(): | |||
| @@ -226,7 +226,7 @@ def test_cast_before_mirror1(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float16) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float16) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_cast_before_mirror2(): | |||
| @@ -250,7 +250,7 @@ def test_cast_before_mirror2(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float16) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float16) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_cast_before_mirror3(): | |||
| @@ -274,7 +274,7 @@ def test_cast_before_mirror3(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float16) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float16) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_mul_two_cast(): | |||
| @@ -303,4 +303,4 @@ def test_mul_two_cast(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| @@ -54,7 +54,7 @@ _w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) | |||
| _b = Tensor(np.ones([128, 64, 32, 1]), dtype=ms.float32) | |||
| def compile(net): | |||
| def compile_net(net): | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| train_net = TrainOneStepCell(net, optimizer) | |||
| train_net.set_auto_parallel() | |||
| @@ -68,7 +68,7 @@ def test_expand_dims_data_parallel(): | |||
| strategy2 = ((16, 1, 1),) | |||
| strategy3 = ((16, 1, 1, 1), (16, 1, 1, 1)) | |||
| net = Net(_w1, strategy1, strategy2, strategy3) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_expand_dims_model_parallel(): | |||
| @@ -77,7 +77,7 @@ def test_expand_dims_model_parallel(): | |||
| strategy2 = ((1, 1, 16),) | |||
| strategy3 = ((1, 1, 16, 1), (1, 1, 16, 1)) | |||
| net = Net(_w1, strategy1, strategy2, strategy3) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_expand_dims_hybrid_parallel(): | |||
| @@ -86,13 +86,13 @@ def test_expand_dims_hybrid_parallel(): | |||
| strategy2 = ((2, 2, 4),) | |||
| strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1)) | |||
| net = Net(_w1, strategy1, strategy2, strategy3) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_expand_dims_auto_parallel(): | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) | |||
| net = Net(_w1) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_expand_dims_repeat_calc(): | |||
| @@ -101,7 +101,7 @@ def test_expand_dims_repeat_calc(): | |||
| strategy2 = ((1, 2, 2),) | |||
| strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1)) | |||
| net = Net(_w1, strategy1, strategy2, strategy3) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_expand_dims_parameter(): | |||
| @@ -109,4 +109,4 @@ def test_expand_dims_parameter(): | |||
| strategy1 = ((1, 2, 2),) | |||
| strategy2 = ((2, 2, 4, 1), (2, 2, 4, 1)) | |||
| net = Net2(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| @@ -39,7 +39,7 @@ _w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) | |||
| _b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) | |||
| def compile(net): | |||
| def compile_net(net): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, _x, _b) | |||
| context.reset_auto_parallel_context() | |||
| @@ -50,7 +50,7 @@ def test_forward_graph_data_parallel(): | |||
| strategy1 = ((16, 1, 1), (16, 1, 1)) | |||
| strategy2 = ((16, 1, 1),) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_forward_graph_model_parallel(): | |||
| @@ -58,7 +58,7 @@ def test_forward_graph_model_parallel(): | |||
| strategy1 = ((1, 1, 16), (1, 1, 16)) | |||
| strategy2 = ((1, 1, 16),) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_forward_graph_hybrid_parallel(): | |||
| @@ -66,13 +66,13 @@ def test_forward_graph_hybrid_parallel(): | |||
| strategy1 = ((2, 2, 4), (2, 2, 4)) | |||
| strategy2 = ((2, 2, 4),) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_forward_graph_auto_parallel(): | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) | |||
| net = Net(_w1) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_forward_graph_repeat_calc(): | |||
| @@ -80,4 +80,4 @@ def test_forward_graph_repeat_calc(): | |||
| strategy1 = ((2, 2, 4), (2, 2, 4)) | |||
| strategy2 = ((1, 2, 2),) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| @@ -18,7 +18,6 @@ import mindspore as ms | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore import context | |||
| from mindspore.common import dtype as mstype | |||
| from mindspore.common.api import _executor | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import operations as P | |||
| @@ -120,7 +120,7 @@ class TrainOneStepCell(Cell): | |||
| return F.depend(loss, self.optimizer(grads)) | |||
| def net_trains(gather_v2_strategy, criterion, rank): | |||
| def net_trains(criterion, rank): | |||
| init() | |||
| lr = 0.1 | |||
| momentum = 0.9 | |||
| @@ -151,42 +151,42 @@ def test_auto_batch_parallel(): | |||
| gather_v2_strategy = None | |||
| criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number) | |||
| rank = 2 | |||
| net_trains(gather_v2_strategy, criterion, rank) | |||
| net_trains(criterion, rank) | |||
| def test_2d_index_auto_batch_parallel(): | |||
| gather_v2_strategy = None | |||
| criterion = GatherV2(2, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number) | |||
| rank = 2 | |||
| net_trains(gather_v2_strategy, criterion, rank) | |||
| net_trains(criterion, rank) | |||
| def test_batch_parallel(): | |||
| gather_v2_strategy = ((device_number, 1),) | |||
| criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number) | |||
| rank = 2 | |||
| net_trains(gather_v2_strategy, criterion, rank) | |||
| net_trains(criterion, rank) | |||
| def test_strategy1(): | |||
| gather_v2_strategy = ((16, 2),) | |||
| rank = 2 | |||
| criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number) | |||
| net_trains(gather_v2_strategy, criterion, rank) | |||
| net_trains(criterion, rank) | |||
| def test_strategy2(): | |||
| gather_v2_strategy = ((1, device_number),) | |||
| rank = 2 | |||
| criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number) | |||
| net_trains(gather_v2_strategy, criterion, rank) | |||
| net_trains(criterion, rank) | |||
| def test_strategy3(): | |||
| gather_v2_strategy = ((8, 1),) | |||
| rank = 2 | |||
| criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number) | |||
| net_trains(gather_v2_strategy, criterion, rank) | |||
| net_trains(criterion, rank) | |||
| class GatherV2Axis1(_Loss): | |||
| @@ -217,18 +217,18 @@ def test_axis1_auto_batch_parallel(): | |||
| gather_v2_strategy = None | |||
| criterion = GatherV2Axis1(1, strategy=gather_v2_strategy, index_size=512) | |||
| rank = 2 | |||
| net_trains(gather_v2_strategy, criterion, rank) | |||
| net_trains(criterion, rank) | |||
| def test_axis1_batch_parallel(): | |||
| gather_v2_strategy = ((device_number, 1),) | |||
| criterion = GatherV2Axis1(1, strategy=gather_v2_strategy, index_size=512) | |||
| rank = 2 | |||
| net_trains(gather_v2_strategy, criterion, rank) | |||
| net_trains(criterion, rank) | |||
| def test_axis1_strategy1(): | |||
| gather_v2_strategy = ((16, 2),) | |||
| rank = 17 | |||
| criterion = GatherV2Axis1(1, strategy=gather_v2_strategy, index_size=512) | |||
| net_trains(gather_v2_strategy, criterion, rank) | |||
| net_trains(criterion, rank) | |||
| @@ -12,8 +12,6 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| @@ -23,8 +21,6 @@ from mindspore.common.initializer import initializer | |||
| from mindspore.common.parameter import Parameter, ParameterTuple | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.operations.comm_ops import _VirtualDataset | |||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| @@ -56,7 +52,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_by_list(self.network, self.weights)() | |||
| def compile(net): | |||
| def compile_net(net): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net) | |||
| @@ -67,7 +63,7 @@ def test_get_next_single(): | |||
| super().__init__() | |||
| self.norm = P.L2Normalize(axis=1) | |||
| self.prelu = P.PReLU() | |||
| self.w = Parameter(initializer(w, [channel, ]), name='w') | |||
| self.w = Parameter(initializer(w, [channel,]), name='w') | |||
| def construct(self, data): | |||
| x = self.norm(data) | |||
| @@ -84,7 +80,7 @@ def test_get_next_semi_auto_parallel(): | |||
| super().__init__() | |||
| self.norm = P.L2Normalize().set_strategy(strategy1) | |||
| self.prelu = P.PReLU().set_strategy(strategy2) | |||
| self.w = Parameter(initializer(w, [channel, ]), name='w') | |||
| self.w = Parameter(initializer(w, [channel,]), name='w') | |||
| def construct(self, data): | |||
| x = self.norm(data) | |||
| @@ -99,7 +95,7 @@ def test_get_next_semi_auto_parallel(): | |||
| strategy4=strategy4) | |||
| net = GradWrap(net_with_loss) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_get_next_semi_auto_parallel1(): | |||
| @@ -108,7 +104,7 @@ def test_get_next_semi_auto_parallel1(): | |||
| super().__init__() | |||
| self.norm = P.L2Normalize().set_strategy(strategy1) | |||
| self.prelu = P.PReLU().set_strategy(strategy2) | |||
| self.w = Parameter(initializer(w, [channel, ]), name='w') | |||
| self.w = Parameter(initializer(w, [channel,]), name='w') | |||
| def construct(self, data): | |||
| x = self.norm(data) | |||
| @@ -123,7 +119,7 @@ def test_get_next_semi_auto_parallel1(): | |||
| strategy4=strategy4) | |||
| net = GradWrap(net_with_loss) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_get_next_auto_parallel(): | |||
| @@ -132,7 +128,7 @@ def test_get_next_auto_parallel(): | |||
| super().__init__() | |||
| self.norm = P.L2Normalize().set_strategy(strategy1) | |||
| self.prelu = P.PReLU().set_strategy(strategy2) | |||
| self.w = Parameter(initializer(w, [channel, ]), name='w') | |||
| self.w = Parameter(initializer(w, [channel,]), name='w') | |||
| def construct(self, data): | |||
| x = self.norm(data) | |||
| @@ -144,7 +140,7 @@ def test_get_next_auto_parallel(): | |||
| net_with_loss = NetWithLoss(network, [ms.float32, ms.int32], [[32, 64], [32]], 2) | |||
| net = GradWrap(net_with_loss) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_only_one_get_next(): | |||
| @@ -159,4 +155,4 @@ def test_only_one_get_next(): | |||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||
| net = Net() | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| compile(net) | |||
| compile_net(net) | |||
| @@ -52,8 +52,8 @@ def test_get_parameter_layout(): | |||
| x_layout = [[2, 4], [1, -1], [16, 32]] # device_arrangement = [2, 4], tensor_map = [1, -1] | |||
| weight_layout = [[2, 4], [0, -1], [16, 32]] # device_arrangement = [2, 4], tensor_map = [0, -1] | |||
| expect_dict = {'x': x_layout, 'w1': weight_layout} | |||
| # to be resovled: static local variable count_p is used in step_parallel.cc, it needs to be reset between each ut | |||
| assert (net.parameter_layout_dict == expect_dict) | |||
| # to be resovled: static local variable count_p is used in step_parallel.cc, it needs to be reset between each ut | |||
| assert net.parameter_layout_dict == expect_dict | |||
| if __name__ == '__main__': | |||
| @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, b) | |||
| def compile(net, x, y, b): | |||
| def compile_net(net, x, y, b): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, b) | |||
| @@ -72,7 +72,7 @@ def test_matmul_tanh(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_activation(): | |||
| @@ -98,7 +98,7 @@ def test_matmul_activation(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_softmax(): | |||
| @@ -124,7 +124,7 @@ def test_matmul_softmax(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_matmul_logsoftmax(): | |||
| @@ -150,7 +150,7 @@ def test_matmul_logsoftmax(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_activations(): | |||
| @@ -179,7 +179,7 @@ def test_activations(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_activations_repeated_calculation(): | |||
| @@ -211,7 +211,7 @@ def test_activations_repeated_calculation(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_activations_axis_tuple(): | |||
| @@ -243,4 +243,4 @@ def test_activations_axis_tuple(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| @@ -48,7 +48,7 @@ _w = Tensor(np.ones([128, 64, 32, 16]), dtype=ms.float32) | |||
| _b = Tensor(np.ones([128, 64, 32, 16]), dtype=ms.float32) | |||
| def compile(net): | |||
| def compile_net(net): | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| train_net = TrainOneStepCell(net, optimizer) | |||
| train_net.set_auto_parallel() | |||
| @@ -62,7 +62,7 @@ def test_layer_norm_data_parallel(): | |||
| strategy2 = ((16, 1, 1, 1), (1, 1, 1), (1, 1, 1)) | |||
| strategy3 = ((16, 1, 1, 1), (16, 1, 1, 1)) | |||
| net = Net(_w, strategy1, strategy2, strategy3) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_layer_norm_model_parallel(): | |||
| @@ -71,7 +71,7 @@ def test_layer_norm_model_parallel(): | |||
| strategy2 = ((1, 16, 1, 1), (16, 1, 1), (16, 1, 1)) | |||
| strategy3 = ((1, 16, 1, 1), (1, 16, 1, 1)) | |||
| net = Net(_w, strategy1, strategy2, strategy3) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_layer_norm_hybrid_parallel(): | |||
| @@ -80,13 +80,13 @@ def test_layer_norm_hybrid_parallel(): | |||
| strategy2 = ((2, 8, 1, 1), (8, 1, 1), (8, 1, 1)) | |||
| strategy3 = ((2, 8, 1, 1), (2, 8, 1, 1)) | |||
| net = Net(_w, strategy1, strategy2, strategy3) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_layer_norm_auto_parallel(): | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) | |||
| net = Net(_w) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_layer_norm_repeat_calc(): | |||
| @@ -95,7 +95,7 @@ def test_layer_norm_repeat_calc(): | |||
| strategy2 = ((2, 2, 1, 1), (2, 1, 1), (2, 1, 1)) | |||
| strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1)) | |||
| net = Net(_w, strategy1, strategy2, strategy3) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_layer_norm_wrong_strategy(): | |||
| @@ -105,4 +105,4 @@ def test_layer_norm_wrong_strategy(): | |||
| strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1)) | |||
| net = Net(_w, strategy1, strategy2, strategy3) | |||
| with pytest.raises(RuntimeError): | |||
| compile(net) | |||
| compile_net(net) | |||
| @@ -21,7 +21,6 @@ from mindspore import context | |||
| from mindspore.common.api import _executor | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import operations as P | |||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||
| class NetWithLoss(nn.Cell): | |||
| @@ -19,9 +19,8 @@ import mindspore.nn as nn | |||
| from mindspore import Tensor, Parameter | |||
| from mindspore import context | |||
| from mindspore.common.api import _executor | |||
| from mindspore.nn import TrainOneStepCell, WithLossCell | |||
| from mindspore.nn import TrainOneStepCell | |||
| from mindspore.nn.optim import Momentum, LARS | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import operations as P | |||
| @@ -36,7 +35,7 @@ class NetWithLoss(nn.Cell): | |||
| return self.loss(predict, b)[0] | |||
| def compile(net, x, b): | |||
| def compile_net(net, x, b): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, b) | |||
| @@ -72,7 +71,7 @@ def test_momentum(): | |||
| train_net = TrainOneStepCell(net_with_loss, optimizer) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| compile(train_net, x, b) | |||
| compile_net(train_net, x, b) | |||
| def test_momentum_with_loss_scale(): | |||
| @@ -106,7 +105,7 @@ def test_momentum_with_loss_scale(): | |||
| train_net = TrainOneStepCell(net_with_loss, optimizer) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| compile(train_net, x, b) | |||
| compile_net(train_net, x, b) | |||
| def test_momentum_with_dynamic_lr(): | |||
| @@ -141,7 +140,7 @@ def test_momentum_with_dynamic_lr(): | |||
| train_net = TrainOneStepCell(net_with_loss, optimizer) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| compile(train_net, x, b) | |||
| compile_net(train_net, x, b) | |||
| def test_momentum_with_loss_scale_and_dynamic_lr(): | |||
| @@ -177,7 +176,7 @@ def test_momentum_with_loss_scale_and_dynamic_lr(): | |||
| train_net = TrainOneStepCell(net_with_loss, optimizer) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| compile(train_net, x, b) | |||
| compile_net(train_net, x, b) | |||
| def test_lars(): | |||
| @@ -205,11 +204,11 @@ def test_lars(): | |||
| net = Net(strategy1, strategy2, weight) | |||
| lr = Tensor(np.ones([6]), dtype=ms.float32) | |||
| SGD = Momentum(net.trainable_params(), lr, 0.9) | |||
| optimizer = LARS(SGD, epsilon=1e-08, hyperpara=0.02, decay_filter=lambda x: 'bn' not in x.name, | |||
| sgd = Momentum(net.trainable_params(), lr, 0.9) | |||
| optimizer = LARS(sgd, epsilon=1e-08, hyperpara=0.02, decay_filter=lambda x: 'bn' not in x.name, | |||
| lars_filter=lambda x: 'bn' not in x.name) | |||
| net_with_loss = NetWithLoss(net, strategy3) | |||
| train_net = TrainOneStepCell(net_with_loss, optimizer) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| compile(train_net, x, b) | |||
| compile_net(train_net, x, b) | |||
| @@ -46,7 +46,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y) | |||
| def compile(net, x, y): | |||
| def compile_net(net, x, y): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y) | |||
| @@ -79,7 +79,7 @@ def test_two_matmul(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 128]), dtype=ms.float32) | |||
| compile(net, x, y) | |||
| compile_net(net, x, y) | |||
| def test_matmul_mul_broadcast2(): | |||
| @@ -103,7 +103,7 @@ def test_matmul_mul_broadcast2(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 1]), dtype=ms.float32) | |||
| compile(net, x, y) | |||
| compile_net(net, x, y) | |||
| def test_two_matmul1(): | |||
| @@ -133,7 +133,7 @@ def test_two_matmul1(): | |||
| x = Tensor(np.ones([128, 128]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 128]), dtype=ms.float32) | |||
| compile(net, x, y) | |||
| compile_net(net, x, y) | |||
| def test_matmul_add_tensor(): | |||
| @@ -158,4 +158,4 @@ def test_matmul_add_tensor(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| compile(net, x, y) | |||
| compile_net(net, x, y) | |||
| @@ -39,7 +39,7 @@ _w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) | |||
| _b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) | |||
| def compile(net): | |||
| def compile_net(net): | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| train_net = TrainOneStepCell(net, optimizer) | |||
| train_net.set_auto_parallel() | |||
| @@ -52,7 +52,7 @@ def test_neg_data_parallel(): | |||
| strategy1 = ((16, 1, 1), (16, 1, 1)) | |||
| strategy2 = ((16, 1, 1),) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_neg_model_parallel(): | |||
| @@ -60,7 +60,7 @@ def test_neg_model_parallel(): | |||
| strategy1 = ((1, 1, 16), (1, 1, 16)) | |||
| strategy2 = ((1, 1, 16),) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_neg_hybrid_parallel(): | |||
| @@ -68,13 +68,13 @@ def test_neg_hybrid_parallel(): | |||
| strategy1 = ((2, 2, 4), (2, 2, 4)) | |||
| strategy2 = ((2, 2, 4),) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_neg_auto_parallel(): | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) | |||
| net = Net(_w1) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_neg_repeat_calc(): | |||
| @@ -82,4 +82,4 @@ def test_neg_repeat_calc(): | |||
| strategy1 = ((2, 2, 4), (2, 2, 4)) | |||
| strategy2 = ((1, 2, 2),) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| @@ -12,8 +12,8 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import numpy as np | |||
| import re | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.nn as nn | |||
| @@ -159,8 +159,8 @@ class SemiAutoOneHotNet(Cell): | |||
| weight_np = np.zeros(weight_shape, np.float32) | |||
| self.weight = Parameter(Tensor(weight_np), name='model_parallel_weight') | |||
| def construct(self, input, label): | |||
| input_n = self.normalize(input) | |||
| def construct(self, input_, label): | |||
| input_n = self.normalize(input_) | |||
| w = self.normalize2(self.weight) | |||
| fc_o = self.fc(input_n, w) | |||
| fc_o_shape = F.shape(fc_o) | |||
| @@ -209,9 +209,8 @@ class Dataset(MindData): | |||
| raise StopIteration | |||
| self.index += 1 | |||
| if self.input_num == 2: | |||
| return self.predict, self.label | |||
| else: | |||
| return self.predict, | |||
| return (self.predict, self.label) | |||
| return (self.predict,) | |||
| def reset(self): | |||
| self.index = 0 | |||
| @@ -268,20 +267,20 @@ def test_bn_reshape_dense_bn_train_loss(): | |||
| batch_size = 16 | |||
| device_num = 16 | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=0) | |||
| input = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01) | |||
| input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01) | |||
| label = Tensor(np.ones([batch_size]), dtype=ms.int32) | |||
| net = GradWrap(NetWithLoss(BNReshapeDenseBNNet())) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, input, label) | |||
| _executor.compile(net, input_, label) | |||
| def test_semi_one_hot_net_batch(): | |||
| batch_size = 16 | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=0) | |||
| input = Tensor(np.ones([batch_size * 1, 512]).astype(np.float32) * 0.01) | |||
| input_ = Tensor(np.ones([batch_size * 1, 512]).astype(np.float32) * 0.01) | |||
| label = Tensor(np.ones([batch_size]), dtype=ms.int32) | |||
| net = SemiAutoOneHotNet(args=Args(), strategy=StrategyBatch()) | |||
| @@ -289,7 +288,7 @@ def test_semi_one_hot_net_batch(): | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, input, label) | |||
| _executor.compile(net, input_, label) | |||
| def test_semi_one_hot_net_model(): | |||
| @@ -20,7 +20,6 @@ from mindspore import Tensor, Parameter, ParameterTuple | |||
| from mindspore import context | |||
| from mindspore.common.api import _executor | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import functional as F | |||
| from mindspore.ops import operations as P | |||
| @@ -126,15 +126,6 @@ def test_onehot_auto(): | |||
| compile_graph(strategy1, strategy2, strategy3, strategy4, auto=True) | |||
| def test_onehot_model_parallel(): | |||
| context.set_auto_parallel_context(device_num=16, global_rank=0) | |||
| strategy1 = ((2, 4), (4, 2)) | |||
| strategy2 = ((2, 8),) | |||
| strategy3 = ((1, 16), (), ()) | |||
| strategy4 = ((16, 1), (16, 1)) | |||
| compile_graph(strategy1, strategy2, strategy3, strategy4) | |||
| def test_onehot_batch_parallel_axis0(): | |||
| context.set_auto_parallel_context(device_num=16, global_rank=0) | |||
| strategy1 = ((2, 4), (4, 2)) | |||
| @@ -21,8 +21,6 @@ from mindspore.common.initializer import initializer | |||
| from mindspore.common.parameter import Parameter | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn.cell import Cell | |||
| from mindspore.nn.layer.activation import ReLU | |||
| from mindspore.nn.layer.basic import Dense | |||
| from mindspore.nn.layer.basic import Flatten | |||
| from mindspore.nn.layer.conv import Conv2d | |||
| from mindspore.nn.layer.normalization import BatchNorm2d | |||
| @@ -61,8 +59,7 @@ class DenseWrap(Cell): | |||
| self.has_bias = has_bias | |||
| self.weight = Parameter(initializer( | |||
| weight_init, [output_channels, input_channels]), | |||
| name="weight") | |||
| weight_init, [output_channels, input_channels]), name="weight") | |||
| if self.has_bias: | |||
| self.bias = Parameter(initializer( | |||
| @@ -103,7 +100,7 @@ class DatasetLenet(MindData): | |||
| self.index = 0 | |||
| def conv3x3(in_channels, out_channels, stride=1, padding=1): | |||
| def conv3x3(in_channels, out_channels, stride=1): | |||
| """3x3 convolution """ | |||
| weight_shape = (out_channels, in_channels, 3, 3) | |||
| weight = Tensor(np.ones(weight_shape).astype(np.float32)) | |||
| @@ -114,7 +111,7 @@ def conv3x3(in_channels, out_channels, stride=1, padding=1): | |||
| return conv | |||
| def conv1x1(in_channels, out_channels, stride=1, padding=0): | |||
| def conv1x1(in_channels, out_channels, stride=1): | |||
| """1x1 convolution""" | |||
| weight_shape = (out_channels, in_channels, 1, 1) | |||
| weight = Tensor(np.ones(weight_shape).astype(np.float32)) | |||
| @@ -125,7 +122,7 @@ def conv1x1(in_channels, out_channels, stride=1, padding=0): | |||
| return conv | |||
| def conv7x7(in_channels, out_channels, stride=1, padding=0): | |||
| def conv7x7(in_channels, out_channels, stride=1): | |||
| """1x1 convolution""" | |||
| weight_shape = (out_channels, in_channels, 7, 7) | |||
| weight = Tensor(np.ones(weight_shape).astype(np.float32)) | |||
| @@ -186,18 +183,17 @@ class ResidualBlock(Cell): | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| stride=1, | |||
| down_sample=False): | |||
| stride=1): | |||
| super(ResidualBlock, self).__init__() | |||
| out_chls = out_channels // self.expansion | |||
| self.conv1 = conv1x1(in_channels, out_chls, stride=1, padding=0) | |||
| self.conv1 = conv1x1(in_channels, out_chls, stride=1) | |||
| self.bn1 = bn_with_initialize(out_chls) | |||
| self.conv2 = conv3x3(out_chls, out_chls, stride=stride, padding=0) | |||
| self.conv2 = conv3x3(out_chls, out_chls, stride=stride) | |||
| self.bn2 = bn_with_initialize(out_chls) | |||
| self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0) | |||
| self.conv3 = conv1x1(out_chls, out_channels, stride=1) | |||
| self.bn3 = bn_with_initialize_last(out_channels) | |||
| self.relu1 = P.ReLU().set_strategy(strategy_no_weight) | |||
| @@ -236,21 +232,21 @@ class ResidualBlockWithDown(Cell): | |||
| super(ResidualBlockWithDown, self).__init__() | |||
| out_chls = out_channels // self.expansion | |||
| self.conv1 = conv1x1(in_channels, out_chls, stride=1, padding=0) | |||
| self.conv1 = conv1x1(in_channels, out_chls, stride=1) | |||
| self.bn1 = bn_with_initialize(out_chls) | |||
| self.conv2 = conv3x3(out_chls, out_chls, stride=stride, padding=0) | |||
| self.conv2 = conv3x3(out_chls, out_chls, stride=stride) | |||
| self.bn2 = bn_with_initialize(out_chls) | |||
| self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0) | |||
| self.conv3 = conv1x1(out_chls, out_channels, stride=1) | |||
| self.bn3 = bn_with_initialize_last(out_channels) | |||
| self.relu1 = P.ReLU().set_strategy(strategy_no_weight) | |||
| self.relu2 = P.ReLU().set_strategy(strategy_no_weight) | |||
| self.relu3 = P.ReLU().set_strategy(strategy_no_weight) | |||
| self.downSample = down_sample | |||
| self.down_sample = down_sample | |||
| self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride, padding=0) | |||
| self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride) | |||
| self.bn_down_sample = bn_with_initialize(out_channels) | |||
| self.add = TensorAdd().set_strategy(strategy_add) | |||
| @@ -279,7 +275,7 @@ class ResidualBlockWithDown(Cell): | |||
| class MakeLayer0(Cell): | |||
| def __init__(self, block, layer_num, in_channels, out_channels, stride): | |||
| def __init__(self, block, in_channels, out_channels, stride): | |||
| super(MakeLayer0, self).__init__() | |||
| self.a = ResidualBlockWithDown(in_channels, out_channels, stride=1, down_sample=True) | |||
| self.b = block(out_channels, out_channels, stride=stride) | |||
| @@ -295,14 +291,14 @@ class MakeLayer0(Cell): | |||
| class ResNet(Cell): | |||
| def __init__(self, block, layer_num, num_classes=100): | |||
| def __init__(self, block, num_classes=100): | |||
| super(ResNet, self).__init__() | |||
| self.conv1 = conv7x7(3, 64, stride=2, padding=3) | |||
| self.conv1 = conv7x7(3, 64, stride=2) | |||
| self.bn1 = bn_with_initialize(64) | |||
| self.relu = P.ReLU().set_strategy(strategy_no_weight) | |||
| self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same") | |||
| self.layer1 = MakeLayer0( | |||
| block, layer_num[0], in_channels=64, out_channels=256, stride=1) | |||
| block, in_channels=64, out_channels=256, stride=1) | |||
| self.pool = M.ReduceMean(keep_dims=True).set_strategy(strategy_no_weight) | |||
| self.fc = fc_with_initialize(64 * block.expansion, num_classes) | |||
| self.flatten = Flatten() | |||
| @@ -320,12 +316,12 @@ class ResNet(Cell): | |||
| class ResNetModelParallel(Cell): | |||
| def __init__(self, block, layer_num, num_classes=100): | |||
| def __init__(self, block, num_classes=100): | |||
| super(ResNetModelParallel, self).__init__() | |||
| self.relu = P.ReLU().set_strategy(((1, dev_num, 1, 1),)) | |||
| self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same") | |||
| self.layer1 = MakeLayer0( | |||
| block, layer_num[0], in_channels=64, out_channels=256, stride=1) | |||
| block, in_channels=64, out_channels=256, stride=1) | |||
| self.pool = M.ReduceMean(keep_dims=True).set_strategy(strategy_no_weight) | |||
| self.fc = fc_with_initialize(64 * block.expansion, num_classes) | |||
| self.flatten = Flatten() | |||
| @@ -341,11 +337,11 @@ class ResNetModelParallel(Cell): | |||
| def resnet_operator_net(num_classes): | |||
| return ResNet(ResidualBlock, [3, 4, 6, 3], num_classes) | |||
| return ResNet(ResidualBlock, num_classes) | |||
| def resnet_model_parallel_net(num_classes): | |||
| return ResNetModelParallel(ResidualBlock, [3, 4, 6, 3], num_classes) | |||
| return ResNetModelParallel(ResidualBlock, num_classes) | |||
| def test_resnet_operator_batch_parallel(): | |||
| @@ -354,7 +350,6 @@ def test_resnet_operator_batch_parallel(): | |||
| learning_rate = 0.1 | |||
| momentum = 0.9 | |||
| epoch_size = 2 | |||
| rank_size = dev_num | |||
| context.reset_auto_parallel_context() | |||
| context.set_auto_parallel_context(device_num=dev_num, global_rank=0) | |||
| @@ -381,7 +376,6 @@ def test_resnet_model_parallel(): | |||
| learning_rate = 0.1 | |||
| momentum = 0.9 | |||
| epoch_size = 2 | |||
| rank_size = dev_num | |||
| context.reset_auto_parallel_context() | |||
| context.set_auto_parallel_context(device_num=dev_num, global_rank=0) | |||
| @@ -35,7 +35,7 @@ class NetWithLoss(nn.Cell): | |||
| return self.loss(predict, b)[0] | |||
| def compile(net, x, b): | |||
| def compile_net(net, x, b): | |||
| net.set_auto_parallel() | |||
| _Executor().compile(net, x, b) | |||
| @@ -72,7 +72,7 @@ def test_optimizer_clone_weight(): | |||
| train_net = TrainOneStepCell(net_with_loss, optimizer) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| compile(train_net, x, b) | |||
| compile_net(train_net, x, b) | |||
| def test_optimizer_clone_weight2(): | |||
| @@ -107,4 +107,4 @@ def test_optimizer_clone_weight2(): | |||
| train_net = TrainOneStepCell(net_with_loss, optimizer) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| compile(train_net, x, b) | |||
| compile_net(train_net, x, b) | |||
| @@ -52,7 +52,7 @@ def test_parameter_init(): | |||
| weight = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| net = Net(strategy1, weight) | |||
| net(x, ) | |||
| net(x,) | |||
| if __name__ == '__main__': | |||
| @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y) | |||
| def compile(net, x, y): | |||
| def compile_net(net, x, y): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y) | |||
| @@ -63,7 +63,7 @@ def test_prelu_single_success1(): | |||
| net = GradWrap(NetWithLoss(Net())) | |||
| x = Tensor(np.random.rand(1, 33, 4, 4), ms.float32) | |||
| w = Tensor(np.random.rand(33), ms.float32) | |||
| compile(net, x, w) | |||
| compile_net(net, x, w) | |||
| def test_prelu_single_success2(): | |||
| @@ -80,7 +80,7 @@ def test_prelu_single_success2(): | |||
| net = GradWrap(NetWithLoss(Net())) | |||
| x = Tensor(np.random.rand(1, 33, 4, 4), ms.float32) | |||
| w = Tensor([0.1], ms.float32) | |||
| compile(net, x, w) | |||
| compile_net(net, x, w) | |||
| def test_prelu_parallel_success1(): | |||
| @@ -100,7 +100,7 @@ def test_prelu_parallel_success1(): | |||
| x = Tensor(np.random.rand(4, 4, 32, 64), dtype=ms.float32) | |||
| w = Tensor(np.random.rand(4), dtype=ms.float32) | |||
| net = GradWrap(NetWithLoss(Net(strategy))) | |||
| compile(net, x, w) | |||
| compile_net(net, x, w) | |||
| def test_prelu_parallel_success2(): | |||
| @@ -120,7 +120,7 @@ def test_prelu_parallel_success2(): | |||
| x = Tensor(np.random.rand(4, 4, 32, 64), dtype=ms.float32) | |||
| w = Tensor(np.random.rand(4), dtype=ms.float32) | |||
| net = GradWrap(NetWithLoss(Net(strategy))) | |||
| compile(net, x, w) | |||
| compile_net(net, x, w) | |||
| def test_prelu_parallel_success3(): | |||
| @@ -183,7 +183,7 @@ def test_prelu_parallel_success4(): | |||
| x = Tensor(np.random.rand(4, 16, 32, 64), dtype=ms.float32) | |||
| w = Tensor(np.random.rand(16), dtype=ms.float32) | |||
| net = GradWrap(NetWithLoss(Net(strategy))) | |||
| compile(net, x, w) | |||
| compile_net(net, x, w) | |||
| def test_prelu_parallel_success5(): | |||
| @@ -203,4 +203,4 @@ def test_prelu_parallel_success5(): | |||
| x = Tensor(np.random.rand(4, 16, 32, 64), dtype=ms.float32) | |||
| w = Tensor(np.random.rand(1), dtype=ms.float32) | |||
| net = GradWrap(NetWithLoss(Net(strategy))) | |||
| compile(net, x, w) | |||
| compile_net(net, x, w) | |||
| @@ -47,9 +47,8 @@ class Dataset(MindData): | |||
| raise StopIteration | |||
| self.index += 1 | |||
| if self.input_num == 2: | |||
| return self.predict, self.label | |||
| else: | |||
| return self.predict, | |||
| return (self.predict, self.label) | |||
| return (self.predict,) | |||
| def reset(self): | |||
| self.index = 0 | |||
| @@ -68,7 +67,7 @@ class PReLU(nn.Cell): | |||
| if not isinstance(w, Tensor): | |||
| raise TypeError("w only support np.float32, float or Tensor type.") | |||
| self.w = Parameter(initializer(w, [channel, ]), name='a') | |||
| self.w = Parameter(initializer(w, [channel,]), name='a') | |||
| self.prelu = P.PReLU() | |||
| self.relu = P.ReLU().set_strategy(((1,),)) | |||
| self.sub = P.Sub().set_strategy(((1,), (1,))) | |||
| @@ -97,7 +96,6 @@ def prelu_net(): | |||
| def reshape_common(parallel_mode): | |||
| batch_size = 32 | |||
| learning_rate = 0.1 | |||
| momentum = 0.9 | |||
| epoch_size = 2 | |||
| @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, b) | |||
| def compile(net, x, y, b): | |||
| def compile_net(net, x, y, b): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, b) | |||
| @@ -74,7 +74,7 @@ def test_sum_mul(): | |||
| x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_sum_mul2(): | |||
| @@ -101,7 +101,7 @@ def test_sum_mul2(): | |||
| x = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_sum_mul3(): | |||
| @@ -128,7 +128,7 @@ def test_sum_mul3(): | |||
| x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_sum_mul4(): | |||
| @@ -155,7 +155,7 @@ def test_sum_mul4(): | |||
| x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 32, 1]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_sum_mul5(): | |||
| @@ -179,7 +179,7 @@ def test_sum_mul5(): | |||
| x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([1, 32, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_sum_mul6(): | |||
| @@ -203,7 +203,7 @@ def test_sum_mul6(): | |||
| x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 1, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_sum_mul7(): | |||
| @@ -227,7 +227,7 @@ def test_sum_mul7(): | |||
| x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([1, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_max_mul(): | |||
| @@ -254,7 +254,7 @@ def test_max_mul(): | |||
| x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_min_mul(): | |||
| @@ -281,7 +281,7 @@ def test_min_mul(): | |||
| x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_reduce_mean_mul_float32(): | |||
| @@ -309,7 +309,7 @@ def test_reduce_mean_mul_float32(): | |||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| class ArgMaxWithValueNet(nn.Cell): | |||
| @@ -321,7 +321,7 @@ class ArgMaxWithValueNet(nn.Cell): | |||
| def construct(self, x, y, b): | |||
| out = self.mul1(x, y) | |||
| index, out = self.arg_max_with_value(out) | |||
| _, out = self.arg_max_with_value(out) | |||
| out = self.mul2(out, b) | |||
| return out | |||
| @@ -335,16 +335,16 @@ class ArgMinWithValueNet(nn.Cell): | |||
| def construct(self, x, y, b): | |||
| out = self.mul1(x, y) | |||
| index, out = self.arg_min_with_value(out) | |||
| _, out = self.arg_min_with_value(out) | |||
| out = self.mul2(out, b) | |||
| return out | |||
| def gen_inputs_and_compile(net): | |||
| def gen_inputs_and_compile_net(net): | |||
| x = Tensor(np.ones([128, 64, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([128, 64, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def tobefixed_test_arg_max_with_value_mul_semi_axis_parallel(): | |||
| @@ -354,7 +354,7 @@ def tobefixed_test_arg_max_with_value_mul_semi_axis_parallel(): | |||
| strategy3 = ((2, 4), (2, 4)) | |||
| net = GradWrap(NetWithLoss(ArgMaxWithValueNet(strategy1, strategy2, strategy3))) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| gen_inputs_and_compile(net) | |||
| gen_inputs_and_compile_net(net) | |||
| def test_arg_max_with_value_mul_semi(): | |||
| @@ -364,7 +364,7 @@ def test_arg_max_with_value_mul_semi(): | |||
| strategy3 = ((2, 4), (2, 4)) | |||
| net = GradWrap(NetWithLoss(ArgMaxWithValueNet(strategy1, strategy2, strategy3))) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| gen_inputs_and_compile(net) | |||
| gen_inputs_and_compile_net(net) | |||
| def test_arg_max_with_value_mul_auto(): | |||
| @@ -374,7 +374,7 @@ def test_arg_max_with_value_mul_auto(): | |||
| strategy3 = None | |||
| net = GradWrap(NetWithLoss(ArgMaxWithValueNet(strategy1, strategy2, strategy3))) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| gen_inputs_and_compile(net) | |||
| gen_inputs_and_compile_net(net) | |||
| def test_arg_min_with_value_mul_semi_axis_parallel(): | |||
| @@ -384,7 +384,7 @@ def test_arg_min_with_value_mul_semi_axis_parallel(): | |||
| strategy3 = ((2, 4), (2, 4)) | |||
| net = GradWrap(NetWithLoss(ArgMinWithValueNet(strategy1, strategy2, strategy3))) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| gen_inputs_and_compile(net) | |||
| gen_inputs_and_compile_net(net) | |||
| def test_arg_min_with_value_mul_semi(): | |||
| @@ -394,7 +394,7 @@ def test_arg_min_with_value_mul_semi(): | |||
| strategy3 = ((2, 4), (2, 4)) | |||
| net = GradWrap(NetWithLoss(ArgMinWithValueNet(strategy1, strategy2, strategy3))) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| gen_inputs_and_compile(net) | |||
| gen_inputs_and_compile_net(net) | |||
| def test_arg_min_with_value_mul_auto(): | |||
| @@ -404,7 +404,7 @@ def test_arg_min_with_value_mul_auto(): | |||
| strategy3 = None | |||
| net = GradWrap(NetWithLoss(ArgMinWithValueNet(strategy1, strategy2, strategy3))) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| gen_inputs_and_compile(net) | |||
| gen_inputs_and_compile_net(net) | |||
| class ArgMinWithValueNet2(nn.Cell): | |||
| @@ -416,7 +416,7 @@ class ArgMinWithValueNet2(nn.Cell): | |||
| def construct(self, x, y, b): | |||
| out = self.mul1(x, y) | |||
| index, out = self.arg_min_with_value(out) | |||
| _, out = self.arg_min_with_value(out) | |||
| out = self.relu(out) | |||
| return out | |||
| @@ -428,7 +428,7 @@ def tobefixed_test_arg_min_with_value_mul_semi_axis_parallel2(): | |||
| strategy3 = ((2, 4, 1),) | |||
| net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3))) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| gen_inputs_and_compile(net) | |||
| gen_inputs_and_compile_net(net) | |||
| def test_arg_min_with_value_mul_semi2(): | |||
| @@ -438,7 +438,7 @@ def test_arg_min_with_value_mul_semi2(): | |||
| strategy3 = ((2, 4, 1),) | |||
| net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3))) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| gen_inputs_and_compile(net) | |||
| gen_inputs_and_compile_net(net) | |||
| def test_arg_min_with_value_mul_auto2(): | |||
| @@ -448,7 +448,7 @@ def test_arg_min_with_value_mul_auto2(): | |||
| strategy3 = None | |||
| net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3))) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| gen_inputs_and_compile(net) | |||
| gen_inputs_and_compile_net(net) | |||
| def test_cross_batch(): | |||
| @@ -475,7 +475,7 @@ def test_cross_batch(): | |||
| x = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_cross_batch2(): | |||
| @@ -502,7 +502,7 @@ def test_cross_batch2(): | |||
| x = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_cross_batch_auto(): | |||
| @@ -526,7 +526,7 @@ def test_cross_batch_auto(): | |||
| x = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_max_empty_tuple(): | |||
| @@ -554,4 +554,4 @@ def test_max_empty_tuple(): | |||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| @@ -18,7 +18,6 @@ import mindspore as ms | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore import context | |||
| from mindspore.common import dtype as mstype | |||
| from mindspore.common.api import _executor | |||
| from mindspore.common.parameter import Parameter | |||
| from mindspore.common.parameter import ParameterTuple | |||
| @@ -54,9 +53,8 @@ class Dataset(MindData): | |||
| raise StopIteration | |||
| self.index += 1 | |||
| if self.input_num == 2: | |||
| return self.predict, self.label | |||
| else: | |||
| return self.predict, | |||
| return (self.predict, self.label) | |||
| return (self.predict,) | |||
| def reset(self): | |||
| self.index = 0 | |||
| @@ -82,7 +80,6 @@ def reshape_net(strategy0, strategy1, strategy2): | |||
| def reshape_common(parallel_mode, strategy0, strategy1, strategy2, strategy_loss): | |||
| batch_size = 32 | |||
| learning_rate = 0.1 | |||
| momentum = 0.9 | |||
| epoch_size = 2 | |||
| @@ -306,21 +303,21 @@ class ReshapeNet6(nn.Cell): | |||
| return matmul2_o | |||
| def compile(net, input): | |||
| def compile_net(net, input_): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, input) | |||
| _executor.compile(net, input_) | |||
| def reshape_net2(backbone): | |||
| batch_size = 16 | |||
| device_num = 16 | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=0) | |||
| input = Tensor(np.ones([batch_size * device_num, 512, 7, 7]).astype(np.float32) * 0.01) | |||
| input_ = Tensor(np.ones([batch_size * device_num, 512, 7, 7]).astype(np.float32) * 0.01) | |||
| net = GradWrap(NetWithLoss(backbone)) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| compile(net, input) | |||
| compile_net(net, input_) | |||
| def test_reshape_net1_1(): | |||
| @@ -480,11 +477,11 @@ def test_batchnorm_reshape_train(): | |||
| device_num = 16 | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=0) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| input = Tensor(np.ones([batch_size * device_num, 512]).astype(np.float32) * 0.01) | |||
| input_ = Tensor(np.ones([batch_size * device_num, 512]).astype(np.float32) * 0.01) | |||
| net = GradWrap(NetWithLoss(BatchNormReshapeNet())) | |||
| compile(net, input) | |||
| compile_net(net, input_) | |||
| def bn_with_initialize(out_channels): | |||
| @@ -517,12 +514,12 @@ def test_bn_reshape_dense_bn_train(): | |||
| batch_size = 16 | |||
| device_num = 16 | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=0) | |||
| input = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01) | |||
| input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01) | |||
| net = GradWrap(NetWithLoss(BNReshapeDenseBNNet())) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| compile(net, input) | |||
| compile_net(net, input_) | |||
| class ParallelReduceMeanNet(nn.Cell): | |||
| @@ -58,7 +58,7 @@ class Net(nn.Cell): | |||
| return out | |||
| def compile(net, x, y): | |||
| def compile_net(net, x, y): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y) | |||
| @@ -69,7 +69,7 @@ def test_reshape_parameter_data_parallel(): | |||
| net = GradWrap(NetWithLoss(Net(strategy))) | |||
| x = Tensor(np.ones([10000, 36]), dtype=ms.float32) | |||
| y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32) | |||
| compile(net, x, y) | |||
| compile_net(net, x, y) | |||
| def test_reshape_parameter_model_parallel(): | |||
| @@ -78,4 +78,4 @@ def test_reshape_parameter_model_parallel(): | |||
| net = GradWrap(NetWithLoss(Net(strategy))) | |||
| x = Tensor(np.ones([10000, 36]), dtype=ms.float32) | |||
| y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32) | |||
| compile(net, x, y) | |||
| compile_net(net, x, y) | |||
| @@ -22,7 +22,6 @@ from mindspore.common.api import _executor | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import functional as F | |||
| from mindspore.ops import operations as P | |||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||
| class GradWrap(nn.Cell): | |||
| @@ -30,10 +30,10 @@ def test_set_auto_parallel_context(): | |||
| parameter_broadcast = context.get_auto_parallel_context("parameter_broadcast") | |||
| assert device_num == 4 | |||
| assert global_rank == 3 | |||
| assert mirror_mean == True | |||
| assert cast_before_mirror == False | |||
| assert mirror_mean | |||
| assert not cast_before_mirror | |||
| assert parallel_mode == "auto_parallel" | |||
| assert parameter_broadcast == False | |||
| assert not parameter_broadcast | |||
| auto_parallel_context().set_communication_backend("hccl") | |||
| backend = auto_parallel_context().get_communication_backend() | |||
| @@ -43,7 +43,7 @@ def test_set_auto_parallel_context(): | |||
| device_num = auto_parallel_context().get_device_num() | |||
| device_num_is_set = auto_parallel_context().get_device_num_is_set() | |||
| assert device_num == 4 | |||
| assert device_num_is_set == True | |||
| assert device_num_is_set | |||
| auto_parallel_context().set_global_rank(4) | |||
| global_rank = auto_parallel_context().get_global_rank() | |||
| @@ -51,14 +51,14 @@ def test_set_auto_parallel_context(): | |||
| auto_parallel_context().set_mirror_mean(True) | |||
| mirror_mean = auto_parallel_context().get_mirror_mean() | |||
| assert mirror_mean == True | |||
| assert mirror_mean | |||
| auto_parallel_context().set_cast_before_mirror(False) | |||
| cast_before_mirror = auto_parallel_context().get_cast_before_mirror() | |||
| assert cast_before_mirror == False | |||
| assert not cast_before_mirror | |||
| parameter_broadcast_is_set = auto_parallel_context().get_parameter_broadcast_is_set() | |||
| assert parameter_broadcast_is_set == True | |||
| assert parameter_broadcast_is_set | |||
| with pytest.raises(ValueError): | |||
| context.set_auto_parallel_context(device_num=0) | |||
| @@ -94,9 +94,9 @@ def test_reset_auto_parallel_context(): | |||
| parameter_broadcast_is_set = auto_parallel_context().get_parameter_broadcast_is_set() | |||
| assert device_num == 1 | |||
| assert global_rank == 0 | |||
| assert mirror_mean == False | |||
| assert cast_before_mirror == True | |||
| assert not mirror_mean | |||
| assert cast_before_mirror | |||
| assert parallel_mode == "stand_alone" | |||
| assert parameter_broadcast == False | |||
| assert device_num_is_set == False | |||
| assert parameter_broadcast_is_set == False | |||
| assert not parameter_broadcast | |||
| assert not device_num_is_set | |||
| assert not parameter_broadcast_is_set | |||
| @@ -39,7 +39,7 @@ _w1 = Tensor(np.ones([128, 64]), dtype=ms.float32) | |||
| _b = Tensor(np.ones([128, 64]), dtype=ms.float32) | |||
| def compile(net): | |||
| def compile_net(net): | |||
| optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) | |||
| train_net = TrainOneStepCell(net, optimizer) | |||
| train_net.set_auto_parallel() | |||
| @@ -52,7 +52,7 @@ def test_sigmoid_cross_entropy_with_logits_data_parallel(): | |||
| strategy1 = ((16, 1), (16, 1)) | |||
| strategy2 = ((16, 1), (16, 1)) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_sigmoid_cross_entropy_with_logits_model_parallel(): | |||
| @@ -60,7 +60,7 @@ def test_sigmoid_cross_entropy_with_logits_model_parallel(): | |||
| strategy1 = ((1, 16), (1, 16)) | |||
| strategy2 = ((1, 16), (1, 16)) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_sigmoid_cross_entropy_with_logits_hybrid_parallel(): | |||
| @@ -68,13 +68,13 @@ def test_sigmoid_cross_entropy_with_logits_hybrid_parallel(): | |||
| strategy1 = ((2, 8), (2, 8)) | |||
| strategy2 = ((2, 8), (2, 8)) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_sigmoid_cross_entropy_with_logits_auto_parallel(): | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) | |||
| net = Net(_w1) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_sigmoid_cross_entropy_with_logits_repeat_calc(): | |||
| @@ -82,4 +82,4 @@ def test_sigmoid_cross_entropy_with_logits_repeat_calc(): | |||
| strategy1 = ((2, 8), (2, 8)) | |||
| strategy2 = ((2, 2), (2, 2)) | |||
| net = Net(_w1, strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| @@ -21,7 +21,6 @@ from mindspore import context | |||
| from mindspore.common.api import _executor | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import operations as P | |||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||
| class NetWithLoss(nn.Cell): | |||
| @@ -44,7 +43,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, b) | |||
| def compile(net, x, y, b): | |||
| def compile_net(net, x, y, b): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, b) | |||
| @@ -71,7 +70,7 @@ def test_softmax_cross_entropy_loss(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_softmax_cross_entropy_loss_repeated_calculation(): | |||
| @@ -96,7 +95,7 @@ def test_softmax_cross_entropy_loss_repeated_calculation(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_softmax_cross_entropy_loss_auto_batch_parallel(): | |||
| @@ -118,4 +117,4 @@ def test_softmax_cross_entropy_loss_auto_batch_parallel(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| @@ -22,7 +22,6 @@ from mindspore import context | |||
| from mindspore.common.api import _executor | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import operations as P | |||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||
| class GradWrap(nn.Cell): | |||
| @@ -54,7 +53,7 @@ class GradWrap3(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, bias) | |||
| def compile(net, x, y, b): | |||
| def compile_net(net, x, y, b): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, b) | |||
| @@ -81,7 +80,7 @@ def test_no_grad(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_grad_sens_parameter_type(): | |||
| @@ -135,7 +134,7 @@ def test_grad_sens_tensor_type(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_grad_sens_scalar_broadcast(): | |||
| @@ -159,4 +158,4 @@ def test_grad_sens_scalar_broadcast(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| bias = Tensor(np.ones([64]), dtype=ms.float32) | |||
| compile(net, x, y, bias) | |||
| compile_net(net, x, y, bias) | |||
| @@ -15,9 +15,9 @@ | |||
| import numpy as np | |||
| import mindspore as ms | |||
| from mindspore import context, Tensor, Parameter | |||
| from mindspore import context, Tensor | |||
| from mindspore.common.api import _executor | |||
| from mindspore.nn import Cell, TrainOneStepCell, Momentum | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| @@ -37,7 +37,7 @@ _x = Tensor(np.ones([64, 1, 32, 1]), dtype=ms.float32) | |||
| _b = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| def compile(net): | |||
| def compile_net(net): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, _x, _b) | |||
| context.reset_auto_parallel_context() | |||
| @@ -48,7 +48,7 @@ def test_squeeze_data_parallel(): | |||
| strategy1 = ((16, 1, 1, 1),) | |||
| strategy2 = ((16, 1), (16, 1)) | |||
| net = Net(strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_squeeze_model_parallel(): | |||
| @@ -56,7 +56,7 @@ def test_squeeze_model_parallel(): | |||
| strategy1 = ((1, 1, 16, 1),) | |||
| strategy2 = ((1, 16), (1, 16)) | |||
| net = Net(strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_squeeze_specified_axis(): | |||
| @@ -64,13 +64,13 @@ def test_squeeze_specified_axis(): | |||
| strategy1 = ((4, 1, 4, 1),) | |||
| strategy2 = ((8, 2), (8, 2)) | |||
| net = Net(strategy1, strategy2, (1, 3)) | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_squeeze_auto_parallel(): | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) | |||
| net = Net() | |||
| compile(net) | |||
| compile_net(net) | |||
| def test_squeeze_repeat_calc(): | |||
| @@ -78,4 +78,4 @@ def test_squeeze_repeat_calc(): | |||
| strategy1 = ((1, 1, 8, 1),) | |||
| strategy2 = ((2, 8), (2, 8)) | |||
| net = Net(strategy1, strategy2) | |||
| compile(net) | |||
| compile_net(net) | |||
| @@ -21,7 +21,6 @@ from mindspore import context | |||
| from mindspore.common.api import _executor | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import operations as P | |||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||
| class GradWrap(nn.Cell): | |||
| @@ -33,7 +32,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, bias) | |||
| def compile(net, x, y, bias): | |||
| def compile_net(net, x, y, bias): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, bias) | |||
| @@ -59,7 +58,7 @@ def test_sum_as_loss(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| bias = Tensor(np.ones([64]), dtype=ms.float32) | |||
| compile(net, x, y, bias) | |||
| compile_net(net, x, y, bias) | |||
| def test_sum_as_loss2(): | |||
| @@ -83,4 +82,4 @@ def test_sum_as_loss2(): | |||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([64, 32]), dtype=ms.float32) | |||
| bias = Tensor(np.ones([64]), dtype=ms.float32) | |||
| compile(net, x, y, bias) | |||
| compile_net(net, x, y, bias) | |||
| @@ -17,7 +17,6 @@ import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor, context | |||
| from mindspore import context | |||
| from mindspore.common.parameter import Parameter | |||
| from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits | |||
| from mindspore.nn.optim.momentum import Momentum | |||
| @@ -67,7 +66,6 @@ def transpose_net(strategy1, strategy2): | |||
| def transpose_common(strategy1, strategy2): | |||
| batch_size = 32 | |||
| learning_rate = 0.1 | |||
| momentum = 0.9 | |||
| epoch_size = 2 | |||
| @@ -44,7 +44,7 @@ class GradWrap(nn.Cell): | |||
| return C.grad_all(self.network)(x, y, b) | |||
| def compile(net, x, y, b): | |||
| def compile_net(net, x, y, b): | |||
| net.set_auto_parallel() | |||
| _executor.compile(net, x, y, b) | |||
| @@ -72,7 +72,7 @@ def test_two_matmul(): | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_two_matmul_repeated_calculation1(): | |||
| @@ -96,7 +96,7 @@ def test_two_matmul_repeated_calculation1(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| def test_two_matmul_repeated_calculation2(): | |||
| @@ -120,4 +120,4 @@ def test_two_matmul_repeated_calculation2(): | |||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||
| b = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||
| compile(net, x, y, b) | |||
| compile_net(net, x, y, b) | |||
| @@ -20,7 +20,6 @@ from mindspore import Tensor, Parameter, ParameterTuple | |||
| from mindspore import context | |||
| from mindspore.common.api import _executor | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import functional as F | |||
| from mindspore.ops import operations as P | |||
| @@ -78,7 +78,7 @@ def test_virtual_dataset_3_input(): | |||
| def test_virtualdataset_cell_3_inputs(): | |||
| class Net(nn.Cell): | |||
| def __init__(self, strategy0, strategy1, strategy2, strategy3): | |||
| def __init__(self, strategy1, strategy2, strategy3): | |||
| super().__init__() | |||
| self.matmul1 = P.MatMul().set_strategy(strategy1) | |||
| self.matmul2 = P.MatMul().set_strategy(strategy2) | |||
| @@ -89,7 +89,7 @@ def test_virtualdataset_cell_3_inputs(): | |||
| out = self.matmul2(out, b) | |||
| return out | |||
| net = GradWrap(VirtualDatasetCellTriple(NetWithLoss(Net(None, None, None, None)))) | |||
| net = GradWrap(VirtualDatasetCellTriple(NetWithLoss(Net(None, None, None)))) | |||
| context.set_context(save_graphs=True) | |||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||
| context.set_auto_parallel_context(device_num=8, global_rank=0) | |||