| @@ -1,178 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class AddRelu(Cell): | |||
| def __init__(self, strategy0=None, strategy1=None): | |||
| super(AddRelu, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.relu = P.ReLU(strategy=strategy1) | |||
| def construct(self, x, z): | |||
| out = self.add(x, z) | |||
| return self.relu(out) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class AddReluFactory: | |||
| def __init__(self, input_shape, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = 1.0 | |||
| self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, | |||
| input_shape).astype(np.float32) | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| need_dev_num = 1 | |||
| need_dev_num_ = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| for s in strategy1[1]: | |||
| need_dev_num_ = need_dev_num_ * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.y_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num_ | |||
| def forward_mindspore_impl(self): | |||
| net = AddRelu() | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(self.input_np2, ms.float32) | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| output_grad = Tensor(self.output_grad_np) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = AddRelu() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1]) | |||
| output_grad = Tensor(output_grads[self.out_id]) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(self.input_np2, ms.float32) | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], | |||
| parallel_inputs_run=[x1, y1, output_grad]) | |||
| return input_grad | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1]) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| _ = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| _ = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| @pytest.mark.reid_forward | |||
| def test_reid_add_relu_input_256_64(): | |||
| stra0 = (0, (2, 2), ()) | |||
| stra1 = (0, (2, 2)) | |||
| fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_add_relu_input_256_64(): | |||
| stra0 = (0, (2, 2), ()) | |||
| stra1 = (0, (2, 2)) | |||
| fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.grad_cmp() | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_add_relu_parallel_4p.py>../../log/test_add_relu_parallel_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1,356 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| from numpy import allclose | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore._checkparam import check_bool, twice | |||
| from mindspore.common.initializer import initializer | |||
| from mindspore.common.parameter import Parameter | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class _Conv(Cell): | |||
| r"""Applies a N-D convolution over an input signal composed of several input | |||
| planes. | |||
| """ | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| kernel_size, | |||
| stride, | |||
| pad_mode, | |||
| padding, | |||
| dilation, | |||
| group, | |||
| has_bias, | |||
| weight_init, | |||
| bias_init): | |||
| super(_Conv, self).__init__() | |||
| self.in_channels = in_channels | |||
| self.out_channels = out_channels | |||
| self.kernel_size = kernel_size | |||
| self.stride = stride | |||
| self.pad_mode = pad_mode | |||
| self.padding = padding | |||
| self.dilation = dilation | |||
| self.group = group | |||
| self.has_bias = has_bias | |||
| if not (isinstance(in_channels, int) and in_channels > 0): | |||
| raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed ' | |||
| + str(in_channels) + ', should be a int and greater than 0.') | |||
| if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \ | |||
| (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \ | |||
| kernel_size[0] < 1 or kernel_size[1] < 1: | |||
| raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed ' | |||
| + str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.') | |||
| if in_channels % group != 0: | |||
| raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by ' | |||
| 'attr \'group\' of \'Conv2D\' Op.') | |||
| if out_channels % group != 0: | |||
| raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by ' | |||
| 'attr \'group\' of \'Conv2D\' Op.') | |||
| self.weight = Parameter(initializer( | |||
| weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight') | |||
| if check_bool(has_bias): | |||
| self.bias = Parameter(initializer( | |||
| bias_init, [out_channels]), name='bias') | |||
| else: | |||
| if bias_init != 'zeros': | |||
| print("Value of 'has_bias' is False, value of 'bias_init' will be ignored.") | |||
| self.bias = None | |||
| def construct(self, *inputs): | |||
| raise NotImplementedError | |||
| class Conv2d(_Conv): | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| kernel_size, | |||
| stride=1, | |||
| pad_mode='same', | |||
| padding=0, | |||
| dilation=1, | |||
| group=1, | |||
| has_bias=False, | |||
| weight_init='normal', | |||
| bias_init='zeros', | |||
| strategy=None): | |||
| kernel_size = twice(kernel_size) | |||
| super(Conv2d, self).__init__( | |||
| in_channels, | |||
| out_channels, | |||
| kernel_size, | |||
| stride, | |||
| pad_mode, | |||
| padding, | |||
| dilation, | |||
| group, | |||
| has_bias, | |||
| weight_init, | |||
| bias_init) | |||
| self.add = P.TensorAdd(strategy) | |||
| self.conv2d = P.Conv2D(out_channel=self.out_channels, | |||
| kernel_size=self.kernel_size, | |||
| mode=1, | |||
| pad_mode=self.pad_mode, | |||
| pad=self.padding, | |||
| stride=self.stride, | |||
| dilation=self.dilation, | |||
| group=self.group, | |||
| strategy=None) | |||
| self.bias_add = P.BiasAdd() | |||
| def construct(self, input1, input2): | |||
| x = self.add(input1, input2) | |||
| if self.has_bias: | |||
| return self.bias_add(self.conv2d(x, self.weight), | |||
| self.bias) | |||
| return self.conv2d(x, self.weight) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, input1, input2, output_grad): | |||
| return grad_all_with_sens(self.network)(input1, input2, output_grad) | |||
| class Conv2dFactory: | |||
| def __init__(self, input_shape, filter_shape, stride, pad_mode, padding, dilation, group, has_bias): | |||
| self.in_n, self.in_c, self.in_h, self.in_w = input_shape | |||
| self.out_c, self.kernel_c, self.kernel_h, self.kernel_w = filter_shape | |||
| self.stride = stride | |||
| self.pad_mode = pad_mode | |||
| self.padding = padding | |||
| self.dilation = dilation | |||
| self.group = group | |||
| self.strategy0 = (0, (4, 1, 1, 1), (1, 1, 1, 1)) | |||
| prefix = "" | |||
| input_size = 1 | |||
| filter_size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) + "_" | |||
| input_size = input_size * s | |||
| self.prefix = prefix | |||
| for s in filter_shape: | |||
| filter_size = filter_size * s | |||
| number_range1 = min(10, input_size) | |||
| number_range2 = min(10, filter_size) | |||
| self.input_np1 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 2, input_shape).astype( | |||
| np.float16) | |||
| self.input_np2 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 4, input_shape).astype( | |||
| np.float16) | |||
| self.weight_np = np.reshape(np.arange(0, filter_size) % number_range2 - number_range2 / 2, filter_shape).astype( | |||
| np.float16) | |||
| self.has_bias = has_bias | |||
| if self.has_bias is True: | |||
| self.bias_np = np.arange(0, self.out_c).astype(np.float16) | |||
| self.out_shape = (128, 64, 56, 56) | |||
| out_size = 1 | |||
| for s in self.out_shape: | |||
| out_size = out_size * s | |||
| number_range3 = min(10, out_size) | |||
| self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range3 - number_range3 / 2, | |||
| self.out_shape).astype(np.float16) | |||
| self.x_id = device_id % 4 | |||
| self.y_id = device_id % 4 | |||
| self.out_strategy = self.strategy0[1] | |||
| self.out_id = device_id % 4 | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_conv2d_mindspore_impl(self): | |||
| input1 = Tensor(self.input_np1) | |||
| input2 = Tensor(self.input_np2) | |||
| weight = Tensor(self.weight_np) | |||
| if self.has_bias: | |||
| bias = Tensor(self.bias_np) | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=True, weight_init=weight, | |||
| bias_init=bias) | |||
| else: | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=False, weight_init=weight) | |||
| out = net(input1, input2) | |||
| return out.asnumpy() | |||
| def forward_conv2d_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| weight = Tensor(self.weight_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| if self.has_bias: | |||
| bias = Tensor(self.bias_np) | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=True, weight_init=weight, | |||
| bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) | |||
| else: | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=False, weight_init=weight, | |||
| strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_conv2d_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| weight = Tensor(self.weight_np) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| if self.has_bias: | |||
| bias = Tensor(self.bias_np) | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=True, weight_init=weight, | |||
| bias_init=bias,) | |||
| else: | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=False, weight_init=weight) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| out_grad = grad_net(x, y, output_grad) | |||
| return out_grad | |||
| def grad_conv2d_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| weight = Tensor(self.weight_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| output_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| output_grad1 = Tensor(output_grads[self.out_id]) | |||
| if self.has_bias: | |||
| bias = Tensor(self.bias_np) | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=True, weight_init=weight, | |||
| bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) | |||
| else: | |||
| net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, | |||
| kernel_size=(self.kernel_h, self.kernel_w), | |||
| stride=self.stride, pad_mode=self.pad_mode, | |||
| padding=self.padding, dilation=self.dilation, | |||
| group=self.group, has_bias=False, weight_init=weight, | |||
| strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_train() | |||
| grad_net.set_auto_parallel() | |||
| out_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return out_grad | |||
| def forward_conv2d_cmp(self): | |||
| out_mindspore = self.forward_conv2d_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_conv2d_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) | |||
| def grad_conv2d_cmp(self): | |||
| input_grad_mindspore = self.grad_conv2d_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_conv2d_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[1]) | |||
| assert allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.001, 0.001) | |||
| assert allclose(input_grad_blocks_1[self.x_id], input_grad_mindspore_parallel1, 0.001, 0.001) | |||
| def test_reid_conv2d_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true(): | |||
| fact = Conv2dFactory(input_shape=(128, 64, 112, 112), | |||
| filter_shape=(64, 64, 1, 1), | |||
| stride=2, pad_mode='valid', padding=0, | |||
| dilation=1, group=1, has_bias=False) | |||
| fact.forward_conv2d_cmp() | |||
| def test_reid_conv2d_grad_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true(): | |||
| fact = Conv2dFactory(input_shape=(128, 64, 112, 112), | |||
| filter_shape=(64, 64, 1, 1), | |||
| stride=2, pad_mode='valid', padding=0, | |||
| dilation=1, group=1, has_bias=False) | |||
| fact.grad_conv2d_cmp() | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_conv2d_parallel_4p.py>../../log/test_conv2d_parallel_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1,36 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| export SLOG_PRINT_TO_STDOUT=1 | |||
| source /root/miniconda3/bin/activate ci3.6 | |||
| export RANK_SIZE=4 | |||
| export RANK_TABLE_FILE=../../rank_table_4p.json | |||
| export RANK_ID=$1 | |||
| export DEVICE_ID=$1 | |||
| export HCCL_FLAG=1 | |||
| export DEPLOY_MODE=0 | |||
| export AICPU_FLAG=1 | |||
| export DUMP_OP=1 | |||
| export PYTHONPATH=../../../../../../../../mindspore:/usr/local/HiAI/runtime/python3.6/site-packages/topi.egg/:/usr/local/HiAI/runtime/python3.6/site-packages/te.egg/:/usr/local/HiAI/runtime/ops/op_impl/built-in/ai_core/tbe/ | |||
| export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/HiAI/runtime/lib64/libhccl.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so | |||
| export LD_LIBRARY_PATH=/usr/local/HiAI/runtime/lib64 | |||
| export FE_FLAG=1 | |||
| export PATH=/usr/local/HiAI/runtime/ccec_compiler/bin:$PATH | |||
| if [ $1 -eq 0 ]; | |||
| then | |||
| export DUMP_GE_GRAPH=true | |||
| export ME_DRAW_GRAPH=1 | |||
| fi | |||
| @@ -1,120 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.nn import Dropout | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Net(Cell): | |||
| def __init__(self, keep_prob, seed0, seed1, strategy=None): | |||
| super(Net, self).__init__() | |||
| self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy) | |||
| def construct(self, input_): | |||
| x = self.drop(input_) | |||
| return x | |||
| # pylint: disable=comparison-with-itself | |||
| class DropoutFactory: | |||
| def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None): | |||
| size = 1 | |||
| prefix = "" | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(10, size) | |||
| self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32) | |||
| self.keep_prob = keep_prob | |||
| self.seed0 = seed0 | |||
| self.seed1 = seed1 | |||
| self.strategy0 = strategy0 | |||
| need_dev_num = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def d4_tensor_compare(self, input_, out_me): | |||
| [a, b, c, d] = input_.shape | |||
| for i in range(a): | |||
| for j in range(b): | |||
| for k in range(c): | |||
| for e in range(d): | |||
| if out_me[i, j, k, e] == 0: | |||
| assert True | |||
| else: | |||
| assert np.allclose(out_me[i, j, k, e], input_[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001) | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| net = Net(0.4, 0, 0, strategy=self.strategy0) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1]) | |||
| return out.asnumpy() | |||
| def forward_cmp(self): | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1]) | |||
| self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel) | |||
| def test_reid_dropout_forward_seed_F32_64_512_8_8(): | |||
| fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1))) | |||
| fact.forward_cmp() | |||
| def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat(): | |||
| fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1))) | |||
| fact.forward_cmp() | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_dropout_parallel_4p.py>../../log/test_dropout_parallel_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1,154 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class MatmulSingle(Cell): | |||
| def __init__(self, transpose_a=False, transpose_b=False): | |||
| super(MatmulSingle, self).__init__() | |||
| self.matmul = P.MatMul(transpose_a, transpose_b) | |||
| self.pow = P.Pow() | |||
| self.reduce_sum = P.ReduceSum() | |||
| def construct(self, x, y): | |||
| out = self.matmul(x, y) | |||
| out = self.pow(out, 2.0) | |||
| out = self.reduce_sum(out, None) | |||
| return out | |||
| class MatmulAllgather(Cell): | |||
| def __init__(self, group, transpose_a=False, transpose_b=False): | |||
| super(MatmulAllgather, self).__init__() | |||
| self.allgather = P.AllGather(group=group) | |||
| self.matmul = P.MatMul(transpose_a, transpose_b) | |||
| self.pow = P.Pow() | |||
| self.reduce_sum = P.ReduceSum() | |||
| self.allreduce = P.AllReduce(group=group) | |||
| def construct(self, x, y): | |||
| x = self.allgather(x) | |||
| out = self.matmul(x, y) | |||
| out = self.pow(out, 2.0) | |||
| out = self.reduce_sum(out, None) | |||
| out = self.allreduce(out) | |||
| return out | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, sens): | |||
| return grad_all_with_sens(self.network)(x, y, sens) | |||
| class MatmulAllgatherFactory: | |||
| def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra): | |||
| self.inputx = self.gen_value(inputx_shape, 10) | |||
| self.inputy = self.gen_value(inputy_shape, 20) | |||
| self.x_stra = x_stra | |||
| self.y_stra = y_stra | |||
| stra_size = 1 | |||
| for s in x_stra: | |||
| stra_size = stra_size * s | |||
| self.stra_size = stra_size | |||
| def gen_value(self, input_shape, delta): | |||
| size = 1 | |||
| for s in input_shape: | |||
| size = size * s | |||
| number_range = min(100, size) | |||
| input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32) | |||
| return input_np | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def grad_mindspore_impl_single(self): | |||
| x = Tensor(self.inputx) | |||
| y = Tensor(self.inputy) | |||
| sens = Tensor(1.0, dtype=ms.float32) | |||
| net = MatmulSingle() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, sens) | |||
| return input_grad | |||
| def grad_mindspore_impl_reduce(self): | |||
| inputxs = self.get_parallel_blocks(self.inputx, self.x_stra) | |||
| inputys = self.get_parallel_blocks(self.inputy, self.y_stra) | |||
| x = Tensor(inputxs[device_id % self.stra_size]) | |||
| y = Tensor(inputys[device_id % self.stra_size]) | |||
| repeat_num = device_num / self.stra_size | |||
| v = self.stra_size * repeat_num * repeat_num * repeat_num | |||
| sens = Tensor(1.0 / v, dtype=ms.float32) | |||
| net = MatmulAllgather("hccl_world_group") | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, sens) | |||
| return input_grad | |||
| def grad_cmp(self): | |||
| single_results = self.grad_mindspore_impl_single() | |||
| reduce_results = self.grad_mindspore_impl_reduce() | |||
| single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size] | |||
| reduce_result0 = reduce_results[0].asnumpy() | |||
| single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size] | |||
| reduce_result1 = reduce_results[1].asnumpy() | |||
| assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001) | |||
| assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001) | |||
| def test_reduce_grad(): | |||
| inputx_shape = (64, 32) | |||
| inputy_shape = (32, 64) | |||
| fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4)) | |||
| fact.grad_cmp() | |||
| @@ -1,175 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class MatmulSingle(Cell): | |||
| def __init__(self, transpose_a=False, transpose_b=False): | |||
| super(MatmulSingle, self).__init__() | |||
| self.matmul1 = P.MatMul(transpose_a, transpose_b) | |||
| self.matmul2 = P.MatMul(transpose_a, transpose_b) | |||
| self.pow = P.Pow() | |||
| self.reduce_sum = P.ReduceSum() | |||
| def construct(self, x, y, z): | |||
| out = self.matmul1(x, y) | |||
| out = self.matmul2(out, z) | |||
| out = self.pow(out, 2.0) | |||
| out = self.reduce_sum(out, None) | |||
| return out | |||
| class MatmulReduce(Cell): | |||
| def __init__(self, group, transpose_a=False, transpose_b=False): | |||
| super(MatmulReduce, self).__init__() | |||
| self.matmul1 = P.MatMul(transpose_a, transpose_b) | |||
| self.allreduce1 = P.AllReduce(group=group) | |||
| self.matmul2 = P.MatMul(transpose_a, transpose_b) | |||
| self.pow = P.Pow() | |||
| self.reduce_sum = P.ReduceSum() | |||
| self.allreduce2 = P.AllReduce(group=group) | |||
| def construct(self, x, y, z): | |||
| out = self.matmul1(x, y) | |||
| out = self.allreduce1(out) | |||
| out = self.matmul2(out, z) | |||
| out = self.pow(out, 2.0) | |||
| out = self.reduce_sum(out, None) | |||
| out = self.allreduce2(out) | |||
| return out | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, z, sens): | |||
| return grad_all_with_sens(self.network)(x, y, z, sens) | |||
| class MatmulReduceFactory: | |||
| def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra): | |||
| self.inputx = self.gen_value(inputx_shape, 10) | |||
| self.inputy = self.gen_value(inputy_shape, 20) | |||
| self.inputz = self.gen_value(inputz_shape, 30) | |||
| self.x_stra = x_stra | |||
| self.y_stra = y_stra | |||
| self.z_stra = z_stra | |||
| stra_size = 1 | |||
| for s in x_stra: | |||
| stra_size = stra_size * s | |||
| self.stra_size = stra_size | |||
| def gen_value(self, input_shape, delta): | |||
| size = 1 | |||
| for s in input_shape: | |||
| size = size * s | |||
| number_range = min(100, size) | |||
| input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32) | |||
| return input_np | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def grad_mindspore_impl_single(self): | |||
| x = Tensor(self.inputx) | |||
| y = Tensor(self.inputy) | |||
| z = Tensor(self.inputz) | |||
| sens = Tensor(1.0, dtype=ms.float32) | |||
| net = MatmulSingle() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, z, sens) | |||
| return input_grad | |||
| def grad_mindspore_impl_reduce(self): | |||
| inputxs = self.get_parallel_blocks(self.inputx, self.x_stra) | |||
| inputys = self.get_parallel_blocks(self.inputy, self.y_stra) | |||
| inputzs = self.get_parallel_blocks(self.inputz, self.z_stra) | |||
| x = Tensor(inputxs[device_id % self.stra_size]) | |||
| y = Tensor(inputys[device_id % self.stra_size]) | |||
| z = Tensor(inputzs[device_id % self.stra_size]) | |||
| repeat_num = device_num / self.stra_size | |||
| v = self.stra_size * repeat_num * repeat_num * repeat_num | |||
| sens = Tensor(1.0 / v, dtype=ms.float32) | |||
| net = MatmulReduce("hccl_world_group") | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, z, sens) | |||
| return input_grad | |||
| def grad_cmp(self): | |||
| single_results = self.grad_mindspore_impl_single() | |||
| reduce_results = self.grad_mindspore_impl_reduce() | |||
| single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size] | |||
| reduce_result0 = reduce_results[0].asnumpy() | |||
| single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size] | |||
| reduce_result1 = reduce_results[1].asnumpy() | |||
| single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size] | |||
| reduce_result2 = reduce_results[2].asnumpy() | |||
| assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001) | |||
| assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001) | |||
| assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001) | |||
| def test_reduce_grad(): | |||
| inputx_shape = (32, 64) | |||
| inputy_shape = (64, 64) | |||
| inputz_shape = (64, 32) | |||
| fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4)) | |||
| fact.grad_cmp() | |||
| def test_reduce_grad_repeat(): | |||
| inputx_shape = (32, 64) | |||
| inputy_shape = (64, 64) | |||
| inputz_shape = (64, 32) | |||
| fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2)) | |||
| fact.grad_cmp() | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_allgather_4p.py>../../log/test_allgather_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_allreduce_4p.py>../../log/test_allreduce_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1,206 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class L2normalize(Cell): | |||
| def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None): | |||
| super(L2normalize, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.l2norm = P.L2Normalize(axis, epsilon, strategy1) | |||
| def construct(self, x, y): | |||
| out = self.add(x, y) | |||
| out = self.l2norm(out) | |||
| return out | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class L2normalizeFactory: | |||
| def __init__(self, input_shape, axis, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| target_shape = input_shape | |||
| self.target_shape = target_shape | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(1000, target_size) | |||
| self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, | |||
| target_shape).astype(np.float32) | |||
| self.axis = axis | |||
| self.epsilon = 1e-4 | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| out_strategy = strategy1[1] | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| self.out_id = device_id % need_dev_num1 | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| net = L2normalize(self.axis, self.epsilon) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| net = L2normalize(self.axis, self.epsilon) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return input_grad | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_l2normalize_input_128_512(): | |||
| input_shape = (128, 512) | |||
| axis = 0 | |||
| fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) | |||
| fact.forward_cmp() | |||
| def test_reid_l2normalize_grad_input_128_512(): | |||
| input_shape = (128, 512) | |||
| axis = 0 | |||
| fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) | |||
| fact.grad_cmp() | |||
| def test_reid_l2normalize_input_128_512_repeat(): | |||
| input_shape = (128, 512) | |||
| axis = 0 | |||
| fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2))) | |||
| fact.forward_cmp() | |||
| def test_reid_l2normalize_grad_input_128_512_repeat(): | |||
| input_shape = (128, 512) | |||
| axis = 0 | |||
| fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2))) | |||
| fact.grad_cmp() | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_l2normalize_parallel_4p.py>../../log/test_l2normalize_parallel_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1 +0,0 @@ | |||
| log files for auto parallel end to end test cases | |||
| @@ -1,195 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class AddRelu(Cell): | |||
| def __init__(self, strategy0=None, strategy1=None): | |||
| super(AddRelu, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.relu = P.ReLU(strategy=strategy1) | |||
| def construct(self, x, y): | |||
| out = self.add(x, y) | |||
| out = self.relu(out) | |||
| return out | |||
| class NetWithLoss(Cell): | |||
| def __init__(self, network, strategy2=None): | |||
| super(NetWithLoss, self).__init__() | |||
| self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2) | |||
| self.network = network | |||
| def construct(self, x, y, b): | |||
| predict = self.network(x, y) | |||
| return self.loss(predict, b)[0] | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, b): | |||
| return grad_all(self.network)(x, y, b) | |||
| class AddReluFactory: | |||
| def __init__(self, input_shape, strategy0, strategy1, strategy2): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| target_shape = input_shape | |||
| self.target_shape = target_shape | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(10, target_size) | |||
| self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype( | |||
| np.float32) | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| self.strategy2 = strategy2 | |||
| out_strategy = strategy1[1] | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| self.out_id = device_id % need_dev_num1 | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| net = AddRelu() | |||
| net_with_loss = NetWithLoss(net) | |||
| grad_net = Grad(net_with_loss) | |||
| grad_net.set_train() | |||
| input_grads = [] | |||
| for i in range(0, 3): | |||
| input_grad = grad_net(x, y, output_grad) | |||
| input_grads.append(input_grad) | |||
| return input_grads | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| net_with_loss = NetWithLoss(net, strategy2=self.strategy2) | |||
| grad_net = Grad(net_with_loss) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grads = [] | |||
| for i in range(0, 3): | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| input_grads.append(input_grad) | |||
| return input_grads | |||
| def grad_cmp(self): | |||
| input_grad_mindspores = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl() | |||
| for i in range(0, len(input_grad_mindspores)): | |||
| input_grad_mindspore = input_grad_mindspores[i] | |||
| input_grad_mindspore_parallel = input_grad_mindspore_parallels[i] | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy", | |||
| input_grad_blocks_0[self.x_id]) | |||
| np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy", | |||
| input_grad_blocks_1[self.y_id]) | |||
| np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", | |||
| input_grad_mindspore_parallel0) | |||
| np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", | |||
| input_grad_mindspore_parallel1) | |||
| assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_l2normalize_grad_input_128_512(): | |||
| input_shape = (128, 512) | |||
| fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)), | |||
| strategy2=(0, (4, 1), (4, 1))) | |||
| fact.grad_cmp() | |||
| def test_reid_l2normalize_grad_input_128_512_stridesplit(): | |||
| input_shape = (128, 512) | |||
| fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)), | |||
| strategy2=(0, (4, 1), (4, 1))) | |||
| fact.grad_cmp() | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_loss_parallel_4p.py>../../log/test_loss_parallel_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1,329 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| from numpy import allclose | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Matmul(Cell): | |||
| def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None): | |||
| super(Matmul, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy1) | |||
| self.matmul = P.MatMul(transpose_a, transpose_b, strategy=strategy0) | |||
| def construct(self, x, w, z): | |||
| out = self.add(x, z) | |||
| return self.matmul(out, w) | |||
| class BatchMatMul(Cell): | |||
| def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None): | |||
| super(BatchMatMul, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy1) | |||
| self.batchmatmul = P.BatchMatMul(transpose_a, transpose_b, strategy=strategy0) | |||
| def construct(self, x, w, z): | |||
| out = self.add(x, z) | |||
| return self.batchmatmul(out, w) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, inputa, inputb, inputz, output_grad): | |||
| gout = grad_all_with_sens(self.network)(inputa, inputb, inputz, output_grad) | |||
| return gout | |||
| class BatchmatmulFactory: | |||
| def __init__(self, inputa_shape, inputb_shape, transpose_a, transpose_b, strategy, strategy_): | |||
| self.strategy = strategy | |||
| self.strategy_ = strategy_ | |||
| inputa_size = 1 | |||
| inputb_size = 1 | |||
| prefix = "" | |||
| for s in inputa_shape: | |||
| prefix = prefix + str(s) + "_" | |||
| inputa_size = inputa_size * s | |||
| prefix = prefix + "and" | |||
| for s in inputb_shape: | |||
| prefix = prefix + str(s) + "_" | |||
| inputb_size = inputb_size * s | |||
| number_rangea = min(1000, inputa_size) | |||
| number_rangeb = min(1000, inputb_size) | |||
| self.inputa = np.reshape(np.arange(0, inputa_size) % number_rangea - number_rangea / 2, inputa_shape).astype( | |||
| np.float32) | |||
| self.inputb = np.reshape(np.arange(0, inputb_size) % number_rangeb - number_rangeb / 2, inputb_shape).astype( | |||
| np.float32) | |||
| self.inputz = np.zeros(self.inputa.shape).astype(np.float32) | |||
| self.transpose_a = transpose_a | |||
| self.transpose_b = transpose_b | |||
| out_shape = [] | |||
| device_matrix = [] | |||
| out_strategy = [] | |||
| if transpose_a: | |||
| temp = inputa_shape[-1] | |||
| inputa_shape[-1] = inputa_shape[-2] | |||
| inputa_shape[-2] = temp | |||
| if transpose_b: | |||
| temp = inputb_shape[-1] | |||
| inputb_shape[-1] = inputb_shape[-2] | |||
| inputb_shape[-2] = temp | |||
| if len(inputa_shape) >= len(inputb_shape): | |||
| out_shape = list(inputa_shape) | |||
| out_shape[-1] = inputb_shape[-1] | |||
| else: | |||
| out_shape = list(inputb_shape) | |||
| out_shape[-2] = inputa_shape[-2] | |||
| strategy1 = list(self.strategy[1]) | |||
| strategy2 = list(self.strategy[2]) | |||
| if transpose_a: | |||
| temp = strategy1[-1] | |||
| strategy1[-1] = strategy1[-2] | |||
| strategy1[-2] = temp | |||
| if transpose_b: | |||
| temp = strategy2[-1] | |||
| strategy2[-1] = strategy2[-2] | |||
| strategy2[-2] = temp | |||
| if len(strategy1) >= len(strategy2): | |||
| out_strategy = strategy1.copy() | |||
| out_strategy[-1] = strategy2[-1] | |||
| else: | |||
| out_strategy = strategy2.copy() | |||
| out_strategy[-2] = strategy1[-2] | |||
| device_matrix = out_strategy.copy() | |||
| device_matrix.insert(-1, strategy1[-1]) | |||
| self.out_strategy = out_strategy | |||
| need_dev_num = 1 | |||
| for s in device_matrix: | |||
| need_dev_num = need_dev_num * s | |||
| self.need_dev_num = need_dev_num | |||
| self.device_matrix = device_matrix | |||
| out_size = 1 | |||
| for s in out_shape: | |||
| out_size = out_size * s | |||
| number_range = min(1000, out_size) | |||
| self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range - number_range / 2, out_shape).astype( | |||
| np.float32) | |||
| device_index = self.id_to_list(device_id % need_dev_num, self.device_matrix) | |||
| x_index = device_index[:-1].copy() | |||
| if transpose_a: | |||
| temp = x_index[-1] | |||
| x_index[-1] = x_index[-2] | |||
| x_index[-2] = temp | |||
| y_index = device_index[:-3].copy() | |||
| y_index.append(device_index[-2]) | |||
| y_index.append(device_index[-1]) | |||
| if transpose_b: | |||
| temp = y_index[-1] | |||
| y_index[-1] = y_index[-2] | |||
| y_index[-2] = temp | |||
| out_index = device_index[:-2].copy() | |||
| out_index.append(device_index[-1]) | |||
| print(device_matrix) | |||
| print(device_index) | |||
| need_dev_num_ = 1 | |||
| for s in strategy_[1]: | |||
| need_dev_num_ = need_dev_num_ * s | |||
| self.x_id = device_id % need_dev_num_ | |||
| self.y_id = self.list_to_id(y_index, self.strategy[2]) | |||
| self.out_id = self.list_to_id(out_index, self.out_strategy) | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def id_to_list(self, id_, shape): | |||
| """ | |||
| shape:每一维的上限,如(2,4,8) | |||
| """ | |||
| result = [] | |||
| r = id_ | |||
| for i in range(0, len(shape)): | |||
| v = 1 | |||
| for j in range(i + 1, len(shape)): | |||
| v = v * shape[j] | |||
| result.append(r // v) | |||
| r = r % v | |||
| return result | |||
| def list_to_id(self, id_list, shape): | |||
| result = 0 | |||
| for i in range(0, len(id_list)): | |||
| v = 1 | |||
| for j in range(i + 1, len(id_list)): | |||
| v = v * shape[j] | |||
| result = result + id_list[i] * v | |||
| return result | |||
| def forward_mindspore_impl(self): | |||
| if len(self.inputa.shape) > 2: | |||
| matmul = BatchMatMul(self.transpose_a, self.transpose_b) | |||
| else: | |||
| matmul = Matmul(self.transpose_a, self.transpose_b) | |||
| matmul.set_train() | |||
| out_me = matmul(Tensor(self.inputa), Tensor(self.inputb), Tensor(self.inputz)) | |||
| return out_me.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| if len(self.inputa.shape) > 2: | |||
| matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) | |||
| else: | |||
| matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| x = Tensor(self.inputa) | |||
| y = Tensor(self.inputb) | |||
| z = Tensor(self.inputz) | |||
| xs = self.get_parallel_blocks(self.inputa, self.strategy_[1]) | |||
| ys = self.get_parallel_blocks(self.inputb, self.strategy[2]) | |||
| zs = self.get_parallel_blocks(self.inputz, self.strategy_[1]) | |||
| x1 = Tensor(xs[self.x_id]) # | |||
| y1 = Tensor(ys[self.y_id]) # 需要从设备矩阵推导 | |||
| z1 = Tensor(zs[self.x_id]) | |||
| matmul.set_train() | |||
| matmul.set_auto_parallel() | |||
| out_me = matmul(x, y, z, parallel_inputs_compile=[x, y, z], parallel_inputs_run=[x1, y1, z1]) | |||
| return out_me.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.inputa) | |||
| y = Tensor(self.inputb) | |||
| z = Tensor(self.inputz) | |||
| if len(self.inputa.shape) > 2: | |||
| matmul = BatchMatMul(self.transpose_a, self.transpose_b) | |||
| else: | |||
| matmul = Matmul(self.transpose_a, self.transpose_b) | |||
| net_me = Grad(matmul) | |||
| net_me.set_train() | |||
| out_grad_me = Tensor(self.output_grad_np) | |||
| out_grad = net_me(x, y, z, out_grad_me) | |||
| return out_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| if len(self.inputa.shape) > 2: | |||
| matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) | |||
| else: | |||
| matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) | |||
| x = Tensor(self.inputa) | |||
| y = Tensor(self.inputb) | |||
| z = Tensor(self.inputz) | |||
| out_grad_me = Tensor(self.output_grad_np) | |||
| xs = self.get_parallel_blocks(self.inputa, self.strategy_[1]) | |||
| ys = self.get_parallel_blocks(self.inputb, self.strategy[2]) | |||
| zs = self.get_parallel_blocks(self.inputz, self.strategy_[1]) | |||
| out_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(xs[self.x_id]) # 需要从设备矩阵推导 | |||
| y1 = Tensor(ys[self.y_id]) # | |||
| z1 = Tensor(zs[self.x_id]) | |||
| out_grad1 = Tensor(out_grads[self.out_id]) | |||
| net_me = Grad(matmul) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net_me.set_auto_parallel() | |||
| net_me.set_train() | |||
| out_grad = net_me(x, y, z, out_grad_me, parallel_inputs_compile=[x, y, z, out_grad1], | |||
| parallel_inputs_run=[x1, y1, z1, out_grad1]) | |||
| return out_grad | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspores = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| assert allclose(out_mindspores[self.out_id], out_mindspore_parallel, 0.0001, 0.0001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspores0 = self.get_parallel_blocks(input_grad_mindspore[0].asnumpy(), self.strategy_[1]) | |||
| input_grad_mindspores1 = self.get_parallel_blocks(input_grad_mindspore[1].asnumpy(), self.strategy[2]) | |||
| input_grad_mindspores2 = self.get_parallel_blocks(input_grad_mindspore[2].asnumpy(), self.strategy_[1]) | |||
| assert allclose(input_grad_mindspores0[self.x_id], input_grad_mindspore_parallel[0].asnumpy(), 0.0001, 0.0001) | |||
| assert allclose(input_grad_mindspores1[self.y_id], input_grad_mindspore_parallel[1].asnumpy(), 0.0001, 0.0001) | |||
| assert allclose(input_grad_mindspores2[self.x_id], input_grad_mindspore_parallel[2].asnumpy(), 0.0001, 0.0001) | |||
| def test_reid_batchmatmul_inputa_128_512_inputb_2000_512(): | |||
| inputa = [128, 512] | |||
| inputb = [2000, 512] | |||
| fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2))) | |||
| fact.forward_cmp() | |||
| def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512(): | |||
| inputa = [128, 512] | |||
| inputb = [2000, 512] | |||
| fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2))) | |||
| fact.grad_cmp() | |||
| def test_reid_batchmatmul_inputa_128_512_inputb_2000_512_redistribution(): | |||
| inputa = [128, 512] | |||
| inputb = [2000, 512] | |||
| fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2))) | |||
| fact.forward_cmp() | |||
| def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512_redistribution(): | |||
| inputa = [128, 512] | |||
| inputb = [2000, 512] | |||
| fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2))) | |||
| fact.grad_cmp() | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_matmul_parallel_4p.py >../../log/test_matmul_parallel_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1,213 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, input1, input2, output_grad): | |||
| return grad_all_with_sens(self.network)(input1, input2, output_grad) | |||
| class Max(Cell): | |||
| def __init__(self, axis, keep_dims, strategy0=None, strategy1=None): | |||
| super(Max, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1) | |||
| self.axis = axis | |||
| def construct(self, input1, input2): | |||
| out = self.add(input1, input2) | |||
| return self.reduce_max(out, self.axis) | |||
| class MaxFactory: | |||
| def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1): | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| self.axis = axis | |||
| self.keep_dims = keep_dims | |||
| input_size = 1 | |||
| prefix = "" | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) + "_" | |||
| input_size = input_size * s | |||
| number_range = min(1000, input_size) | |||
| self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = self.input_np1.copy() | |||
| self.out_grad_np = None | |||
| out_shape = list(input_shape) | |||
| out_shape.pop(axis) | |||
| out_size = input_size / input_shape[axis] | |||
| number_range_ = min(1000, out_size) | |||
| self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype( | |||
| np.float32) | |||
| out_strategy = list(strategy1[1]) | |||
| out_strategy.pop(axis) | |||
| self.out_strategy = out_strategy | |||
| need_dev_num = 1 | |||
| need_dev_num_ = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| for s in out_strategy: | |||
| need_dev_num_ = need_dev_num_ * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.y_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num_ | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_mindspore_impl(self): | |||
| input1 = Tensor(self.input_np1) | |||
| input2 = Tensor(self.input_np2) | |||
| net = Max(axis=self.axis, keep_dims=self.keep_dims) | |||
| out = net(input1, input2) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(xs[self.x_id]) | |||
| y1 = Tensor(ys[self.y_id]) | |||
| net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| input1 = Tensor(self.input_np1) | |||
| input2 = Tensor(self.input_np2) | |||
| out_grad = Tensor(self.out_grad_np) | |||
| net = Max(axis=self.axis, keep_dims=self.keep_dims) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(input1, input2, out_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy) | |||
| out_grad = Tensor(output_grads[self.out_id]) | |||
| xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(xs[self.x_id]) | |||
| y1 = Tensor(ys[self.y_id]) | |||
| net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad], | |||
| parallel_inputs_run=[x1, y1, out_grad]) | |||
| return input_grad | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| print(out_mindspore) | |||
| print(out_mindspore_parallel) | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_max_forward_input_256_64(): | |||
| fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)), | |||
| strategy1=(0, (4, 1))) | |||
| fact.forward_cmp() | |||
| def test_reid_max_grad_input_256_64(): | |||
| fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)), | |||
| strategy1=(0, (4, 1))) | |||
| fact.grad_cmp() | |||
| def test_reid_max_forward_input_128_64_32_32(): | |||
| fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), | |||
| strategy1=(0, (2, 1, 2, 1))) | |||
| fact.forward_cmp() | |||
| def test_reid_max_grad_input_128_64_32_32(): | |||
| fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), | |||
| strategy1=(0, (2, 1, 2, 1))) | |||
| fact.grad_cmp() | |||
| def test_reid_max_forward_input_256_64_repeat(): | |||
| fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)), | |||
| strategy1=(0, (2, 1))) | |||
| fact.forward_cmp() | |||
| def test_reid_max_grad_input_256_64_repeat(): | |||
| fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)), | |||
| strategy1=(0, (2, 1))) | |||
| fact.grad_cmp() | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_max_parallel_4p.py>../../log/test_max_parallel_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_mul_softmax_parallel_4p.py>../../log/test_mul_softmax_parallel_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1,200 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class MulSoftmax(Cell): | |||
| def __init__(self, strategy0=None, strategy1=None, axis=0): | |||
| super(MulSoftmax, self).__init__() | |||
| self.mul = P.Mul(strategy=strategy0) | |||
| self.softmax = P.Softmax(axis=axis, strategy=strategy1) | |||
| def construct(self, x, z): | |||
| out = self.mul(x, z) | |||
| return self.softmax(out) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class MulSoftmaxFactory: | |||
| def __init__(self, input_shape, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = 1.0 | |||
| self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, | |||
| input_shape).astype(np.float32) | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| need_dev_num = 1 | |||
| need_dev_num_ = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| for s in strategy1[1]: | |||
| need_dev_num_ = need_dev_num_ * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.y_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num_ | |||
| def forward_mindspore_impl(self): | |||
| net = MulSoftmax() | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(self.input_np2, ms.float32) | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| output_grad = Tensor(self.output_grad_np) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = MulSoftmax() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1]) | |||
| output_grad = Tensor(output_grads[self.out_id]) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_train() | |||
| grad_net.set_auto_parallel() | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(self.input_np2, ms.float32) | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], | |||
| parallel_inputs_run=[x1, y1, output_grad]) | |||
| return input_grad | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| np.save(path + str(device_id) + "_" + self.prefix + "_forward_parallel.npy", out_mindspore_parallel) | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1]) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", input_grad_mindspore_parallel0) | |||
| np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", input_grad_mindspore_parallel1) | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, | |||
| self.strategy0[1]) # 这里由于TensorMul两个输入X1没做广播,X2做了广播 | |||
| assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| @pytest.mark.reid_forward | |||
| def test_reid_mul_softmax_input_128x64(): | |||
| stra0 = (0, (1, 4), ()) | |||
| stra1 = (0, (1, 4)) | |||
| fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_mul_softmax_input_128x64(): | |||
| stra0 = (0, (1, 4), ()) | |||
| stra1 = (0, (1, 4)) | |||
| fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.grad_cmp() | |||
| @pytest.mark.reid_forward | |||
| def test_reid_mul_softmax_input_128x64_all_to_all(): | |||
| stra0 = (0, (4, 1), ()) | |||
| stra1 = (0, (1, 4)) | |||
| fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_mul_softmax_input_128x64_all_to_all(): | |||
| stra0 = (0, (4, 1), ()) | |||
| stra1 = (0, (1, 4)) | |||
| fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) | |||
| fact.grad_cmp() | |||
| @@ -1,147 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Onehot(Cell): | |||
| def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None): | |||
| super(Onehot, self).__init__() | |||
| self.onehot = P.OneHot(axis, strategy=strategy) | |||
| self.depth = depth | |||
| self.on_value = Tensor(on_value, ms.float32) | |||
| self.off_value = Tensor(off_value, ms.float32) | |||
| def construct(self, indices): | |||
| return self.onehot(indices, self.depth, self.on_value, self.off_value) | |||
| class OneHotFactory: | |||
| def __init__(self, input_shape, depth, on_value=1.0, off_value=0.0, axis=None, dtype=None, strategy0=None): | |||
| size = 1 | |||
| prefix = "" | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(10, size) | |||
| self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.int32) | |||
| self.depth = depth | |||
| self.on_value = on_value | |||
| self.off_value = off_value | |||
| self.axis = axis | |||
| self.dtype = dtype | |||
| self.strategy0 = strategy0 | |||
| need_dev_num = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num = need_dev_num * s | |||
| self.x_id = device_id % need_dev_num | |||
| self.out_id = device_id % need_dev_num | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def grad_mindspore_impl(self): | |||
| output_grad = Tensor(self.output_grad_np) | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2, ms.float32) | |||
| net = AddRelu() | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def forward_mindspore_impl(self): | |||
| indices = Tensor(self.input_np) | |||
| net = Onehot(axis=self.axis, | |||
| depth=self.depth, | |||
| on_value=self.on_value, | |||
| off_value=self.off_value) | |||
| out = net(indices) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| net = Onehot(axis=self.axis, | |||
| depth=self.depth, | |||
| on_value=self.on_value, | |||
| off_value=self.off_value, strategy=self.strategy0) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1]) | |||
| return out.asnumpy() | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy0[1]) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.0001) | |||
| def test_reid_onehot_forward_int32_128_depth13000(): | |||
| fact = OneHotFactory(input_shape=(128,), | |||
| depth=131072, | |||
| on_value=1.000000, | |||
| off_value=0.000000, | |||
| axis=-1, | |||
| dtype="float32", | |||
| strategy0=(0, (2,))) | |||
| fact.forward_cmp() | |||
| def test_reid_onehot_forward_int32_131072_depth127(): | |||
| fact = OneHotFactory(input_shape=(131072,), | |||
| depth=127, | |||
| on_value=1.000000, | |||
| off_value=0.000000, | |||
| axis=-1, | |||
| dtype="float32", | |||
| strategy0=(0, (4,))) | |||
| fact.forward_cmp() | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_onehot_parallel_4p.py>../../log/test_onehot_parallel_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1,206 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class PReLU(Cell): | |||
| def __init__(self, channel=1, w=0.25, strategy_=None, strategy1_=None): | |||
| super(PReLU, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy1_) | |||
| self.prelu = P.PReLU(strategy=strategy_) | |||
| self.channel = channel | |||
| def construct(self, x, z, w): | |||
| out = self.add(x, z) | |||
| return self.prelu(out, w) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, input_, z, w, output_grad): | |||
| return grad_all_with_sens(self.network)(input_, z, w, output_grad) | |||
| class PReLUFactory: | |||
| def __init__(self, input_shape, strategy): | |||
| n, c = input_shape[:2] | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(np.float32) | |||
| self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, | |||
| input_shape).astype(np.float32) | |||
| self.channel = c | |||
| self.weight = np.array([np.float32(0.25)] * c) | |||
| self.strategy = strategy | |||
| def forward_mindspore_impl(self): | |||
| net = PReLU(channel=self.channel, w=self.weight) | |||
| x = Tensor(self.input_np) | |||
| z = Tensor(np.zeros(self.input_np.shape), ms.float32) | |||
| w = Tensor(self.weight) | |||
| out = net(x, z, w) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, | |||
| strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1])) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| x = Tensor(self.input_np) | |||
| z = Tensor(np.zeros(self.input_np.shape), ms.float32) | |||
| w = Tensor(self.weight) | |||
| inputs = self.get_parallel_blocks(self.input_np, self.strategy[1]) | |||
| block_id = device_id % len(inputs) | |||
| x1 = Tensor(inputs[block_id]) | |||
| z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32) | |||
| w1 = Tensor(self.weight) | |||
| out = net(x, z, w, parallel_inputs_compile=[x, z, w], parallel_inputs_run=[x1, z1, w1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| output_grad = Tensor(self.output_grad_np) | |||
| x = Tensor(self.input_np) | |||
| z = Tensor(np.zeros(self.input_np.shape), ms.float32) | |||
| w = Tensor(self.weight) | |||
| net = PReLU(channel=self.channel, w=self.weight) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, z, w, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy[1]) | |||
| block_id = device_id % len(output_grads) | |||
| output_grad = Tensor(output_grads[block_id]) | |||
| x = Tensor(self.input_np) | |||
| z = Tensor(np.zeros(self.input_np.shape), ms.float32) | |||
| w = Tensor(self.weight) | |||
| net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, | |||
| strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1])) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| inputs = self.get_parallel_blocks(self.input_np, self.strategy[1]) | |||
| x1 = Tensor(inputs[block_id]) | |||
| z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32) | |||
| w1 = Tensor(self.weight) | |||
| input_grad = grad_net(x, z, w, output_grad, parallel_inputs_compile=[x, z, w, output_grad], | |||
| parallel_inputs_run=[x1, z1, w1, output_grad]) | |||
| return input_grad | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy[1]) | |||
| block_id = device_id % len(out_blocks) | |||
| assert np.allclose(out_blocks[block_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore2 = input_grad_mindspore[2].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_mindspore_parallel2 = input_grad_mindspore_parallel[2].asnumpy() | |||
| input_grad_blocks = self.get_parallel_blocks(input_grad_mindspore0, self.strategy[1]) | |||
| input1_grad_blocks = self.get_parallel_blocks(input_grad_mindspore1, self.strategy[1]) | |||
| block_id = device_id % len(input_grad_blocks) | |||
| assert np.allclose(input_grad_blocks[block_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert np.allclose(input_grad_mindspore2, input_grad_mindspore_parallel2, 0.0001, 0.0001) | |||
| assert np.allclose(input1_grad_blocks[block_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| @pytest.mark.reid_grad | |||
| def test_reid_prelu_input_128x64x112x112_repeat(): | |||
| stra = (0, (1, 1, 2, 1), (1)) | |||
| fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_prelu_input_128x64x112x112_repeat(): | |||
| stra = (0, (1, 1, 2, 1), (1)) | |||
| fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) | |||
| fact.grad_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_prelu_input_128x64x112x112_mix(): | |||
| stra = (0, (2, 1, 1, 2), (1)) | |||
| fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) | |||
| fact.forward_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_grad_prelu_input_128x64x112x112_mix(): | |||
| stra = (0, (2, 1, 1, 2), (1)) | |||
| fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) | |||
| fact.grad_cmp() | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_prelu_parallel_4p.py >../../log/test_prelu_parallel_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1,252 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| from numpy import allclose as allclose_nparray | |||
| import mindspore as ms | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class GradScalar(Cell): | |||
| def __init__(self, network): | |||
| super(GradScalar, self).__init__() | |||
| self.network = network | |||
| self.sens = Tensor([1.0], dtype=ms.float32) | |||
| def construct(self, x, y): | |||
| return grad_all_with_sens(self.network)(x, y, self.sens) | |||
| class ReduceMean(Cell): | |||
| def __init__(self, keep_dims, axis, strategy0=None, strategy1=None): | |||
| super(ReduceMean, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.reduce_mean = P.ReduceMean(keep_dims=keep_dims).set_strategy(strategy=strategy1) | |||
| self.axis = axis | |||
| def construct(self, x, y): | |||
| out = self.add(x, y) | |||
| return self.reduce_mean(out, self.axis) | |||
| class ReduceMeanFactory: | |||
| def __init__(self, input_shape, keep_dims, axis, strategy0=None, strategy1=None): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| self.keep_dims = keep_dims | |||
| self.axis = axis | |||
| target_shape = self.input_np1.mean(axis=axis, keepdims=keep_dims).shape | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(1000, target_size) | |||
| self.output_grad_np = np.array([1.0], dtype=np.float32) | |||
| if len(target_shape) > 0: | |||
| self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range, target_shape).astype( | |||
| np.float32) + 1.0 | |||
| self.shape = target_shape | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| out_strategy = [] | |||
| axis_ = list(axis) | |||
| if axis_[0] == -1: | |||
| axis_[0] = len(input_shape) - 1 | |||
| for i in range(0, len(input_shape)): | |||
| if i in axis_: | |||
| if keep_dims: | |||
| out_strategy.append(1) | |||
| else: | |||
| out_strategy.append(strategy1[1][i]) | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| block_id = device_id % need_dev_num0 | |||
| device_index = self.id_to_list(block_id, self.strategy1[1]) | |||
| print(device_index) | |||
| for i in axis: | |||
| device_index[i] = 0 | |||
| print(device_index) | |||
| self.out_id = self.list_to_id(device_index, self.out_strategy) | |||
| print(self.out_id) | |||
| def id_to_list(self, id_, shape): | |||
| result = [] | |||
| r = id_ | |||
| for i in range(0, len(shape)): | |||
| v = 1 | |||
| for j in range(i + 1, len(shape)): | |||
| v = v * shape[j] | |||
| result.append(r // v) | |||
| r = r % v | |||
| return result | |||
| def list_to_id(self, id_list, shape): | |||
| result = 0 | |||
| for i in range(0, len(id_list)): | |||
| v = 1 | |||
| for j in range(i + 1, len(id_list)): | |||
| v = v * shape[j] | |||
| result = result + id_list[i] * v | |||
| return result | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| out_grad = Tensor(self.output_grad_np) | |||
| net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, out_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return input_grad | |||
| def forward_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_reducemean_input_64x16(): | |||
| fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)), | |||
| strategy1=(0, (4,))) | |||
| fact.forward_cmp() | |||
| def test_grad_reid_reducemean_input_64x16(): | |||
| fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)), | |||
| strategy1=(0, (4,))) | |||
| fact.grad_cmp() | |||
| def test_reid_reducemean_input_64x128x28x28(): | |||
| fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3), | |||
| strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1))) | |||
| fact.forward_cmp() | |||
| def test_grad_reid_reducemean_input_64x128x28x28(): | |||
| fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3), | |||
| strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1))) | |||
| fact.grad_cmp() | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_reducemean_parallel_4p.py>../../log/test_reducemean_parallel_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1,206 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| import pytest | |||
| from numpy import allclose as allclose_nparray | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class Reshape(Cell): | |||
| def __init__(self, target_shape, strategy0=None, strategy1=None): | |||
| super(Reshape, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.reshape = P.Reshape(strategy=strategy1) | |||
| self.shape = tuple(target_shape) | |||
| def construct(self, input1, input2): | |||
| x = self.add(input1, input2) | |||
| return self.reshape(x, self.shape) | |||
| class ReshapeFactory: | |||
| def __init__(self, input_shape, target_shape, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(1000, target_size) | |||
| self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, | |||
| target_shape).astype(np.float32) | |||
| self.target_shape = target_shape | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| out_strategy = [1] * len(target_shape) | |||
| out_strategy[0] = strategy1[1][0] | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| self.out_id = device_id % need_dev_num1 | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def forward_reshape_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| net = Reshape(self.target_shape) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_reshape_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_reshape_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| net = Reshape(self.target_shape) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_reshape_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return input_grad | |||
| def forward_reshape_cmp(self): | |||
| out_mindspore = self.forward_reshape_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_reshape_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_reshape_cmp(self): | |||
| input_grad_mindspore = self.grad_reshape_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_reshape_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| @pytest.mark.reid_forward | |||
| def test_reid_reshape_input_128x512x7x7_target_128x25088(): | |||
| fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088), | |||
| strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1))) | |||
| fact.forward_reshape_cmp() | |||
| def test_reid_reshape_grad_input_128x512x7x7_target_128x25088(): | |||
| fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088), | |||
| strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1))) | |||
| fact.grad_reshape_cmp() | |||
| @pytest.mark.reid_forward | |||
| def test_reid_reshape_input_128x64_target_128x64x1x1(): | |||
| fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)), | |||
| strategy1=(0, (2, 1))) | |||
| fact.forward_reshape_cmp() | |||
| @pytest.mark.reid_grad | |||
| def test_reid_reshape_grad_input_128x64_target_128x64x1x1(): | |||
| fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)), | |||
| strategy1=(0, (2, 1))) | |||
| fact.grad_reshape_cmp() | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_reshape_parallel_4p.py>../../log/test_reshape_parallel_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||
| @@ -1,235 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| import os | |||
| import numpy as np | |||
| from numpy import allclose as allclose_nparray | |||
| import mindspore.communication.management as distributedTool | |||
| from mindspore import context | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.composite import grad_all_with_sens | |||
| device_num = 4 | |||
| device_id = int(os.environ["RANK_ID"]) | |||
| path = "./output/" | |||
| def setup_module(): | |||
| print("~~~~~~~~~~~set up~~~~~~~~~~~~~") | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) | |||
| distributedTool.init() | |||
| distributedTool.create_group("0-3", [0, 1, 2, 3]) | |||
| print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") | |||
| def teardown_module(): | |||
| print("~~~~~~~~~~~~tear down~~~~~~~~~~") | |||
| class Net(Cell): | |||
| def __init__(self, perm_in, strategy0=None, strategy1=None): | |||
| super(Net, self).__init__() | |||
| self.add = P.TensorAdd(strategy=strategy0) | |||
| self.transpose = P.Transpose(strategy=strategy1) | |||
| self.perm_in = perm_in | |||
| def construct(self, x, y): | |||
| out = self.add(x, y) | |||
| return self.transpose(out, self.perm_in) | |||
| class Grad(Cell): | |||
| def __init__(self, network): | |||
| super(Grad, self).__init__() | |||
| self.network = network | |||
| def construct(self, x, y, output_grad): | |||
| return grad_all_with_sens(self.network)(x, y, output_grad) | |||
| class TransposeFactory: | |||
| def __init__(self, input_shape, perm_in, strategy0, strategy1): | |||
| prefix = "" | |||
| size = 1 | |||
| for s in input_shape: | |||
| prefix = prefix + str(s) | |||
| size = size * s | |||
| self.prefix = prefix | |||
| number_range = min(1000, size) | |||
| self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( | |||
| np.float32) | |||
| self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( | |||
| np.float32) | |||
| target_shape = self.input_np1.transpose(perm_in).shape | |||
| target_size = 1 | |||
| for s in target_shape: | |||
| target_size = target_size * s | |||
| number_range = min(1000, target_size) | |||
| self.target_shape = target_shape | |||
| self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, | |||
| target_shape).astype(np.float32) | |||
| self.perm_in = perm_in | |||
| self.strategy0 = strategy0 | |||
| self.strategy1 = strategy1 | |||
| out_strategy = [] | |||
| for i in perm_in: | |||
| out_strategy.append(strategy1[1][i]) | |||
| self.out_strategy = out_strategy | |||
| need_dev_num0 = 1 | |||
| need_dev_num1 = 1 | |||
| for s in strategy0[1]: | |||
| need_dev_num0 = need_dev_num0 * s | |||
| for s in out_strategy: | |||
| need_dev_num1 = need_dev_num1 * s | |||
| self.x_id = device_id % need_dev_num0 | |||
| self.y_id = device_id % need_dev_num0 | |||
| device_index = self.id_to_list(device_id % need_dev_num1, | |||
| self.strategy1[1]) # encoding to get the index before transpose | |||
| device_index_transpose = [] | |||
| for i in perm_in: | |||
| device_index_transpose.append(device_index[i]) | |||
| self.out_id = self.list_to_id(device_index_transpose, self.out_strategy) | |||
| def get_parallel_blocks(self, input_, strategy): | |||
| blocks = [input_] | |||
| i = 0 | |||
| for stra in strategy: | |||
| temp = [] | |||
| while len(blocks) > 0: | |||
| block = blocks.pop(0) | |||
| temp.extend(np.split(block, stra, axis=i)) | |||
| blocks.extend(temp) | |||
| i += 1 | |||
| return blocks | |||
| def id_to_list(self, id_, shape): | |||
| result = [] | |||
| r = id_ | |||
| for i in range(0, len(shape)): | |||
| v = 1 | |||
| for j in range(i + 1, len(shape)): | |||
| v = v * shape[j] | |||
| result.append(r // v) | |||
| r = r % v | |||
| return result | |||
| def list_to_id(self, id_list, shape): | |||
| result = 0 | |||
| for i in range(0, len(id_list)): | |||
| v = 1 | |||
| for j in range(i + 1, len(id_list)): | |||
| v = v * shape[j] | |||
| result = result + id_list[i] * v | |||
| return result | |||
| def forward_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| net = Net(self.perm_in) | |||
| out = net(x, y) | |||
| return out.asnumpy() | |||
| def forward_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| net.set_auto_parallel() | |||
| out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) | |||
| return out.asnumpy() | |||
| def grad_mindspore_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| net = Net(self.perm_in) | |||
| grad_net = Grad(net) | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad) | |||
| return input_grad | |||
| def grad_mindspore_parallel_impl(self): | |||
| x = Tensor(self.input_np1) | |||
| y = Tensor(self.input_np2) | |||
| output_grad = Tensor(self.output_grad_np) | |||
| inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) | |||
| inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) | |||
| outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) | |||
| x1 = Tensor(inputs_x[self.x_id]) | |||
| y1 = Tensor(inputs_y[self.y_id]) | |||
| output_grad1 = Tensor(outgrads[self.out_id]) | |||
| net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1) | |||
| grad_net = Grad(net) | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||
| grad_net.set_auto_parallel() | |||
| grad_net.set_train() | |||
| input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], | |||
| parallel_inputs_run=[x1, y1, output_grad1]) | |||
| return input_grad | |||
| def forward_transpose_cmp(self): | |||
| out_mindspore = self.forward_mindspore_impl() | |||
| out_mindspore_parallel = self.forward_mindspore_parallel_impl() | |||
| out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) | |||
| assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) | |||
| def grad_transpose_cmp(self): | |||
| input_grad_mindspore = self.grad_mindspore_impl() | |||
| input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() | |||
| input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() | |||
| input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() | |||
| input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() | |||
| input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() | |||
| input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) | |||
| input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) | |||
| assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) | |||
| assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) | |||
| def test_reid_transpose_input_256x512_output_512x256_perm_1x0(): | |||
| fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2))) | |||
| fact.forward_transpose_cmp() | |||
| def test_reid_grad_transpose_input_256x512_output_512x256_perm_1x0(): | |||
| fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2))) | |||
| fact.grad_transpose_cmp() | |||
| def test_reid_transpose_input_512x256_output_256x512_perm_1x0(): | |||
| fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) | |||
| fact.forward_transpose_cmp() | |||
| def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0(): | |||
| fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) | |||
| fact.grad_transpose_cmp() | |||
| def test_reid_transpose_input_512x256_output_256x512_perm_1x0_repeat(): | |||
| fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1))) | |||
| fact.forward_transpose_cmp() | |||
| def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0_repeat(): | |||
| fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1))) | |||
| fact.grad_transpose_cmp() | |||
| @@ -1,27 +0,0 @@ | |||
| #!/bin/bash | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| for((i=0;i<4;i++)); | |||
| do | |||
| rm -rf device$i | |||
| mkdir device$i | |||
| cd device$i | |||
| mkdir output | |||
| source ../../dist_env_4p.sh $i | |||
| env >log$i.log | |||
| pytest -s ../test_transpose_parallel_4p.py>../../log/test_transpose_parallel_4p_log$i.log 2>&1 & | |||
| cd .. | |||
| done | |||