Browse Source

!1348 fix pylint warnings of parallel test cases

Merge pull request !1348 from yihuaijie/master
tags/v0.3.0-alpha
mindspore-ci-bot Gitee 5 years ago
parent
commit
3f23aa1d79
90 changed files with 4030 additions and 4081 deletions
  1. +154
    -154
      tests/st/auto_parallel/onehot_model_parallel.py
  2. +276
    -275
      tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py
  3. +26
    -26
      tests/st/auto_parallel/test_expand_loss.py
  4. +21
    -22
      tests/st/auto_parallel/test_model_parallel_onehot.py
  5. +16
    -19
      tests/st/auto_parallel/test_resnet50_expand_loss_2p.py
  6. +17
    -17
      tests/ut/python/communication/__init__.py
  7. +0
    -1
      tests/ut/python/communication/test_comm.py
  8. +1
    -2
      tests/ut/python/communication/test_data_parallel_lenet.py
  9. +1
    -1
      tests/ut/python/parallel/__init__.py
  10. +178
    -178
      tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py
  11. +356
    -356
      tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py
  12. +120
    -120
      tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py
  13. +154
    -154
      tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py
  14. +175
    -175
      tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py
  15. +206
    -207
      tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py
  16. +195
    -196
      tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py
  17. +329
    -329
      tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py
  18. +213
    -214
      tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py
  19. +200
    -201
      tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py
  20. +147
    -149
      tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py
  21. +206
    -206
      tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py
  22. +252
    -253
      tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py
  23. +206
    -206
      tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py
  24. +235
    -236
      tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py
  25. +3
    -3
      tests/ut/python/parallel/test_add_relu_redistribution.py
  26. +20
    -21
      tests/ut/python/parallel/test_allreduce_fusion.py
  27. +1
    -2
      tests/ut/python/parallel/test_alltoall.py
  28. +20
    -20
      tests/ut/python/parallel/test_arithmetic.py
  29. +0
    -1
      tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py
  30. +5
    -6
      tests/ut/python/parallel/test_auto_parallel_arithmetic.py
  31. +1
    -2
      tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py
  32. +0
    -1
      tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
  33. +4
    -4
      tests/ut/python/parallel/test_auto_parallel_four_matmul.py
  34. +1
    -1
      tests/ut/python/parallel/test_auto_parallel_inference.py
  35. +1
    -1
      tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py
  36. +0
    -1
      tests/ut/python/parallel/test_auto_parallel_parameter_cast.py
  37. +4
    -4
      tests/ut/python/parallel/test_auto_parallel_reduce_method.py
  38. +2
    -2
      tests/ut/python/parallel/test_auto_parallel_reshape.py
  39. +4
    -4
      tests/ut/python/parallel/test_auto_parallel_rhombus.py
  40. +0
    -1
      tests/ut/python/parallel/test_auto_parallel_softmax_loss.py
  41. +2
    -2
      tests/ut/python/parallel/test_auto_parallel_transformer.py
  42. +18
    -4
      tests/ut/python/parallel/test_auto_parallel_two_bn.py
  43. +6
    -6
      tests/ut/python/parallel/test_auto_parallel_two_matmul.py
  44. +1
    -4
      tests/ut/python/parallel/test_auto_star_elimination.py
  45. +7
    -7
      tests/ut/python/parallel/test_batch_matmul.py
  46. +2
    -4
      tests/ut/python/parallel/test_batchnorm_batch_parallel.py
  47. +6
    -7
      tests/ut/python/parallel/test_bn_prelu_cell.py
  48. +1
    -1
      tests/ut/python/parallel/test_bool_grad.py
  49. +2
    -2
      tests/ut/python/parallel/test_broadcast_dict.py
  50. +11
    -11
      tests/ut/python/parallel/test_comparison_function_info.py
  51. +3
    -5
      tests/ut/python/parallel/test_dataset_util.py
  52. +2
    -2
      tests/ut/python/parallel/test_dense_matmul.py
  53. +4
    -4
      tests/ut/python/parallel/test_different_type_for_div_op.py
  54. +6
    -6
      tests/ut/python/parallel/test_dropout_do_mask.py
  55. +11
    -11
      tests/ut/python/parallel/test_element_wise_function.py
  56. +7
    -7
      tests/ut/python/parallel/test_expand_dims.py
  57. +6
    -6
      tests/ut/python/parallel/test_forward_graph.py
  58. +0
    -1
      tests/ut/python/parallel/test_gather_v2.py
  59. +10
    -10
      tests/ut/python/parallel/test_gather_v2_primitive.py
  60. +9
    -13
      tests/ut/python/parallel/test_get_next.py
  61. +2
    -2
      tests/ut/python/parallel/test_get_parameter_layout.py
  62. +8
    -8
      tests/ut/python/parallel/test_hybird_parallel_activation.py
  63. +7
    -7
      tests/ut/python/parallel/test_layer_norm.py
  64. +0
    -1
      tests/ut/python/parallel/test_linear.py
  65. +9
    -10
      tests/ut/python/parallel/test_loss_and_optimizer.py
  66. +5
    -5
      tests/ut/python/parallel/test_matmul_tensor.py
  67. +6
    -6
      tests/ut/python/parallel/test_neg.py
  68. +1
    -1
      tests/ut/python/parallel/test_one_dev.py
  69. +8
    -9
      tests/ut/python/parallel/test_one_hot_net.py
  70. +0
    -1
      tests/ut/python/parallel/test_one_weight_parameter.py
  71. +0
    -9
      tests/ut/python/parallel/test_onehot.py
  72. +21
    -27
      tests/ut/python/parallel/test_operator_model_parallel.py
  73. +3
    -3
      tests/ut/python/parallel/test_optimizer_clone_weight.py
  74. +1
    -1
      tests/ut/python/parallel/test_parameter_init.py
  75. +7
    -7
      tests/ut/python/parallel/test_prelu.py
  76. +3
    -5
      tests/ut/python/parallel/test_prelu_cell.py
  77. +29
    -29
      tests/ut/python/parallel/test_reduce_method_info.py
  78. +10
    -13
      tests/ut/python/parallel/test_reshape.py
  79. +3
    -3
      tests/ut/python/parallel/test_reshape_parameter.py
  80. +0
    -1
      tests/ut/python/parallel/test_scalar_loss.py
  81. +12
    -12
      tests/ut/python/parallel/test_set_auto_parallel_context.py
  82. +6
    -6
      tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py
  83. +4
    -5
      tests/ut/python/parallel/test_softmax_cross_entropy_loss.py
  84. +4
    -5
      tests/ut/python/parallel/test_split_grad_sens.py
  85. +8
    -8
      tests/ut/python/parallel/test_squeeze_info.py
  86. +3
    -4
      tests/ut/python/parallel/test_sum_as_loss.py
  87. +0
    -2
      tests/ut/python/parallel/test_transpose.py
  88. +4
    -4
      tests/ut/python/parallel/test_two_matmul.py
  89. +0
    -1
      tests/ut/python/parallel/test_two_weights_parameter.py
  90. +2
    -2
      tests/ut/python/parallel/test_virtual_dataset_3_input.py

+ 154
- 154
tests/st/auto_parallel/onehot_model_parallel.py View File

@@ -1,154 +1,154 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import os
import pytest
import mindspore as ms
import mindspore.communication.management as distributedTool
import mindspore.context as context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
device_num = 2
device_id = int(os.getenv('DEVICE_ID'))
rank_id = 0
def setup_module():
global device_num
global rank_id
np.random.seed(0)
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
context.set_context(device_id=device_id)
distributedTool.init()
device_num = distributedTool.get_group_size()
rank_id = distributedTool.get_rank()
context.set_auto_parallel_context(device_num=device_num,
global_rank=rank_id)
def teardown_module():
distributedTool.release()
class Onehot(Cell):
def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None):
super(Onehot, self).__init__()
trans_stra = None
if strategy:
trans_stra = (strategy[0],)
self.onehot = P.OneHot().set_strategy(strategy=strategy)
self.depth = depth
self.on_value = Tensor(on_value, ms.float32)
self.off_value = Tensor(off_value, ms.float32)
self.transpose = P.Transpose().set_strategy(strategy=trans_stra)
self.sub = P.Sub().set_strategy(strategy=((1, 1), (1, 1)))
def construct(self, input, indices):
x = self.onehot(indices, self.depth, self.on_value, self.off_value)
x = self.transpose(x, (1, 0))
x = self.sub(input, x)
return x
class DataGenerator():
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def generate_data(self, shape):
data = np.random.rand(*shape)
return data
def input_data(self, shape):
data = (self.generate_data(shape) * 2).astype(np.float32)
stra = [1] * len(shape)
stra[0] = device_num
datas = self.get_parallel_blocks(data, stra)
return Tensor(data), Tensor(datas[rank_id])
def label_data(self, shape, classes):
data = (self.generate_data(shape) * (classes - 1)).astype(np.int32)
stra = [1] * len(shape)
stra[0] = device_num
datas = self.get_parallel_blocks(data, stra)
return Tensor(data), Tensor(datas[rank_id])
class OneHotFactory:
def __init__(self, batch_size, classes, on_value=1.0, off_value=0.0, axis=None, strategy=None):
dataGen = DataGenerator()
self.input_full, self.input_part = dataGen.input_data((classes, batch_size))
self.label_full, self.label_part = dataGen.label_data((batch_size,), classes)
self.depth = classes
self.on_value = on_value
self.off_value = off_value
self.axis = axis
self.strategy = strategy
def forward_mindspore_single_impl(self):
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value)
out = net(self.input_full, self.label_full)
return out
def forward_mindspore_parallel_impl(self):
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value, strategy=self.strategy)
out = net.compile_and_run(self.input_full, self.label_full)
return out
def forward_cmp(self):
out_mindspore_single = self.forward_mindspore_single_impl().asnumpy()
context.reset_auto_parallel_context()
out_mindspore_parallel = self.forward_mindspore_parallel_impl().asnumpy()
context.reset_auto_parallel_context()
assert np.allclose(out_mindspore_single, out_mindspore_parallel, 0.0001, 0.0001)
def test_reid_onehot_forward_int32_128_depth1024_model_parallel():
fact = OneHotFactory(batch_size=128,
classes=1024,
on_value=1.000000,
off_value=0.000000,
axis=-1,
strategy=((1, device_num), (), ()))
fact.forward_cmp()
def test_reid_onehot_forward_int32_1024_depth128_model_parallel():
fact = OneHotFactory(batch_size=1024,
classes=128,
on_value=1.000000,
off_value=0.000000,
axis=-1,
strategy=((1, device_num), (), ()))
fact.forward_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import numpy as np
import mindspore as ms
import mindspore.communication.management as distributedTool
import mindspore.context as context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
device_num = 2
device_id = int(os.getenv('DEVICE_ID'))
rank_id = 0
def setup_module():
global device_num
global rank_id
np.random.seed(0)
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
context.set_context(device_id=device_id)
distributedTool.init()
device_num = distributedTool.get_group_size()
rank_id = distributedTool.get_rank()
context.set_auto_parallel_context(device_num=device_num,
global_rank=rank_id)
def teardown_module():
distributedTool.release()
class Onehot(Cell):
def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None):
super(Onehot, self).__init__()
trans_stra = None
if strategy:
trans_stra = (strategy[0],)
self.onehot = P.OneHot().set_strategy(strategy=strategy)
self.depth = depth
self.on_value = Tensor(on_value, ms.float32)
self.off_value = Tensor(off_value, ms.float32)
self.transpose = P.Transpose().set_strategy(strategy=trans_stra)
self.sub = P.Sub().set_strategy(strategy=((1, 1), (1, 1)))
self.axis = axis
def construct(self, input_, indices):
x = self.onehot(indices, self.depth, self.on_value, self.off_value)
x = self.transpose(x, (1, 0))
x = self.sub(input_, x)
return x
class DataGenerator():
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def generate_data(self, shape):
data = np.random.rand(*shape)
return data
def input_data(self, shape):
data = (self.generate_data(shape) * 2).astype(np.float32)
stra = [1] * len(shape)
stra[0] = device_num
datas = self.get_parallel_blocks(data, stra)
return Tensor(data), Tensor(datas[rank_id])
def label_data(self, shape, classes):
data = (self.generate_data(shape) * (classes - 1)).astype(np.int32)
stra = [1] * len(shape)
stra[0] = device_num
datas = self.get_parallel_blocks(data, stra)
return Tensor(data), Tensor(datas[rank_id])
class OneHotFactory:
def __init__(self, batch_size, classes, on_value=1.0, off_value=0.0, axis=None, strategy=None):
data_gen = DataGenerator()
self.input_full, self.input_part = data_gen.input_data((classes, batch_size))
self.label_full, self.label_part = data_gen.label_data((batch_size,), classes)
self.depth = classes
self.on_value = on_value
self.off_value = off_value
self.axis = axis
self.strategy = strategy
def forward_mindspore_single_impl(self):
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value)
out = net(self.input_full, self.label_full)
return out
def forward_mindspore_parallel_impl(self):
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value, strategy=self.strategy)
out = net.compile_and_run(self.input_full, self.label_full)
return out
def forward_cmp(self):
out_mindspore_single = self.forward_mindspore_single_impl().asnumpy()
context.reset_auto_parallel_context()
out_mindspore_parallel = self.forward_mindspore_parallel_impl().asnumpy()
context.reset_auto_parallel_context()
assert np.allclose(out_mindspore_single, out_mindspore_parallel, 0.0001, 0.0001)
def test_reid_onehot_forward_int32_128_depth1024_model_parallel():
fact = OneHotFactory(batch_size=128,
classes=1024,
on_value=1.000000,
off_value=0.000000,
axis=-1,
strategy=((1, device_num), (), ()))
fact.forward_cmp()
def test_reid_onehot_forward_int32_1024_depth128_model_parallel():
fact = OneHotFactory(batch_size=1024,
classes=128,
on_value=1.000000,
off_value=0.000000,
axis=-1,
strategy=((1, device_num), (), ()))
fact.forward_cmp()

+ 276
- 275
tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py View File

@@ -1,275 +1,276 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import os
import pytest
from numpy import allclose
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common import dtype as mstype
from mindspore.common.parameter import ParameterTuple, Parameter
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.nn.optim.momentum import Momentum
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import operations as P
from mindspore.train import Model, ParallelMode
from mindspore.train.callback import Callback
np.set_printoptions(threshold=np.inf)
device_num = 2
device_id = int(os.getenv('DEVICE_ID'))
rank_id = 0
embed = 128
classes = 32
batch_size = 32 * 2
MatmulParamShape = (classes, embed)
def setup_module():
global device_num
global rank_id
np.random.seed(0)
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
context.set_context(device_id=device_id)
distributedTool.init()
rank_id = distributedTool.get_rank()
device_num = distributedTool.get_group_size()
context.set_auto_parallel_context(device_num=device_num,
global_rank=device_id)
def teardown_module():
distributedTool.release()
class DataGenerator():
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def generate_data(self, shape):
size = np.cumprod(shape)[-1]
num_range = min(size, 1000)
data = (np.arange(0, size) % num_range) / num_range
data = np.reshape(data, shape)
return data
def input_data(self, shape):
data = (self.generate_data(shape) * 0.1).astype(np.float32)
stra = [1] * len(shape)
stra[0] = device_num
datas = self.get_parallel_blocks(data, stra)
return Tensor(data), Tensor(datas[rank_id])
def label_data(self, shape, embed):
data = (self.generate_data(shape) * (embed - 1)).astype(np.int32)
stra = [1] * len(shape)
stra[0] = device_num
datas = self.get_parallel_blocks(data, stra)
return Tensor(data), Tensor(datas[rank_id])
class Dataset():
def __init__(self, predict, label, length=1, input_num=2):
self.predict = predict
self.label = label
self.index = 0
self.length = length
self.input_num = input_num
def __iter__(self):
return self
def __next__(self):
if self.index >= self.length:
raise StopIteration
self.index += 1
if self.input_num == 2:
return self.predict, self.label
else:
return self.predict,
def reset(self):
self.index = 0
def get_dataset_size(self):
return self.length
def get_repeat_count(self):
return self.length
class ModelCallback(Callback):
def __init__(self):
super(ModelCallback, self).__init__()
self.loss_list = []
def epoch_end(self, run_context, *args):
cb_params = run_context.original_args()
result = cb_params.net_outputs
self.loss_list.append(result.asnumpy().mean())
class SoftmaxCrossEntropyExpand(Cell):
def __init__(self, sparse=False, stra_list=[]):
super(SoftmaxCrossEntropyExpand, self).__init__()
if len(stra_list) < 11:
stra_list = [None] * 11
self.exp = P.Exp()
self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy=stra_list[1])
self.onehot = P.OneHot().set_strategy(strategy=stra_list[2])
self.on_value = Tensor(1.0, mstype.float32)
self.off_value = Tensor(0.0, mstype.float32)
self.div = P.Div().set_strategy(strategy=stra_list[3])
self.log = P.Log().set_strategy(strategy=stra_list[4])
self.sum_cross_entropy = P.ReduceSum(keep_dims=False).set_strategy(strategy=stra_list[5])
self.mul = P.Mul().set_strategy(strategy=stra_list[6])
self.mul2 = P.Mul().set_strategy(strategy=stra_list[7])
self.cast = P.Cast()
self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy=stra_list[8])
self.sparse = sparse
self.reduce_max = P.ReduceMax(keep_dims=True).set_strategy(strategy=stra_list[9])
self.sub = P.Sub().set_strategy(strategy=stra_list[10])
def construct(self, logit, label):
logit_max = self.reduce_max(logit, -1)
exp = self.exp(self.sub(logit, logit_max))
exp_sum = self.reduce_sum(exp, -1)
softmax_result = self.div(exp, exp_sum)
if self.sparse:
label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
softmax_result_log = self.log(softmax_result)
loss = self.sum_cross_entropy((self.mul(softmax_result_log, label)), -1)
loss = self.mul2(F.scalar_to_array(-1.0), loss)
loss = self.reduce_mean(loss, -1)
return loss
class MatmulNet(Cell):
def __init__(self, matmul_stra=None, loss_stra_list=[]):
super(MatmulNet, self).__init__()
self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy=matmul_stra)
self.loss = SoftmaxCrossEntropyExpand(sparse=True, stra_list=loss_stra_list)
self.weight = Parameter(Tensor(np.ones(MatmulParamShape), dtype=ms.float32), name="weight")
def construct(self, x, label):
loss_input = self.matmul(x, self.weight)
out = self.loss(loss_input, label)
return out
class LossFactory():
def __init__(self):
dataGen = DataGenerator()
self.input_full, self.input_part = dataGen.input_data((batch_size, embed))
self.label_full, self.label_part = dataGen.label_data((batch_size,), embed)
def single_matmul_trains(self):
single_callback = ModelCallback()
net = MatmulNet()
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
model = Model(net, optimizer=optimizer)
epoch_size = 6
dataset = Dataset(self.input_full, self.label_full)
model.train(epoch_size, dataset, callbacks=single_callback, dataset_sink_mode=False)
loss_value = np.array(single_callback.loss_list)
return loss_value
def data_parallel_matmul_trains(self):
parallel_callback = ModelCallback()
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net = MatmulNet()
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
model = Model(net, optimizer=optimizer)
epoch_size = 6
dataset = Dataset(self.input_part, self.label_part)
model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False)
loss_value = np.array(parallel_callback.loss_list)
return loss_value
def model_parallel_matmul_trains(self):
parallel_callback = ModelCallback()
matmul_stra = ((1, 1), (device_num, 1))
reduce_max_stra = ((1, device_num),)
sub_stra = ((1, device_num), (1, 1))
exp_stra = ((1, device_num),)
reduce_sum_stra = ((1, device_num),)
div_stra = ((1, device_num), (1, 1))
log_stra = ((1, device_num),)
mul_stra = ((1, device_num), (1, device_num))
sum_cross_entropy_stra = ((1, device_num),)
mul2_stra = ((), (device_num,))
reduce_mean_stra = ((device_num,),)
onehot_stra = ((1, device_num), (), ())
loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra,
sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra]
context.set_auto_parallel_context(parallel_mode="auto_parallel")
net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list)
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
model = Model(net, optimizer=optimizer)
epoch_size = 6
dataset = Dataset(self.input_part, self.label_part)
model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False)
loss_value = np.array(parallel_callback.loss_list)
return loss_value
def mix_parallel_matmul_trains(self):
parallel_callback = ModelCallback()
matmul_stra = ((device_num, 1), (1, 1))
reduce_max_stra = ((1, device_num),)
sub_stra = ((device_num, 1), (device_num, 1))
exp_stra = ((1, device_num),)
reduce_sum_stra = ((1, device_num),)
div_stra = ((1, device_num), (1, 1))
log_stra = ((1, device_num),)
mul_stra = ((1, device_num), (1, device_num))
sum_cross_entropy_stra = ((1, device_num),)
mul2_stra = ((), (device_num,))
reduce_mean_stra = ((device_num,),)
onehot_stra = ((1, device_num), (), ())
loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra,
sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra]
context.set_auto_parallel_context(parallel_mode="auto_parallel")
net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list)
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
model = Model(net, optimizer=optimizer)
epoch_size = 6
dataset = Dataset(self.input_part, self.label_part)
model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False)
loss_value = np.array(parallel_callback.loss_list)
return loss_value
def test_all_trains():
loss_factory = LossFactory()
context.reset_auto_parallel_context()
single_loss = loss_factory.single_matmul_trains()
model_parallel_loss = loss_factory.model_parallel_matmul_trains()
mix_parallel_loss = loss_factory.mix_parallel_matmul_trains()
assert allclose(single_loss, model_parallel_loss)
assert allclose(single_loss, mix_parallel_loss)
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

import os
import numpy as np
from numpy import allclose

import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common import dtype as mstype
from mindspore.common.parameter import Parameter
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.nn.optim.momentum import Momentum
from mindspore.ops import functional as F
from mindspore.ops import operations as P
from mindspore.train import Model
from mindspore.train.callback import Callback

np.set_printoptions(threshold=np.inf)
device_num = 2
device_id = int(os.getenv('DEVICE_ID'))
rank_id = 0
embed = 128
classes = 32
batch_size = 32 * 2
MatmulParamShape = (classes, embed)


def setup_module():
global device_num
global rank_id
np.random.seed(0)
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
context.set_context(device_id=device_id)
distributedTool.init()
rank_id = distributedTool.get_rank()
device_num = distributedTool.get_group_size()
context.set_auto_parallel_context(device_num=device_num,
global_rank=device_id)


def teardown_module():
distributedTool.release()


class DataGenerator():
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks

def generate_data(self, shape):
size = np.cumprod(shape)[-1]
num_range = min(size, 1000)
data = (np.arange(0, size) % num_range) / num_range
data = np.reshape(data, shape)
return data

def input_data(self, shape):
data = (self.generate_data(shape) * 0.1).astype(np.float32)
stra = [1] * len(shape)
stra[0] = device_num
datas = self.get_parallel_blocks(data, stra)
return Tensor(data), Tensor(datas[rank_id])

def label_data(self, shape, embed_):
data = (self.generate_data(shape) * (embed_ - 1)).astype(np.int32)
stra = [1] * len(shape)
stra[0] = device_num
datas = self.get_parallel_blocks(data, stra)
return Tensor(data), Tensor(datas[rank_id])


class Dataset():
def __init__(self, predict, label, length=1, input_num=2):
self.predict = predict
self.label = label
self.index = 0
self.length = length
self.input_num = input_num

def __iter__(self):
return self

def __next__(self):
if self.index >= self.length:
raise StopIteration
self.index += 1
if self.input_num == 2:
return (self.predict, self.label)
return (self.predict,)

def reset(self):
self.index = 0

def get_dataset_size(self):
return self.length

def get_repeat_count(self):
return self.length


class ModelCallback(Callback):
def __init__(self):
super(ModelCallback, self).__init__()
self.loss_list = []

def epoch_end(self, run_context):
cb_params = run_context.original_args()
result = cb_params.net_outputs
self.loss_list.append(result.asnumpy().mean())


class SoftmaxCrossEntropyExpand(Cell):
def __init__(self, sparse=False, stra_list=None):
super(SoftmaxCrossEntropyExpand, self).__init__()
if stra_list is None:
stra_list = []
if len(stra_list) < 11:
stra_list = [None] * 11
self.exp = P.Exp()
self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy=stra_list[1])
self.onehot = P.OneHot().set_strategy(strategy=stra_list[2])
self.on_value = Tensor(1.0, mstype.float32)
self.off_value = Tensor(0.0, mstype.float32)
self.div = P.Div().set_strategy(strategy=stra_list[3])
self.log = P.Log().set_strategy(strategy=stra_list[4])
self.sum_cross_entropy = P.ReduceSum(keep_dims=False).set_strategy(strategy=stra_list[5])
self.mul = P.Mul().set_strategy(strategy=stra_list[6])
self.mul2 = P.Mul().set_strategy(strategy=stra_list[7])
self.cast = P.Cast()
self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy=stra_list[8])
self.sparse = sparse
self.reduce_max = P.ReduceMax(keep_dims=True).set_strategy(strategy=stra_list[9])
self.sub = P.Sub().set_strategy(strategy=stra_list[10])

def construct(self, logit, label):
logit_max = self.reduce_max(logit, -1)
exp = self.exp(self.sub(logit, logit_max))
exp_sum = self.reduce_sum(exp, -1)
softmax_result = self.div(exp, exp_sum)
if self.sparse:
label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
softmax_result_log = self.log(softmax_result)
loss = self.sum_cross_entropy((self.mul(softmax_result_log, label)), -1)
loss = self.mul2(F.scalar_to_array(-1.0), loss)
loss = self.reduce_mean(loss, -1)
return loss


class MatmulNet(Cell):
def __init__(self, matmul_stra=None, loss_stra_list=None):
super(MatmulNet, self).__init__()
if loss_stra_list is None:
loss_stra_list = []
self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy=matmul_stra)
self.loss = SoftmaxCrossEntropyExpand(sparse=True, stra_list=loss_stra_list)
self.weight = Parameter(Tensor(np.ones(MatmulParamShape), dtype=ms.float32), name="weight")

def construct(self, x, label):
loss_input = self.matmul(x, self.weight)
out = self.loss(loss_input, label)
return out


class LossFactory():
def __init__(self):
data_gen = DataGenerator()
self.input_full, self.input_part = data_gen.input_data((batch_size, embed))
self.label_full, self.label_part = data_gen.label_data((batch_size,), embed)

def single_matmul_trains(self):
single_callback = ModelCallback()
net = MatmulNet()
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
model = Model(net, optimizer=optimizer)
epoch_size = 6
dataset = Dataset(self.input_full, self.label_full)
model.train(epoch_size, dataset, callbacks=single_callback, dataset_sink_mode=False)
loss_value = np.array(single_callback.loss_list)
return loss_value

def data_parallel_matmul_trains(self):
parallel_callback = ModelCallback()
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net = MatmulNet()
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
model = Model(net, optimizer=optimizer)
epoch_size = 6
dataset = Dataset(self.input_part, self.label_part)
model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False)
loss_value = np.array(parallel_callback.loss_list)
return loss_value

def model_parallel_matmul_trains(self):
parallel_callback = ModelCallback()
matmul_stra = ((1, 1), (device_num, 1))
reduce_max_stra = ((1, device_num),)
sub_stra = ((1, device_num), (1, 1))
exp_stra = ((1, device_num),)
reduce_sum_stra = ((1, device_num),)
div_stra = ((1, device_num), (1, 1))
log_stra = ((1, device_num),)
mul_stra = ((1, device_num), (1, device_num))
sum_cross_entropy_stra = ((1, device_num),)
mul2_stra = ((), (device_num,))
reduce_mean_stra = ((device_num,),)
onehot_stra = ((1, device_num), (), ())
loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra,
sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra]
context.set_auto_parallel_context(parallel_mode="auto_parallel")
net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list)
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
model = Model(net, optimizer=optimizer)
epoch_size = 6
dataset = Dataset(self.input_part, self.label_part)
model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False)
loss_value = np.array(parallel_callback.loss_list)
return loss_value

def mix_parallel_matmul_trains(self):
parallel_callback = ModelCallback()
matmul_stra = ((device_num, 1), (1, 1))
reduce_max_stra = ((1, device_num),)
sub_stra = ((device_num, 1), (device_num, 1))
exp_stra = ((1, device_num),)
reduce_sum_stra = ((1, device_num),)
div_stra = ((1, device_num), (1, 1))
log_stra = ((1, device_num),)
mul_stra = ((1, device_num), (1, device_num))
sum_cross_entropy_stra = ((1, device_num),)
mul2_stra = ((), (device_num,))
reduce_mean_stra = ((device_num,),)
onehot_stra = ((1, device_num), (), ())
loss_stra_list = [exp_stra, reduce_sum_stra, onehot_stra, div_stra, log_stra,
sum_cross_entropy_stra, mul_stra, mul2_stra, reduce_mean_stra, reduce_max_stra, sub_stra]
context.set_auto_parallel_context(parallel_mode="auto_parallel")
net = MatmulNet(matmul_stra=matmul_stra, loss_stra_list=loss_stra_list)
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
model = Model(net, optimizer=optimizer)
epoch_size = 6
dataset = Dataset(self.input_part, self.label_part)
model.train(epoch_size, dataset, callbacks=parallel_callback, dataset_sink_mode=False)
loss_value = np.array(parallel_callback.loss_list)
return loss_value


def test_all_trains():
loss_factory = LossFactory()
context.reset_auto_parallel_context()
single_loss = loss_factory.single_matmul_trains()
model_parallel_loss = loss_factory.model_parallel_matmul_trains()
mix_parallel_loss = loss_factory.mix_parallel_matmul_trains()
assert allclose(single_loss, model_parallel_loss)
assert allclose(single_loss, mix_parallel_loss)

+ 26
- 26
tests/st/auto_parallel/test_expand_loss.py View File

@@ -1,26 +1,26 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import pytest
@pytest.mark.level0
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_single
def test_expand_loss():
sh_path = os.path.split(os.path.realpath(__file__))[0]
ret = os.system(f"sh {sh_path}/run_auto_parallel_loss_expand.sh")
assert (ret == 0)
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import pytest
@pytest.mark.level0
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_single
def test_expand_loss():
sh_path = os.path.split(os.path.realpath(__file__))[0]
ret = os.system(f"sh {sh_path}/run_auto_parallel_loss_expand.sh")
assert ret == 0

+ 21
- 22
tests/st/auto_parallel/test_model_parallel_onehot.py View File

@@ -1,22 +1,21 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import pytest
def test_expand_loss():
ret = os.system("sh run_onehot_model_parallel.sh")
assert (ret == 0)
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

import os


def test_expand_loss():
ret = os.system("sh run_onehot_model_parallel.sh")
assert ret == 0

+ 16
- 19
tests/st/auto_parallel/test_resnet50_expand_loss_2p.py View File

@@ -13,8 +13,8 @@
# limitations under the License.
# ============================================================================

import numpy as np
import os
import numpy as np
import pytest

import mindspore.common.dtype as mstype
@@ -37,31 +37,29 @@ init()
context.set_auto_parallel_context(mirror_mean=True, parallel_mode=ParallelMode.AUTO_PARALLEL)


def weight_variable(shape, factor=0.1):
def weight_variable():
return One()


def _conv3x3(in_channels, out_channels, stride=1, padding=0, pad_mode='same'):
init_value = weight_variable((out_channels, in_channels, 3, 3))
init_value = weight_variable()
return nn.Conv2d(in_channels, out_channels,
kernel_size=3, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value)


def _conv1x1(in_channels, out_channels, stride=1, padding=0, pad_mode='same'):
init_value = weight_variable((out_channels, in_channels, 1, 1))
init_value = weight_variable()
return nn.Conv2d(in_channels, out_channels,
kernel_size=1, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value)


def _conv7x7(in_channels, out_channels, stride=1, padding=0, pad_mode='same'):
init_value = weight_variable((out_channels, in_channels, 7, 7))
init_value = weight_variable()
return nn.Conv2d(in_channels, out_channels,
kernel_size=7, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value)


def _fused_bn(channels, momentum=0.9):
init_weight = weight_variable((channels,))
init_bias = weight_variable((channels,))
return nn.BatchNorm2d(channels, momentum=momentum)


@@ -210,8 +208,8 @@ class ResNet(nn.Cell):

self.mean = P.ReduceMean(keep_dims=True)
self.end_point = nn.Dense(2048, num_classes, has_bias=True,
weight_init=weight_variable((num_classes, 2048)),
bias_init=weight_variable((num_classes,)))
weight_init=weight_variable(),
bias_init=weight_variable())
self.squeeze = P.Squeeze()
self.cast = P.Cast()

@@ -345,9 +343,8 @@ class Dataset():
raise StopIteration
self.index += 1
if self.input_num == 2:
return self.predict, self.label
else:
return self.predict,
return (self.predict, self.label)
return (self.predict,)

def reset(self):
self.index = 0
@@ -364,7 +361,7 @@ class ModelCallback(Callback):
super(ModelCallback, self).__init__()
self.loss_list = []

def epoch_end(self, run_context, *args):
def epoch_end(self, run_context):
cb_params = run_context.original_args()
result = cb_params.net_outputs
self.loss_list.append(result.asnumpy().mean())
@@ -376,9 +373,9 @@ class ModelCallback(Callback):
def test_train_feed(num_classes=8192):
set_algo_parameters(elementwise_op_strategy_follow=True)
parallel_callback = ModelCallback()
dataGen = DataGenerator()
input_full, input_part = dataGen.input_data((32 * 2, 3, 224, 224))
label_full, label_part = dataGen.label_data((32 * 2,))
data_gen = DataGenerator()
_, input_part = data_gen.input_data((32 * 2, 3, 224, 224))
_, label_part = data_gen.label_data((32 * 2,))
dataset = Dataset(input_part, label_part)
net = resnet50(num_classes)
loss = SoftmaxCrossEntropyExpand(sparse=True)
@@ -396,9 +393,9 @@ def test_train_feed(num_classes=8192):
def test_train_feed2(num_classes=1001):
set_algo_parameters(elementwise_op_strategy_follow=True)
parallel_callback = ModelCallback()
dataGen = DataGenerator()
input_full, input_part = dataGen.input_data((32 * 2, 3, 224, 224))
label_full, label_part = dataGen.label_data((32 * 2,))
data_gen = DataGenerator()
_, input_part = data_gen.input_data((32 * 2, 3, 224, 224))
_, label_part = data_gen.label_data((32 * 2,))
dataset = Dataset(input_part, label_part)
net = resnet50(num_classes)
loss = SoftmaxCrossEntropyExpand(sparse=True)


+ 17
- 17
tests/ut/python/communication/__init__.py View File

@@ -1,17 +1,17 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("../../..")
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("../../..")

+ 0
- 1
tests/ut/python/communication/test_comm.py View File

@@ -25,7 +25,6 @@ from mindspore.nn import Dense
from mindspore.nn import Momentum
from mindspore.nn import ReLU
from mindspore.nn import TrainOneStepCell, WithLossCell
from mindspore.ops.operations import Split
from mindspore.ops.operations.comm_ops import AllReduce, AllGather, _AlltoAll, ReduceOp, ReduceScatter
from mindspore.ops.operations.comm_ops import Broadcast



+ 1
- 2
tests/ut/python/communication/test_data_parallel_lenet.py View File

@@ -16,8 +16,8 @@
@File : test_data_parallel_lenet.py
@Desc : test data parallel lenet
"""
import numpy as np
import os
import numpy as np

import mindspore.context as context
import mindspore.nn as nn
@@ -80,7 +80,6 @@ def test_lenet5_train_step_training_pynative():
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL,
device_num=8, mirror_mean=True)
size = 3
predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01)
label = Tensor(np.zeros([1, 10]).astype(np.float32))
DatasetLenet(predict, label, 2)


+ 1
- 1
tests/ut/python/parallel/__init__.py View File

@@ -19,7 +19,7 @@ from mindspore.parallel._utils import _reset_op_id
from mindspore.parallel.algo_parameter_config import reset_algo_parameters


def setup_module(module):
def setup_module():
auto_parallel_context().set_enable_all_reduce_fusion(enable_all_reduce_fusion=True)
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
reset_cost_model_context()


+ 178
- 178
tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py View File

@@ -1,178 +1,178 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class AddRelu(Cell):
def __init__(self, strategy0=None, strategy1=None):
super(AddRelu, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.relu = P.ReLU(strategy=strategy1)
def construct(self, x, z):
out = self.add(x, z)
return self.relu(out)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class AddReluFactory:
def __init__(self, input_shape, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = 1.0
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
input_shape).astype(np.float32)
self.strategy0 = strategy0
self.strategy1 = strategy1
need_dev_num = 1
need_dev_num_ = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
for s in strategy1[1]:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num
self.y_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num_
def forward_mindspore_impl(self):
net = AddRelu()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = AddRelu()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
output_grad = Tensor(output_grads[self.out_id])
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
parallel_inputs_run=[x1, y1, output_grad])
return input_grad
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)
@pytest.mark.reid_forward
def test_reid_add_relu_input_256_64():
stra0 = (0, (2, 2), ())
stra1 = (0, (2, 2))
fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_add_relu_input_256_64():
stra0 = (0, (2, 2), ())
stra1 = (0, (2, 2))
fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import pytest
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class AddRelu(Cell):
def __init__(self, strategy0=None, strategy1=None):
super(AddRelu, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.relu = P.ReLU(strategy=strategy1)
def construct(self, x, z):
out = self.add(x, z)
return self.relu(out)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class AddReluFactory:
def __init__(self, input_shape, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = 1.0
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
input_shape).astype(np.float32)
self.strategy0 = strategy0
self.strategy1 = strategy1
need_dev_num = 1
need_dev_num_ = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
for s in strategy1[1]:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num
self.y_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num_
def forward_mindspore_impl(self):
net = AddRelu()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = AddRelu()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
output_grad = Tensor(output_grads[self.out_id])
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
parallel_inputs_run=[x1, y1, output_grad])
return input_grad
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
_ = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
_ = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)
@pytest.mark.reid_forward
def test_reid_add_relu_input_256_64():
stra0 = (0, (2, 2), ())
stra1 = (0, (2, 2))
fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_add_relu_input_256_64():
stra0 = (0, (2, 2), ())
stra1 = (0, (2, 2))
fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
fact.grad_cmp()

+ 356
- 356
tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py View File

@@ -1,356 +1,356 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
from numpy import allclose
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore._checkparam import check_bool, twice
from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class _Conv(Cell):
r"""Applies a N-D convolution over an input signal composed of several input
planes.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
pad_mode,
padding,
dilation,
group,
has_bias,
weight_init,
bias_init):
super(_Conv, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.pad_mode = pad_mode
self.padding = padding
self.dilation = dilation
self.group = group
self.has_bias = has_bias
if not (isinstance(in_channels, int) and in_channels > 0):
raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed '
+ str(in_channels) + ', should be a int and greater than 0.')
if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \
(not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
kernel_size[0] < 1 or kernel_size[1] < 1:
raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed '
+ str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.')
if in_channels % group != 0:
raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by '
'attr \'group\' of \'Conv2D\' Op.')
if out_channels % group != 0:
raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by '
'attr \'group\' of \'Conv2D\' Op.')
self.weight = Parameter(initializer(
weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight')
if check_bool(has_bias):
self.bias = Parameter(initializer(
bias_init, [out_channels]), name='bias')
else:
if bias_init != 'zeros':
print("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
self.bias = None
def construct(self, *inputs):
raise NotImplementedError
class Conv2d(_Conv):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
pad_mode='same',
padding=0,
dilation=1,
group=1,
has_bias=False,
weight_init='normal',
bias_init='zeros',
strategy=None):
kernel_size = twice(kernel_size)
super(Conv2d, self).__init__(
in_channels,
out_channels,
kernel_size,
stride,
pad_mode,
padding,
dilation,
group,
has_bias,
weight_init,
bias_init)
self.add = P.TensorAdd(strategy)
self.conv2d = P.Conv2D(out_channel=self.out_channels,
kernel_size=self.kernel_size,
mode=1,
pad_mode=self.pad_mode,
pad=self.padding,
stride=self.stride,
dilation=self.dilation,
group=self.group,
strategy=None)
self.bias_add = P.BiasAdd()
def construct(self, input1, input2):
x = self.add(input1, input2)
if self.has_bias:
return self.bias_add(self.conv2d(x, self.weight),
self.bias)
return self.conv2d(x, self.weight)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, input1, input2, output_grad):
return grad_all_with_sens(self.network)(input1, input2, output_grad)
class Conv2dFactory:
def __init__(self, input_shape, filter_shape, stride, pad_mode, padding, dilation, group, has_bias):
self.in_n, self.in_c, self.in_h, self.in_w = input_shape
self.out_c, self.kernel_c, self.kernel_h, self.kernel_w = filter_shape
self.stride = stride
self.pad_mode = pad_mode
self.padding = padding
self.dilation = dilation
self.group = group
self.strategy0 = (0, (4, 1, 1, 1), (1, 1, 1, 1))
prefix = ""
input_size = 1
filter_size = 1
for s in input_shape:
prefix = prefix + str(s) + "_"
input_size = input_size * s
self.prefix = prefix
for s in filter_shape:
filter_size = filter_size * s
number_range1 = min(10, input_size)
number_range2 = min(10, filter_size)
self.input_np1 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 2, input_shape).astype(
np.float16)
self.input_np2 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 4, input_shape).astype(
np.float16)
self.weight_np = np.reshape(np.arange(0, filter_size) % number_range2 - number_range2 / 2, filter_shape).astype(
np.float16)
self.has_bias = has_bias
if self.has_bias is True:
self.bias_np = np.arange(0, self.out_c).astype(np.float16)
self.out_shape = (128, 64, 56, 56)
out_size = 1
for s in self.out_shape:
out_size = out_size * s
number_range3 = min(10, out_size)
self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range3 - number_range3 / 2,
self.out_shape).astype(np.float16)
self.x_id = device_id % 4
self.y_id = device_id % 4
self.out_strategy = self.strategy0[1]
self.out_id = device_id % 4
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_conv2d_mindspore_impl(self):
input1 = Tensor(self.input_np1)
input2 = Tensor(self.input_np2)
weight = Tensor(self.weight_np)
if self.has_bias:
bias = Tensor(self.bias_np)
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=True, weight_init=weight,
bias_init=bias)
else:
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=False, weight_init=weight)
out = net(input1, input2)
return out.asnumpy()
def forward_conv2d_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
weight = Tensor(self.weight_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
if self.has_bias:
bias = Tensor(self.bias_np)
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=True, weight_init=weight,
bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
else:
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=False, weight_init=weight,
strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_conv2d_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
weight = Tensor(self.weight_np)
output_grad = Tensor(self.output_grad_np)
if self.has_bias:
bias = Tensor(self.bias_np)
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=True, weight_init=weight,
bias_init=bias, )
else:
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=False, weight_init=weight)
grad_net = Grad(net)
grad_net.set_train()
out_grad = grad_net(x, y, output_grad)
return out_grad
def grad_conv2d_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
weight = Tensor(self.weight_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad = Tensor(self.output_grad_np)
output_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
output_grad1 = Tensor(output_grads[self.out_id])
if self.has_bias:
bias = Tensor(self.bias_np)
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=True, weight_init=weight,
bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
else:
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=False, weight_init=weight,
strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_train()
grad_net.set_auto_parallel()
out_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return out_grad
def forward_conv2d_cmp(self):
out_mindspore = self.forward_conv2d_mindspore_impl()
out_mindspore_parallel = self.forward_conv2d_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
def grad_conv2d_cmp(self):
input_grad_mindspore = self.grad_conv2d_mindspore_impl()
input_grad_mindspore_parallel = self.grad_conv2d_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[1])
assert allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.001, 0.001)
assert allclose(input_grad_blocks_1[self.x_id], input_grad_mindspore_parallel1, 0.001, 0.001)
def test_reid_conv2d_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true():
fact = Conv2dFactory(input_shape=(128, 64, 112, 112),
filter_shape=(64, 64, 1, 1),
stride=2, pad_mode='valid', padding=0,
dilation=1, group=1, has_bias=False)
fact.forward_conv2d_cmp()
def test_reid_conv2d_grad_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true():
fact = Conv2dFactory(input_shape=(128, 64, 112, 112),
filter_shape=(64, 64, 1, 1),
stride=2, pad_mode='valid', padding=0,
dilation=1, group=1, has_bias=False)
fact.grad_conv2d_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from numpy import allclose
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore._checkparam import check_bool, twice
from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class _Conv(Cell):
r"""Applies a N-D convolution over an input signal composed of several input
planes.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
pad_mode,
padding,
dilation,
group,
has_bias,
weight_init,
bias_init):
super(_Conv, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.pad_mode = pad_mode
self.padding = padding
self.dilation = dilation
self.group = group
self.has_bias = has_bias
if not (isinstance(in_channels, int) and in_channels > 0):
raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed '
+ str(in_channels) + ', should be a int and greater than 0.')
if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \
(not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
kernel_size[0] < 1 or kernel_size[1] < 1:
raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed '
+ str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.')
if in_channels % group != 0:
raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by '
'attr \'group\' of \'Conv2D\' Op.')
if out_channels % group != 0:
raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by '
'attr \'group\' of \'Conv2D\' Op.')
self.weight = Parameter(initializer(
weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight')
if check_bool(has_bias):
self.bias = Parameter(initializer(
bias_init, [out_channels]), name='bias')
else:
if bias_init != 'zeros':
print("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
self.bias = None
def construct(self, *inputs):
raise NotImplementedError
class Conv2d(_Conv):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
pad_mode='same',
padding=0,
dilation=1,
group=1,
has_bias=False,
weight_init='normal',
bias_init='zeros',
strategy=None):
kernel_size = twice(kernel_size)
super(Conv2d, self).__init__(
in_channels,
out_channels,
kernel_size,
stride,
pad_mode,
padding,
dilation,
group,
has_bias,
weight_init,
bias_init)
self.add = P.TensorAdd(strategy)
self.conv2d = P.Conv2D(out_channel=self.out_channels,
kernel_size=self.kernel_size,
mode=1,
pad_mode=self.pad_mode,
pad=self.padding,
stride=self.stride,
dilation=self.dilation,
group=self.group,
strategy=None)
self.bias_add = P.BiasAdd()
def construct(self, input1, input2):
x = self.add(input1, input2)
if self.has_bias:
return self.bias_add(self.conv2d(x, self.weight),
self.bias)
return self.conv2d(x, self.weight)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, input1, input2, output_grad):
return grad_all_with_sens(self.network)(input1, input2, output_grad)
class Conv2dFactory:
def __init__(self, input_shape, filter_shape, stride, pad_mode, padding, dilation, group, has_bias):
self.in_n, self.in_c, self.in_h, self.in_w = input_shape
self.out_c, self.kernel_c, self.kernel_h, self.kernel_w = filter_shape
self.stride = stride
self.pad_mode = pad_mode
self.padding = padding
self.dilation = dilation
self.group = group
self.strategy0 = (0, (4, 1, 1, 1), (1, 1, 1, 1))
prefix = ""
input_size = 1
filter_size = 1
for s in input_shape:
prefix = prefix + str(s) + "_"
input_size = input_size * s
self.prefix = prefix
for s in filter_shape:
filter_size = filter_size * s
number_range1 = min(10, input_size)
number_range2 = min(10, filter_size)
self.input_np1 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 2, input_shape).astype(
np.float16)
self.input_np2 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 4, input_shape).astype(
np.float16)
self.weight_np = np.reshape(np.arange(0, filter_size) % number_range2 - number_range2 / 2, filter_shape).astype(
np.float16)
self.has_bias = has_bias
if self.has_bias is True:
self.bias_np = np.arange(0, self.out_c).astype(np.float16)
self.out_shape = (128, 64, 56, 56)
out_size = 1
for s in self.out_shape:
out_size = out_size * s
number_range3 = min(10, out_size)
self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range3 - number_range3 / 2,
self.out_shape).astype(np.float16)
self.x_id = device_id % 4
self.y_id = device_id % 4
self.out_strategy = self.strategy0[1]
self.out_id = device_id % 4
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_conv2d_mindspore_impl(self):
input1 = Tensor(self.input_np1)
input2 = Tensor(self.input_np2)
weight = Tensor(self.weight_np)
if self.has_bias:
bias = Tensor(self.bias_np)
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=True, weight_init=weight,
bias_init=bias)
else:
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=False, weight_init=weight)
out = net(input1, input2)
return out.asnumpy()
def forward_conv2d_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
weight = Tensor(self.weight_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
if self.has_bias:
bias = Tensor(self.bias_np)
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=True, weight_init=weight,
bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
else:
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=False, weight_init=weight,
strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_conv2d_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
weight = Tensor(self.weight_np)
output_grad = Tensor(self.output_grad_np)
if self.has_bias:
bias = Tensor(self.bias_np)
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=True, weight_init=weight,
bias_init=bias,)
else:
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=False, weight_init=weight)
grad_net = Grad(net)
grad_net.set_train()
out_grad = grad_net(x, y, output_grad)
return out_grad
def grad_conv2d_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
weight = Tensor(self.weight_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad = Tensor(self.output_grad_np)
output_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
output_grad1 = Tensor(output_grads[self.out_id])
if self.has_bias:
bias = Tensor(self.bias_np)
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=True, weight_init=weight,
bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
else:
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=False, weight_init=weight,
strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_train()
grad_net.set_auto_parallel()
out_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return out_grad
def forward_conv2d_cmp(self):
out_mindspore = self.forward_conv2d_mindspore_impl()
out_mindspore_parallel = self.forward_conv2d_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
def grad_conv2d_cmp(self):
input_grad_mindspore = self.grad_conv2d_mindspore_impl()
input_grad_mindspore_parallel = self.grad_conv2d_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[1])
assert allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.001, 0.001)
assert allclose(input_grad_blocks_1[self.x_id], input_grad_mindspore_parallel1, 0.001, 0.001)
def test_reid_conv2d_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true():
fact = Conv2dFactory(input_shape=(128, 64, 112, 112),
filter_shape=(64, 64, 1, 1),
stride=2, pad_mode='valid', padding=0,
dilation=1, group=1, has_bias=False)
fact.forward_conv2d_cmp()
def test_reid_conv2d_grad_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true():
fact = Conv2dFactory(input_shape=(128, 64, 112, 112),
filter_shape=(64, 64, 1, 1),
stride=2, pad_mode='valid', padding=0,
dilation=1, group=1, has_bias=False)
fact.grad_conv2d_cmp()

+ 120
- 120
tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py View File

@@ -1,120 +1,120 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.nn import Dropout
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Net(Cell):
def __init__(self, keep_prob, seed0, seed1, strategy=None):
super(Net, self).__init__()
self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy)
def construct(self, input):
x = self.drop(input)
return x
# pylint: disable=comparison-with-itself
class DropoutFactory:
def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None):
size = 1
prefix = ""
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(10, size)
self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32)
self.keep_prob = keep_prob
self.seed0 = seed0
self.seed1 = seed1
self.strategy0 = strategy0
need_dev_num = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
self.x_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def d4_tensor_compare(self, input, out_me):
[a, b, c, d] = input.shape
for i in range(a):
for j in range(b):
for k in range(c):
for e in range(d):
if out_me[i, j, k, e] == 0:
assert True == True
else:
assert np.allclose(out_me[i, j, k, e], input[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001)
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np)
inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
net = Net(0.4, 0, 0, strategy=self.strategy0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
return out.asnumpy()
def forward_cmp(self):
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1])
self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel)
def test_reid_dropout_forward_seed_F32_64_512_8_8():
fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1)))
fact.forward_cmp()
def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat():
fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1)))
fact.forward_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.nn import Dropout
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Net(Cell):
def __init__(self, keep_prob, seed0, seed1, strategy=None):
super(Net, self).__init__()
self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy)
def construct(self, input_):
x = self.drop(input_)
return x
# pylint: disable=comparison-with-itself
class DropoutFactory:
def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None):
size = 1
prefix = ""
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(10, size)
self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32)
self.keep_prob = keep_prob
self.seed0 = seed0
self.seed1 = seed1
self.strategy0 = strategy0
need_dev_num = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
self.x_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def d4_tensor_compare(self, input_, out_me):
[a, b, c, d] = input_.shape
for i in range(a):
for j in range(b):
for k in range(c):
for e in range(d):
if out_me[i, j, k, e] == 0:
assert True
else:
assert np.allclose(out_me[i, j, k, e], input_[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001)
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np)
inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
net = Net(0.4, 0, 0, strategy=self.strategy0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
return out.asnumpy()
def forward_cmp(self):
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1])
self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel)
def test_reid_dropout_forward_seed_F32_64_512_8_8():
fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1)))
fact.forward_cmp()
def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat():
fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1)))
fact.forward_cmp()

+ 154
- 154
tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py View File

@@ -1,154 +1,154 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class MatmulSingle(Cell):
def __init__(self, transpose_a=False, transpose_b=False):
super(MatmulSingle, self).__init__()
self.matmul = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
def construct(self, x, y):
out = self.matmul(x, y)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
return out
class MatmulAllgather(Cell):
def __init__(self, group, transpose_a=False, transpose_b=False):
super(MatmulAllgather, self).__init__()
self.allgather = P.AllGather(group=group)
self.matmul = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
self.allreduce = P.AllReduce(group=group)
def construct(self, x, y):
x = self.allgather(x)
out = self.matmul(x, y)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
out = self.allreduce(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, sens):
return grad_all_with_sens(self.network)(x, y, sens)
class MatmulAllgatherFactory:
def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra):
self.inputx = self.GenValue(inputx_shape, 10)
self.inputy = self.GenValue(inputy_shape, 20)
self.x_stra = x_stra
self.y_stra = y_stra
stra_size = 1
for s in x_stra:
stra_size = stra_size * s
self.stra_size = stra_size
def GenValue(self, input_shape, delta):
size = 1
for s in input_shape:
size = size * s
number_range = min(100, size)
input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
return input_np
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl_single(self):
x = Tensor(self.inputx)
y = Tensor(self.inputy)
sens = Tensor(1.0, dtype=ms.float32)
net = MatmulSingle()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, sens)
return input_grad
def grad_mindspore_impl_reduce(self):
inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
x = Tensor(inputxs[device_id % self.stra_size])
y = Tensor(inputys[device_id % self.stra_size])
repeat_num = device_num / self.stra_size
v = self.stra_size * repeat_num * repeat_num * repeat_num
sens = Tensor(1.0 / v, dtype=ms.float32)
net = MatmulAllgather("hccl_world_group")
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, sens)
return input_grad
def grad_cmp(self):
single_results = self.grad_mindspore_impl_single()
reduce_results = self.grad_mindspore_impl_reduce()
single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
reduce_result0 = reduce_results[0].asnumpy()
single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
reduce_result1 = reduce_results[1].asnumpy()
assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
def test_reduce_grad():
inputx_shape = (64, 32)
inputy_shape = (32, 64)
fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class MatmulSingle(Cell):
def __init__(self, transpose_a=False, transpose_b=False):
super(MatmulSingle, self).__init__()
self.matmul = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
def construct(self, x, y):
out = self.matmul(x, y)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
return out
class MatmulAllgather(Cell):
def __init__(self, group, transpose_a=False, transpose_b=False):
super(MatmulAllgather, self).__init__()
self.allgather = P.AllGather(group=group)
self.matmul = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
self.allreduce = P.AllReduce(group=group)
def construct(self, x, y):
x = self.allgather(x)
out = self.matmul(x, y)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
out = self.allreduce(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, sens):
return grad_all_with_sens(self.network)(x, y, sens)
class MatmulAllgatherFactory:
def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra):
self.inputx = self.gen_value(inputx_shape, 10)
self.inputy = self.gen_value(inputy_shape, 20)
self.x_stra = x_stra
self.y_stra = y_stra
stra_size = 1
for s in x_stra:
stra_size = stra_size * s
self.stra_size = stra_size
def gen_value(self, input_shape, delta):
size = 1
for s in input_shape:
size = size * s
number_range = min(100, size)
input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
return input_np
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl_single(self):
x = Tensor(self.inputx)
y = Tensor(self.inputy)
sens = Tensor(1.0, dtype=ms.float32)
net = MatmulSingle()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, sens)
return input_grad
def grad_mindspore_impl_reduce(self):
inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
x = Tensor(inputxs[device_id % self.stra_size])
y = Tensor(inputys[device_id % self.stra_size])
repeat_num = device_num / self.stra_size
v = self.stra_size * repeat_num * repeat_num * repeat_num
sens = Tensor(1.0 / v, dtype=ms.float32)
net = MatmulAllgather("hccl_world_group")
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, sens)
return input_grad
def grad_cmp(self):
single_results = self.grad_mindspore_impl_single()
reduce_results = self.grad_mindspore_impl_reduce()
single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
reduce_result0 = reduce_results[0].asnumpy()
single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
reduce_result1 = reduce_results[1].asnumpy()
assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
def test_reduce_grad():
inputx_shape = (64, 32)
inputy_shape = (32, 64)
fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4))
fact.grad_cmp()

+ 175
- 175
tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py View File

@@ -1,175 +1,175 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class MatmulSingle(Cell):
def __init__(self, transpose_a=False, transpose_b=False):
super(MatmulSingle, self).__init__()
self.matmul1 = P.MatMul(transpose_a, transpose_b)
self.matmul2 = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
def construct(self, x, y, z):
out = self.matmul1(x, y)
out = self.matmul2(out, z)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
return out
class MatmulReduce(Cell):
def __init__(self, group, transpose_a=False, transpose_b=False):
super(MatmulReduce, self).__init__()
self.matmul1 = P.MatMul(transpose_a, transpose_b)
self.allreduce1 = P.AllReduce(group=group)
self.matmul2 = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
self.allreduce2 = P.AllReduce(group=group)
def construct(self, x, y, z):
out = self.matmul1(x, y)
out = self.allreduce1(out)
out = self.matmul2(out, z)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
out = self.allreduce2(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, z, sens):
return grad_all_with_sens(self.network)(x, y, z, sens)
class MatmulReduceFactory:
def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra):
self.inputx = self.GenValue(inputx_shape, 10)
self.inputy = self.GenValue(inputy_shape, 20)
self.inputz = self.GenValue(inputz_shape, 30)
self.x_stra = x_stra
self.y_stra = y_stra
self.z_stra = z_stra
stra_size = 1
for s in x_stra:
stra_size = stra_size * s
self.stra_size = stra_size
def GenValue(self, input_shape, delta):
size = 1
for s in input_shape:
size = size * s
number_range = min(100, size)
input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
return input_np
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl_single(self):
x = Tensor(self.inputx)
y = Tensor(self.inputy)
z = Tensor(self.inputz)
sens = Tensor(1.0, dtype=ms.float32)
net = MatmulSingle()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, z, sens)
return input_grad
def grad_mindspore_impl_reduce(self):
inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
inputzs = self.get_parallel_blocks(self.inputz, self.z_stra)
x = Tensor(inputxs[device_id % self.stra_size])
y = Tensor(inputys[device_id % self.stra_size])
z = Tensor(inputzs[device_id % self.stra_size])
repeat_num = device_num / self.stra_size
v = self.stra_size * repeat_num * repeat_num * repeat_num
sens = Tensor(1.0 / v, dtype=ms.float32)
net = MatmulReduce("hccl_world_group")
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, z, sens)
return input_grad
def grad_cmp(self):
single_results = self.grad_mindspore_impl_single()
reduce_results = self.grad_mindspore_impl_reduce()
single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
reduce_result0 = reduce_results[0].asnumpy()
single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
reduce_result1 = reduce_results[1].asnumpy()
single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size]
reduce_result2 = reduce_results[2].asnumpy()
assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001)
def test_reduce_grad():
inputx_shape = (32, 64)
inputy_shape = (64, 64)
inputz_shape = (64, 32)
fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4))
fact.grad_cmp()
def test_reduce_grad_repeat():
inputx_shape = (32, 64)
inputy_shape = (64, 64)
inputz_shape = (64, 32)
fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class MatmulSingle(Cell):
def __init__(self, transpose_a=False, transpose_b=False):
super(MatmulSingle, self).__init__()
self.matmul1 = P.MatMul(transpose_a, transpose_b)
self.matmul2 = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
def construct(self, x, y, z):
out = self.matmul1(x, y)
out = self.matmul2(out, z)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
return out
class MatmulReduce(Cell):
def __init__(self, group, transpose_a=False, transpose_b=False):
super(MatmulReduce, self).__init__()
self.matmul1 = P.MatMul(transpose_a, transpose_b)
self.allreduce1 = P.AllReduce(group=group)
self.matmul2 = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
self.allreduce2 = P.AllReduce(group=group)
def construct(self, x, y, z):
out = self.matmul1(x, y)
out = self.allreduce1(out)
out = self.matmul2(out, z)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
out = self.allreduce2(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, z, sens):
return grad_all_with_sens(self.network)(x, y, z, sens)
class MatmulReduceFactory:
def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra):
self.inputx = self.gen_value(inputx_shape, 10)
self.inputy = self.gen_value(inputy_shape, 20)
self.inputz = self.gen_value(inputz_shape, 30)
self.x_stra = x_stra
self.y_stra = y_stra
self.z_stra = z_stra
stra_size = 1
for s in x_stra:
stra_size = stra_size * s
self.stra_size = stra_size
def gen_value(self, input_shape, delta):
size = 1
for s in input_shape:
size = size * s
number_range = min(100, size)
input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
return input_np
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl_single(self):
x = Tensor(self.inputx)
y = Tensor(self.inputy)
z = Tensor(self.inputz)
sens = Tensor(1.0, dtype=ms.float32)
net = MatmulSingle()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, z, sens)
return input_grad
def grad_mindspore_impl_reduce(self):
inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
inputzs = self.get_parallel_blocks(self.inputz, self.z_stra)
x = Tensor(inputxs[device_id % self.stra_size])
y = Tensor(inputys[device_id % self.stra_size])
z = Tensor(inputzs[device_id % self.stra_size])
repeat_num = device_num / self.stra_size
v = self.stra_size * repeat_num * repeat_num * repeat_num
sens = Tensor(1.0 / v, dtype=ms.float32)
net = MatmulReduce("hccl_world_group")
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, z, sens)
return input_grad
def grad_cmp(self):
single_results = self.grad_mindspore_impl_single()
reduce_results = self.grad_mindspore_impl_reduce()
single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
reduce_result0 = reduce_results[0].asnumpy()
single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
reduce_result1 = reduce_results[1].asnumpy()
single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size]
reduce_result2 = reduce_results[2].asnumpy()
assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001)
def test_reduce_grad():
inputx_shape = (32, 64)
inputy_shape = (64, 64)
inputz_shape = (64, 32)
fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4))
fact.grad_cmp()
def test_reduce_grad_repeat():
inputx_shape = (32, 64)
inputy_shape = (64, 64)
inputz_shape = (64, 32)
fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2))
fact.grad_cmp()

+ 206
- 207
tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py View File

@@ -1,207 +1,206 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class L2normalize(Cell):
def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None):
super(L2normalize, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.l2norm = P.L2Normalize(axis, epsilon, strategy1)
def construct(self, x, y):
out = self.add(x, y)
out = self.l2norm(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class L2normalizeFactory:
def __init__(self, input_shape, axis, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_shape = input_shape
self.target_shape = target_shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
target_shape).astype(np.float32)
self.axis = axis
self.epsilon = 1e-4
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = strategy1[1]
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
self.out_id = device_id % need_dev_num1
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = L2normalize(self.axis, self.epsilon)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = L2normalize(self.axis, self.epsilon)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_l2normalize_input_128_512():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.forward_cmp()
def test_reid_l2normalize_grad_input_128_512():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.grad_cmp()
def test_reid_l2normalize_input_128_512_repeat():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
fact.forward_cmp()
def test_reid_l2normalize_grad_input_128_512_repeat():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import numpy as np

import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens

device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"


def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")


def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")


class L2normalize(Cell):
def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None):
super(L2normalize, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.l2norm = P.L2Normalize(axis, epsilon, strategy1)

def construct(self, x, y):
out = self.add(x, y)
out = self.l2norm(out)
return out


class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network

def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)


class L2normalizeFactory:
def __init__(self, input_shape, axis, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_shape = input_shape
self.target_shape = target_shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
target_shape).astype(np.float32)
self.axis = axis
self.epsilon = 1e-4
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = strategy1[1]
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
self.out_id = device_id % need_dev_num1

def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks

def forward_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = L2normalize(self.axis, self.epsilon)
out = net(x, y)
return out.asnumpy()

def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()

def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = L2normalize(self.axis, self.epsilon)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad

def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad

def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)

def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)


def test_reid_l2normalize_input_128_512():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.forward_cmp()


def test_reid_l2normalize_grad_input_128_512():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.grad_cmp()


def test_reid_l2normalize_input_128_512_repeat():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
fact.forward_cmp()


def test_reid_l2normalize_grad_input_128_512_repeat():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
fact.grad_cmp()

+ 195
- 196
tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py View File

@@ -1,196 +1,195 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class AddRelu(Cell):
def __init__(self, strategy0=None, strategy1=None):
super(AddRelu, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.relu = P.ReLU(strategy=strategy1)
def construct(self, x, y):
out = self.add(x, y)
out = self.relu(out)
return out
class NetWithLoss(Cell):
def __init__(self, network, strategy2=None):
super(NetWithLoss, self).__init__()
self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2)
self.network = network
def construct(self, x, y, b):
predict = self.network(x, y)
return self.loss(predict, b)[0]
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, b):
return grad_all(self.network)(x, y, b)
class AddReluFactory:
def __init__(self, input_shape, strategy0, strategy1, strategy2):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_shape = input_shape
self.target_shape = target_shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(10, target_size)
self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype(
np.float32)
self.strategy0 = strategy0
self.strategy1 = strategy1
self.strategy2 = strategy2
out_strategy = strategy1[1]
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
self.out_id = device_id % need_dev_num1
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = AddRelu()
net_with_loss = NetWithLoss(net)
grad_net = Grad(net_with_loss)
grad_net.set_train()
input_grads = []
for i in range(0, 3):
input_grad = grad_net(x, y, output_grad)
input_grads.append(input_grad)
return input_grads
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
net_with_loss = NetWithLoss(net, strategy2=self.strategy2)
grad_net = Grad(net_with_loss)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grads = []
for i in range(0, 3):
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
parallel_inputs_run=[x1, y1, output_grad1])
input_grads.append(input_grad)
return input_grads
def grad_cmp(self):
input_grad_mindspores = self.grad_mindspore_impl()
input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl()
for i in range(0, len(input_grad_mindspores)):
input_grad_mindspore = input_grad_mindspores[i]
input_grad_mindspore_parallel = input_grad_mindspore_parallels[i]
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy",
input_grad_blocks_0[self.x_id])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy",
input_grad_blocks_1[self.y_id])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy",
input_grad_mindspore_parallel0)
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy",
input_grad_mindspore_parallel1)
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_l2normalize_grad_input_128_512():
input_shape = (128, 512)
fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)),
strategy2=(0, (4, 1), (4, 1)))
fact.grad_cmp()
def test_reid_l2normalize_grad_input_128_512_stridesplit():
input_shape = (128, 512)
fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)),
strategy2=(0, (4, 1), (4, 1)))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import numpy as np

import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all

device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"


def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")


def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")


class AddRelu(Cell):
def __init__(self, strategy0=None, strategy1=None):
super(AddRelu, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.relu = P.ReLU(strategy=strategy1)

def construct(self, x, y):
out = self.add(x, y)
out = self.relu(out)
return out


class NetWithLoss(Cell):
def __init__(self, network, strategy2=None):
super(NetWithLoss, self).__init__()
self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2)
self.network = network

def construct(self, x, y, b):
predict = self.network(x, y)
return self.loss(predict, b)[0]


class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network

def construct(self, x, y, b):
return grad_all(self.network)(x, y, b)


class AddReluFactory:
def __init__(self, input_shape, strategy0, strategy1, strategy2):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_shape = input_shape
self.target_shape = target_shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(10, target_size)
self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype(
np.float32)
self.strategy0 = strategy0
self.strategy1 = strategy1
self.strategy2 = strategy2
out_strategy = strategy1[1]
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
self.out_id = device_id % need_dev_num1

def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks

def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = AddRelu()
net_with_loss = NetWithLoss(net)
grad_net = Grad(net_with_loss)
grad_net.set_train()
input_grads = []
for i in range(0, 3):
input_grad = grad_net(x, y, output_grad)
input_grads.append(input_grad)
return input_grads

def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
net_with_loss = NetWithLoss(net, strategy2=self.strategy2)
grad_net = Grad(net_with_loss)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grads = []
for i in range(0, 3):
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
parallel_inputs_run=[x1, y1, output_grad1])
input_grads.append(input_grad)
return input_grads

def grad_cmp(self):
input_grad_mindspores = self.grad_mindspore_impl()
input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl()
for i in range(0, len(input_grad_mindspores)):
input_grad_mindspore = input_grad_mindspores[i]
input_grad_mindspore_parallel = input_grad_mindspore_parallels[i]
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy",
input_grad_blocks_0[self.x_id])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy",
input_grad_blocks_1[self.y_id])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy",
input_grad_mindspore_parallel0)
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy",
input_grad_mindspore_parallel1)
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)


def test_reid_l2normalize_grad_input_128_512():
input_shape = (128, 512)
fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)),
strategy2=(0, (4, 1), (4, 1)))
fact.grad_cmp()


def test_reid_l2normalize_grad_input_128_512_stridesplit():
input_shape = (128, 512)
fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)),
strategy2=(0, (4, 1), (4, 1)))
fact.grad_cmp()

+ 329
- 329
tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py View File

@@ -1,329 +1,329 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
from numpy import allclose
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Matmul(Cell):
def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None):
super(Matmul, self).__init__()
self.add = P.TensorAdd(strategy=strategy1)
self.matmul = P.MatMul(transpose_a, transpose_b, strategy=strategy0)
def construct(self, x, w, z):
out = self.add(x, z)
return self.matmul(out, w)
class BatchMatMul(Cell):
def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None):
super(BatchMatMul, self).__init__()
self.add = P.TensorAdd(strategy=strategy1)
self.batchmatmul = P.BatchMatMul(transpose_a, transpose_b, strategy=strategy0)
def construct(self, x, w, z):
out = self.add(x, z)
return self.batchmatmul(out, w)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, inputa, inputb, inputz, output_grad):
gout = grad_all_with_sens(self.network)(inputa, inputb, inputz, output_grad)
return gout
class BatchmatmulFactory:
def __init__(self, inputa_shape, inputb_shape, transpose_a, transpose_b, strategy, strategy_):
self.strategy = strategy
self.strategy_ = strategy_
inputa_size = 1
inputb_size = 1
prefix = ""
for s in inputa_shape:
prefix = prefix + str(s) + "_"
inputa_size = inputa_size * s
prefix = prefix + "and"
for s in inputb_shape:
prefix = prefix + str(s) + "_"
inputb_size = inputb_size * s
number_rangea = min(1000, inputa_size)
number_rangeb = min(1000, inputb_size)
self.inputa = np.reshape(np.arange(0, inputa_size) % number_rangea - number_rangea / 2, inputa_shape).astype(
np.float32)
self.inputb = np.reshape(np.arange(0, inputb_size) % number_rangeb - number_rangeb / 2, inputb_shape).astype(
np.float32)
self.inputz = np.zeros(self.inputa.shape).astype(np.float32)
self.transpose_a = transpose_a
self.transpose_b = transpose_b
out_shape = []
device_matrix = []
out_strategy = []
if transpose_a:
temp = inputa_shape[-1]
inputa_shape[-1] = inputa_shape[-2]
inputa_shape[-2] = temp
if transpose_b:
temp = inputb_shape[-1]
inputb_shape[-1] = inputb_shape[-2]
inputb_shape[-2] = temp
if (len(inputa_shape) >= len(inputb_shape)):
out_shape = list(inputa_shape)
out_shape[-1] = inputb_shape[-1]
else:
out_shape = list(inputb_shape)
out_shape[-2] = inputa_shape[-2]
strategy1 = list(self.strategy[1])
strategy2 = list(self.strategy[2])
if transpose_a:
temp = strategy1[-1]
strategy1[-1] = strategy1[-2]
strategy1[-2] = temp
if transpose_b:
temp = strategy2[-1]
strategy2[-1] = strategy2[-2]
strategy2[-2] = temp
if (len(strategy1) >= len(strategy2)):
out_strategy = strategy1.copy()
out_strategy[-1] = strategy2[-1]
else:
out_strategy = strategy2.copy()
out_strategy[-2] = strategy1[-2]
device_matrix = out_strategy.copy()
device_matrix.insert(-1, strategy1[-1])
self.out_strategy = out_strategy
need_dev_num = 1
for s in device_matrix:
need_dev_num = need_dev_num * s
self.need_dev_num = need_dev_num
self.device_matrix = device_matrix
out_size = 1
for s in out_shape:
out_size = out_size * s
number_range = min(1000, out_size)
self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range - number_range / 2, out_shape).astype(
np.float32)
device_index = self.id_to_list(device_id % need_dev_num, self.device_matrix)
x_index = device_index[:-1].copy()
if transpose_a:
temp = x_index[-1]
x_index[-1] = x_index[-2]
x_index[-2] = temp
y_index = device_index[:-3].copy()
y_index.append(device_index[-2])
y_index.append(device_index[-1])
if transpose_b:
temp = y_index[-1]
y_index[-1] = y_index[-2]
y_index[-2] = temp
out_index = device_index[:-2].copy()
out_index.append(device_index[-1])
print(device_matrix)
print(device_index)
need_dev_num_ = 1
for s in strategy_[1]:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num_
self.y_id = self.list_to_id(y_index, self.strategy[2])
self.out_id = self.list_to_id(out_index, self.out_strategy)
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
"""
shape:每一维的上限,如(2,4,8)
"""
def id_to_list(self, id, shape):
result = []
r = id
for i in range(0, len(shape)):
v = 1
for j in range(i + 1, len(shape)):
v = v * shape[j]
result.append(r // v)
r = r % v
return result
def list_to_id(self, id_list, shape):
result = 0
for i in range(0, len(id_list)):
v = 1
for j in range(i + 1, len(id_list)):
v = v * shape[j]
result = result + id_list[i] * v
return result
def forward_mindspore_impl(self):
if len(self.inputa.shape) > 2:
matmul = BatchMatMul(self.transpose_a, self.transpose_b)
else:
matmul = Matmul(self.transpose_a, self.transpose_b)
matmul.set_train()
out_me = matmul(Tensor(self.inputa), Tensor(self.inputb), Tensor(self.inputz))
return out_me.asnumpy()
def forward_mindspore_parallel_impl(self):
if len(self.inputa.shape) > 2:
matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
else:
matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
x = Tensor(self.inputa)
y = Tensor(self.inputb)
z = Tensor(self.inputz)
xs = self.get_parallel_blocks(self.inputa, self.strategy_[1])
ys = self.get_parallel_blocks(self.inputb, self.strategy[2])
zs = self.get_parallel_blocks(self.inputz, self.strategy_[1])
x1 = Tensor(xs[self.x_id]) #
y1 = Tensor(ys[self.y_id]) # 需要从设备矩阵推导
z1 = Tensor(zs[self.x_id])
matmul.set_train()
matmul.set_auto_parallel()
out_me = matmul(x, y, z, parallel_inputs_compile=[x, y, z], parallel_inputs_run=[x1, y1, z1])
return out_me.asnumpy()
def grad_mindspore_impl(self):
x = Tensor(self.inputa)
y = Tensor(self.inputb)
z = Tensor(self.inputz)
if len(self.inputa.shape) > 2:
matmul = BatchMatMul(self.transpose_a, self.transpose_b)
else:
matmul = Matmul(self.transpose_a, self.transpose_b)
net_me = Grad(matmul)
net_me.set_train()
out_grad_me = Tensor(self.output_grad_np)
out_grad = net_me(x, y, z, out_grad_me)
return out_grad
def grad_mindspore_parallel_impl(self):
if len(self.inputa.shape) > 2:
matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
else:
matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
x = Tensor(self.inputa)
y = Tensor(self.inputb)
z = Tensor(self.inputz)
out_grad_me = Tensor(self.output_grad_np)
xs = self.get_parallel_blocks(self.inputa, self.strategy_[1])
ys = self.get_parallel_blocks(self.inputb, self.strategy[2])
zs = self.get_parallel_blocks(self.inputz, self.strategy_[1])
out_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(xs[self.x_id]) # 需要从设备矩阵推导
y1 = Tensor(ys[self.y_id]) #
z1 = Tensor(zs[self.x_id])
out_grad1 = Tensor(out_grads[self.out_id])
net_me = Grad(matmul)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net_me.set_auto_parallel()
net_me.set_train()
out_grad = net_me(x, y, z, out_grad_me, parallel_inputs_compile=[x, y, z, out_grad1],
parallel_inputs_run=[x1, y1, z1, out_grad1])
return out_grad
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspores = self.get_parallel_blocks(out_mindspore, self.out_strategy)
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
assert allclose(out_mindspores[self.out_id], out_mindspore_parallel, 0.0001, 0.0001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspores0 = self.get_parallel_blocks(input_grad_mindspore[0].asnumpy(), self.strategy_[1])
input_grad_mindspores1 = self.get_parallel_blocks(input_grad_mindspore[1].asnumpy(), self.strategy[2])
input_grad_mindspores2 = self.get_parallel_blocks(input_grad_mindspore[2].asnumpy(), self.strategy_[1])
assert allclose(input_grad_mindspores0[self.x_id], input_grad_mindspore_parallel[0].asnumpy(), 0.0001, 0.0001)
assert allclose(input_grad_mindspores1[self.y_id], input_grad_mindspore_parallel[1].asnumpy(), 0.0001, 0.0001)
assert allclose(input_grad_mindspores2[self.x_id], input_grad_mindspore_parallel[2].asnumpy(), 0.0001, 0.0001)
def test_reid_batchmatmul_inputa_128_512_inputb_2000_512():
inputa = [128, 512]
inputb = [2000, 512]
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2)))
fact.forward_cmp()
def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512():
inputa = [128, 512]
inputb = [2000, 512]
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2)))
fact.grad_cmp()
def test_reid_batchmatmul_inputa_128_512_inputb_2000_512_redistribution():
inputa = [128, 512]
inputb = [2000, 512]
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2)))
fact.forward_cmp()
def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512_redistribution():
inputa = [128, 512]
inputb = [2000, 512]
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2)))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from numpy import allclose
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Matmul(Cell):
def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None):
super(Matmul, self).__init__()
self.add = P.TensorAdd(strategy=strategy1)
self.matmul = P.MatMul(transpose_a, transpose_b, strategy=strategy0)
def construct(self, x, w, z):
out = self.add(x, z)
return self.matmul(out, w)
class BatchMatMul(Cell):
def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None):
super(BatchMatMul, self).__init__()
self.add = P.TensorAdd(strategy=strategy1)
self.batchmatmul = P.BatchMatMul(transpose_a, transpose_b, strategy=strategy0)
def construct(self, x, w, z):
out = self.add(x, z)
return self.batchmatmul(out, w)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, inputa, inputb, inputz, output_grad):
gout = grad_all_with_sens(self.network)(inputa, inputb, inputz, output_grad)
return gout
class BatchmatmulFactory:
def __init__(self, inputa_shape, inputb_shape, transpose_a, transpose_b, strategy, strategy_):
self.strategy = strategy
self.strategy_ = strategy_
inputa_size = 1
inputb_size = 1
prefix = ""
for s in inputa_shape:
prefix = prefix + str(s) + "_"
inputa_size = inputa_size * s
prefix = prefix + "and"
for s in inputb_shape:
prefix = prefix + str(s) + "_"
inputb_size = inputb_size * s
number_rangea = min(1000, inputa_size)
number_rangeb = min(1000, inputb_size)
self.inputa = np.reshape(np.arange(0, inputa_size) % number_rangea - number_rangea / 2, inputa_shape).astype(
np.float32)
self.inputb = np.reshape(np.arange(0, inputb_size) % number_rangeb - number_rangeb / 2, inputb_shape).astype(
np.float32)
self.inputz = np.zeros(self.inputa.shape).astype(np.float32)
self.transpose_a = transpose_a
self.transpose_b = transpose_b
out_shape = []
device_matrix = []
out_strategy = []
if transpose_a:
temp = inputa_shape[-1]
inputa_shape[-1] = inputa_shape[-2]
inputa_shape[-2] = temp
if transpose_b:
temp = inputb_shape[-1]
inputb_shape[-1] = inputb_shape[-2]
inputb_shape[-2] = temp
if len(inputa_shape) >= len(inputb_shape):
out_shape = list(inputa_shape)
out_shape[-1] = inputb_shape[-1]
else:
out_shape = list(inputb_shape)
out_shape[-2] = inputa_shape[-2]
strategy1 = list(self.strategy[1])
strategy2 = list(self.strategy[2])
if transpose_a:
temp = strategy1[-1]
strategy1[-1] = strategy1[-2]
strategy1[-2] = temp
if transpose_b:
temp = strategy2[-1]
strategy2[-1] = strategy2[-2]
strategy2[-2] = temp
if len(strategy1) >= len(strategy2):
out_strategy = strategy1.copy()
out_strategy[-1] = strategy2[-1]
else:
out_strategy = strategy2.copy()
out_strategy[-2] = strategy1[-2]
device_matrix = out_strategy.copy()
device_matrix.insert(-1, strategy1[-1])
self.out_strategy = out_strategy
need_dev_num = 1
for s in device_matrix:
need_dev_num = need_dev_num * s
self.need_dev_num = need_dev_num
self.device_matrix = device_matrix
out_size = 1
for s in out_shape:
out_size = out_size * s
number_range = min(1000, out_size)
self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range - number_range / 2, out_shape).astype(
np.float32)
device_index = self.id_to_list(device_id % need_dev_num, self.device_matrix)
x_index = device_index[:-1].copy()
if transpose_a:
temp = x_index[-1]
x_index[-1] = x_index[-2]
x_index[-2] = temp
y_index = device_index[:-3].copy()
y_index.append(device_index[-2])
y_index.append(device_index[-1])
if transpose_b:
temp = y_index[-1]
y_index[-1] = y_index[-2]
y_index[-2] = temp
out_index = device_index[:-2].copy()
out_index.append(device_index[-1])
print(device_matrix)
print(device_index)
need_dev_num_ = 1
for s in strategy_[1]:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num_
self.y_id = self.list_to_id(y_index, self.strategy[2])
self.out_id = self.list_to_id(out_index, self.out_strategy)
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def id_to_list(self, id_, shape):
"""
shape:每一维的上限,如(2,4,8)
"""
result = []
r = id_
for i in range(0, len(shape)):
v = 1
for j in range(i + 1, len(shape)):
v = v * shape[j]
result.append(r // v)
r = r % v
return result
def list_to_id(self, id_list, shape):
result = 0
for i in range(0, len(id_list)):
v = 1
for j in range(i + 1, len(id_list)):
v = v * shape[j]
result = result + id_list[i] * v
return result
def forward_mindspore_impl(self):
if len(self.inputa.shape) > 2:
matmul = BatchMatMul(self.transpose_a, self.transpose_b)
else:
matmul = Matmul(self.transpose_a, self.transpose_b)
matmul.set_train()
out_me = matmul(Tensor(self.inputa), Tensor(self.inputb), Tensor(self.inputz))
return out_me.asnumpy()
def forward_mindspore_parallel_impl(self):
if len(self.inputa.shape) > 2:
matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
else:
matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
x = Tensor(self.inputa)
y = Tensor(self.inputb)
z = Tensor(self.inputz)
xs = self.get_parallel_blocks(self.inputa, self.strategy_[1])
ys = self.get_parallel_blocks(self.inputb, self.strategy[2])
zs = self.get_parallel_blocks(self.inputz, self.strategy_[1])
x1 = Tensor(xs[self.x_id]) #
y1 = Tensor(ys[self.y_id]) # 需要从设备矩阵推导
z1 = Tensor(zs[self.x_id])
matmul.set_train()
matmul.set_auto_parallel()
out_me = matmul(x, y, z, parallel_inputs_compile=[x, y, z], parallel_inputs_run=[x1, y1, z1])
return out_me.asnumpy()
def grad_mindspore_impl(self):
x = Tensor(self.inputa)
y = Tensor(self.inputb)
z = Tensor(self.inputz)
if len(self.inputa.shape) > 2:
matmul = BatchMatMul(self.transpose_a, self.transpose_b)
else:
matmul = Matmul(self.transpose_a, self.transpose_b)
net_me = Grad(matmul)
net_me.set_train()
out_grad_me = Tensor(self.output_grad_np)
out_grad = net_me(x, y, z, out_grad_me)
return out_grad
def grad_mindspore_parallel_impl(self):
if len(self.inputa.shape) > 2:
matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
else:
matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
x = Tensor(self.inputa)
y = Tensor(self.inputb)
z = Tensor(self.inputz)
out_grad_me = Tensor(self.output_grad_np)
xs = self.get_parallel_blocks(self.inputa, self.strategy_[1])
ys = self.get_parallel_blocks(self.inputb, self.strategy[2])
zs = self.get_parallel_blocks(self.inputz, self.strategy_[1])
out_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(xs[self.x_id]) # 需要从设备矩阵推导
y1 = Tensor(ys[self.y_id]) #
z1 = Tensor(zs[self.x_id])
out_grad1 = Tensor(out_grads[self.out_id])
net_me = Grad(matmul)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net_me.set_auto_parallel()
net_me.set_train()
out_grad = net_me(x, y, z, out_grad_me, parallel_inputs_compile=[x, y, z, out_grad1],
parallel_inputs_run=[x1, y1, z1, out_grad1])
return out_grad
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspores = self.get_parallel_blocks(out_mindspore, self.out_strategy)
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
assert allclose(out_mindspores[self.out_id], out_mindspore_parallel, 0.0001, 0.0001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspores0 = self.get_parallel_blocks(input_grad_mindspore[0].asnumpy(), self.strategy_[1])
input_grad_mindspores1 = self.get_parallel_blocks(input_grad_mindspore[1].asnumpy(), self.strategy[2])
input_grad_mindspores2 = self.get_parallel_blocks(input_grad_mindspore[2].asnumpy(), self.strategy_[1])
assert allclose(input_grad_mindspores0[self.x_id], input_grad_mindspore_parallel[0].asnumpy(), 0.0001, 0.0001)
assert allclose(input_grad_mindspores1[self.y_id], input_grad_mindspore_parallel[1].asnumpy(), 0.0001, 0.0001)
assert allclose(input_grad_mindspores2[self.x_id], input_grad_mindspore_parallel[2].asnumpy(), 0.0001, 0.0001)
def test_reid_batchmatmul_inputa_128_512_inputb_2000_512():
inputa = [128, 512]
inputb = [2000, 512]
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2)))
fact.forward_cmp()
def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512():
inputa = [128, 512]
inputb = [2000, 512]
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2)))
fact.grad_cmp()
def test_reid_batchmatmul_inputa_128_512_inputb_2000_512_redistribution():
inputa = [128, 512]
inputb = [2000, 512]
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2)))
fact.forward_cmp()
def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512_redistribution():
inputa = [128, 512]
inputb = [2000, 512]
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2)))
fact.grad_cmp()

+ 213
- 214
tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py View File

@@ -1,214 +1,213 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, input1, input2, output_grad):
return grad_all_with_sens(self.network)(input1, input2, output_grad)
class Max(Cell):
def __init__(self, axis, keep_dims, strategy0=None, strategy1=None):
super(Max, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1)
self.axis = axis
def construct(self, input1, input2):
out = self.add(input1, input2)
return self.reduce_max(out, self.axis)
class MaxFactory:
def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1):
self.strategy0 = strategy0
self.strategy1 = strategy1
self.axis = axis
self.keep_dims = keep_dims
input_size = 1
prefix = ""
for s in input_shape:
prefix = prefix + str(s) + "_"
input_size = input_size * s
number_range = min(1000, input_size)
self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = self.input_np1.copy()
self.out_grad_np = None
out_shape = list(input_shape)
out_shape.pop(axis)
out_size = input_size / input_shape[axis]
number_range_ = min(1000, out_size)
self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype(
np.float32)
out_strategy = list(strategy1[1])
out_strategy.pop(axis)
self.out_strategy = out_strategy
need_dev_num = 1
need_dev_num_ = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
for s in out_strategy:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num
self.y_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num_
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_mindspore_impl(self):
input1 = Tensor(self.input_np1)
input2 = Tensor(self.input_np2)
net = Max(axis=self.axis, keep_dims=self.keep_dims)
out = net(input1, input2)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(xs[self.x_id])
y1 = Tensor(ys[self.y_id])
net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
input1 = Tensor(self.input_np1)
input2 = Tensor(self.input_np2)
out_grad = Tensor(self.out_grad_np)
net = Max(axis=self.axis, keep_dims=self.keep_dims)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(input1, input2, out_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy)
out_grad = Tensor(output_grads[self.out_id])
xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(xs[self.x_id])
y1 = Tensor(ys[self.y_id])
net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad],
parallel_inputs_run=[x1, y1, out_grad])
return input_grad
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
print(out_mindspore)
print(out_mindspore_parallel)
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_max_forward_input_256_64():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
strategy1=(0, (4, 1)))
fact.forward_cmp()
def test_reid_max_grad_input_256_64():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
strategy1=(0, (4, 1)))
fact.grad_cmp()
def test_reid_max_forward_input_128_64_32_32():
fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
strategy1=(0, (2, 1, 2, 1)))
fact.forward_cmp()
def test_reid_max_grad_input_128_64_32_32():
fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
strategy1=(0, (2, 1, 2, 1)))
fact.grad_cmp()
def test_reid_max_forward_input_256_64_repeat():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.forward_cmp()
def test_reid_max_grad_input_256_64_repeat():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import numpy as np

import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens

device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"


def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")


def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")


class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network

def construct(self, input1, input2, output_grad):
return grad_all_with_sens(self.network)(input1, input2, output_grad)


class Max(Cell):
def __init__(self, axis, keep_dims, strategy0=None, strategy1=None):
super(Max, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1)
self.axis = axis

def construct(self, input1, input2):
out = self.add(input1, input2)
return self.reduce_max(out, self.axis)


class MaxFactory:
def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1):
self.strategy0 = strategy0
self.strategy1 = strategy1
self.axis = axis
self.keep_dims = keep_dims
input_size = 1
prefix = ""
for s in input_shape:
prefix = prefix + str(s) + "_"
input_size = input_size * s
number_range = min(1000, input_size)
self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = self.input_np1.copy()
self.out_grad_np = None
out_shape = list(input_shape)
out_shape.pop(axis)
out_size = input_size / input_shape[axis]
number_range_ = min(1000, out_size)
self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype(
np.float32)
out_strategy = list(strategy1[1])
out_strategy.pop(axis)
self.out_strategy = out_strategy
need_dev_num = 1
need_dev_num_ = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
for s in out_strategy:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num
self.y_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num_

def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks

def forward_mindspore_impl(self):
input1 = Tensor(self.input_np1)
input2 = Tensor(self.input_np2)
net = Max(axis=self.axis, keep_dims=self.keep_dims)
out = net(input1, input2)
return out.asnumpy()

def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(xs[self.x_id])
y1 = Tensor(ys[self.y_id])
net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()

def grad_mindspore_impl(self):
input1 = Tensor(self.input_np1)
input2 = Tensor(self.input_np2)
out_grad = Tensor(self.out_grad_np)
net = Max(axis=self.axis, keep_dims=self.keep_dims)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(input1, input2, out_grad)
return input_grad

def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy)
out_grad = Tensor(output_grads[self.out_id])
xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(xs[self.x_id])
y1 = Tensor(ys[self.y_id])
net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad],
parallel_inputs_run=[x1, y1, out_grad])
return input_grad

def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
print(out_mindspore)
print(out_mindspore_parallel)
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)

def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)


def test_reid_max_forward_input_256_64():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
strategy1=(0, (4, 1)))
fact.forward_cmp()


def test_reid_max_grad_input_256_64():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
strategy1=(0, (4, 1)))
fact.grad_cmp()


def test_reid_max_forward_input_128_64_32_32():
fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
strategy1=(0, (2, 1, 2, 1)))
fact.forward_cmp()


def test_reid_max_grad_input_128_64_32_32():
fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
strategy1=(0, (2, 1, 2, 1)))
fact.grad_cmp()


def test_reid_max_forward_input_256_64_repeat():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.forward_cmp()


def test_reid_max_grad_input_256_64_repeat():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.grad_cmp()

+ 200
- 201
tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py View File

@@ -1,201 +1,200 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
from numpy import allclose
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class MulSoftmax(Cell):
def __init__(self, strategy0=None, strategy1=None, axis=0):
super(MulSoftmax, self).__init__()
self.mul = P.Mul(strategy=strategy0)
self.softmax = P.Softmax(axis=axis, strategy=strategy1)
def construct(self, x, z):
out = self.mul(x, z)
return self.softmax(out)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class MulSoftmaxFactory:
def __init__(self, input_shape, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = 1.0
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
input_shape).astype(np.float32)
self.strategy0 = strategy0
self.strategy1 = strategy1
need_dev_num = 1
need_dev_num_ = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
for s in strategy1[1]:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num
self.y_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num_
def forward_mindspore_impl(self):
net = MulSoftmax()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = MulSoftmax()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
output_grad = Tensor(output_grads[self.out_id])
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_train()
grad_net.set_auto_parallel()
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
parallel_inputs_run=[x1, y1, output_grad])
return input_grad
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
np.save(path + str(device_id) + "_" + self.prefix + "_forward_parallel.npy", out_mindspore_parallel)
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", input_grad_mindspore_parallel0)
np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", input_grad_mindspore_parallel1)
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0,
self.strategy0[1]) # 这里由于TensorMul两个输入X1没做广播,X2做了广播
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)
@pytest.mark.reid_forward
def test_reid_mul_softmax_input_128x64():
stra0 = (0, (1, 4), ())
stra1 = (0, (1, 4))
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_mul_softmax_input_128x64():
stra0 = (0, (1, 4), ())
stra1 = (0, (1, 4))
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
fact.grad_cmp()
@pytest.mark.reid_forward
def test_reid_mul_softmax_input_128x64_all_to_all():
stra0 = (0, (4, 1), ())
stra1 = (0, (1, 4))
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_mul_softmax_input_128x64_all_to_all():
stra0 = (0, (4, 1), ())
stra1 = (0, (1, 4))
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import numpy as np
import pytest

import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens

device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"


def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")


def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")


class MulSoftmax(Cell):
def __init__(self, strategy0=None, strategy1=None, axis=0):
super(MulSoftmax, self).__init__()
self.mul = P.Mul(strategy=strategy0)
self.softmax = P.Softmax(axis=axis, strategy=strategy1)

def construct(self, x, z):
out = self.mul(x, z)
return self.softmax(out)


class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network

def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)


class MulSoftmaxFactory:
def __init__(self, input_shape, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = 1.0
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
input_shape).astype(np.float32)
self.strategy0 = strategy0
self.strategy1 = strategy1
need_dev_num = 1
need_dev_num_ = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
for s in strategy1[1]:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num
self.y_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num_

def forward_mindspore_impl(self):
net = MulSoftmax()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
out = net(x, y)
return out.asnumpy()

def forward_mindspore_parallel_impl(self):
net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()

def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = MulSoftmax()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad

def grad_mindspore_parallel_impl(self):
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
output_grad = Tensor(output_grads[self.out_id])
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_train()
grad_net.set_auto_parallel()
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
parallel_inputs_run=[x1, y1, output_grad])
return input_grad

def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks

def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
np.save(path + str(device_id) + "_" + self.prefix + "_forward_parallel.npy", out_mindspore_parallel)
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)

def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", input_grad_mindspore_parallel0)
np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", input_grad_mindspore_parallel1)
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0,
self.strategy0[1]) # 这里由于TensorMul两个输入X1没做广播,X2做了广播
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)


@pytest.mark.reid_forward
def test_reid_mul_softmax_input_128x64():
stra0 = (0, (1, 4), ())
stra1 = (0, (1, 4))
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
fact.forward_cmp()


@pytest.mark.reid_grad
def test_reid_grad_mul_softmax_input_128x64():
stra0 = (0, (1, 4), ())
stra1 = (0, (1, 4))
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
fact.grad_cmp()


@pytest.mark.reid_forward
def test_reid_mul_softmax_input_128x64_all_to_all():
stra0 = (0, (4, 1), ())
stra1 = (0, (1, 4))
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
fact.forward_cmp()


@pytest.mark.reid_grad
def test_reid_grad_mul_softmax_input_128x64_all_to_all():
stra0 = (0, (4, 1), ())
stra1 = (0, (1, 4))
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
fact.grad_cmp()

+ 147
- 149
tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py View File

@@ -1,149 +1,147 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
from numpy import allclose
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Onehot(Cell):
def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None):
super(Onehot, self).__init__()
self.onehot = P.OneHot(axis, strategy=strategy)
self.depth = depth
self.on_value = Tensor(on_value, ms.float32)
self.off_value = Tensor(off_value, ms.float32)
def construct(self, indices):
return self.onehot(indices, self.depth, self.on_value, self.off_value)
class OneHotFactory:
def __init__(self, input_shape, depth, on_value=1.0, off_value=0.0, axis=None, dtype=None, strategy0=None):
size = 1
prefix = ""
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(10, size)
self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.int32)
self.depth = depth
self.on_value = on_value
self.off_value = off_value
self.axis = axis
self.dtype = dtype
self.strategy0 = strategy0
need_dev_num = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
self.x_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = AddRelu()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def forward_mindspore_impl(self):
indices = Tensor(self.input_np)
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value)
out = net(indices)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np)
inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value, strategy=self.strategy0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
return out.asnumpy()
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy0[1])
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.0001)
def test_reid_onehot_forward_int32_128_depth13000():
fact = OneHotFactory(input_shape=(128,),
depth=131072,
on_value=1.000000,
off_value=0.000000,
axis=-1,
dtype="float32",
strategy0=(0, (2,)))
fact.forward_cmp()
def test_reid_onehot_forward_int32_131072_depth127():
fact = OneHotFactory(input_shape=(131072,),
depth=127,
on_value=1.000000,
off_value=0.000000,
axis=-1,
dtype="float32",
strategy0=(0, (4,)))
fact.forward_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import numpy as np

import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P

device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"


def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")


def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")


class Onehot(Cell):
def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None):
super(Onehot, self).__init__()
self.onehot = P.OneHot(axis, strategy=strategy)
self.depth = depth
self.on_value = Tensor(on_value, ms.float32)
self.off_value = Tensor(off_value, ms.float32)

def construct(self, indices):
return self.onehot(indices, self.depth, self.on_value, self.off_value)


class OneHotFactory:
def __init__(self, input_shape, depth, on_value=1.0, off_value=0.0, axis=None, dtype=None, strategy0=None):
size = 1
prefix = ""
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(10, size)
self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.int32)
self.depth = depth
self.on_value = on_value
self.off_value = off_value
self.axis = axis
self.dtype = dtype
self.strategy0 = strategy0
need_dev_num = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
self.x_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num

def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks

def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = AddRelu()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad

def forward_mindspore_impl(self):
indices = Tensor(self.input_np)
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value)
out = net(indices)
return out.asnumpy()

def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np)
inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value, strategy=self.strategy0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
return out.asnumpy()

def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy0[1])
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.0001)


def test_reid_onehot_forward_int32_128_depth13000():
fact = OneHotFactory(input_shape=(128,),
depth=131072,
on_value=1.000000,
off_value=0.000000,
axis=-1,
dtype="float32",
strategy0=(0, (2,)))
fact.forward_cmp()


def test_reid_onehot_forward_int32_131072_depth127():
fact = OneHotFactory(input_shape=(131072,),
depth=127,
on_value=1.000000,
off_value=0.000000,
axis=-1,
dtype="float32",
strategy0=(0, (4,)))
fact.forward_cmp()

+ 206
- 206
tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py View File

@@ -1,206 +1,206 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
from numpy import allclose
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class PReLU(Cell):
def __init__(self, channel=1, w=0.25, strategy_=None, strategy1_=None):
super(PReLU, self).__init__()
self.add = P.TensorAdd(strategy=strategy1_)
self.prelu = P.PReLU(strategy=strategy_)
def construct(self, x, z, w):
out = self.add(x, z)
return self.prelu(out, w)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, input, z, w, output_grad):
return grad_all_with_sens(self.network)(input, z, w, output_grad)
class PReLUFactory:
def __init__(self, input_shape, strategy):
n, c = input_shape[:2]
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(np.float32)
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
input_shape).astype(np.float32)
self.channel = c
self.weight = np.array([np.float32(0.25)] * c)
self.strategy = strategy
def forward_mindspore_impl(self):
net = PReLU(channel=self.channel, w=self.weight)
x = Tensor(self.input_np)
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
w = Tensor(self.weight)
out = net(x, z, w)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy,
strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
x = Tensor(self.input_np)
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
w = Tensor(self.weight)
inputs = self.get_parallel_blocks(self.input_np, self.strategy[1])
block_id = device_id % len(inputs)
x1 = Tensor(inputs[block_id])
z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32)
w1 = Tensor(self.weight)
out = net(x, z, w, parallel_inputs_compile=[x, z, w], parallel_inputs_run=[x1, z1, w1])
return out.asnumpy()
def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np)
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
w = Tensor(self.weight)
net = PReLU(channel=self.channel, w=self.weight)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, z, w, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy[1])
block_id = device_id % len(output_grads)
output_grad = Tensor(output_grads[block_id])
x = Tensor(self.input_np)
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
w = Tensor(self.weight)
net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy,
strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
inputs = self.get_parallel_blocks(self.input_np, self.strategy[1])
x1 = Tensor(inputs[block_id])
z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32)
w1 = Tensor(self.weight)
input_grad = grad_net(x, z, w, output_grad, parallel_inputs_compile=[x, z, w, output_grad],
parallel_inputs_run=[x1, z1, w1, output_grad])
return input_grad
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy[1])
block_id = device_id % len(out_blocks)
assert np.allclose(out_blocks[block_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore2 = input_grad_mindspore[2].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_mindspore_parallel2 = input_grad_mindspore_parallel[2].asnumpy()
input_grad_blocks = self.get_parallel_blocks(input_grad_mindspore0, self.strategy[1])
input1_grad_blocks = self.get_parallel_blocks(input_grad_mindspore1, self.strategy[1])
block_id = device_id % len(input_grad_blocks)
assert np.allclose(input_grad_blocks[block_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_mindspore2, input_grad_mindspore_parallel2, 0.0001, 0.0001)
assert np.allclose(input1_grad_blocks[block_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
@pytest.mark.reid_grad
def test_reid_prelu_input_128x64x112x112_repeat():
stra = (0, (1, 1, 2, 1), (1))
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_prelu_input_128x64x112x112_repeat():
stra = (0, (1, 1, 2, 1), (1))
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
fact.grad_cmp()
@pytest.mark.reid_grad
def test_reid_prelu_input_128x64x112x112_mix():
stra = (0, (2, 1, 1, 2), (1))
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_prelu_input_128x64x112x112_mix():
stra = (0, (2, 1, 1, 2), (1))
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import pytest
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class PReLU(Cell):
def __init__(self, channel=1, w=0.25, strategy_=None, strategy1_=None):
super(PReLU, self).__init__()
self.add = P.TensorAdd(strategy=strategy1_)
self.prelu = P.PReLU(strategy=strategy_)
self.channel = channel
def construct(self, x, z, w):
out = self.add(x, z)
return self.prelu(out, w)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, input_, z, w, output_grad):
return grad_all_with_sens(self.network)(input_, z, w, output_grad)
class PReLUFactory:
def __init__(self, input_shape, strategy):
n, c = input_shape[:2]
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(np.float32)
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
input_shape).astype(np.float32)
self.channel = c
self.weight = np.array([np.float32(0.25)] * c)
self.strategy = strategy
def forward_mindspore_impl(self):
net = PReLU(channel=self.channel, w=self.weight)
x = Tensor(self.input_np)
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
w = Tensor(self.weight)
out = net(x, z, w)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy,
strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
x = Tensor(self.input_np)
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
w = Tensor(self.weight)
inputs = self.get_parallel_blocks(self.input_np, self.strategy[1])
block_id = device_id % len(inputs)
x1 = Tensor(inputs[block_id])
z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32)
w1 = Tensor(self.weight)
out = net(x, z, w, parallel_inputs_compile=[x, z, w], parallel_inputs_run=[x1, z1, w1])
return out.asnumpy()
def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np)
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
w = Tensor(self.weight)
net = PReLU(channel=self.channel, w=self.weight)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, z, w, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy[1])
block_id = device_id % len(output_grads)
output_grad = Tensor(output_grads[block_id])
x = Tensor(self.input_np)
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
w = Tensor(self.weight)
net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy,
strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
inputs = self.get_parallel_blocks(self.input_np, self.strategy[1])
x1 = Tensor(inputs[block_id])
z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32)
w1 = Tensor(self.weight)
input_grad = grad_net(x, z, w, output_grad, parallel_inputs_compile=[x, z, w, output_grad],
parallel_inputs_run=[x1, z1, w1, output_grad])
return input_grad
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy[1])
block_id = device_id % len(out_blocks)
assert np.allclose(out_blocks[block_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore2 = input_grad_mindspore[2].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_mindspore_parallel2 = input_grad_mindspore_parallel[2].asnumpy()
input_grad_blocks = self.get_parallel_blocks(input_grad_mindspore0, self.strategy[1])
input1_grad_blocks = self.get_parallel_blocks(input_grad_mindspore1, self.strategy[1])
block_id = device_id % len(input_grad_blocks)
assert np.allclose(input_grad_blocks[block_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_mindspore2, input_grad_mindspore_parallel2, 0.0001, 0.0001)
assert np.allclose(input1_grad_blocks[block_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
@pytest.mark.reid_grad
def test_reid_prelu_input_128x64x112x112_repeat():
stra = (0, (1, 1, 2, 1), (1))
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_prelu_input_128x64x112x112_repeat():
stra = (0, (1, 1, 2, 1), (1))
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
fact.grad_cmp()
@pytest.mark.reid_grad
def test_reid_prelu_input_128x64x112x112_mix():
stra = (0, (2, 1, 1, 2), (1))
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_prelu_input_128x64x112x112_mix():
stra = (0, (2, 1, 1, 2), (1))
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
fact.grad_cmp()

+ 252
- 253
tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py View File

@@ -1,253 +1,252 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
from numpy import allclose as allclose_nparray
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class GradScalar(Cell):
def __init__(self, network):
super(GradScalar, self).__init__()
self.network = network
self.sens = Tensor([1.0], dtype=ms.float32)
def construct(self, x, y):
return grad_all_with_sens(self.network)(x, y, self.sens)
class ReduceMean(Cell):
def __init__(self, keep_dims, axis, strategy0=None, strategy1=None):
super(ReduceMean, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.reduce_mean = P.ReduceMean(keep_dims=keep_dims).set_strategy(strategy=strategy1)
self.axis = axis
def construct(self, x, y):
out = self.add(x, y)
return self.reduce_mean(out, self.axis)
class ReduceMeanFactory:
def __init__(self, input_shape, keep_dims, axis, strategy0=None, strategy1=None):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
self.keep_dims = keep_dims
self.axis = axis
target_shape = self.input_np1.mean(axis=axis, keepdims=keep_dims).shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.output_grad_np = np.array([1.0], dtype=np.float32)
if len(target_shape) > 0:
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range, target_shape).astype(
np.float32) + 1.0
self.shape = target_shape
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = []
axis_ = list(axis)
if axis_[0] == -1:
axis_[0] = len(input_shape) - 1
for i in range(0, len(input_shape)):
if i in axis_:
if keep_dims:
out_strategy.append(1)
else:
out_strategy.append(strategy1[1][i])
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
block_id = device_id % need_dev_num0
device_index = self.id_to_list(block_id, self.strategy1[1])
print(device_index)
for i in axis:
device_index[i] = 0
print(device_index)
self.out_id = self.list_to_id(device_index, self.out_strategy)
print(self.out_id)
def id_to_list(self, id, shape):
result = []
r = id
for i in range(0, len(shape)):
v = 1
for j in range(i + 1, len(shape)):
v = v * shape[j]
result.append(r // v)
r = r % v
return result
def list_to_id(self, id_list, shape):
result = 0
for i in range(0, len(id_list)):
v = 1
for j in range(i + 1, len(id_list)):
v = v * shape[j]
result = result + id_list[i] * v
return result
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
out_grad = Tensor(self.output_grad_np)
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, out_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_reducemean_input_64x16():
fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)),
strategy1=(0, (4,)))
fact.forward_cmp()
def test_grad_reid_reducemean_input_64x16():
fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)),
strategy1=(0, (4,)))
fact.grad_cmp()
def test_reid_reducemean_input_64x128x28x28():
fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3),
strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1)))
fact.forward_cmp()
def test_grad_reid_reducemean_input_64x128x28x28():
fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3),
strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1)))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import numpy as np
from numpy import allclose as allclose_nparray

import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens

device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"


def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")


def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")


class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network

def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)


class GradScalar(Cell):
def __init__(self, network):
super(GradScalar, self).__init__()
self.network = network
self.sens = Tensor([1.0], dtype=ms.float32)

def construct(self, x, y):
return grad_all_with_sens(self.network)(x, y, self.sens)


class ReduceMean(Cell):
def __init__(self, keep_dims, axis, strategy0=None, strategy1=None):
super(ReduceMean, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.reduce_mean = P.ReduceMean(keep_dims=keep_dims).set_strategy(strategy=strategy1)
self.axis = axis

def construct(self, x, y):
out = self.add(x, y)
return self.reduce_mean(out, self.axis)


class ReduceMeanFactory:
def __init__(self, input_shape, keep_dims, axis, strategy0=None, strategy1=None):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
self.keep_dims = keep_dims
self.axis = axis
target_shape = self.input_np1.mean(axis=axis, keepdims=keep_dims).shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.output_grad_np = np.array([1.0], dtype=np.float32)
if len(target_shape) > 0:
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range, target_shape).astype(
np.float32) + 1.0
self.shape = target_shape
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = []
axis_ = list(axis)
if axis_[0] == -1:
axis_[0] = len(input_shape) - 1
for i in range(0, len(input_shape)):
if i in axis_:
if keep_dims:
out_strategy.append(1)
else:
out_strategy.append(strategy1[1][i])
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
block_id = device_id % need_dev_num0
device_index = self.id_to_list(block_id, self.strategy1[1])
print(device_index)
for i in axis:
device_index[i] = 0
print(device_index)
self.out_id = self.list_to_id(device_index, self.out_strategy)
print(self.out_id)

def id_to_list(self, id_, shape):
result = []
r = id_
for i in range(0, len(shape)):
v = 1
for j in range(i + 1, len(shape)):
v = v * shape[j]
result.append(r // v)
r = r % v
return result

def list_to_id(self, id_list, shape):
result = 0
for i in range(0, len(id_list)):
v = 1
for j in range(i + 1, len(id_list)):
v = v * shape[j]
result = result + id_list[i] * v
return result

def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks

def forward_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis)
out = net(x, y)
return out.asnumpy()

def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()

def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
out_grad = Tensor(self.output_grad_np)
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, out_grad)
return input_grad

def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad

def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)

def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)


def test_reid_reducemean_input_64x16():
fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)),
strategy1=(0, (4,)))
fact.forward_cmp()


def test_grad_reid_reducemean_input_64x16():
fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)),
strategy1=(0, (4,)))
fact.grad_cmp()


def test_reid_reducemean_input_64x128x28x28():
fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3),
strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1)))
fact.forward_cmp()


def test_grad_reid_reducemean_input_64x128x28x28():
fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3),
strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1)))
fact.grad_cmp()

+ 206
- 206
tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py View File

@@ -1,206 +1,206 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
from numpy import allclose as allclose_nparray
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class Reshape(Cell):
def __init__(self, target_shape, strategy0=None, strategy1=None):
super(Reshape, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.reshape = P.Reshape(strategy=strategy1)
self.shape = tuple(target_shape)
def construct(self, input1, input2):
x = self.add(input1, input2)
return self.reshape(x, self.shape)
class ReshapeFactory:
def __init__(self, input_shape, target_shape, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
target_shape).astype(np.float32)
self.target_shape = target_shape
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = [1] * len(target_shape)
out_strategy[0] = strategy1[1][0]
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
self.out_id = device_id % need_dev_num1
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_reshape_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = Reshape(self.target_shape)
out = net(x, y)
return out.asnumpy()
def forward_reshape_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_reshape_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = Reshape(self.target_shape)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_reshape_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad
def forward_reshape_cmp(self):
out_mindspore = self.forward_reshape_mindspore_impl()
out_mindspore_parallel = self.forward_reshape_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_reshape_cmp(self):
input_grad_mindspore = self.grad_reshape_mindspore_impl()
input_grad_mindspore_parallel = self.grad_reshape_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
@pytest.mark.reid_forward
def test_reid_reshape_input_128x512x7x7_target_128x25088():
fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088),
strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1)))
fact.forward_reshape_cmp()
def test_reid_reshape_grad_input_128x512x7x7_target_128x25088():
fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088),
strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1)))
fact.grad_reshape_cmp()
@pytest.mark.reid_forward
def test_reid_reshape_input_128x64_target_128x64x1x1():
fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.forward_reshape_cmp()
@pytest.mark.reid_grad
def test_reid_reshape_grad_input_128x64_target_128x64x1x1():
fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.grad_reshape_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import pytest
from numpy import allclose as allclose_nparray
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class Reshape(Cell):
def __init__(self, target_shape, strategy0=None, strategy1=None):
super(Reshape, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.reshape = P.Reshape(strategy=strategy1)
self.shape = tuple(target_shape)
def construct(self, input1, input2):
x = self.add(input1, input2)
return self.reshape(x, self.shape)
class ReshapeFactory:
def __init__(self, input_shape, target_shape, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
target_shape).astype(np.float32)
self.target_shape = target_shape
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = [1] * len(target_shape)
out_strategy[0] = strategy1[1][0]
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
self.out_id = device_id % need_dev_num1
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_reshape_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = Reshape(self.target_shape)
out = net(x, y)
return out.asnumpy()
def forward_reshape_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_reshape_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = Reshape(self.target_shape)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_reshape_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad
def forward_reshape_cmp(self):
out_mindspore = self.forward_reshape_mindspore_impl()
out_mindspore_parallel = self.forward_reshape_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_reshape_cmp(self):
input_grad_mindspore = self.grad_reshape_mindspore_impl()
input_grad_mindspore_parallel = self.grad_reshape_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
@pytest.mark.reid_forward
def test_reid_reshape_input_128x512x7x7_target_128x25088():
fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088),
strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1)))
fact.forward_reshape_cmp()
def test_reid_reshape_grad_input_128x512x7x7_target_128x25088():
fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088),
strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1)))
fact.grad_reshape_cmp()
@pytest.mark.reid_forward
def test_reid_reshape_input_128x64_target_128x64x1x1():
fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.forward_reshape_cmp()
@pytest.mark.reid_grad
def test_reid_reshape_grad_input_128x64_target_128x64x1x1():
fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.grad_reshape_cmp()

+ 235
- 236
tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py View File

@@ -1,236 +1,235 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
from numpy import allclose as allclose_nparray
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Net(Cell):
def __init__(self, perm_in, strategy0=None, strategy1=None):
super(Net, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.transpose = P.Transpose(strategy=strategy1)
self.perm_in = perm_in
def construct(self, x, y):
out = self.add(x, y)
return self.transpose(out, self.perm_in)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class TransposeFactory:
def __init__(self, input_shape, perm_in, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_shape = self.input_np1.transpose(perm_in).shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.target_shape = target_shape
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
target_shape).astype(np.float32)
self.perm_in = perm_in
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = []
for i in perm_in:
out_strategy.append(strategy1[1][i])
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
device_index = self.id_to_list(device_id % need_dev_num1,
self.strategy1[1]) # encoding to get the index before transpose
device_index_transpose = []
for i in perm_in:
device_index_transpose.append(device_index[i])
self.out_id = self.list_to_id(device_index_transpose, self.out_strategy)
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def id_to_list(self, id, shape):
result = []
r = id
for i in range(0, len(shape)):
v = 1
for j in range(i + 1, len(shape)):
v = v * shape[j]
result.append(r // v)
r = r % v
return result
def list_to_id(self, id_list, shape):
result = 0
for i in range(0, len(id_list)):
v = 1
for j in range(i + 1, len(id_list)):
v = v * shape[j]
result = result + id_list[i] * v
return result
def forward_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = Net(self.perm_in)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = Net(self.perm_in)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad
def forward_transpose_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_transpose_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_transpose_input_256x512_output_512x256_perm_1x0():
fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2)))
fact.forward_transpose_cmp()
def test_reid_grad_transpose_input_256x512_output_512x256_perm_1x0():
fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2)))
fact.grad_transpose_cmp()
def test_reid_transpose_input_512x256_output_256x512_perm_1x0():
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.forward_transpose_cmp()
def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0():
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.grad_transpose_cmp()
def test_reid_transpose_input_512x256_output_256x512_perm_1x0_repeat():
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1)))
fact.forward_transpose_cmp()
def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0_repeat():
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1)))
fact.grad_transpose_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import numpy as np
from numpy import allclose as allclose_nparray

import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens

device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"


def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")


def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")


class Net(Cell):
def __init__(self, perm_in, strategy0=None, strategy1=None):
super(Net, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.transpose = P.Transpose(strategy=strategy1)
self.perm_in = perm_in

def construct(self, x, y):
out = self.add(x, y)
return self.transpose(out, self.perm_in)


class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network

def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)


class TransposeFactory:
def __init__(self, input_shape, perm_in, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_shape = self.input_np1.transpose(perm_in).shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.target_shape = target_shape
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
target_shape).astype(np.float32)
self.perm_in = perm_in
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = []
for i in perm_in:
out_strategy.append(strategy1[1][i])
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
device_index = self.id_to_list(device_id % need_dev_num1,
self.strategy1[1]) # encoding to get the index before transpose
device_index_transpose = []
for i in perm_in:
device_index_transpose.append(device_index[i])
self.out_id = self.list_to_id(device_index_transpose, self.out_strategy)

def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks

def id_to_list(self, id_, shape):
result = []
r = id_
for i in range(0, len(shape)):
v = 1
for j in range(i + 1, len(shape)):
v = v * shape[j]
result.append(r // v)
r = r % v
return result

def list_to_id(self, id_list, shape):
result = 0
for i in range(0, len(id_list)):
v = 1
for j in range(i + 1, len(id_list)):
v = v * shape[j]
result = result + id_list[i] * v
return result

def forward_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = Net(self.perm_in)
out = net(x, y)
return out.asnumpy()

def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()

def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = Net(self.perm_in)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad

def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad

def forward_transpose_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)

def grad_transpose_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)


def test_reid_transpose_input_256x512_output_512x256_perm_1x0():
fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2)))
fact.forward_transpose_cmp()


def test_reid_grad_transpose_input_256x512_output_512x256_perm_1x0():
fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2)))
fact.grad_transpose_cmp()


def test_reid_transpose_input_512x256_output_256x512_perm_1x0():
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.forward_transpose_cmp()


def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0():
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.grad_transpose_cmp()


def test_reid_transpose_input_512x256_output_256x512_perm_1x0_repeat():
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1)))
fact.forward_transpose_cmp()


def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0_repeat():
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1)))
fact.grad_transpose_cmp()

+ 3
- 3
tests/ut/python/parallel/test_add_relu_redistribution.py View File

@@ -54,7 +54,7 @@ class Grad(nn.Cell):
return C.grad_all(self.network)(x, y)


def compile(net, x, y):
def compile_net(net, x, y):
net.set_auto_parallel()
_executor.compile(net, x, y)

@@ -69,7 +69,7 @@ def test_add_relu_stride_slice():

x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([128, 32]), dtype=ms.float32)
compile(net, x, y)
compile_net(net, x, y)


def test_add_relu_all_gather():
@@ -82,4 +82,4 @@ def test_add_relu_all_gather():

x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([128, 32]), dtype=ms.float32)
compile(net, x, y)
compile_net(net, x, y)

+ 20
- 21
tests/ut/python/parallel/test_allreduce_fusion.py View File

@@ -17,7 +17,6 @@ import numpy as np
import mindspore as ms
import mindspore.nn as nn
from mindspore import Tensor, context
from mindspore import context
from mindspore.common.api import _executor
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
from mindspore.nn.optim.momentum import Momentum
@@ -131,56 +130,56 @@ def test_allreduce_fusion_parameters():
cost_model_context.reset_cost_model_context()
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=2)
algorithm = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_algorithm')
assert (algorithm == 2)
assert algorithm == 2
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1)
algorithm = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_algorithm')
assert (algorithm == 1)
assert algorithm == 1
cost_model_context.reset_cost_model_context()
algorithm = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_algorithm')
assert (algorithm == 0)
assert algorithm == 0

cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2)
fusion_times = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_times')
assert (fusion_times == 2)
assert fusion_times == 2

cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.2)
tail_percent = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_percent')
assert (tail_percent == 0.2)
assert tail_percent == 0.2
cost_model_context.reset_cost_model_context()
tail_percent = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_percent')
assert (tail_percent == 0.1)
assert tail_percent == 0.1

cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_time=0.2)
tail_time = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_time')
assert (tail_time == 0.2)
assert tail_time == 0.2
cost_model_context.reset_cost_model_context()
tail_time = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_tail_time')
assert (tail_time == 0.1)
assert tail_time == 0.1

cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_inherent_time=0.2)
allreduce_inherent_time = cost_model_context.get_cost_model_context(
'costmodel_allreduce_fusion_allreduce_inherent_time')
assert (allreduce_inherent_time == 0.2)
assert allreduce_inherent_time == 0.2
cost_model_context.reset_cost_model_context()
allreduce_inherent_time = cost_model_context.get_cost_model_context(
'costmodel_allreduce_fusion_allreduce_inherent_time')
assert (allreduce_inherent_time == 0.1)
assert allreduce_inherent_time == 0.1

cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_bandwidth=0.2)
allreduce_bandwidth = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_allreduce_bandwidth')
assert (allreduce_bandwidth == 0.2)
assert allreduce_bandwidth == 0.2
cost_model_context.reset_cost_model_context()
allreduce_bandwidth = cost_model_context.get_cost_model_context('costmodel_allreduce_fusion_allreduce_bandwidth')
assert (allreduce_bandwidth == 0.1)
assert allreduce_bandwidth == 0.1

cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_computation_time_parameter=0.2)
computation_time_parameter = cost_model_context.get_cost_model_context(
'costmodel_allreduce_fusion_computation_time_parameter')
assert (computation_time_parameter == 0.2)
assert computation_time_parameter == 0.2
cost_model_context.reset_cost_model_context()
computation_time_parameter = cost_model_context.get_cost_model_context(
'costmodel_allreduce_fusion_computation_time_parameter')
assert (computation_time_parameter == 0.1)
assert computation_time_parameter == 0.1


def test_allreduce_fusion1():
@@ -201,7 +200,7 @@ def test_allreduce_fusion1():
'backbone2.fc2.weight': 1,
'backbone2.fc1.weight': 1,
'backbone1.fc1.weight': 1}
assert (allreduce_fusion_dict == expect_dict)
assert allreduce_fusion_dict == expect_dict
cost_model_context.reset_cost_model_context()


@@ -214,7 +213,7 @@ def test_allreduce_fusion2():
net = SimpleDMLNet(DenseNet1(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None))
allreduce_fusion_dict = train_common(net)
expect_dict = {}
assert (allreduce_fusion_dict == expect_dict)
assert allreduce_fusion_dict == expect_dict
cost_model_context.reset_cost_model_context()


@@ -240,7 +239,7 @@ def test_allreduce_fusion3():
'backbone1.fc2.weight': 2,
'backbone1.fc1.bias': 2,
'backbone1.fc1.weight': 2}
assert (allreduce_fusion_dict == expect_dict)
assert allreduce_fusion_dict == expect_dict
cost_model_context.reset_cost_model_context()


@@ -267,7 +266,7 @@ def test_allreduce_fusion4():
'backbone1.fc2.weight': 1,
'backbone1.fc1.weight': 1}

assert (allreduce_fusion_dict == expect_dict)
assert allreduce_fusion_dict == expect_dict
cost_model_context.reset_cost_model_context()


@@ -295,7 +294,7 @@ def test_allreduce_fusion5():
'backbone1.fc4.weight': 2,
'backbone1.fc3.weight': 2,
'backbone1.fc2.weight': 1,
'backbone1.fc1.weight': 1, }
'backbone1.fc1.weight': 1,}

assert (allreduce_fusion_dict == expect_dict)
assert allreduce_fusion_dict == expect_dict
cost_model_context.reset_cost_model_context()

+ 1
- 2
tests/ut/python/parallel/test_alltoall.py View File

@@ -67,7 +67,6 @@ def all_to_all_net(strategy1):


def all_to_all_common(strategy1):
batch_size = 32
learning_rate = 0.1
momentum = 0.9
epoch_size = 2
@@ -104,7 +103,7 @@ def test_all_to_all():
[8, 1]],
'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/MatMul-op0': [
[1, 1], [1, 8]]}
assert (strategys == expect_dict)
assert strategys == expect_dict
context.set_context(save_graphs=False)




+ 20
- 20
tests/ut/python/parallel/test_arithmetic.py View File

@@ -43,7 +43,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y, b)


def compile(net, x, y, b):
def compile_net(net, x, y, b):
net.set_auto_parallel()
_executor.compile(net, x, y, b)

@@ -69,7 +69,7 @@ def test_matmul_sub():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_add():
@@ -93,7 +93,7 @@ def test_matmul_add():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_mul():
@@ -117,7 +117,7 @@ def test_matmul_mul():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_div():
@@ -141,7 +141,7 @@ def test_matmul_div():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_greater():
@@ -165,7 +165,7 @@ def test_matmul_greater():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_add_broadcast():
@@ -189,7 +189,7 @@ def test_matmul_add_broadcast():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_add_broadcast2():
@@ -213,7 +213,7 @@ def test_matmul_add_broadcast2():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
b = Tensor(np.ones([1, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_sub_broadcast():
@@ -237,7 +237,7 @@ def test_matmul_sub_broadcast():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_sub_broadcast2():
@@ -261,7 +261,7 @@ def test_matmul_sub_broadcast2():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
b = Tensor(np.ones([1, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_mul_broadcast():
@@ -285,7 +285,7 @@ def test_matmul_mul_broadcast():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_mul_broadcast2():
@@ -309,7 +309,7 @@ def test_matmul_mul_broadcast2():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
b = Tensor(np.ones([1, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_div_broadcast():
@@ -333,7 +333,7 @@ def test_matmul_div_broadcast():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_div_broadcast2():
@@ -357,7 +357,7 @@ def test_matmul_div_broadcast2():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
b = Tensor(np.ones([1, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_greater_broadcast():
@@ -381,7 +381,7 @@ def test_matmul_greater_broadcast():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_greater_broadcast2():
@@ -405,7 +405,7 @@ def test_matmul_greater_broadcast2():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
b = Tensor(np.ones([1, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_floordiv():
@@ -429,7 +429,7 @@ def test_matmul_floordiv():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_floordiv_broadcast():
@@ -453,7 +453,7 @@ def test_matmul_floordiv_broadcast():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_floordiv_broadcast2():
@@ -477,7 +477,7 @@ def test_matmul_floordiv_broadcast2():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
b = Tensor(np.ones([1, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_assign_sub():
@@ -504,4 +504,4 @@ def test_assign_sub():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([128, 32]), dtype=ms.float32)
z = Tensor(np.ones([128, 32]), dtype=ms.float32)
compile(net, x, y, z)
compile_net(net, x, y, z)

+ 0
- 1
tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py View File

@@ -20,7 +20,6 @@ from mindspore import Tensor
from mindspore import context
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from tests.ut.python.ops.test_math_ops import VirtualLoss




+ 5
- 6
tests/ut/python/parallel/test_auto_parallel_arithmetic.py View File

@@ -18,7 +18,6 @@ import mindspore as ms
import mindspore.nn as nn
from mindspore import Tensor
from mindspore import context
from mindspore import context
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import operations as P
@@ -48,7 +47,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y, b)


def compile(net, x, y, b, phase):
def compile_net(net, x, y, b, phase):
net.set_auto_parallel()
_executor.compile(net, x, y, b, phase=phase)

@@ -73,7 +72,7 @@ def test_auto_parallel_arithmetic():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 128]), dtype=ms.float32)
b = Tensor(np.ones([64, 128]), dtype=ms.float32)
compile(net, x, y, b, phase='train')
compile_net(net, x, y, b, phase='train')
strategies = _executor._get_strategy(net)
expected_strategies = {'Default/network-Net/FloorDiv-op0': [[2, 4], [2, 4]],
'Default/network-Net/MatMul-op1': [[2, 1], [1, 4]]}
@@ -100,7 +99,7 @@ def test_auto_parallel_arithmetic_broadcast_both():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
b = Tensor(np.ones([1, 64]), dtype=ms.float32)
compile(net, x, y, b, phase='train')
compile_net(net, x, y, b, phase='train')
strategies = _executor._get_strategy(net)
expected_strategies = {'Default/network-Net/FloorDiv-op0': [[8, 1], [1, 1]],
'Default/network-Net/MatMul-op1': [[8, 1], [1, 1]]}
@@ -127,7 +126,7 @@ def test_auto_parallel_arithmetic_broadcast_right():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 32]), dtype=ms.float32)
b = Tensor(np.ones([32]), dtype=ms.float32)
compile(net, x, y, b, phase='train')
compile_net(net, x, y, b, phase='train')
strategies = _executor._get_strategy(net)
expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [2]],
'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]}
@@ -154,7 +153,7 @@ def test_auto_parallel_arithmetic_broadcast_left():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 32]), dtype=ms.float32)
b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
compile(net, x, y, b, phase="train")
compile_net(net, x, y, b, phase="train")
strategies = _executor._get_strategy(net)
expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [1, 4, 2]],
'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]}


+ 1
- 2
tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py View File

@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import re
import numpy as np

import mindspore as ms
import mindspore.nn as nn
@@ -21,7 +21,6 @@ from mindspore import Tensor
from mindspore import context
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from mindspore.parallel._utils import _reset_op_id as reset_op_id
from tests.ut.python.ops.test_math_ops import VirtualLoss



+ 0
- 1
tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py View File

@@ -10,7 +10,6 @@ from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import operations as P
from mindspore.parallel import _cost_model_context as cost_model_context
from mindspore.parallel import set_algo_parameters, get_algo_parameters, reset_algo_parameters
from mindspore.parallel._utils import _reset_op_id as reset_op_id




+ 4
- 4
tests/ut/python/parallel/test_auto_parallel_four_matmul.py View File

@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y, z, w, b)


def compile(net, x, y, z, w, b):
def compile_net(net, x, y, z, w, b):
net.set_auto_parallel()
_executor.compile(net, x, y, z, w, b)

@@ -77,7 +77,7 @@ def test_four_matmul_linear():

net = GradWrap(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
compile(net, x, y, z, w, b)
compile_net(net, x, y, z, w, b)


def test_four_matmul1():
@@ -103,7 +103,7 @@ def test_four_matmul1():

net = GradWrap(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
compile(net, x, y, z, w, b)
compile_net(net, x, y, z, w, b)


def test_four_matmul2():
@@ -130,4 +130,4 @@ def test_four_matmul2():

net = GradWrap(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
compile(net, x, y, z, w, b)
compile_net(net, x, y, z, w, b)

+ 1
- 1
tests/ut/python/parallel/test_auto_parallel_inference.py View File

@@ -36,4 +36,4 @@ def test_inference_phase():
train_network.set_train()
train_network.set_auto_parallel()

output = train_network(predict, label)
_ = train_network(predict, label)

+ 1
- 1
tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py View File

@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import re
import numpy as np

import mindspore as ms
import mindspore.nn as nn


+ 0
- 1
tests/ut/python/parallel/test_auto_parallel_parameter_cast.py View File

@@ -16,7 +16,6 @@ import numpy as np

import mindspore as ms
import mindspore.nn as nn
from mindspore import Tensor
from mindspore import Tensor, Parameter
from mindspore import context
from mindspore.common import dtype as mstype


+ 4
- 4
tests/ut/python/parallel/test_auto_parallel_reduce_method.py View File

@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y, b)


def compile(net, x, y, b):
def compile_net(net, x, y, b):
net.set_auto_parallel()
_executor.compile(net, x, y, b)

@@ -71,7 +71,7 @@ def test_sum_mul():
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
b = Tensor(np.ones([32, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_sum_mul2():
@@ -95,7 +95,7 @@ def test_sum_mul2():
x = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_sum_mul3():
@@ -119,4 +119,4 @@ def test_sum_mul3():
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
b = Tensor(np.ones([128, 32]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)

+ 2
- 2
tests/ut/python/parallel/test_auto_parallel_reshape.py View File

@@ -215,7 +215,7 @@ def test_reshape_auto_5():
size = 8
context.set_auto_parallel_context(device_num=size, global_rank=0)
x = Tensor(np.ones([4, 1024 * size, 1]), dtype=ms.float32)
y = Tensor(np.ones([4, 1024 * size, ]), dtype=ms.float32)
y = Tensor(np.ones([4, 1024 * size,]), dtype=ms.float32)

net = GradWrap(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
@@ -263,7 +263,7 @@ def test_reshape_auto_6():
size = 8
context.set_auto_parallel_context(device_num=size, global_rank=0)
x = Tensor(np.ones([4, 1024, 1]), dtype=ms.float32)
y = Tensor(np.ones([4, 1024, ]), dtype=ms.float32)
y = Tensor(np.ones([4, 1024,]), dtype=ms.float32)

net = GradWrap(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel")


+ 4
- 4
tests/ut/python/parallel/test_auto_parallel_rhombus.py View File

@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y, b)


def compile(net, x, y, b):
def compile_net(net, x, y, b):
net.set_auto_parallel()
_executor.compile(net, x, y, b)

@@ -72,7 +72,7 @@ def test_rhombus1():

net = GradWrap(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_rhombus2():
@@ -103,7 +103,7 @@ def test_rhombus2():

net = GradWrap(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_rhombus3():
@@ -134,4 +134,4 @@ def test_rhombus3():

net = GradWrap(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
compile(net, x, y, z)
compile_net(net, x, y, z)

+ 0
- 1
tests/ut/python/parallel/test_auto_parallel_softmax_loss.py View File

@@ -21,7 +21,6 @@ from mindspore import context
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from tests.ut.python.ops.test_math_ops import VirtualLoss


class NetWithLoss(nn.Cell):


+ 2
- 2
tests/ut/python/parallel/test_auto_parallel_transformer.py View File

@@ -105,8 +105,8 @@ def test_dmnet_train_step():
size = 8
context.set_auto_parallel_context(device_num=size, global_rank=0)

input = Tensor(np.ones([4096, 4096]).astype(np.float32) * 0.01)
input_ = Tensor(np.ones([4096, 4096]).astype(np.float32) * 0.01)
net = GradWrap(NetWithLoss(MultiTransformer()))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
net.set_auto_parallel()
_executor.compile(net, input)
_executor.compile(net, input_)

+ 18
- 4
tests/ut/python/parallel/test_auto_parallel_two_bn.py View File

@@ -1,5 +1,19 @@
import numpy as np
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import numpy as np

import mindspore as ms
import mindspore.nn as nn
@@ -33,7 +47,7 @@ class Blockcell(nn.Cell):
return out


def getBlock():
def get_block():
return Blockcell()


@@ -41,8 +55,8 @@ def test_two_bn():
class Net(nn.Cell):
def __init__(self):
super().__init__()
self.block1 = getBlock()
self.block2 = getBlock()
self.block1 = get_block()
self.block2 = get_block()
self.relu = P.ReLU()
self.add = P.TensorAdd()
self.bias = Tensor(np.ones([64, 64]), dtype=ms.float32)


+ 6
- 6
tests/ut/python/parallel/test_auto_parallel_two_matmul.py View File

@@ -104,23 +104,23 @@ def test_two_matmul():
set_algo_parameters(tensor_slice_align_enable=False, tensor_slice_align_size=32,
fully_use_devices=False, elementwise_op_strategy_follow=False)
para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable")
assert para_slice_align_enable == False
assert not para_slice_align_enable
para_slice_align_size = get_algo_parameters("tensor_slice_align_size")
assert para_slice_align_size == 32
fully_use_devices = get_algo_parameters("fully_use_devices")
assert fully_use_devices == False
assert not fully_use_devices
elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow")
assert elementwise_op_strategy_follow == False
assert not elementwise_op_strategy_follow

reset_algo_parameters()
para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable")
assert para_slice_align_enable == False
assert not para_slice_align_enable
para_slice_align_size = get_algo_parameters("tensor_slice_align_size")
assert para_slice_align_size == 16
fully_use_devices = get_algo_parameters("fully_use_devices")
assert fully_use_devices == True
assert fully_use_devices
elementwise_op_strategy_follow = get_algo_parameters("elementwise_op_strategy_follow")
assert elementwise_op_strategy_follow == False
assert not elementwise_op_strategy_follow

x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)


+ 1
- 4
tests/ut/python/parallel/test_auto_star_elimination.py View File

@@ -11,9 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import numpy as np
import os

import mindspore as ms
import mindspore.nn as nn
@@ -21,10 +20,8 @@ from mindspore import Tensor, Parameter
from mindspore import context
from mindspore.common import dtype as mstype
from mindspore.common.api import _executor
from mindspore.common.initializer import initializer
from mindspore.nn.loss.loss import _Loss
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import operations as P
from tests.ut.python.ops.test_math_ops import VirtualLoss



+ 7
- 7
tests/ut/python/parallel/test_batch_matmul.py View File

@@ -41,7 +41,7 @@ _w2 = Tensor(np.ones([128, 32, 32]), dtype=ms.float32)
_b = Tensor(np.ones([128, 64, 16]), dtype=ms.float32)


def compile(net):
def compile_net(net):
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
train_net = TrainOneStepCell(net, optimizer)
train_net.set_auto_parallel()
@@ -54,7 +54,7 @@ def test_batch_matmul_data_parallel():
strategy1 = ((16, 1, 1), (16, 1, 1))
strategy2 = ((16, 1, 1), (16, 1, 1))
net = Net(_w1, _w2, False, strategy1, strategy2)
compile(net)
compile_net(net)


def test_batch_matmul_model_parallel():
@@ -62,7 +62,7 @@ def test_batch_matmul_model_parallel():
strategy1 = ((1, 1, 1), (1, 1, 1))
strategy2 = ((1, 1, 1), (1, 1, 16))
net = Net(_w1, _w2, False, strategy1, strategy2)
compile(net)
compile_net(net)


def test_batch_matmul_hybrid_parallel():
@@ -70,13 +70,13 @@ def test_batch_matmul_hybrid_parallel():
strategy1 = ((2, 2, 2), (2, 2, 2))
strategy2 = ((2, 2, 2), (2, 2, 2))
net = Net(_w1, _w2, False, strategy1, strategy2)
compile(net)
compile_net(net)


def test_batch_matmul_auto_parallel():
context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
net = Net(_w1, _w2, False)
compile(net)
compile_net(net)


def test_batch_matmul_repeat_calc():
@@ -84,7 +84,7 @@ def test_batch_matmul_repeat_calc():
strategy1 = ((2, 2, 4), (2, 2, 4))
strategy2 = ((1, 2, 2), (1, 2, 2))
net = Net(_w1, _w2, False, strategy1, strategy2)
compile(net)
compile_net(net)


def test_batch_matmul_transpose_b():
@@ -92,4 +92,4 @@ def test_batch_matmul_transpose_b():
strategy1 = ((2, 2, 4), (2, 2, 4))
strategy2 = ((1, 2, 2), (1, 2, 2))
net = Net(_w1, _w2, True, strategy1, strategy2)
compile(net)
compile_net(net)

+ 2
- 4
tests/ut/python/parallel/test_batchnorm_batch_parallel.py View File

@@ -30,7 +30,6 @@ from mindspore.train import Model, ParallelMode
from tests.dataset_mock import MindData

dev_num = 8
strategy_no_weight = ((dev_num, 1, 1, 1),)
strategy_weight = ((dev_num, 1, 1, 1), (1, 1, 1, 1))
strategy_bn = ((dev_num, 1, 1, 1), (1,), (1,))
strategy_fc_weight_bias = ((dev_num, 1), (1, 1), (1,))
@@ -62,7 +61,7 @@ def conv7x7(in_channels, out_channels, stride=1, padding=0):
weight_shape = (out_channels, in_channels, 7, 7)
weight = Tensor(np.ones(weight_shape).astype(np.float32))
conv = Conv2d(in_channels, out_channels,
kernel_size=7, stride=stride, padding=0, weight_init=weight, has_bias=False,
kernel_size=7, stride=stride, padding=padding, weight_init=weight, has_bias=False,
pad_mode="same")
conv.conv2d.set_strategy(strategy_weight)
return conv
@@ -95,7 +94,7 @@ class ResNet(Cell):
def __init__(self, num_classes=100):
super(ResNet, self).__init__()
strategy_no_weight = ((dev_num, 1, 1, 1),)
self.conv1 = conv7x7(3, 64, stride=2, padding=3)
self.conv1 = conv7x7(3, 64, stride=2, padding=0)
self.bn1 = bn_with_initialize(64)
self.relu = ReLU()
self.relu.relu.set_strategy(strategy_no_weight)
@@ -124,7 +123,6 @@ def test_batchnorm_batch_parallel():
learning_rate = 0.1
momentum = 0.9
epoch_size = 2
rank_size = 0

predict = Tensor(np.ones([batch_size, 3, 224, 224]), dtype=ms.float32)
label = Tensor(np.ones([batch_size]), dtype=ms.int32)


+ 6
- 7
tests/ut/python/parallel/test_bn_prelu_cell.py View File

@@ -171,7 +171,7 @@ class PReLU(nn.Cell):

if not isinstance(w, Tensor):
w = Tensor(w)
self.w = Parameter(initializer(w, [channel, ]), name='a')
self.w = Parameter(initializer(w, [channel,]), name='a')
self.prelu = P.PReLU()
self.relu = P.ReLU().set_strategy(((1)))

@@ -181,7 +181,7 @@ class PReLU(nn.Cell):


class BNNet(nn.Cell):
def __init__(self, strategy0, strategy1, strategy2):
def __init__(self):
super(BNNet, self).__init__()
self.bn = FusedBatchNorm(512)
self.prelu = PReLU(512)
@@ -192,13 +192,12 @@ class BNNet(nn.Cell):
return x


def bn_net(strategy0, strategy1, strategy2):
return BNNet(strategy0=strategy0, strategy1=strategy1, strategy2=strategy2)
def bn_net():
return BNNet()


def bn_common(parallel_mode, train_flag, strategy0=None, strategy1=None, strategy2=None, strategy_loss=None):
def bn_common(parallel_mode, train_flag, strategy_loss=None):
context.set_context(mode=context.GRAPH_MODE)
batch_size = 32
learning_rate = 0.1
momentum = 0.9
epoch_size = 2
@@ -207,7 +206,7 @@ def bn_common(parallel_mode, train_flag, strategy0=None, strategy1=None, strateg
predict = Tensor(np.ones([32, 512]), dtype=ms.float32)
label = Tensor(np.ones([32]), dtype=ms.int32)
dataset = Dataset(predict, label, 2)
net = bn_net(strategy0, strategy1, strategy2)
net = bn_net()

loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
loss.softmax_cross_entropy.set_strategy(strategy_loss)


+ 1
- 1
tests/ut/python/parallel/test_bool_grad.py View File

@@ -21,7 +21,7 @@ from mindspore import context
from mindspore.common.parameter import Parameter
from mindspore.nn.optim import Momentum
from mindspore.ops import operations as P
from mindspore.train import Model, ParallelMode
from mindspore.train import Model
from tests.dataset_mock import MindData

context.set_context(mode=context.GRAPH_MODE)


+ 2
- 2
tests/ut/python/parallel/test_broadcast_dict.py View File

@@ -54,7 +54,7 @@ def test_param_broadcast():
network.set_train()

predict = Tensor(np.ones([64, 512]).astype(np.float32) * 0.01)
out = network(predict)
_ = network(predict)
context.reset_auto_parallel_context()


@@ -67,5 +67,5 @@ def test_param_not_broadcast():
network.set_train()

predict = Tensor(np.ones([64, 512]).astype(np.float32) * 0.01)
out = network(predict)
_ = network(predict)
context.reset_auto_parallel_context()

+ 11
- 11
tests/ut/python/parallel/test_comparison_function_info.py View File

@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y, b)


def compile(net, x, y, b):
def compile_net(net, x, y, b):
net.set_auto_parallel()
_executor.compile(net, x, y, b)

@@ -69,7 +69,7 @@ def test_matmul_equal():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([128, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_not_equal():
@@ -92,7 +92,7 @@ def test_matmul_not_equal():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([128, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_not_equal_repeated_calculation():
@@ -115,7 +115,7 @@ def test_matmul_not_equal_repeated_calculation():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([128, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_maximum():
@@ -138,7 +138,7 @@ def test_matmul_maximum():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_maximum_broadcast():
@@ -161,7 +161,7 @@ def test_matmul_maximum_broadcast():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_maximum_broadcast2():
@@ -184,7 +184,7 @@ def test_matmul_maximum_broadcast2():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
b = Tensor(np.ones([1, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_minimum():
@@ -207,7 +207,7 @@ def test_matmul_minimum():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_minimum_broadcast():
@@ -230,7 +230,7 @@ def test_matmul_minimum_broadcast():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_minimum_broadcast2():
@@ -253,7 +253,7 @@ def test_matmul_minimum_broadcast2():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
b = Tensor(np.ones([1, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_minimum_auto_parallel():
@@ -274,4 +274,4 @@ def test_matmul_minimum_auto_parallel():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
b = Tensor(np.ones([1, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)

+ 3
- 5
tests/ut/python/parallel/test_dataset_util.py View File

@@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

import mindspore as ms
from mindspore import Tensor
from mindspore.train._utils import _to_full_shapes, _to_full_tensor
@@ -35,7 +33,7 @@ def test_to_full_tensor_1():
expect = ([[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [1, 2, 3], [4, 5, 6], [0, 0, 0], [0, 0, 0]])
expect_tensor = Tensor(expect, dtype=ms.float32)

assert (full_tensor[0] == expect_tensor)
assert full_tensor[0] == expect_tensor


def test_to_full_tensor_2():
@@ -52,7 +50,7 @@ def test_to_full_tensor_2():
expect_tensor1 = Tensor(expect1, dtype=ms.int32)
expect_tensors = (expect_tensor0, expect_tensor1)

assert (full_tensor == expect_tensors)
assert full_tensor == expect_tensors


def test_to_full_tensor_sens_2():
@@ -70,4 +68,4 @@ def test_to_full_tensor_sens_2():
expect_tensor_sens = Tensor(0.1, dtype=ms.float32)
expect_tensors = (expect_tensor0, expect_tensor1, expect_tensor_sens)

assert (full_tensor == expect_tensors)
assert full_tensor == expect_tensors

+ 2
- 2
tests/ut/python/parallel/test_dense_matmul.py View File

@@ -47,8 +47,8 @@ class DenseMutMulNet(nn.Cell):

def test_dmnet_train_step():
context.reset_auto_parallel_context()
input = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01)
input_ = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01)
label = Tensor(np.zeros([32, 768]).astype(np.float32))
net = DenseMutMulNet()
net = train_step_with_loss_warp(DenseMutMulNet())
_executor.compile(net, input, label)
_executor.compile(net, input_, label)

+ 4
- 4
tests/ut/python/parallel/test_different_type_for_div_op.py View File

@@ -32,7 +32,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y, bias)


def compile(net, x, y, bias):
def compile_net(net, x, y, bias):
net.set_auto_parallel()
_executor.compile(net, x, y, bias)

@@ -58,7 +58,7 @@ def test_sum_as_loss_float16():
x = Tensor(np.ones([64, 32]), dtype=ms.float16)
y = Tensor(np.ones([64, 32]), dtype=ms.float16)
bias = Tensor(np.ones([64]), dtype=ms.float16)
compile(net, x, y, bias)
compile_net(net, x, y, bias)


def test_sum_as_loss_float32():
@@ -82,7 +82,7 @@ def test_sum_as_loss_float32():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([64, 32]), dtype=ms.float32)
bias = Tensor(np.ones([64]), dtype=ms.float32)
compile(net, x, y, bias)
compile_net(net, x, y, bias)


def test_sum_as_loss_int32():
@@ -106,4 +106,4 @@ def test_sum_as_loss_int32():
x = Tensor(np.ones([64, 32]), dtype=ms.int32)
y = Tensor(np.ones([64, 32]), dtype=ms.int32)
bias = Tensor(np.ones([64]), dtype=ms.int32)
compile(net, x, y, bias)
compile_net(net, x, y, bias)

+ 6
- 6
tests/ut/python/parallel/test_dropout_do_mask.py View File

@@ -50,7 +50,7 @@ _w1 = Tensor(np.ones([128, 64]), dtype=ms.float32)
_b = Tensor(np.ones([128, 64]), dtype=ms.float32)


def compile(net):
def compile_net(net):
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
train_net = TrainOneStepCell(net, optimizer)
train_net.set_auto_parallel()
@@ -63,7 +63,7 @@ def test_dropout_do_mask_data_parallel():
strategy1 = ((16, 1), (16, 1))
strategy2 = ((16, 1),)
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)


def test_dropout_do_mask_model_parallel():
@@ -71,7 +71,7 @@ def test_dropout_do_mask_model_parallel():
strategy1 = ((1, 16), (1, 16))
strategy2 = ((1, 16),)
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)


def test_dropout_do_mask_hybrid_parallel():
@@ -79,13 +79,13 @@ def test_dropout_do_mask_hybrid_parallel():
strategy1 = ((4, 4), (4, 4))
strategy2 = ((4, 4),)
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)


def test_dropout_do_mask_auto_parallel():
context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
net = Net(_w1)
compile(net)
compile_net(net)


def test_dropout_do_mask_repeat_calc():
@@ -93,4 +93,4 @@ def test_dropout_do_mask_repeat_calc():
strategy1 = ((4, 4), (4, 4))
strategy2 = ((2, 4),)
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)

+ 11
- 11
tests/ut/python/parallel/test_element_wise_function.py View File

@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y, b)


def compile(net, x, y, b):
def compile_net(net, x, y, b):
net.set_auto_parallel()
_executor.compile(net, x, y, b)

@@ -72,7 +72,7 @@ def test_matmul_pow():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_exp():
@@ -98,7 +98,7 @@ def test_matmul_exp():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_log():
@@ -124,7 +124,7 @@ def test_matmul_log():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_logical_not():
@@ -151,7 +151,7 @@ def test_matmul_logical_not():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([128, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_cast():
@@ -178,7 +178,7 @@ def test_matmul_cast():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.int32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_cast_before_mirror():
@@ -202,7 +202,7 @@ def test_cast_before_mirror():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float16)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_cast_before_mirror1():
@@ -226,7 +226,7 @@ def test_cast_before_mirror1():
x = Tensor(np.ones([128, 32]), dtype=ms.float16)
y = Tensor(np.ones([32, 64]), dtype=ms.float16)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_cast_before_mirror2():
@@ -250,7 +250,7 @@ def test_cast_before_mirror2():
x = Tensor(np.ones([128, 32]), dtype=ms.float16)
y = Tensor(np.ones([32, 64]), dtype=ms.float16)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_cast_before_mirror3():
@@ -274,7 +274,7 @@ def test_cast_before_mirror3():
x = Tensor(np.ones([128, 32]), dtype=ms.float16)
y = Tensor(np.ones([32, 64]), dtype=ms.float16)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_mul_two_cast():
@@ -303,4 +303,4 @@ def test_mul_two_cast():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([128, 32]), dtype=ms.float32)
b = Tensor(np.ones([128, 32]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)

+ 7
- 7
tests/ut/python/parallel/test_expand_dims.py View File

@@ -54,7 +54,7 @@ _w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
_b = Tensor(np.ones([128, 64, 32, 1]), dtype=ms.float32)


def compile(net):
def compile_net(net):
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
train_net = TrainOneStepCell(net, optimizer)
train_net.set_auto_parallel()
@@ -68,7 +68,7 @@ def test_expand_dims_data_parallel():
strategy2 = ((16, 1, 1),)
strategy3 = ((16, 1, 1, 1), (16, 1, 1, 1))
net = Net(_w1, strategy1, strategy2, strategy3)
compile(net)
compile_net(net)


def test_expand_dims_model_parallel():
@@ -77,7 +77,7 @@ def test_expand_dims_model_parallel():
strategy2 = ((1, 1, 16),)
strategy3 = ((1, 1, 16, 1), (1, 1, 16, 1))
net = Net(_w1, strategy1, strategy2, strategy3)
compile(net)
compile_net(net)


def test_expand_dims_hybrid_parallel():
@@ -86,13 +86,13 @@ def test_expand_dims_hybrid_parallel():
strategy2 = ((2, 2, 4),)
strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1))
net = Net(_w1, strategy1, strategy2, strategy3)
compile(net)
compile_net(net)


def test_expand_dims_auto_parallel():
context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
net = Net(_w1)
compile(net)
compile_net(net)


def test_expand_dims_repeat_calc():
@@ -101,7 +101,7 @@ def test_expand_dims_repeat_calc():
strategy2 = ((1, 2, 2),)
strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1))
net = Net(_w1, strategy1, strategy2, strategy3)
compile(net)
compile_net(net)


def test_expand_dims_parameter():
@@ -109,4 +109,4 @@ def test_expand_dims_parameter():
strategy1 = ((1, 2, 2),)
strategy2 = ((2, 2, 4, 1), (2, 2, 4, 1))
net = Net2(_w1, strategy1, strategy2)
compile(net)
compile_net(net)

+ 6
- 6
tests/ut/python/parallel/test_forward_graph.py View File

@@ -39,7 +39,7 @@ _w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
_b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)


def compile(net):
def compile_net(net):
net.set_auto_parallel()
_executor.compile(net, _x, _b)
context.reset_auto_parallel_context()
@@ -50,7 +50,7 @@ def test_forward_graph_data_parallel():
strategy1 = ((16, 1, 1), (16, 1, 1))
strategy2 = ((16, 1, 1),)
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)


def test_forward_graph_model_parallel():
@@ -58,7 +58,7 @@ def test_forward_graph_model_parallel():
strategy1 = ((1, 1, 16), (1, 1, 16))
strategy2 = ((1, 1, 16),)
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)


def test_forward_graph_hybrid_parallel():
@@ -66,13 +66,13 @@ def test_forward_graph_hybrid_parallel():
strategy1 = ((2, 2, 4), (2, 2, 4))
strategy2 = ((2, 2, 4),)
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)


def test_forward_graph_auto_parallel():
context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
net = Net(_w1)
compile(net)
compile_net(net)


def test_forward_graph_repeat_calc():
@@ -80,4 +80,4 @@ def test_forward_graph_repeat_calc():
strategy1 = ((2, 2, 4), (2, 2, 4))
strategy2 = ((1, 2, 2),)
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)

+ 0
- 1
tests/ut/python/parallel/test_gather_v2.py View File

@@ -18,7 +18,6 @@ import mindspore as ms
import mindspore.nn as nn
from mindspore import Tensor
from mindspore import context
from mindspore.common import dtype as mstype
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import operations as P


+ 10
- 10
tests/ut/python/parallel/test_gather_v2_primitive.py View File

@@ -120,7 +120,7 @@ class TrainOneStepCell(Cell):
return F.depend(loss, self.optimizer(grads))


def net_trains(gather_v2_strategy, criterion, rank):
def net_trains(criterion, rank):
init()
lr = 0.1
momentum = 0.9
@@ -151,42 +151,42 @@ def test_auto_batch_parallel():
gather_v2_strategy = None
criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number)
rank = 2
net_trains(gather_v2_strategy, criterion, rank)
net_trains(criterion, rank)


def test_2d_index_auto_batch_parallel():
gather_v2_strategy = None
criterion = GatherV2(2, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number)
rank = 2
net_trains(gather_v2_strategy, criterion, rank)
net_trains(criterion, rank)


def test_batch_parallel():
gather_v2_strategy = ((device_number, 1),)
criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number)
rank = 2
net_trains(gather_v2_strategy, criterion, rank)
net_trains(criterion, rank)


def test_strategy1():
gather_v2_strategy = ((16, 2),)
rank = 2
criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number)
net_trains(gather_v2_strategy, criterion, rank)
net_trains(criterion, rank)


def test_strategy2():
gather_v2_strategy = ((1, device_number),)
rank = 2
criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number)
net_trains(gather_v2_strategy, criterion, rank)
net_trains(criterion, rank)


def test_strategy3():
gather_v2_strategy = ((8, 1),)
rank = 2
criterion = GatherV2(1, strategy=gather_v2_strategy, index_size=batch_size_per_device * device_number)
net_trains(gather_v2_strategy, criterion, rank)
net_trains(criterion, rank)


class GatherV2Axis1(_Loss):
@@ -217,18 +217,18 @@ def test_axis1_auto_batch_parallel():
gather_v2_strategy = None
criterion = GatherV2Axis1(1, strategy=gather_v2_strategy, index_size=512)
rank = 2
net_trains(gather_v2_strategy, criterion, rank)
net_trains(criterion, rank)


def test_axis1_batch_parallel():
gather_v2_strategy = ((device_number, 1),)
criterion = GatherV2Axis1(1, strategy=gather_v2_strategy, index_size=512)
rank = 2
net_trains(gather_v2_strategy, criterion, rank)
net_trains(criterion, rank)


def test_axis1_strategy1():
gather_v2_strategy = ((16, 2),)
rank = 17
criterion = GatherV2Axis1(1, strategy=gather_v2_strategy, index_size=512)
net_trains(gather_v2_strategy, criterion, rank)
net_trains(criterion, rank)

+ 9
- 13
tests/ut/python/parallel/test_get_next.py View File

@@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

import mindspore as ms
import mindspore.nn as nn
from mindspore import Tensor
@@ -23,8 +21,6 @@ from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter, ParameterTuple
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from mindspore.ops.operations.comm_ops import _VirtualDataset
from tests.ut.python.ops.test_math_ops import VirtualLoss

context.set_context(mode=context.GRAPH_MODE)

@@ -56,7 +52,7 @@ class GradWrap(nn.Cell):
return C.grad_by_list(self.network, self.weights)()


def compile(net):
def compile_net(net):
net.set_auto_parallel()
_executor.compile(net)

@@ -67,7 +63,7 @@ def test_get_next_single():
super().__init__()
self.norm = P.L2Normalize(axis=1)
self.prelu = P.PReLU()
self.w = Parameter(initializer(w, [channel, ]), name='w')
self.w = Parameter(initializer(w, [channel,]), name='w')

def construct(self, data):
x = self.norm(data)
@@ -84,7 +80,7 @@ def test_get_next_semi_auto_parallel():
super().__init__()
self.norm = P.L2Normalize().set_strategy(strategy1)
self.prelu = P.PReLU().set_strategy(strategy2)
self.w = Parameter(initializer(w, [channel, ]), name='w')
self.w = Parameter(initializer(w, [channel,]), name='w')

def construct(self, data):
x = self.norm(data)
@@ -99,7 +95,7 @@ def test_get_next_semi_auto_parallel():
strategy4=strategy4)
net = GradWrap(net_with_loss)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
compile(net)
compile_net(net)


def test_get_next_semi_auto_parallel1():
@@ -108,7 +104,7 @@ def test_get_next_semi_auto_parallel1():
super().__init__()
self.norm = P.L2Normalize().set_strategy(strategy1)
self.prelu = P.PReLU().set_strategy(strategy2)
self.w = Parameter(initializer(w, [channel, ]), name='w')
self.w = Parameter(initializer(w, [channel,]), name='w')

def construct(self, data):
x = self.norm(data)
@@ -123,7 +119,7 @@ def test_get_next_semi_auto_parallel1():
strategy4=strategy4)
net = GradWrap(net_with_loss)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
compile(net)
compile_net(net)


def test_get_next_auto_parallel():
@@ -132,7 +128,7 @@ def test_get_next_auto_parallel():
super().__init__()
self.norm = P.L2Normalize().set_strategy(strategy1)
self.prelu = P.PReLU().set_strategy(strategy2)
self.w = Parameter(initializer(w, [channel, ]), name='w')
self.w = Parameter(initializer(w, [channel,]), name='w')

def construct(self, data):
x = self.norm(data)
@@ -144,7 +140,7 @@ def test_get_next_auto_parallel():
net_with_loss = NetWithLoss(network, [ms.float32, ms.int32], [[32, 64], [32]], 2)
net = GradWrap(net_with_loss)
context.set_auto_parallel_context(parallel_mode="auto_parallel")
compile(net)
compile_net(net)


def test_only_one_get_next():
@@ -159,4 +155,4 @@ def test_only_one_get_next():
context.set_auto_parallel_context(device_num=4, global_rank=0)
net = Net()
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
compile(net)
compile_net(net)

+ 2
- 2
tests/ut/python/parallel/test_get_parameter_layout.py View File

@@ -52,8 +52,8 @@ def test_get_parameter_layout():
x_layout = [[2, 4], [1, -1], [16, 32]] # device_arrangement = [2, 4], tensor_map = [1, -1]
weight_layout = [[2, 4], [0, -1], [16, 32]] # device_arrangement = [2, 4], tensor_map = [0, -1]
expect_dict = {'x': x_layout, 'w1': weight_layout}
# to be resovled: static local variable count_p is used in step_parallel.cc, it needs to be reset between each ut
assert (net.parameter_layout_dict == expect_dict)
# to be resovled: static local variable count_p is used in step_parallel.cc, it needs to be reset between each ut
assert net.parameter_layout_dict == expect_dict


if __name__ == '__main__':


+ 8
- 8
tests/ut/python/parallel/test_hybird_parallel_activation.py View File

@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y, b)


def compile(net, x, y, b):
def compile_net(net, x, y, b):
net.set_auto_parallel()
_executor.compile(net, x, y, b)

@@ -72,7 +72,7 @@ def test_matmul_tanh():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_activation():
@@ -98,7 +98,7 @@ def test_matmul_activation():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_softmax():
@@ -124,7 +124,7 @@ def test_matmul_softmax():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_matmul_logsoftmax():
@@ -150,7 +150,7 @@ def test_matmul_logsoftmax():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_activations():
@@ -179,7 +179,7 @@ def test_activations():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_activations_repeated_calculation():
@@ -211,7 +211,7 @@ def test_activations_repeated_calculation():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_activations_axis_tuple():
@@ -243,4 +243,4 @@ def test_activations_axis_tuple():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)

+ 7
- 7
tests/ut/python/parallel/test_layer_norm.py View File

@@ -48,7 +48,7 @@ _w = Tensor(np.ones([128, 64, 32, 16]), dtype=ms.float32)
_b = Tensor(np.ones([128, 64, 32, 16]), dtype=ms.float32)


def compile(net):
def compile_net(net):
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
train_net = TrainOneStepCell(net, optimizer)
train_net.set_auto_parallel()
@@ -62,7 +62,7 @@ def test_layer_norm_data_parallel():
strategy2 = ((16, 1, 1, 1), (1, 1, 1), (1, 1, 1))
strategy3 = ((16, 1, 1, 1), (16, 1, 1, 1))
net = Net(_w, strategy1, strategy2, strategy3)
compile(net)
compile_net(net)


def test_layer_norm_model_parallel():
@@ -71,7 +71,7 @@ def test_layer_norm_model_parallel():
strategy2 = ((1, 16, 1, 1), (16, 1, 1), (16, 1, 1))
strategy3 = ((1, 16, 1, 1), (1, 16, 1, 1))
net = Net(_w, strategy1, strategy2, strategy3)
compile(net)
compile_net(net)


def test_layer_norm_hybrid_parallel():
@@ -80,13 +80,13 @@ def test_layer_norm_hybrid_parallel():
strategy2 = ((2, 8, 1, 1), (8, 1, 1), (8, 1, 1))
strategy3 = ((2, 8, 1, 1), (2, 8, 1, 1))
net = Net(_w, strategy1, strategy2, strategy3)
compile(net)
compile_net(net)


def test_layer_norm_auto_parallel():
context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
net = Net(_w)
compile(net)
compile_net(net)


def test_layer_norm_repeat_calc():
@@ -95,7 +95,7 @@ def test_layer_norm_repeat_calc():
strategy2 = ((2, 2, 1, 1), (2, 1, 1), (2, 1, 1))
strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1))
net = Net(_w, strategy1, strategy2, strategy3)
compile(net)
compile_net(net)


def test_layer_norm_wrong_strategy():
@@ -105,4 +105,4 @@ def test_layer_norm_wrong_strategy():
strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1))
net = Net(_w, strategy1, strategy2, strategy3)
with pytest.raises(RuntimeError):
compile(net)
compile_net(net)

+ 0
- 1
tests/ut/python/parallel/test_linear.py View File

@@ -21,7 +21,6 @@ from mindspore import context
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from tests.ut.python.ops.test_math_ops import VirtualLoss


class NetWithLoss(nn.Cell):


+ 9
- 10
tests/ut/python/parallel/test_loss_and_optimizer.py View File

@@ -19,9 +19,8 @@ import mindspore.nn as nn
from mindspore import Tensor, Parameter
from mindspore import context
from mindspore.common.api import _executor
from mindspore.nn import TrainOneStepCell, WithLossCell
from mindspore.nn import TrainOneStepCell
from mindspore.nn.optim import Momentum, LARS
from mindspore.ops import composite as C
from mindspore.ops import operations as P


@@ -36,7 +35,7 @@ class NetWithLoss(nn.Cell):
return self.loss(predict, b)[0]


def compile(net, x, b):
def compile_net(net, x, b):
net.set_auto_parallel()
_executor.compile(net, x, b)

@@ -72,7 +71,7 @@ def test_momentum():
train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile(train_net, x, b)
compile_net(train_net, x, b)


def test_momentum_with_loss_scale():
@@ -106,7 +105,7 @@ def test_momentum_with_loss_scale():
train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile(train_net, x, b)
compile_net(train_net, x, b)


def test_momentum_with_dynamic_lr():
@@ -141,7 +140,7 @@ def test_momentum_with_dynamic_lr():
train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile(train_net, x, b)
compile_net(train_net, x, b)


def test_momentum_with_loss_scale_and_dynamic_lr():
@@ -177,7 +176,7 @@ def test_momentum_with_loss_scale_and_dynamic_lr():
train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile(train_net, x, b)
compile_net(train_net, x, b)


def test_lars():
@@ -205,11 +204,11 @@ def test_lars():
net = Net(strategy1, strategy2, weight)

lr = Tensor(np.ones([6]), dtype=ms.float32)
SGD = Momentum(net.trainable_params(), lr, 0.9)
optimizer = LARS(SGD, epsilon=1e-08, hyperpara=0.02, decay_filter=lambda x: 'bn' not in x.name,
sgd = Momentum(net.trainable_params(), lr, 0.9)
optimizer = LARS(sgd, epsilon=1e-08, hyperpara=0.02, decay_filter=lambda x: 'bn' not in x.name,
lars_filter=lambda x: 'bn' not in x.name)
net_with_loss = NetWithLoss(net, strategy3)
train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile(train_net, x, b)
compile_net(train_net, x, b)

+ 5
- 5
tests/ut/python/parallel/test_matmul_tensor.py View File

@@ -46,7 +46,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y)


def compile(net, x, y):
def compile_net(net, x, y):
net.set_auto_parallel()
_executor.compile(net, x, y)

@@ -79,7 +79,7 @@ def test_two_matmul():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 128]), dtype=ms.float32)

compile(net, x, y)
compile_net(net, x, y)


def test_matmul_mul_broadcast2():
@@ -103,7 +103,7 @@ def test_matmul_mul_broadcast2():

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
compile(net, x, y)
compile_net(net, x, y)


def test_two_matmul1():
@@ -133,7 +133,7 @@ def test_two_matmul1():
x = Tensor(np.ones([128, 128]), dtype=ms.float32)
y = Tensor(np.ones([128, 128]), dtype=ms.float32)

compile(net, x, y)
compile_net(net, x, y)


def test_matmul_add_tensor():
@@ -158,4 +158,4 @@ def test_matmul_add_tensor():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)

compile(net, x, y)
compile_net(net, x, y)

+ 6
- 6
tests/ut/python/parallel/test_neg.py View File

@@ -39,7 +39,7 @@ _w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
_b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)


def compile(net):
def compile_net(net):
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
train_net = TrainOneStepCell(net, optimizer)
train_net.set_auto_parallel()
@@ -52,7 +52,7 @@ def test_neg_data_parallel():
strategy1 = ((16, 1, 1), (16, 1, 1))
strategy2 = ((16, 1, 1),)
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)


def test_neg_model_parallel():
@@ -60,7 +60,7 @@ def test_neg_model_parallel():
strategy1 = ((1, 1, 16), (1, 1, 16))
strategy2 = ((1, 1, 16),)
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)


def test_neg_hybrid_parallel():
@@ -68,13 +68,13 @@ def test_neg_hybrid_parallel():
strategy1 = ((2, 2, 4), (2, 2, 4))
strategy2 = ((2, 2, 4),)
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)


def test_neg_auto_parallel():
context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
net = Net(_w1)
compile(net)
compile_net(net)


def test_neg_repeat_calc():
@@ -82,4 +82,4 @@ def test_neg_repeat_calc():
strategy1 = ((2, 2, 4), (2, 2, 4))
strategy2 = ((1, 2, 2),)
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)

+ 1
- 1
tests/ut/python/parallel/test_one_dev.py View File

@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import re
import numpy as np

import mindspore as ms
import mindspore.nn as nn


+ 8
- 9
tests/ut/python/parallel/test_one_hot_net.py View File

@@ -159,8 +159,8 @@ class SemiAutoOneHotNet(Cell):
weight_np = np.zeros(weight_shape, np.float32)
self.weight = Parameter(Tensor(weight_np), name='model_parallel_weight')

def construct(self, input, label):
input_n = self.normalize(input)
def construct(self, input_, label):
input_n = self.normalize(input_)
w = self.normalize2(self.weight)
fc_o = self.fc(input_n, w)
fc_o_shape = F.shape(fc_o)
@@ -209,9 +209,8 @@ class Dataset(MindData):
raise StopIteration
self.index += 1
if self.input_num == 2:
return self.predict, self.label
else:
return self.predict,
return (self.predict, self.label)
return (self.predict,)

def reset(self):
self.index = 0
@@ -268,20 +267,20 @@ def test_bn_reshape_dense_bn_train_loss():
batch_size = 16
device_num = 16
context.set_auto_parallel_context(device_num=device_num, global_rank=0)
input = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01)
input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01)
label = Tensor(np.ones([batch_size]), dtype=ms.int32)

net = GradWrap(NetWithLoss(BNReshapeDenseBNNet()))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()

_executor.compile(net, input, label)
_executor.compile(net, input_, label)


def test_semi_one_hot_net_batch():
batch_size = 16
context.set_auto_parallel_context(device_num=device_num, global_rank=0)
input = Tensor(np.ones([batch_size * 1, 512]).astype(np.float32) * 0.01)
input_ = Tensor(np.ones([batch_size * 1, 512]).astype(np.float32) * 0.01)
label = Tensor(np.ones([batch_size]), dtype=ms.int32)

net = SemiAutoOneHotNet(args=Args(), strategy=StrategyBatch())
@@ -289,7 +288,7 @@ def test_semi_one_hot_net_batch():
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()

_executor.compile(net, input, label)
_executor.compile(net, input_, label)


def test_semi_one_hot_net_model():


+ 0
- 1
tests/ut/python/parallel/test_one_weight_parameter.py View File

@@ -20,7 +20,6 @@ from mindspore import Tensor, Parameter, ParameterTuple
from mindspore import context
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import operations as P




+ 0
- 9
tests/ut/python/parallel/test_onehot.py View File

@@ -126,15 +126,6 @@ def test_onehot_auto():
compile_graph(strategy1, strategy2, strategy3, strategy4, auto=True)


def test_onehot_model_parallel():
context.set_auto_parallel_context(device_num=16, global_rank=0)
strategy1 = ((2, 4), (4, 2))
strategy2 = ((2, 8),)
strategy3 = ((1, 16), (), ())
strategy4 = ((16, 1), (16, 1))
compile_graph(strategy1, strategy2, strategy3, strategy4)


def test_onehot_batch_parallel_axis0():
context.set_auto_parallel_context(device_num=16, global_rank=0)
strategy1 = ((2, 4), (4, 2))


+ 21
- 27
tests/ut/python/parallel/test_operator_model_parallel.py View File

@@ -21,8 +21,6 @@ from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter
from mindspore.common.tensor import Tensor
from mindspore.nn.cell import Cell
from mindspore.nn.layer.activation import ReLU
from mindspore.nn.layer.basic import Dense
from mindspore.nn.layer.basic import Flatten
from mindspore.nn.layer.conv import Conv2d
from mindspore.nn.layer.normalization import BatchNorm2d
@@ -61,8 +59,7 @@ class DenseWrap(Cell):
self.has_bias = has_bias

self.weight = Parameter(initializer(
weight_init, [output_channels, input_channels]),
name="weight")
weight_init, [output_channels, input_channels]), name="weight")

if self.has_bias:
self.bias = Parameter(initializer(
@@ -103,7 +100,7 @@ class DatasetLenet(MindData):
self.index = 0


def conv3x3(in_channels, out_channels, stride=1, padding=1):
def conv3x3(in_channels, out_channels, stride=1):
"""3x3 convolution """
weight_shape = (out_channels, in_channels, 3, 3)
weight = Tensor(np.ones(weight_shape).astype(np.float32))
@@ -114,7 +111,7 @@ def conv3x3(in_channels, out_channels, stride=1, padding=1):
return conv


def conv1x1(in_channels, out_channels, stride=1, padding=0):
def conv1x1(in_channels, out_channels, stride=1):
"""1x1 convolution"""
weight_shape = (out_channels, in_channels, 1, 1)
weight = Tensor(np.ones(weight_shape).astype(np.float32))
@@ -125,7 +122,7 @@ def conv1x1(in_channels, out_channels, stride=1, padding=0):
return conv


def conv7x7(in_channels, out_channels, stride=1, padding=0):
def conv7x7(in_channels, out_channels, stride=1):
"""1x1 convolution"""
weight_shape = (out_channels, in_channels, 7, 7)
weight = Tensor(np.ones(weight_shape).astype(np.float32))
@@ -186,18 +183,17 @@ class ResidualBlock(Cell):
def __init__(self,
in_channels,
out_channels,
stride=1,
down_sample=False):
stride=1):
super(ResidualBlock, self).__init__()

out_chls = out_channels // self.expansion
self.conv1 = conv1x1(in_channels, out_chls, stride=1, padding=0)
self.conv1 = conv1x1(in_channels, out_chls, stride=1)
self.bn1 = bn_with_initialize(out_chls)

self.conv2 = conv3x3(out_chls, out_chls, stride=stride, padding=0)
self.conv2 = conv3x3(out_chls, out_chls, stride=stride)
self.bn2 = bn_with_initialize(out_chls)

self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0)
self.conv3 = conv1x1(out_chls, out_channels, stride=1)
self.bn3 = bn_with_initialize_last(out_channels)

self.relu1 = P.ReLU().set_strategy(strategy_no_weight)
@@ -236,21 +232,21 @@ class ResidualBlockWithDown(Cell):
super(ResidualBlockWithDown, self).__init__()

out_chls = out_channels // self.expansion
self.conv1 = conv1x1(in_channels, out_chls, stride=1, padding=0)
self.conv1 = conv1x1(in_channels, out_chls, stride=1)
self.bn1 = bn_with_initialize(out_chls)

self.conv2 = conv3x3(out_chls, out_chls, stride=stride, padding=0)
self.conv2 = conv3x3(out_chls, out_chls, stride=stride)
self.bn2 = bn_with_initialize(out_chls)

self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0)
self.conv3 = conv1x1(out_chls, out_channels, stride=1)
self.bn3 = bn_with_initialize_last(out_channels)

self.relu1 = P.ReLU().set_strategy(strategy_no_weight)
self.relu2 = P.ReLU().set_strategy(strategy_no_weight)
self.relu3 = P.ReLU().set_strategy(strategy_no_weight)
self.downSample = down_sample
self.down_sample = down_sample

self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride, padding=0)
self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride)
self.bn_down_sample = bn_with_initialize(out_channels)
self.add = TensorAdd().set_strategy(strategy_add)

@@ -279,7 +275,7 @@ class ResidualBlockWithDown(Cell):

class MakeLayer0(Cell):

def __init__(self, block, layer_num, in_channels, out_channels, stride):
def __init__(self, block, in_channels, out_channels, stride):
super(MakeLayer0, self).__init__()
self.a = ResidualBlockWithDown(in_channels, out_channels, stride=1, down_sample=True)
self.b = block(out_channels, out_channels, stride=stride)
@@ -295,14 +291,14 @@ class MakeLayer0(Cell):

class ResNet(Cell):

def __init__(self, block, layer_num, num_classes=100):
def __init__(self, block, num_classes=100):
super(ResNet, self).__init__()
self.conv1 = conv7x7(3, 64, stride=2, padding=3)
self.conv1 = conv7x7(3, 64, stride=2)
self.bn1 = bn_with_initialize(64)
self.relu = P.ReLU().set_strategy(strategy_no_weight)
self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
self.layer1 = MakeLayer0(
block, layer_num[0], in_channels=64, out_channels=256, stride=1)
block, in_channels=64, out_channels=256, stride=1)
self.pool = M.ReduceMean(keep_dims=True).set_strategy(strategy_no_weight)
self.fc = fc_with_initialize(64 * block.expansion, num_classes)
self.flatten = Flatten()
@@ -320,12 +316,12 @@ class ResNet(Cell):


class ResNetModelParallel(Cell):
def __init__(self, block, layer_num, num_classes=100):
def __init__(self, block, num_classes=100):
super(ResNetModelParallel, self).__init__()
self.relu = P.ReLU().set_strategy(((1, dev_num, 1, 1),))
self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
self.layer1 = MakeLayer0(
block, layer_num[0], in_channels=64, out_channels=256, stride=1)
block, in_channels=64, out_channels=256, stride=1)
self.pool = M.ReduceMean(keep_dims=True).set_strategy(strategy_no_weight)
self.fc = fc_with_initialize(64 * block.expansion, num_classes)
self.flatten = Flatten()
@@ -341,11 +337,11 @@ class ResNetModelParallel(Cell):


def resnet_operator_net(num_classes):
return ResNet(ResidualBlock, [3, 4, 6, 3], num_classes)
return ResNet(ResidualBlock, num_classes)


def resnet_model_parallel_net(num_classes):
return ResNetModelParallel(ResidualBlock, [3, 4, 6, 3], num_classes)
return ResNetModelParallel(ResidualBlock, num_classes)


def test_resnet_operator_batch_parallel():
@@ -354,7 +350,6 @@ def test_resnet_operator_batch_parallel():
learning_rate = 0.1
momentum = 0.9
epoch_size = 2
rank_size = dev_num

context.reset_auto_parallel_context()
context.set_auto_parallel_context(device_num=dev_num, global_rank=0)
@@ -381,7 +376,6 @@ def test_resnet_model_parallel():
learning_rate = 0.1
momentum = 0.9
epoch_size = 2
rank_size = dev_num

context.reset_auto_parallel_context()
context.set_auto_parallel_context(device_num=dev_num, global_rank=0)


+ 3
- 3
tests/ut/python/parallel/test_optimizer_clone_weight.py View File

@@ -35,7 +35,7 @@ class NetWithLoss(nn.Cell):
return self.loss(predict, b)[0]


def compile(net, x, b):
def compile_net(net, x, b):
net.set_auto_parallel()
_Executor().compile(net, x, b)

@@ -72,7 +72,7 @@ def test_optimizer_clone_weight():
train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile(train_net, x, b)
compile_net(train_net, x, b)


def test_optimizer_clone_weight2():
@@ -107,4 +107,4 @@ def test_optimizer_clone_weight2():
train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile(train_net, x, b)
compile_net(train_net, x, b)

+ 1
- 1
tests/ut/python/parallel/test_parameter_init.py View File

@@ -52,7 +52,7 @@ def test_parameter_init():
weight = Tensor(np.ones([64, 32]), dtype=ms.float32)

net = Net(strategy1, weight)
net(x, )
net(x,)


if __name__ == '__main__':


+ 7
- 7
tests/ut/python/parallel/test_prelu.py View File

@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y)


def compile(net, x, y):
def compile_net(net, x, y):
net.set_auto_parallel()
_executor.compile(net, x, y)

@@ -63,7 +63,7 @@ def test_prelu_single_success1():
net = GradWrap(NetWithLoss(Net()))
x = Tensor(np.random.rand(1, 33, 4, 4), ms.float32)
w = Tensor(np.random.rand(33), ms.float32)
compile(net, x, w)
compile_net(net, x, w)


def test_prelu_single_success2():
@@ -80,7 +80,7 @@ def test_prelu_single_success2():
net = GradWrap(NetWithLoss(Net()))
x = Tensor(np.random.rand(1, 33, 4, 4), ms.float32)
w = Tensor([0.1], ms.float32)
compile(net, x, w)
compile_net(net, x, w)


def test_prelu_parallel_success1():
@@ -100,7 +100,7 @@ def test_prelu_parallel_success1():
x = Tensor(np.random.rand(4, 4, 32, 64), dtype=ms.float32)
w = Tensor(np.random.rand(4), dtype=ms.float32)
net = GradWrap(NetWithLoss(Net(strategy)))
compile(net, x, w)
compile_net(net, x, w)


def test_prelu_parallel_success2():
@@ -120,7 +120,7 @@ def test_prelu_parallel_success2():
x = Tensor(np.random.rand(4, 4, 32, 64), dtype=ms.float32)
w = Tensor(np.random.rand(4), dtype=ms.float32)
net = GradWrap(NetWithLoss(Net(strategy)))
compile(net, x, w)
compile_net(net, x, w)


def test_prelu_parallel_success3():
@@ -183,7 +183,7 @@ def test_prelu_parallel_success4():
x = Tensor(np.random.rand(4, 16, 32, 64), dtype=ms.float32)
w = Tensor(np.random.rand(16), dtype=ms.float32)
net = GradWrap(NetWithLoss(Net(strategy)))
compile(net, x, w)
compile_net(net, x, w)


def test_prelu_parallel_success5():
@@ -203,4 +203,4 @@ def test_prelu_parallel_success5():
x = Tensor(np.random.rand(4, 16, 32, 64), dtype=ms.float32)
w = Tensor(np.random.rand(1), dtype=ms.float32)
net = GradWrap(NetWithLoss(Net(strategy)))
compile(net, x, w)
compile_net(net, x, w)

+ 3
- 5
tests/ut/python/parallel/test_prelu_cell.py View File

@@ -47,9 +47,8 @@ class Dataset(MindData):
raise StopIteration
self.index += 1
if self.input_num == 2:
return self.predict, self.label
else:
return self.predict,
return (self.predict, self.label)
return (self.predict,)

def reset(self):
self.index = 0
@@ -68,7 +67,7 @@ class PReLU(nn.Cell):
if not isinstance(w, Tensor):
raise TypeError("w only support np.float32, float or Tensor type.")

self.w = Parameter(initializer(w, [channel, ]), name='a')
self.w = Parameter(initializer(w, [channel,]), name='a')
self.prelu = P.PReLU()
self.relu = P.ReLU().set_strategy(((1,),))
self.sub = P.Sub().set_strategy(((1,), (1,)))
@@ -97,7 +96,6 @@ def prelu_net():


def reshape_common(parallel_mode):
batch_size = 32
learning_rate = 0.1
momentum = 0.9
epoch_size = 2


+ 29
- 29
tests/ut/python/parallel/test_reduce_method_info.py View File

@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y, b)


def compile(net, x, y, b):
def compile_net(net, x, y, b):
net.set_auto_parallel()
_executor.compile(net, x, y, b)

@@ -74,7 +74,7 @@ def test_sum_mul():
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
b = Tensor(np.ones([128, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_sum_mul2():
@@ -101,7 +101,7 @@ def test_sum_mul2():
x = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 128, 64, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_sum_mul3():
@@ -128,7 +128,7 @@ def test_sum_mul3():
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
b = Tensor(np.ones([128, 32]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_sum_mul4():
@@ -155,7 +155,7 @@ def test_sum_mul4():
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
b = Tensor(np.ones([128, 32, 1]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_sum_mul5():
@@ -179,7 +179,7 @@ def test_sum_mul5():
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
b = Tensor(np.ones([1, 32, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_sum_mul6():
@@ -203,7 +203,7 @@ def test_sum_mul6():
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
b = Tensor(np.ones([128, 1, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_sum_mul7():
@@ -227,7 +227,7 @@ def test_sum_mul7():
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
b = Tensor(np.ones([1, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_max_mul():
@@ -254,7 +254,7 @@ def test_max_mul():
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
b = Tensor(np.ones([128, 32]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_min_mul():
@@ -281,7 +281,7 @@ def test_min_mul():
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
b = Tensor(np.ones([32, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_reduce_mean_mul_float32():
@@ -309,7 +309,7 @@ def test_reduce_mean_mul_float32():
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
b = Tensor(np.ones([32, 64]), dtype=ms.float32)

compile(net, x, y, b)
compile_net(net, x, y, b)


class ArgMaxWithValueNet(nn.Cell):
@@ -321,7 +321,7 @@ class ArgMaxWithValueNet(nn.Cell):

def construct(self, x, y, b):
out = self.mul1(x, y)
index, out = self.arg_max_with_value(out)
_, out = self.arg_max_with_value(out)
out = self.mul2(out, b)
return out

@@ -335,16 +335,16 @@ class ArgMinWithValueNet(nn.Cell):

def construct(self, x, y, b):
out = self.mul1(x, y)
index, out = self.arg_min_with_value(out)
_, out = self.arg_min_with_value(out)
out = self.mul2(out, b)
return out


def gen_inputs_and_compile(net):
def gen_inputs_and_compile_net(net):
x = Tensor(np.ones([128, 64, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 64, 64]), dtype=ms.float32)
b = Tensor(np.ones([128, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def tobefixed_test_arg_max_with_value_mul_semi_axis_parallel():
@@ -354,7 +354,7 @@ def tobefixed_test_arg_max_with_value_mul_semi_axis_parallel():
strategy3 = ((2, 4), (2, 4))
net = GradWrap(NetWithLoss(ArgMaxWithValueNet(strategy1, strategy2, strategy3)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
gen_inputs_and_compile(net)
gen_inputs_and_compile_net(net)


def test_arg_max_with_value_mul_semi():
@@ -364,7 +364,7 @@ def test_arg_max_with_value_mul_semi():
strategy3 = ((2, 4), (2, 4))
net = GradWrap(NetWithLoss(ArgMaxWithValueNet(strategy1, strategy2, strategy3)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
gen_inputs_and_compile(net)
gen_inputs_and_compile_net(net)


def test_arg_max_with_value_mul_auto():
@@ -374,7 +374,7 @@ def test_arg_max_with_value_mul_auto():
strategy3 = None
net = GradWrap(NetWithLoss(ArgMaxWithValueNet(strategy1, strategy2, strategy3)))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
gen_inputs_and_compile(net)
gen_inputs_and_compile_net(net)


def test_arg_min_with_value_mul_semi_axis_parallel():
@@ -384,7 +384,7 @@ def test_arg_min_with_value_mul_semi_axis_parallel():
strategy3 = ((2, 4), (2, 4))
net = GradWrap(NetWithLoss(ArgMinWithValueNet(strategy1, strategy2, strategy3)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
gen_inputs_and_compile(net)
gen_inputs_and_compile_net(net)


def test_arg_min_with_value_mul_semi():
@@ -394,7 +394,7 @@ def test_arg_min_with_value_mul_semi():
strategy3 = ((2, 4), (2, 4))
net = GradWrap(NetWithLoss(ArgMinWithValueNet(strategy1, strategy2, strategy3)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
gen_inputs_and_compile(net)
gen_inputs_and_compile_net(net)


def test_arg_min_with_value_mul_auto():
@@ -404,7 +404,7 @@ def test_arg_min_with_value_mul_auto():
strategy3 = None
net = GradWrap(NetWithLoss(ArgMinWithValueNet(strategy1, strategy2, strategy3)))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
gen_inputs_and_compile(net)
gen_inputs_and_compile_net(net)


class ArgMinWithValueNet2(nn.Cell):
@@ -416,7 +416,7 @@ class ArgMinWithValueNet2(nn.Cell):

def construct(self, x, y, b):
out = self.mul1(x, y)
index, out = self.arg_min_with_value(out)
_, out = self.arg_min_with_value(out)
out = self.relu(out)
return out

@@ -428,7 +428,7 @@ def tobefixed_test_arg_min_with_value_mul_semi_axis_parallel2():
strategy3 = ((2, 4, 1),)
net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
gen_inputs_and_compile(net)
gen_inputs_and_compile_net(net)


def test_arg_min_with_value_mul_semi2():
@@ -438,7 +438,7 @@ def test_arg_min_with_value_mul_semi2():
strategy3 = ((2, 4, 1),)
net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
gen_inputs_and_compile(net)
gen_inputs_and_compile_net(net)


def test_arg_min_with_value_mul_auto2():
@@ -448,7 +448,7 @@ def test_arg_min_with_value_mul_auto2():
strategy3 = None
net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3)))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
gen_inputs_and_compile(net)
gen_inputs_and_compile_net(net)


def test_cross_batch():
@@ -475,7 +475,7 @@ def test_cross_batch():
x = Tensor(np.ones([32, 64]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([32, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_cross_batch2():
@@ -502,7 +502,7 @@ def test_cross_batch2():
x = Tensor(np.ones([32, 64]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([32, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_cross_batch_auto():
@@ -526,7 +526,7 @@ def test_cross_batch_auto():
x = Tensor(np.ones([32, 64]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([32, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_max_empty_tuple():
@@ -554,4 +554,4 @@ def test_max_empty_tuple():
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
b = Tensor(np.ones([128, 32]), dtype=ms.float32)

compile(net, x, y, b)
compile_net(net, x, y, b)

+ 10
- 13
tests/ut/python/parallel/test_reshape.py View File

@@ -18,7 +18,6 @@ import mindspore as ms
import mindspore.nn as nn
from mindspore import Tensor
from mindspore import context
from mindspore.common import dtype as mstype
from mindspore.common.api import _executor
from mindspore.common.parameter import Parameter
from mindspore.common.parameter import ParameterTuple
@@ -54,9 +53,8 @@ class Dataset(MindData):
raise StopIteration
self.index += 1
if self.input_num == 2:
return self.predict, self.label
else:
return self.predict,
return (self.predict, self.label)
return (self.predict,)

def reset(self):
self.index = 0
@@ -82,7 +80,6 @@ def reshape_net(strategy0, strategy1, strategy2):


def reshape_common(parallel_mode, strategy0, strategy1, strategy2, strategy_loss):
batch_size = 32
learning_rate = 0.1
momentum = 0.9
epoch_size = 2
@@ -306,21 +303,21 @@ class ReshapeNet6(nn.Cell):
return matmul2_o


def compile(net, input):
def compile_net(net, input_):
net.set_auto_parallel()
_executor.compile(net, input)
_executor.compile(net, input_)


def reshape_net2(backbone):
batch_size = 16
device_num = 16
context.set_auto_parallel_context(device_num=device_num, global_rank=0)
input = Tensor(np.ones([batch_size * device_num, 512, 7, 7]).astype(np.float32) * 0.01)
input_ = Tensor(np.ones([batch_size * device_num, 512, 7, 7]).astype(np.float32) * 0.01)

net = GradWrap(NetWithLoss(backbone))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile(net, input)
compile_net(net, input_)


def test_reshape_net1_1():
@@ -480,11 +477,11 @@ def test_batchnorm_reshape_train():
device_num = 16
context.set_auto_parallel_context(device_num=device_num, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
input = Tensor(np.ones([batch_size * device_num, 512]).astype(np.float32) * 0.01)
input_ = Tensor(np.ones([batch_size * device_num, 512]).astype(np.float32) * 0.01)

net = GradWrap(NetWithLoss(BatchNormReshapeNet()))

compile(net, input)
compile_net(net, input_)


def bn_with_initialize(out_channels):
@@ -517,12 +514,12 @@ def test_bn_reshape_dense_bn_train():
batch_size = 16
device_num = 16
context.set_auto_parallel_context(device_num=device_num, global_rank=0)
input = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01)
input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01)

net = GradWrap(NetWithLoss(BNReshapeDenseBNNet()))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile(net, input)
compile_net(net, input_)


class ParallelReduceMeanNet(nn.Cell):


+ 3
- 3
tests/ut/python/parallel/test_reshape_parameter.py View File

@@ -58,7 +58,7 @@ class Net(nn.Cell):
return out


def compile(net, x, y):
def compile_net(net, x, y):
net.set_auto_parallel()
_executor.compile(net, x, y)

@@ -69,7 +69,7 @@ def test_reshape_parameter_data_parallel():
net = GradWrap(NetWithLoss(Net(strategy)))
x = Tensor(np.ones([10000, 36]), dtype=ms.float32)
y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32)
compile(net, x, y)
compile_net(net, x, y)


def test_reshape_parameter_model_parallel():
@@ -78,4 +78,4 @@ def test_reshape_parameter_model_parallel():
net = GradWrap(NetWithLoss(Net(strategy)))
x = Tensor(np.ones([10000, 36]), dtype=ms.float32)
y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32)
compile(net, x, y)
compile_net(net, x, y)

+ 0
- 1
tests/ut/python/parallel/test_scalar_loss.py View File

@@ -22,7 +22,6 @@ from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import operations as P
from tests.ut.python.ops.test_math_ops import VirtualLoss


class GradWrap(nn.Cell):


+ 12
- 12
tests/ut/python/parallel/test_set_auto_parallel_context.py View File

@@ -30,10 +30,10 @@ def test_set_auto_parallel_context():
parameter_broadcast = context.get_auto_parallel_context("parameter_broadcast")
assert device_num == 4
assert global_rank == 3
assert mirror_mean == True
assert cast_before_mirror == False
assert mirror_mean
assert not cast_before_mirror
assert parallel_mode == "auto_parallel"
assert parameter_broadcast == False
assert not parameter_broadcast

auto_parallel_context().set_communication_backend("hccl")
backend = auto_parallel_context().get_communication_backend()
@@ -43,7 +43,7 @@ def test_set_auto_parallel_context():
device_num = auto_parallel_context().get_device_num()
device_num_is_set = auto_parallel_context().get_device_num_is_set()
assert device_num == 4
assert device_num_is_set == True
assert device_num_is_set

auto_parallel_context().set_global_rank(4)
global_rank = auto_parallel_context().get_global_rank()
@@ -51,14 +51,14 @@ def test_set_auto_parallel_context():

auto_parallel_context().set_mirror_mean(True)
mirror_mean = auto_parallel_context().get_mirror_mean()
assert mirror_mean == True
assert mirror_mean

auto_parallel_context().set_cast_before_mirror(False)
cast_before_mirror = auto_parallel_context().get_cast_before_mirror()
assert cast_before_mirror == False
assert not cast_before_mirror

parameter_broadcast_is_set = auto_parallel_context().get_parameter_broadcast_is_set()
assert parameter_broadcast_is_set == True
assert parameter_broadcast_is_set

with pytest.raises(ValueError):
context.set_auto_parallel_context(device_num=0)
@@ -94,9 +94,9 @@ def test_reset_auto_parallel_context():
parameter_broadcast_is_set = auto_parallel_context().get_parameter_broadcast_is_set()
assert device_num == 1
assert global_rank == 0
assert mirror_mean == False
assert cast_before_mirror == True
assert not mirror_mean
assert cast_before_mirror
assert parallel_mode == "stand_alone"
assert parameter_broadcast == False
assert device_num_is_set == False
assert parameter_broadcast_is_set == False
assert not parameter_broadcast
assert not device_num_is_set
assert not parameter_broadcast_is_set

+ 6
- 6
tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py View File

@@ -39,7 +39,7 @@ _w1 = Tensor(np.ones([128, 64]), dtype=ms.float32)
_b = Tensor(np.ones([128, 64]), dtype=ms.float32)


def compile(net):
def compile_net(net):
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
train_net = TrainOneStepCell(net, optimizer)
train_net.set_auto_parallel()
@@ -52,7 +52,7 @@ def test_sigmoid_cross_entropy_with_logits_data_parallel():
strategy1 = ((16, 1), (16, 1))
strategy2 = ((16, 1), (16, 1))
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)


def test_sigmoid_cross_entropy_with_logits_model_parallel():
@@ -60,7 +60,7 @@ def test_sigmoid_cross_entropy_with_logits_model_parallel():
strategy1 = ((1, 16), (1, 16))
strategy2 = ((1, 16), (1, 16))
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)


def test_sigmoid_cross_entropy_with_logits_hybrid_parallel():
@@ -68,13 +68,13 @@ def test_sigmoid_cross_entropy_with_logits_hybrid_parallel():
strategy1 = ((2, 8), (2, 8))
strategy2 = ((2, 8), (2, 8))
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)


def test_sigmoid_cross_entropy_with_logits_auto_parallel():
context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
net = Net(_w1)
compile(net)
compile_net(net)


def test_sigmoid_cross_entropy_with_logits_repeat_calc():
@@ -82,4 +82,4 @@ def test_sigmoid_cross_entropy_with_logits_repeat_calc():
strategy1 = ((2, 8), (2, 8))
strategy2 = ((2, 2), (2, 2))
net = Net(_w1, strategy1, strategy2)
compile(net)
compile_net(net)

+ 4
- 5
tests/ut/python/parallel/test_softmax_cross_entropy_loss.py View File

@@ -21,7 +21,6 @@ from mindspore import context
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from tests.ut.python.ops.test_math_ops import VirtualLoss


class NetWithLoss(nn.Cell):
@@ -44,7 +43,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y, b)


def compile(net, x, y, b):
def compile_net(net, x, y, b):
net.set_auto_parallel()
_executor.compile(net, x, y, b)

@@ -71,7 +70,7 @@ def test_softmax_cross_entropy_loss():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([64, 32]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_softmax_cross_entropy_loss_repeated_calculation():
@@ -96,7 +95,7 @@ def test_softmax_cross_entropy_loss_repeated_calculation():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([64, 32]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_softmax_cross_entropy_loss_auto_batch_parallel():
@@ -118,4 +117,4 @@ def test_softmax_cross_entropy_loss_auto_batch_parallel():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([64, 32]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)

+ 4
- 5
tests/ut/python/parallel/test_split_grad_sens.py View File

@@ -22,7 +22,6 @@ from mindspore import context
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from tests.ut.python.ops.test_math_ops import VirtualLoss


class GradWrap(nn.Cell):
@@ -54,7 +53,7 @@ class GradWrap3(nn.Cell):
return C.grad_all(self.network)(x, y, bias)


def compile(net, x, y, b):
def compile_net(net, x, y, b):
net.set_auto_parallel()
_executor.compile(net, x, y, b)

@@ -81,7 +80,7 @@ def test_no_grad():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_grad_sens_parameter_type():
@@ -135,7 +134,7 @@ def test_grad_sens_tensor_type():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_grad_sens_scalar_broadcast():
@@ -159,4 +158,4 @@ def test_grad_sens_scalar_broadcast():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([64, 32]), dtype=ms.float32)
bias = Tensor(np.ones([64]), dtype=ms.float32)
compile(net, x, y, bias)
compile_net(net, x, y, bias)

+ 8
- 8
tests/ut/python/parallel/test_squeeze_info.py View File

@@ -15,9 +15,9 @@
import numpy as np

import mindspore as ms
from mindspore import context, Tensor, Parameter
from mindspore import context, Tensor
from mindspore.common.api import _executor
from mindspore.nn import Cell, TrainOneStepCell, Momentum
from mindspore.nn import Cell
from mindspore.ops import operations as P


@@ -37,7 +37,7 @@ _x = Tensor(np.ones([64, 1, 32, 1]), dtype=ms.float32)
_b = Tensor(np.ones([64, 32]), dtype=ms.float32)


def compile(net):
def compile_net(net):
net.set_auto_parallel()
_executor.compile(net, _x, _b)
context.reset_auto_parallel_context()
@@ -48,7 +48,7 @@ def test_squeeze_data_parallel():
strategy1 = ((16, 1, 1, 1),)
strategy2 = ((16, 1), (16, 1))
net = Net(strategy1, strategy2)
compile(net)
compile_net(net)


def test_squeeze_model_parallel():
@@ -56,7 +56,7 @@ def test_squeeze_model_parallel():
strategy1 = ((1, 1, 16, 1),)
strategy2 = ((1, 16), (1, 16))
net = Net(strategy1, strategy2)
compile(net)
compile_net(net)


def test_squeeze_specified_axis():
@@ -64,13 +64,13 @@ def test_squeeze_specified_axis():
strategy1 = ((4, 1, 4, 1),)
strategy2 = ((8, 2), (8, 2))
net = Net(strategy1, strategy2, (1, 3))
compile(net)
compile_net(net)


def test_squeeze_auto_parallel():
context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
net = Net()
compile(net)
compile_net(net)


def test_squeeze_repeat_calc():
@@ -78,4 +78,4 @@ def test_squeeze_repeat_calc():
strategy1 = ((1, 1, 8, 1),)
strategy2 = ((2, 8), (2, 8))
net = Net(strategy1, strategy2)
compile(net)
compile_net(net)

+ 3
- 4
tests/ut/python/parallel/test_sum_as_loss.py View File

@@ -21,7 +21,6 @@ from mindspore import context
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from tests.ut.python.ops.test_math_ops import VirtualLoss


class GradWrap(nn.Cell):
@@ -33,7 +32,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y, bias)


def compile(net, x, y, bias):
def compile_net(net, x, y, bias):
net.set_auto_parallel()
_executor.compile(net, x, y, bias)

@@ -59,7 +58,7 @@ def test_sum_as_loss():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([64, 32]), dtype=ms.float32)
bias = Tensor(np.ones([64]), dtype=ms.float32)
compile(net, x, y, bias)
compile_net(net, x, y, bias)


def test_sum_as_loss2():
@@ -83,4 +82,4 @@ def test_sum_as_loss2():
x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([64, 32]), dtype=ms.float32)
bias = Tensor(np.ones([64]), dtype=ms.float32)
compile(net, x, y, bias)
compile_net(net, x, y, bias)

+ 0
- 2
tests/ut/python/parallel/test_transpose.py View File

@@ -17,7 +17,6 @@ import numpy as np
import mindspore as ms
import mindspore.nn as nn
from mindspore import Tensor, context
from mindspore import context
from mindspore.common.parameter import Parameter
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
from mindspore.nn.optim.momentum import Momentum
@@ -67,7 +66,6 @@ def transpose_net(strategy1, strategy2):


def transpose_common(strategy1, strategy2):
batch_size = 32
learning_rate = 0.1
momentum = 0.9
epoch_size = 2


+ 4
- 4
tests/ut/python/parallel/test_two_matmul.py View File

@@ -44,7 +44,7 @@ class GradWrap(nn.Cell):
return C.grad_all(self.network)(x, y, b)


def compile(net, x, y, b):
def compile_net(net, x, y, b):
net.set_auto_parallel()
_executor.compile(net, x, y, b)

@@ -72,7 +72,7 @@ def test_two_matmul():
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)

compile(net, x, y, b)
compile_net(net, x, y, b)


def test_two_matmul_repeated_calculation1():
@@ -96,7 +96,7 @@ def test_two_matmul_repeated_calculation1():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)


def test_two_matmul_repeated_calculation2():
@@ -120,4 +120,4 @@ def test_two_matmul_repeated_calculation2():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 64]), dtype=ms.float32)
compile(net, x, y, b)
compile_net(net, x, y, b)

+ 0
- 1
tests/ut/python/parallel/test_two_weights_parameter.py View File

@@ -20,7 +20,6 @@ from mindspore import Tensor, Parameter, ParameterTuple
from mindspore import context
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import operations as P




+ 2
- 2
tests/ut/python/parallel/test_virtual_dataset_3_input.py View File

@@ -78,7 +78,7 @@ def test_virtual_dataset_3_input():

def test_virtualdataset_cell_3_inputs():
class Net(nn.Cell):
def __init__(self, strategy0, strategy1, strategy2, strategy3):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
self.matmul1 = P.MatMul().set_strategy(strategy1)
self.matmul2 = P.MatMul().set_strategy(strategy2)
@@ -89,7 +89,7 @@ def test_virtualdataset_cell_3_inputs():
out = self.matmul2(out, b)
return out

net = GradWrap(VirtualDatasetCellTriple(NetWithLoss(Net(None, None, None, None))))
net = GradWrap(VirtualDatasetCellTriple(NetWithLoss(Net(None, None, None))))
context.set_context(save_graphs=True)
context.set_auto_parallel_context(parallel_mode="auto_parallel")
context.set_auto_parallel_context(device_num=8, global_rank=0)


Loading…
Cancel
Save