Browse Source

!4801 Must set or change parallel mode before any Initializer created

Merge pull request !4801 from yihuaijie/dev
tags/v0.7.0-beta
mindspore-ci-bot Gitee 5 years ago
parent
commit
3d06cbf987
35 changed files with 174 additions and 84 deletions
  1. +2
    -0
      mindspore/ccsrc/frontend/parallel/context.cc
  2. +4
    -0
      mindspore/ccsrc/frontend/parallel/context.h
  3. +2
    -0
      mindspore/ccsrc/pipeline/jit/init.cc
  4. +2
    -1
      mindspore/common/api.py
  5. +2
    -0
      mindspore/common/initializer.py
  6. +3
    -0
      mindspore/context.py
  7. +20
    -0
      mindspore/parallel/_auto_parallel_context.py
  8. +13
    -0
      mindspore/parallel/_utils.py
  9. +1
    -1
      tests/st/nccl/test_nccl_lenet.py
  10. +2
    -0
      tests/ut/python/communication/test_data_parallel_lenet.py
  11. +2
    -2
      tests/ut/python/model/test_mix_precision.py
  12. +2
    -1
      tests/ut/python/nn/test_parameter.py
  13. +5
    -2
      tests/ut/python/parallel/test_add_relu_redistribution.py
  14. +13
    -3
      tests/ut/python/parallel/test_allreduce_fusion.py
  15. +2
    -1
      tests/ut/python/parallel/test_alltoall.py
  16. +20
    -18
      tests/ut/python/parallel/test_arithmetic.py
  17. +1
    -1
      tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py
  18. +1
    -1
      tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
  19. +1
    -1
      tests/ut/python/parallel/test_auto_parallel_tuple_depend.py
  20. +2
    -2
      tests/ut/python/parallel/test_auto_parallel_two_bn.py
  21. +1
    -1
      tests/ut/python/parallel/test_batch_parallel.py
  22. +3
    -3
      tests/ut/python/parallel/test_batchnorm_batch_parallel.py
  23. +1
    -1
      tests/ut/python/parallel/test_bn_prelu_cell.py
  24. +4
    -4
      tests/ut/python/parallel/test_get_next.py
  25. +35
    -12
      tests/ut/python/parallel/test_initializer_weight_slice.py
  26. +1
    -1
      tests/ut/python/parallel/test_linear.py
  27. +5
    -5
      tests/ut/python/parallel/test_loss_and_optimizer.py
  28. +4
    -4
      tests/ut/python/parallel/test_one_hot_net.py
  29. +4
    -6
      tests/ut/python/parallel/test_operator_model_parallel.py
  30. +2
    -2
      tests/ut/python/parallel/test_optimizer.py
  31. +2
    -2
      tests/ut/python/parallel/test_optimizer_clone_weight.py
  32. +2
    -2
      tests/ut/python/parallel/test_reshape.py
  33. +3
    -0
      tests/ut/python/parallel/test_using_seed_for_initializer.py
  34. +3
    -3
      tests/ut/python/parallel/test_virtual_dataset_3_input.py
  35. +4
    -4
      tests/ut/python/train/test_amp.py

+ 2
- 0
mindspore/ccsrc/frontend/parallel/context.cc View File

@@ -81,6 +81,8 @@ void ParallelContext::set_mirror_mean(bool mirror_mean) { mirror_mean_ = mirror_

void ParallelContext::set_full_batch(bool full_batch) { full_batch_ = full_batch; }

void ParallelContext::set_has_initializer(bool has_initializer) { has_initializer_ = has_initializer; }

void ParallelContext::set_cast_before_mirror(bool cast_before_mirror) { cast_before_mirror_ = cast_before_mirror; }

void ParallelContext::set_loss_repeated_mean(bool loss_repeated_mean) { loss_repeated_mean_ = loss_repeated_mean; }


+ 4
- 0
mindspore/ccsrc/frontend/parallel/context.h View File

@@ -58,6 +58,9 @@ class ParallelContext {
void set_full_batch(bool full_batch);
bool full_batch() const { return full_batch_; }

void set_has_initializer(bool has_initializer);
bool has_initializer() const { return has_initializer_; }

void set_cast_before_mirror(bool cast_before_mirror);
bool cast_before_mirror() const { return cast_before_mirror_; }

@@ -112,6 +115,7 @@ class ParallelContext {
static std::shared_ptr<ParallelContext> inst_context_;
bool mirror_mean_;
bool full_batch_;
bool has_initializer_ = false;
bool cast_before_mirror_;
bool loss_repeated_mean_;
int32_t device_num_;


+ 2
- 0
mindspore/ccsrc/pipeline/jit/init.cc View File

@@ -193,6 +193,8 @@ PYBIND11_MODULE(_c_expression, m) {
.def("get_strategy_ckpt_save_file", &ParallelContext::strategy_ckpt_save_file, "Get strategy checkpoint save file.")
.def("set_full_batch", &ParallelContext::set_full_batch, "Set whether load full batch on each device.")
.def("get_full_batch", &ParallelContext::full_batch, "Get whether load full batch on each device.")
.def("set_has_initializer", &ParallelContext::set_has_initializer, "Set whether any Initializer has been created.")
.def("get_has_initializer", &ParallelContext::has_initializer, "Get whether any Initializer has been created.")
.def("set_enable_parallel_optimizer", &ParallelContext::set_enable_parallel_optimizer,
"Set enable/disable parallel optimizer.")
.def("get_enable_parallel_optimizer", &ParallelContext::enable_parallel_optimizer,


+ 2
- 1
mindspore/common/api.py View File

@@ -24,7 +24,7 @@ from mindspore import log as logger
from .._c_expression import generate_key, Executor_, Tensor, MetaTensor, PynativeExecutor_
from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_backend
from .tensor import Tensor as MsTensor
from ..parallel._utils import _get_device_num, _get_global_rank, _need_to_full, _to_full_tensor
from ..parallel._utils import _get_device_num, _get_global_rank, _need_to_full, _to_full_tensor, _set_has_initializer
# store ms_function class compiled pipeline cache
ms_compile_cache = {}

@@ -383,6 +383,7 @@ class _Executor:
Str, the full phase of the cell.
Bool, if the graph has been compiled before, return False, else return True.
"""
_set_has_initializer(False)
obj.check_names()
args_names, args_list = _generate_pip_args(obj, *args)
dic = dict(zip(args_names, args_list))


+ 2
- 0
mindspore/common/initializer.py View File

@@ -24,6 +24,7 @@ from mindspore import log as logger
from . import dtype as mstype
from .tensor import Tensor
from .._c_expression import random_normal
from ..parallel._utils import _set_has_initializer

_INITIALIZER_ALIAS = dict()

@@ -42,6 +43,7 @@ class Initializer:
self._kwargs = kwargs
self.shape = None
self.dtype = None
_set_has_initializer(True)

def _initialize(self, *kwargs):
raise NotImplementedError('Must be overridden!')


+ 3
- 0
mindspore/context.py View File

@@ -437,6 +437,8 @@ def set_auto_parallel_context(**kwargs):
If a program has tasks with different parallel modes, then before setting new parallel mode for
next task, interface mindspore.context.reset_auto_parallel_context() needs to be called to reset
the configuration.
Setting or changing parallel modes must be called before any Initializer created, or RuntimeError
will be raised.

Args:
device_num (int): Available device number, the value must be in [1, 4096]. Default: 1.
@@ -477,6 +479,7 @@ def set_auto_parallel_context(**kwargs):

Raises:
ValueError: If input key is not attribute in auto parallel context.
RuntimeError: If there is any Initializer created before setting or changing parallel_mode.

Examples:
>>> context.set_auto_parallel_context(device_num=8)


+ 20
- 0
mindspore/parallel/_auto_parallel_context.py View File

@@ -176,8 +176,12 @@ class _AutoParallelContext:

Raises:
ValueError: If parallel mode is not supported.
RuntimeError: If there is any Initializer created before setting or changing parallel_mode.
"""
self.check_context_handle()
if self.get_has_initializer():
self.set_has_initializer(False)
raise RuntimeError("Must set or change parallel mode before any Initializer created.")
ret = self._context_handle.set_parallel_mode(parallel_mode)
if ret is False:
raise ValueError("Parallel mode does not support {}".format(parallel_mode))
@@ -249,6 +253,21 @@ class _AutoParallelContext:
self.check_context_handle()
return self._context_handle.get_full_batch()

def set_has_initializer(self, has_initializer):
"""
Set whether any Initializer has been created.

Args:
has_initializer (bool): True if a Initializer created.
"""
self.check_context_handle()
self._context_handle.set_has_initializer(has_initializer)

def get_has_initializer(self):
"""Get whether any Initializer has been created."""
self.check_context_handle()
return self._context_handle.get_has_initializer()

def set_strategy_ckpt_save_file(self, strategy_ckpt_save_file):
"""
Set strategy checkpoint save path.
@@ -543,6 +562,7 @@ def _set_auto_parallel_context(**kwargs):

Raises:
ValueError: If input key is not attribute in auto parallel context.
RuntimeError: If there is any Initializer created before setting or changing parallel_mode.
"""
for key, value in kwargs.items():
if key not in _set_auto_parallel_context_func_map:


+ 13
- 0
mindspore/parallel/_utils.py View File

@@ -32,6 +32,19 @@ def _get_full_batch():
"""Get whether to use full_batch."""
return auto_parallel_context().get_full_batch()

def _get_has_initializer():
"""Get whether any Initializer has been created."""
return auto_parallel_context().get_has_initializer()

def _set_has_initializer(has_initializer):
"""
Set whether any Initializer has been created.

Args:
has_initializer (bool): True if a Initializer created.
"""
auto_parallel_context().set_has_initializer(has_initializer)


def _need_to_full():
"""Check whether to convert input to full shape or tensor."""


+ 1
- 1
tests/st/nccl/test_nccl_lenet.py View File

@@ -78,6 +78,7 @@ def multisteplr(total_steps, gap, base_lr=0.9, gamma=0.1, dtype=mstype.float32):


def test_lenet_nccl():
context.set_auto_parallel_context(parallel_mode="data_parallel", mirror_mean=True, device_num=get_group_size())
net = LeNet()
net.set_train()

@@ -86,7 +87,6 @@ def test_lenet_nccl():
mom_optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
net_with_criterion = WithLossCell(net, criterion)
context.set_auto_parallel_context(parallel_mode="data_parallel", mirror_mean=True, device_num=get_group_size())
train_network = TrainOneStepCell(net_with_criterion, mom_optimizer)
train_network.set_train()
losses = []


+ 2
- 0
tests/ut/python/communication/test_data_parallel_lenet.py View File

@@ -24,6 +24,7 @@ import mindspore.nn as nn
from mindspore import Tensor, Model, ParallelMode
from mindspore.nn.optim import Momentum
from mindspore.ops import operations as P
from mindspore.parallel._utils import _set_has_initializer

_current_dir = os.path.dirname(os.path.realpath(__file__)) + "/../test_data"

@@ -89,3 +90,4 @@ def test_lenet5_train_step_training_pynative():
Model(network=network, loss_fn=loss_fn, optimizer=optimizer)
context.set_context(mode=context.GRAPH_MODE)
context.reset_auto_parallel_context()
_set_has_initializer(False)

+ 2
- 2
tests/ut/python/model/test_mix_precision.py View File

@@ -96,6 +96,8 @@ def test_on_momentum():

def test_data_parallel_with_cast():
"""test_data_parallel_with_cast"""
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, device_num=8)
predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01)
label = Tensor(np.zeros([1, 10]).astype(np.float32))
net = LeNet5()
@@ -107,8 +109,6 @@ def test_data_parallel_with_cast():
learning_rate=0.1,
momentum=0.9)
net = WithLossCell(net, loss_fn)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, device_num=8)
net = TrainOneStepCell(net, optimizer)

_executor.compile(net, predict, label)


+ 2
- 1
tests/ut/python/nn/test_parameter.py View File

@@ -21,7 +21,7 @@ from mindspore import context, Tensor, Parameter, ParameterTuple
from mindspore._checkparam import _check_str_by_regular
from mindspore.common import dtype as mstype
from mindspore.common.initializer import initializer
from mindspore.parallel._utils import _set_has_initializer

def test_parameter_init():
dat = np.array([[1, 2, 3], [2, 3, 4]])
@@ -170,6 +170,7 @@ def test_scalar_parameter_update():


def test_parameter_lazy_init():
_set_has_initializer(False)
# support lazy init in SEMI_AUTO_PARALLEL mode
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8)


+ 5
- 2
tests/ut/python/parallel/test_add_relu_redistribution.py View File

@@ -20,6 +20,7 @@ from mindspore import context
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from mindspore.parallel._utils import _set_has_initializer
from tests.ut.python.ops.test_math_ops import VirtualLoss


@@ -60,12 +61,13 @@ def compile_net(net, x, y):


def test_add_relu_stride_slice():
_set_has_initializer(False)
context.set_auto_parallel_context(device_num=8, global_rank=7)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

strategy0 = ((1, 1), (1, 1))
strategy1 = ((8, 1),)
net = Grad(NetWithLoss(AddRelu(strategy0, strategy1)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([128, 32]), dtype=ms.float32)
@@ -73,12 +75,13 @@ def test_add_relu_stride_slice():


def test_add_relu_all_gather():
_set_has_initializer(False)
context.set_auto_parallel_context(device_num=8, global_rank=7)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

strategy0 = ((8, 1), (8, 1))
strategy1 = ((1, 1),)
net = Grad(NetWithLoss(AddRelu(strategy0, strategy1)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([128, 32]), dtype=ms.float32)


+ 13
- 3
tests/ut/python/parallel/test_allreduce_fusion.py View File

@@ -23,6 +23,7 @@ from mindspore.nn.optim.momentum import Momentum
from mindspore.parallel import _cost_model_context as cost_model_context
from mindspore.parallel._auto_parallel_context import auto_parallel_context
from mindspore.train import Model, ParallelMode
from mindspore.parallel._utils import _set_has_initializer
from tests.dataset_mock import MindData


@@ -105,10 +106,8 @@ def train_common(net):
momentum = 0.9
epoch_size = 2
device_num = 4
context.reset_auto_parallel_context()
auto_parallel_context().set_enable_all_reduce_fusion(enable_all_reduce_fusion=True)
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=device_num,
parameter_broadcast=False)
context.set_auto_parallel_context(device_num=device_num, parameter_broadcast=False)
context.set_context(mode=context.GRAPH_MODE)

predict = Tensor(np.ones([batch_size, 128]), dtype=ms.float32)
@@ -183,9 +182,12 @@ def test_allreduce_fusion_parameters():


def test_allreduce_fusion1():
_set_has_initializer(False)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.5)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
net = SimpleDMLNet(DenseNet1(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None))
allreduce_fusion_dict = train_common(net)
expect_dict = {'backbone2.fc8.weight': 2,
@@ -210,6 +212,8 @@ def test_allreduce_fusion2():
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.5)
cost_model_context.reset_cost_model_context()
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
net = SimpleDMLNet(DenseNet1(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None))
allreduce_fusion_dict = train_common(net)
expect_dict = {}
@@ -221,6 +225,8 @@ def test_allreduce_fusion3():
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=3)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.3333333)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
net = SimpleDMLNet(DenseNet1(has_bias=True, activation='relu'), DenseNet2(has_bias=False, activation='relu'))
allreduce_fusion_dict = train_common(net)
expect_dict = {'backbone2.fc8.weight': 3,
@@ -247,6 +253,8 @@ def test_allreduce_fusion4():
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.5)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
net = SimpleDMLNet(DenseNet2(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None))
allreduce_fusion_dict = train_common(net)
expect_dict = {'backbone2.fc8.weight': 2,
@@ -276,6 +284,8 @@ def test_allreduce_fusion5():
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_inherent_time=0.05)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_bandwidth=0.000001)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_computation_time_parameter=0.0000015)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
net = SimpleDMLNet(DenseNet2(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None))
allreduce_fusion_dict = train_common(net)



+ 2
- 1
tests/ut/python/parallel/test_alltoall.py View File

@@ -23,7 +23,7 @@ from mindspore.common.parameter import Parameter
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
from mindspore.nn.optim.momentum import Momentum
from mindspore.ops import operations as P
from mindspore.parallel._utils import _reset_op_id
from mindspore.parallel._utils import _reset_op_id, _set_has_initializer
from mindspore.train import Model, ParallelMode
from tests.dataset_mock import MindData

@@ -90,6 +90,7 @@ def all_to_all_common(strategy1):


def test_all_to_all():
_set_has_initializer(False)
strategy1 = ((8, 1),)
context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
_reset_op_id()


+ 20
- 18
tests/ut/python/parallel/test_arithmetic.py View File

@@ -20,6 +20,7 @@ from mindspore import Parameter, Tensor, context
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from mindspore.parallel._utils import _set_has_initializer
from tests.ut.python.ops.test_math_ops import VirtualLoss


@@ -60,11 +61,12 @@ def test_matmul_sub():
out = self.sub(out, b)
return out

_set_has_initializer(False)
context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (4, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -85,10 +87,10 @@ def test_matmul_add():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (4, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -109,10 +111,10 @@ def test_matmul_mul():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (4, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -133,10 +135,10 @@ def test_matmul_div():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (4, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -157,10 +159,10 @@ def test_matmul_greater():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (4, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -181,10 +183,10 @@ def test_matmul_add_broadcast():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (2,))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -205,10 +207,10 @@ def test_matmul_add_broadcast2():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 4), (4, 1))
strategy2 = ((4, 1), (1, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
@@ -229,10 +231,10 @@ def test_matmul_sub_broadcast():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (2,))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -253,10 +255,10 @@ def test_matmul_sub_broadcast2():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 4), (4, 1))
strategy2 = ((4, 1), (1, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
@@ -277,10 +279,10 @@ def test_matmul_mul_broadcast():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (2,))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -301,10 +303,10 @@ def test_matmul_mul_broadcast2():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 4), (4, 1))
strategy2 = ((4, 1), (1, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
@@ -325,10 +327,10 @@ def test_matmul_div_broadcast():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (2,))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -349,10 +351,10 @@ def test_matmul_div_broadcast2():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 4), (4, 1))
strategy2 = ((4, 1), (1, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
@@ -373,10 +375,10 @@ def test_matmul_greater_broadcast():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (2,))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -397,10 +399,10 @@ def test_matmul_greater_broadcast2():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 4), (4, 1))
strategy2 = ((4, 1), (1, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)
@@ -421,10 +423,10 @@ def test_matmul_floordiv():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (4, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -445,10 +447,10 @@ def test_matmul_floordiv_broadcast():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (2,))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -469,10 +471,10 @@ def test_matmul_floordiv_broadcast2():
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 4), (4, 1))
strategy2 = ((4, 1), (1, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32)


+ 1
- 1
tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py View File

@@ -64,10 +64,10 @@ def test_auto_parallel_bn_with_prelu():

size = 8
context.set_auto_parallel_context(device_num=size, global_rank=0)
context.set_auto_parallel_context(parallel_mode="auto_parallel")

x = Tensor(np.random.rand(16, 16, 32, 64), dtype=ms.float32)

net = GradWrap(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
net.set_auto_parallel()
_executor.compile(net, x)

+ 1
- 1
tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py View File

@@ -106,8 +106,8 @@ def test_double_subgraphs():
cost_model_context.set_cost_model_context(multi_subgraphs=True)
context.set_context(save_graphs=True)
context.set_auto_parallel_context(device_num=8, global_rank=0)
net = TrainStepWarp(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
net = TrainStepWarp(NetWithLoss(Net()))
net.set_auto_parallel()

x = Tensor(np.ones([8, 8, 8, 8]), dtype=ms.float32)


+ 1
- 1
tests/ut/python/parallel/test_auto_parallel_tuple_depend.py View File

@@ -68,9 +68,9 @@ def test_virtual_dataset_3_input():
out = self.matmul2(out, b)
return out

net = GradWrap(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
context.set_auto_parallel_context(device_num=8, global_rank=0)
net = GradWrap(NetWithLoss(Net()))
net.set_auto_parallel()
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)


+ 2
- 2
tests/ut/python/parallel/test_auto_parallel_two_bn.py View File

@@ -68,11 +68,11 @@ def test_two_bn():
out = self.block2(out)
return out

net = NetWithLoss(Net())
x = Tensor(np.ones([64, 64]), dtype=ms.float32)
context.set_context(save_graphs=True)
context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="auto_parallel")
net = NetWithLoss(Net())
x = Tensor(np.ones([64, 64]), dtype=ms.float32)
net.set_auto_parallel()
set_algo_parameters(elementwise_op_strategy_follow=True)
reset_op_id()


+ 1
- 1
tests/ut/python/parallel/test_batch_parallel.py View File

@@ -94,12 +94,12 @@ def test_batch():
return out4

context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((8, 1, 1, 1), (1, 1, 1, 1))
strategy2 = ((1, 1, 1, 8), (1, 1, 1, 8))
strategy3 = ((4, 1, 1, 2), (4, 1, 1, 2))

net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()

x = Tensor(np.ones([128, 16, 34, 34]), dtype=ms.float32)


+ 3
- 3
tests/ut/python/parallel/test_batchnorm_batch_parallel.py View File

@@ -118,6 +118,9 @@ def batchnorm_net(num_classes):


def test_batchnorm_batch_parallel():
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
context.set_context(mode=context.GRAPH_MODE)
num_classes = 1001
batch_size = 32
learning_rate = 0.1
@@ -134,9 +137,6 @@ def test_batchnorm_batch_parallel():
loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)

context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
context.set_context(mode=context.GRAPH_MODE)
model = Model(net, loss, opt)
model.train(epoch_size, dataset, dataset_sink_mode=False)



+ 1
- 1
tests/ut/python/parallel/test_bn_prelu_cell.py View File

@@ -198,6 +198,7 @@ def bn_net():

def bn_common(parallel_mode, train_flag, strategy_loss=None):
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8)
learning_rate = 0.1
momentum = 0.9
epoch_size = 2
@@ -218,7 +219,6 @@ def bn_common(parallel_mode, train_flag, strategy_loss=None):

if parallel_mode == ParallelMode.DATA_PARALLEL:
context.set_auto_parallel_context(parameter_broadcast=True)
context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8)
model = Model(net, loss, opt)
if train_flag:
model.train(epoch_size, dataset, dataset_sink_mode=False)


+ 4
- 4
tests/ut/python/parallel/test_get_next.py View File

@@ -88,13 +88,13 @@ def test_get_next_semi_auto_parallel():
return x

context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
network = Net(strategy1=((1, 4),), strategy2=((4, 1), (1,)))
strategy3 = ((4, 1), (), ())
strategy4 = ((4, 1), (4, 1))
net_with_loss = NetWithLoss(network, [ms.float32, ms.int32], [[32, 64], [32]], 2, strategy3=strategy3,
strategy4=strategy4)
net = GradWrap(net_with_loss)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
compile_net(net)


@@ -112,13 +112,13 @@ def test_get_next_semi_auto_parallel1():
return x

context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
network = Net(strategy1=((1, 4),), strategy2=((4, 1), (1,)))
strategy3 = ((1, 4), (), ())
strategy4 = ((4, 1), (4, 1))
net_with_loss = NetWithLoss(network, [ms.float32, ms.int32], [[32, 64], [32]], 2, strategy3=strategy3,
strategy4=strategy4)
net = GradWrap(net_with_loss)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
compile_net(net)


@@ -136,10 +136,10 @@ def test_get_next_auto_parallel():
return x

context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="auto_parallel")
network = Net()
net_with_loss = NetWithLoss(network, [ms.float32, ms.int32], [[32, 64], [32]], 2)
net = GradWrap(net_with_loss)
context.set_auto_parallel_context(parallel_mode="auto_parallel")
compile_net(net)


@@ -153,6 +153,6 @@ def test_only_one_get_next():
return self.get_next()

context.set_auto_parallel_context(device_num=4, global_rank=0)
net = Net()
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net = Net()
compile_net(net)

+ 35
- 12
tests/ut/python/parallel/test_initializer_weight_slice.py View File

@@ -13,6 +13,7 @@
# limitations under the License.

import numpy as np
import pytest
from mindspore import context
import mindspore.nn as nn
from mindspore.ops import operations as P
@@ -22,20 +23,19 @@ import mindspore.common.api as me
from mindspore.common.initializer import initializer
from hccl_test.manage.api import Hccl

class Net(nn.Cell):
def __init__(self, strategy1, strategy2, weight):
super().__init__()
self.weight = Parameter(weight, "w1")
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
self.relu = P.ReLU().set_strategy(strategy2)

def check_initializer_weight_slice(init_name="Uniform"):
class Net(nn.Cell):
def __init__(self, strategy1, strategy2, weight):
super().__init__()
self.weight = Parameter(weight, "w1")
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
self.relu = P.ReLU().set_strategy(strategy2)

def construct(self, x):
out = self.matmul(x, self.weight)
out = self.relu(out)
return out
def construct(self, x):
out = self.matmul(x, self.weight)
out = self.relu(out)
return out

def check_initializer_weight_slice(init_name="Uniform"):
def get_slice(rank):
hccl = Hccl()
rank_save = hccl.rank_id
@@ -77,5 +77,28 @@ def test_initializer_weight_slice():
for init_name in initializers:
check_initializer_weight_slice(init_name)

def test_wrong_order_set_parallel_mode_with_initializer():
weight = initializer("Normal", [64, 32], ms.float32)
strategy1 = ((2, 1), (4, 1))
strategy2 = ((2, 4),)
net = Net(strategy1, strategy2, weight)
exe = me._executor
x = Tensor(np.ones([32, 32]), dtype=ms.float32)
with pytest.raises(RuntimeError):
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
net.set_auto_parallel()
exe.compile(net, x, auto_parallel_mode=True, phase='train')

def test_wrong_order_set_parallel_mode_without_initializer():
weight = Tensor(np.ones([64, 32]), ms.float32)
strategy1 = ((2, 1), (4, 1))
strategy2 = ((2, 4),)
net = Net(strategy1, strategy2, weight)
exe = me._executor
x = Tensor(np.ones([32, 32]), dtype=ms.float32)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
net.set_auto_parallel()
exe.compile(net, x, auto_parallel_mode=True, phase='train')

if __name__ == '__main__':
test_initializer_weight_slice()

+ 1
- 1
tests/ut/python/parallel/test_linear.py View File

@@ -58,12 +58,12 @@ def test_linear():
return out

context.set_auto_parallel_context(device_num=16, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy0 = ((2, 4), (2, 4))
strategy1 = ((2, 4), (4,))
strategy2 = ((2, 8),)
strategy3 = ((16, 1), (16, 1))
net = GradWrap(NetWithLoss(Net(strategy0, strategy1, strategy2), strategy3))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()

x = Tensor(np.ones([64, 32]), dtype=ms.float32)


+ 5
- 5
tests/ut/python/parallel/test_loss_and_optimizer.py View File

@@ -54,6 +54,7 @@ def test_momentum():
return out

context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),)
strategy3 = ((4, 1), (4, 1))
@@ -69,7 +70,6 @@ def test_momentum():
net_with_loss = NetWithLoss(net, strategy3)

train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile_net(train_net, x, b)

@@ -88,6 +88,7 @@ def test_momentum_with_loss_scale():
return out

context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),)
strategy3 = ((4, 1), (4, 1))
@@ -103,7 +104,6 @@ def test_momentum_with_loss_scale():
net_with_loss = NetWithLoss(net, strategy3)

train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile_net(train_net, x, b)

@@ -122,6 +122,7 @@ def test_momentum_with_dynamic_lr():
return out

context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),)
strategy3 = ((4, 1), (4, 1))
@@ -138,7 +139,6 @@ def test_momentum_with_dynamic_lr():
net_with_loss = NetWithLoss(net, strategy3)

train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile_net(train_net, x, b)

@@ -157,6 +157,7 @@ def test_momentum_with_loss_scale_and_dynamic_lr():
return out

context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),)
@@ -174,7 +175,6 @@ def test_momentum_with_loss_scale_and_dynamic_lr():
net_with_loss = NetWithLoss(net, strategy3)

train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile_net(train_net, x, b)

@@ -193,6 +193,7 @@ def test_lars():
return out

context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),)
strategy3 = ((4, 1), (4, 1))
@@ -209,6 +210,5 @@ def test_lars():
lars_filter=lambda x: 'bn' not in x.name)
net_with_loss = NetWithLoss(net, strategy3)
train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile_net(train_net, x, b)

+ 4
- 4
tests/ut/python/parallel/test_one_hot_net.py View File

@@ -266,11 +266,11 @@ class BNReshapeDenseBNNet(nn.Cell):
def test_bn_reshape_dense_bn_train_loss():
batch_size = 16
context.set_auto_parallel_context(device_num=device_num, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01)
label = Tensor(np.ones([batch_size]), dtype=ms.int32)

net = GradWrap(NetWithLoss(BNReshapeDenseBNNet()))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()

_executor.compile(net, input_, label)
@@ -279,12 +279,12 @@ def test_bn_reshape_dense_bn_train_loss():
def test_semi_one_hot_net_batch():
batch_size = 16
context.set_auto_parallel_context(device_num=device_num, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
input_ = Tensor(np.ones([batch_size * 1, 512]).astype(np.float32) * 0.01)
label = Tensor(np.ones([batch_size]), dtype=ms.int32)

net = SemiAutoOneHotNet(args=Args(), strategy=StrategyBatch())
net = GradWrap(NetWithLoss(net))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()

_executor.compile(net, input_, label)
@@ -300,10 +300,10 @@ def test_semi_one_hot_net_model():
label = Tensor(np.ones([batch_size]), dtype=ms.int32)
dataset = Dataset(predict, label, 2, input_num=2)

net = SemiAutoOneHotNet(args=Args(), strategy=StrategyModel())
opt = Momentum(net.trainable_params(), learning_rate, momentum)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=16)
context.set_context(mode=context.GRAPH_MODE)
net = SemiAutoOneHotNet(args=Args(), strategy=StrategyModel())
opt = Momentum(net.trainable_params(), learning_rate, momentum)
model = Model(net, optimizer=opt)
model.train(epoch_size, dataset, dataset_sink_mode=False)

+ 4
- 6
tests/ut/python/parallel/test_operator_model_parallel.py View File

@@ -353,6 +353,8 @@ def test_resnet_operator_batch_parallel():

context.reset_auto_parallel_context()
context.set_auto_parallel_context(device_num=dev_num, global_rank=0)
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
context.set_context(mode=context.GRAPH_MODE)
predict = Tensor(np.ones([batch_size, 3, 224, 224]), dtype=ms.float32)
label = Tensor(np.ones([batch_size]), dtype=ms.int32)

@@ -363,9 +365,6 @@ def test_resnet_operator_batch_parallel():
loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)

context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
context.set_context(mode=context.GRAPH_MODE)
model = Model(net, loss, opt)
model.train(epoch_size, dataset, dataset_sink_mode=False)

@@ -379,6 +378,8 @@ def test_resnet_model_parallel():

context.reset_auto_parallel_context()
context.set_auto_parallel_context(device_num=dev_num, global_rank=0)
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
context.set_context(mode=context.GRAPH_MODE)
predict = Tensor(np.ones([batch_size, 64, 112, 112]), dtype=ms.float32)
label = Tensor(np.ones([batch_size]), dtype=ms.int32)

@@ -389,9 +390,6 @@ def test_resnet_model_parallel():
loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)

context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
context.set_context(mode=context.GRAPH_MODE)
model = Model(net, loss, opt)
model.train(epoch_size, dataset, dataset_sink_mode=False)



+ 2
- 2
tests/ut/python/parallel/test_optimizer.py View File

@@ -45,6 +45,8 @@ class Net(nn.Cell):

def test_dense_gen_graph():
context.set_context(mode=context.GRAPH_MODE)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.HYBRID_PARALLEL, mirror_mean=True, device_num=8)
init()
network = Net(512, 128)

@@ -53,8 +55,6 @@ def test_dense_gen_graph():
learning_rate=0.1,
momentum=0.9)
network = WithLossCell(network, loss_fn)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.HYBRID_PARALLEL, mirror_mean=True, device_num=8)
network = TrainOneStepCell(network, optimizer)

predict = Tensor(np.ones([64, 512]).astype(np.float32) * 0.01)


+ 2
- 2
tests/ut/python/parallel/test_optimizer_clone_weight.py View File

@@ -54,6 +54,7 @@ def test_optimizer_clone_weight():
return out

context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),)
@@ -70,7 +71,6 @@ def test_optimizer_clone_weight():
net_with_loss = NetWithLoss(net, strategy3)

train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile_net(train_net, x, b)

@@ -89,6 +89,7 @@ def test_optimizer_clone_weight2():
return out

context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),)
@@ -105,6 +106,5 @@ def test_optimizer_clone_weight2():
net_with_loss = NetWithLoss(net, strategy3)

train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile_net(train_net, x, b)

+ 2
- 2
tests/ut/python/parallel/test_reshape.py View File

@@ -320,10 +320,10 @@ def reshape_net2(backbone):
batch_size = 16
device_num = 16
context.set_auto_parallel_context(device_num=device_num, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
input_ = Tensor(np.ones([batch_size * device_num, 512, 7, 7]).astype(np.float32) * 0.01)

net = GradWrap(NetWithLoss(backbone))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile_net(net, input_)

@@ -530,10 +530,10 @@ def test_bn_reshape_dense_bn_train():
batch_size = 16
device_num = 16
context.set_auto_parallel_context(device_num=device_num, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01)

net = GradWrap(NetWithLoss(BNReshapeDenseBNNet()))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

compile_net(net, input_)



+ 3
- 0
tests/ut/python/parallel/test_using_seed_for_initializer.py View File

@@ -18,6 +18,7 @@ from numpy import allclose
import mindspore.common.initializer as init
import mindspore.nn as nn
from mindspore import Parameter
from mindspore.parallel._utils import _set_has_initializer

parameter_shape = [16, 4]

@@ -46,6 +47,7 @@ def test_using_same_seed_for_initializer():
np.random.seed(0)
net2 = ParameterNet()
net2.init_parameters_data()
_set_has_initializer(False)
for key in net1.parameters_dict():
if key not in net2.parameters_dict():
assert False
@@ -60,6 +62,7 @@ def test_using_diffserent_seed_for_initializer():
np.random.seed(1)
net2 = ParameterNet()
net2.init_parameters_data()
_set_has_initializer(False)
for key in net1.parameters_dict():
if key not in net2.parameters_dict():
assert False


+ 3
- 3
tests/ut/python/parallel/test_virtual_dataset_3_input.py View File

@@ -62,13 +62,13 @@ def test_virtual_dataset_3_input():
out = self.matmul2(out, b)
return out

context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy0 = ((2, 1), (2, 1), (2, 1))
strategy1 = ((2, 2), (2, 2))
strategy2 = ((2, 2), (2, 2))
strategy3 = ((2, 4),)
net = GradWrap(NetWithLoss(Net(strategy0, strategy1, strategy2, strategy3)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
context.set_auto_parallel_context(device_num=8, global_rank=0)
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 2048]), dtype=ms.float32)
@@ -89,10 +89,10 @@ def test_virtualdataset_cell_3_inputs():
out = self.matmul2(out, b)
return out

net = GradWrap(VirtualDatasetCellTriple(NetWithLoss(Net(None, None, None))))
context.set_context(save_graphs=True)
context.set_auto_parallel_context(parallel_mode="auto_parallel")
context.set_auto_parallel_context(device_num=8, global_rank=0)
net = GradWrap(VirtualDatasetCellTriple(NetWithLoss(Net(None, None, None))))
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 2048]), dtype=ms.float32)


+ 4
- 4
tests/ut/python/train/test_amp.py View File

@@ -146,6 +146,10 @@ def test_compile_model_train_O2():
def test_compile_model_train_O2_parallel():
dataset_types = (np.float32, np.float32)
dataset_shapes = ((16, 16), (16, 16))
context.set_auto_parallel_context(
global_rank=0, device_num=8,
mirror_mean=True, parameter_broadcast=True,
parallel_mode=ParallelMode.DATA_PARALLEL)

dataset = MindDataSet(dataset_types, dataset_shapes)

@@ -153,10 +157,6 @@ def test_compile_model_train_O2_parallel():
loss = nn.MSELoss()
optimizer = nn.Momentum(net.trainable_params(), 0.1, 0.9, 0.00004, 1024.0)

context.set_auto_parallel_context(
global_rank=0, device_num=8,
mirror_mean=True, parameter_broadcast=True,
parallel_mode=ParallelMode.DATA_PARALLEL)
init()

model = Model(net, loss_fn=loss, optimizer=optimizer, metrics={"acc"}, amp_level="O2")


Loading…
Cancel
Save