Browse Source

!4801 Must set or change parallel mode before any Initializer created

Merge pull request !4801 from yihuaijie/dev
tags/v0.7.0-beta
mindspore-ci-bot Gitee 5 years ago
parent
commit
3d06cbf987
35 changed files with 174 additions and 84 deletions
  1. +2
    -0
      mindspore/ccsrc/frontend/parallel/context.cc
  2. +4
    -0
      mindspore/ccsrc/frontend/parallel/context.h
  3. +2
    -0
      mindspore/ccsrc/pipeline/jit/init.cc
  4. +2
    -1
      mindspore/common/api.py
  5. +2
    -0
      mindspore/common/initializer.py
  6. +3
    -0
      mindspore/context.py
  7. +20
    -0
      mindspore/parallel/_auto_parallel_context.py
  8. +13
    -0
      mindspore/parallel/_utils.py
  9. +1
    -1
      tests/st/nccl/test_nccl_lenet.py
  10. +2
    -0
      tests/ut/python/communication/test_data_parallel_lenet.py
  11. +2
    -2
      tests/ut/python/model/test_mix_precision.py
  12. +2
    -1
      tests/ut/python/nn/test_parameter.py
  13. +5
    -2
      tests/ut/python/parallel/test_add_relu_redistribution.py
  14. +13
    -3
      tests/ut/python/parallel/test_allreduce_fusion.py
  15. +2
    -1
      tests/ut/python/parallel/test_alltoall.py
  16. +20
    -18
      tests/ut/python/parallel/test_arithmetic.py
  17. +1
    -1
      tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py
  18. +1
    -1
      tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
  19. +1
    -1
      tests/ut/python/parallel/test_auto_parallel_tuple_depend.py
  20. +2
    -2
      tests/ut/python/parallel/test_auto_parallel_two_bn.py
  21. +1
    -1
      tests/ut/python/parallel/test_batch_parallel.py
  22. +3
    -3
      tests/ut/python/parallel/test_batchnorm_batch_parallel.py
  23. +1
    -1
      tests/ut/python/parallel/test_bn_prelu_cell.py
  24. +4
    -4
      tests/ut/python/parallel/test_get_next.py
  25. +35
    -12
      tests/ut/python/parallel/test_initializer_weight_slice.py
  26. +1
    -1
      tests/ut/python/parallel/test_linear.py
  27. +5
    -5
      tests/ut/python/parallel/test_loss_and_optimizer.py
  28. +4
    -4
      tests/ut/python/parallel/test_one_hot_net.py
  29. +4
    -6
      tests/ut/python/parallel/test_operator_model_parallel.py
  30. +2
    -2
      tests/ut/python/parallel/test_optimizer.py
  31. +2
    -2
      tests/ut/python/parallel/test_optimizer_clone_weight.py
  32. +2
    -2
      tests/ut/python/parallel/test_reshape.py
  33. +3
    -0
      tests/ut/python/parallel/test_using_seed_for_initializer.py
  34. +3
    -3
      tests/ut/python/parallel/test_virtual_dataset_3_input.py
  35. +4
    -4
      tests/ut/python/train/test_amp.py

+ 2
- 0
mindspore/ccsrc/frontend/parallel/context.cc View File

@@ -81,6 +81,8 @@ void ParallelContext::set_mirror_mean(bool mirror_mean) { mirror_mean_ = mirror_


void ParallelContext::set_full_batch(bool full_batch) { full_batch_ = full_batch; } void ParallelContext::set_full_batch(bool full_batch) { full_batch_ = full_batch; }


void ParallelContext::set_has_initializer(bool has_initializer) { has_initializer_ = has_initializer; }

void ParallelContext::set_cast_before_mirror(bool cast_before_mirror) { cast_before_mirror_ = cast_before_mirror; } void ParallelContext::set_cast_before_mirror(bool cast_before_mirror) { cast_before_mirror_ = cast_before_mirror; }


void ParallelContext::set_loss_repeated_mean(bool loss_repeated_mean) { loss_repeated_mean_ = loss_repeated_mean; } void ParallelContext::set_loss_repeated_mean(bool loss_repeated_mean) { loss_repeated_mean_ = loss_repeated_mean; }


+ 4
- 0
mindspore/ccsrc/frontend/parallel/context.h View File

@@ -58,6 +58,9 @@ class ParallelContext {
void set_full_batch(bool full_batch); void set_full_batch(bool full_batch);
bool full_batch() const { return full_batch_; } bool full_batch() const { return full_batch_; }


void set_has_initializer(bool has_initializer);
bool has_initializer() const { return has_initializer_; }

void set_cast_before_mirror(bool cast_before_mirror); void set_cast_before_mirror(bool cast_before_mirror);
bool cast_before_mirror() const { return cast_before_mirror_; } bool cast_before_mirror() const { return cast_before_mirror_; }


@@ -112,6 +115,7 @@ class ParallelContext {
static std::shared_ptr<ParallelContext> inst_context_; static std::shared_ptr<ParallelContext> inst_context_;
bool mirror_mean_; bool mirror_mean_;
bool full_batch_; bool full_batch_;
bool has_initializer_ = false;
bool cast_before_mirror_; bool cast_before_mirror_;
bool loss_repeated_mean_; bool loss_repeated_mean_;
int32_t device_num_; int32_t device_num_;


+ 2
- 0
mindspore/ccsrc/pipeline/jit/init.cc View File

@@ -193,6 +193,8 @@ PYBIND11_MODULE(_c_expression, m) {
.def("get_strategy_ckpt_save_file", &ParallelContext::strategy_ckpt_save_file, "Get strategy checkpoint save file.") .def("get_strategy_ckpt_save_file", &ParallelContext::strategy_ckpt_save_file, "Get strategy checkpoint save file.")
.def("set_full_batch", &ParallelContext::set_full_batch, "Set whether load full batch on each device.") .def("set_full_batch", &ParallelContext::set_full_batch, "Set whether load full batch on each device.")
.def("get_full_batch", &ParallelContext::full_batch, "Get whether load full batch on each device.") .def("get_full_batch", &ParallelContext::full_batch, "Get whether load full batch on each device.")
.def("set_has_initializer", &ParallelContext::set_has_initializer, "Set whether any Initializer has been created.")
.def("get_has_initializer", &ParallelContext::has_initializer, "Get whether any Initializer has been created.")
.def("set_enable_parallel_optimizer", &ParallelContext::set_enable_parallel_optimizer, .def("set_enable_parallel_optimizer", &ParallelContext::set_enable_parallel_optimizer,
"Set enable/disable parallel optimizer.") "Set enable/disable parallel optimizer.")
.def("get_enable_parallel_optimizer", &ParallelContext::enable_parallel_optimizer, .def("get_enable_parallel_optimizer", &ParallelContext::enable_parallel_optimizer,


+ 2
- 1
mindspore/common/api.py View File

@@ -24,7 +24,7 @@ from mindspore import log as logger
from .._c_expression import generate_key, Executor_, Tensor, MetaTensor, PynativeExecutor_ from .._c_expression import generate_key, Executor_, Tensor, MetaTensor, PynativeExecutor_
from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_backend from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_backend
from .tensor import Tensor as MsTensor from .tensor import Tensor as MsTensor
from ..parallel._utils import _get_device_num, _get_global_rank, _need_to_full, _to_full_tensor
from ..parallel._utils import _get_device_num, _get_global_rank, _need_to_full, _to_full_tensor, _set_has_initializer
# store ms_function class compiled pipeline cache # store ms_function class compiled pipeline cache
ms_compile_cache = {} ms_compile_cache = {}


@@ -383,6 +383,7 @@ class _Executor:
Str, the full phase of the cell. Str, the full phase of the cell.
Bool, if the graph has been compiled before, return False, else return True. Bool, if the graph has been compiled before, return False, else return True.
""" """
_set_has_initializer(False)
obj.check_names() obj.check_names()
args_names, args_list = _generate_pip_args(obj, *args) args_names, args_list = _generate_pip_args(obj, *args)
dic = dict(zip(args_names, args_list)) dic = dict(zip(args_names, args_list))


+ 2
- 0
mindspore/common/initializer.py View File

@@ -24,6 +24,7 @@ from mindspore import log as logger
from . import dtype as mstype from . import dtype as mstype
from .tensor import Tensor from .tensor import Tensor
from .._c_expression import random_normal from .._c_expression import random_normal
from ..parallel._utils import _set_has_initializer


_INITIALIZER_ALIAS = dict() _INITIALIZER_ALIAS = dict()


@@ -42,6 +43,7 @@ class Initializer:
self._kwargs = kwargs self._kwargs = kwargs
self.shape = None self.shape = None
self.dtype = None self.dtype = None
_set_has_initializer(True)


def _initialize(self, *kwargs): def _initialize(self, *kwargs):
raise NotImplementedError('Must be overridden!') raise NotImplementedError('Must be overridden!')


+ 3
- 0
mindspore/context.py View File

@@ -437,6 +437,8 @@ def set_auto_parallel_context(**kwargs):
If a program has tasks with different parallel modes, then before setting new parallel mode for If a program has tasks with different parallel modes, then before setting new parallel mode for
next task, interface mindspore.context.reset_auto_parallel_context() needs to be called to reset next task, interface mindspore.context.reset_auto_parallel_context() needs to be called to reset
the configuration. the configuration.
Setting or changing parallel modes must be called before any Initializer created, or RuntimeError
will be raised.


Args: Args:
device_num (int): Available device number, the value must be in [1, 4096]. Default: 1. device_num (int): Available device number, the value must be in [1, 4096]. Default: 1.
@@ -477,6 +479,7 @@ def set_auto_parallel_context(**kwargs):


Raises: Raises:
ValueError: If input key is not attribute in auto parallel context. ValueError: If input key is not attribute in auto parallel context.
RuntimeError: If there is any Initializer created before setting or changing parallel_mode.


Examples: Examples:
>>> context.set_auto_parallel_context(device_num=8) >>> context.set_auto_parallel_context(device_num=8)


+ 20
- 0
mindspore/parallel/_auto_parallel_context.py View File

@@ -176,8 +176,12 @@ class _AutoParallelContext:


Raises: Raises:
ValueError: If parallel mode is not supported. ValueError: If parallel mode is not supported.
RuntimeError: If there is any Initializer created before setting or changing parallel_mode.
""" """
self.check_context_handle() self.check_context_handle()
if self.get_has_initializer():
self.set_has_initializer(False)
raise RuntimeError("Must set or change parallel mode before any Initializer created.")
ret = self._context_handle.set_parallel_mode(parallel_mode) ret = self._context_handle.set_parallel_mode(parallel_mode)
if ret is False: if ret is False:
raise ValueError("Parallel mode does not support {}".format(parallel_mode)) raise ValueError("Parallel mode does not support {}".format(parallel_mode))
@@ -249,6 +253,21 @@ class _AutoParallelContext:
self.check_context_handle() self.check_context_handle()
return self._context_handle.get_full_batch() return self._context_handle.get_full_batch()


def set_has_initializer(self, has_initializer):
"""
Set whether any Initializer has been created.

Args:
has_initializer (bool): True if a Initializer created.
"""
self.check_context_handle()
self._context_handle.set_has_initializer(has_initializer)

def get_has_initializer(self):
"""Get whether any Initializer has been created."""
self.check_context_handle()
return self._context_handle.get_has_initializer()

def set_strategy_ckpt_save_file(self, strategy_ckpt_save_file): def set_strategy_ckpt_save_file(self, strategy_ckpt_save_file):
""" """
Set strategy checkpoint save path. Set strategy checkpoint save path.
@@ -543,6 +562,7 @@ def _set_auto_parallel_context(**kwargs):


Raises: Raises:
ValueError: If input key is not attribute in auto parallel context. ValueError: If input key is not attribute in auto parallel context.
RuntimeError: If there is any Initializer created before setting or changing parallel_mode.
""" """
for key, value in kwargs.items(): for key, value in kwargs.items():
if key not in _set_auto_parallel_context_func_map: if key not in _set_auto_parallel_context_func_map:


+ 13
- 0
mindspore/parallel/_utils.py View File

@@ -32,6 +32,19 @@ def _get_full_batch():
"""Get whether to use full_batch.""" """Get whether to use full_batch."""
return auto_parallel_context().get_full_batch() return auto_parallel_context().get_full_batch()


def _get_has_initializer():
"""Get whether any Initializer has been created."""
return auto_parallel_context().get_has_initializer()

def _set_has_initializer(has_initializer):
"""
Set whether any Initializer has been created.

Args:
has_initializer (bool): True if a Initializer created.
"""
auto_parallel_context().set_has_initializer(has_initializer)



def _need_to_full(): def _need_to_full():
"""Check whether to convert input to full shape or tensor.""" """Check whether to convert input to full shape or tensor."""


+ 1
- 1
tests/st/nccl/test_nccl_lenet.py View File

@@ -78,6 +78,7 @@ def multisteplr(total_steps, gap, base_lr=0.9, gamma=0.1, dtype=mstype.float32):




def test_lenet_nccl(): def test_lenet_nccl():
context.set_auto_parallel_context(parallel_mode="data_parallel", mirror_mean=True, device_num=get_group_size())
net = LeNet() net = LeNet()
net.set_train() net.set_train()


@@ -86,7 +87,6 @@ def test_lenet_nccl():
mom_optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) mom_optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
context.set_auto_parallel_context(parallel_mode="data_parallel", mirror_mean=True, device_num=get_group_size())
train_network = TrainOneStepCell(net_with_criterion, mom_optimizer) train_network = TrainOneStepCell(net_with_criterion, mom_optimizer)
train_network.set_train() train_network.set_train()
losses = [] losses = []


+ 2
- 0
tests/ut/python/communication/test_data_parallel_lenet.py View File

@@ -24,6 +24,7 @@ import mindspore.nn as nn
from mindspore import Tensor, Model, ParallelMode from mindspore import Tensor, Model, ParallelMode
from mindspore.nn.optim import Momentum from mindspore.nn.optim import Momentum
from mindspore.ops import operations as P from mindspore.ops import operations as P
from mindspore.parallel._utils import _set_has_initializer


_current_dir = os.path.dirname(os.path.realpath(__file__)) + "/../test_data" _current_dir = os.path.dirname(os.path.realpath(__file__)) + "/../test_data"


@@ -89,3 +90,4 @@ def test_lenet5_train_step_training_pynative():
Model(network=network, loss_fn=loss_fn, optimizer=optimizer) Model(network=network, loss_fn=loss_fn, optimizer=optimizer)
context.set_context(mode=context.GRAPH_MODE) context.set_context(mode=context.GRAPH_MODE)
context.reset_auto_parallel_context() context.reset_auto_parallel_context()
_set_has_initializer(False)

+ 2
- 2
tests/ut/python/model/test_mix_precision.py View File

@@ -96,6 +96,8 @@ def test_on_momentum():


def test_data_parallel_with_cast(): def test_data_parallel_with_cast():
"""test_data_parallel_with_cast""" """test_data_parallel_with_cast"""
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, device_num=8)
predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01) predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01)
label = Tensor(np.zeros([1, 10]).astype(np.float32)) label = Tensor(np.zeros([1, 10]).astype(np.float32))
net = LeNet5() net = LeNet5()
@@ -107,8 +109,6 @@ def test_data_parallel_with_cast():
learning_rate=0.1, learning_rate=0.1,
momentum=0.9) momentum=0.9)
net = WithLossCell(net, loss_fn) net = WithLossCell(net, loss_fn)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, device_num=8)
net = TrainOneStepCell(net, optimizer) net = TrainOneStepCell(net, optimizer)


_executor.compile(net, predict, label) _executor.compile(net, predict, label)


+ 2
- 1
tests/ut/python/nn/test_parameter.py View File

@@ -21,7 +21,7 @@ from mindspore import context, Tensor, Parameter, ParameterTuple
from mindspore._checkparam import _check_str_by_regular from mindspore._checkparam import _check_str_by_regular
from mindspore.common import dtype as mstype from mindspore.common import dtype as mstype
from mindspore.common.initializer import initializer from mindspore.common.initializer import initializer
from mindspore.parallel._utils import _set_has_initializer


def test_parameter_init(): def test_parameter_init():
dat = np.array([[1, 2, 3], [2, 3, 4]]) dat = np.array([[1, 2, 3], [2, 3, 4]])
@@ -170,6 +170,7 @@ def test_scalar_parameter_update():




def test_parameter_lazy_init(): def test_parameter_lazy_init():
_set_has_initializer(False)
# support lazy init in SEMI_AUTO_PARALLEL mode # support lazy init in SEMI_AUTO_PARALLEL mode
context.reset_auto_parallel_context() context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8)


+ 5
- 2
tests/ut/python/parallel/test_add_relu_redistribution.py View File

@@ -20,6 +20,7 @@ from mindspore import context
from mindspore.common.api import _executor from mindspore.common.api import _executor
from mindspore.ops import composite as C from mindspore.ops import composite as C
from mindspore.ops import operations as P from mindspore.ops import operations as P
from mindspore.parallel._utils import _set_has_initializer
from tests.ut.python.ops.test_math_ops import VirtualLoss from tests.ut.python.ops.test_math_ops import VirtualLoss




@@ -60,12 +61,13 @@ def compile_net(net, x, y):




def test_add_relu_stride_slice(): def test_add_relu_stride_slice():
_set_has_initializer(False)
context.set_auto_parallel_context(device_num=8, global_rank=7) context.set_auto_parallel_context(device_num=8, global_rank=7)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


strategy0 = ((1, 1), (1, 1)) strategy0 = ((1, 1), (1, 1))
strategy1 = ((8, 1),) strategy1 = ((8, 1),)
net = Grad(NetWithLoss(AddRelu(strategy0, strategy1))) net = Grad(NetWithLoss(AddRelu(strategy0, strategy1)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([128, 32]), dtype=ms.float32) x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([128, 32]), dtype=ms.float32)
@@ -73,12 +75,13 @@ def test_add_relu_stride_slice():




def test_add_relu_all_gather(): def test_add_relu_all_gather():
_set_has_initializer(False)
context.set_auto_parallel_context(device_num=8, global_rank=7) context.set_auto_parallel_context(device_num=8, global_rank=7)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


strategy0 = ((8, 1), (8, 1)) strategy0 = ((8, 1), (8, 1))
strategy1 = ((1, 1),) strategy1 = ((1, 1),)
net = Grad(NetWithLoss(AddRelu(strategy0, strategy1))) net = Grad(NetWithLoss(AddRelu(strategy0, strategy1)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([128, 32]), dtype=ms.float32) x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([128, 32]), dtype=ms.float32) y = Tensor(np.ones([128, 32]), dtype=ms.float32)


+ 13
- 3
tests/ut/python/parallel/test_allreduce_fusion.py View File

@@ -23,6 +23,7 @@ from mindspore.nn.optim.momentum import Momentum
from mindspore.parallel import _cost_model_context as cost_model_context from mindspore.parallel import _cost_model_context as cost_model_context
from mindspore.parallel._auto_parallel_context import auto_parallel_context from mindspore.parallel._auto_parallel_context import auto_parallel_context
from mindspore.train import Model, ParallelMode from mindspore.train import Model, ParallelMode
from mindspore.parallel._utils import _set_has_initializer
from tests.dataset_mock import MindData from tests.dataset_mock import MindData




@@ -105,10 +106,8 @@ def train_common(net):
momentum = 0.9 momentum = 0.9
epoch_size = 2 epoch_size = 2
device_num = 4 device_num = 4
context.reset_auto_parallel_context()
auto_parallel_context().set_enable_all_reduce_fusion(enable_all_reduce_fusion=True) auto_parallel_context().set_enable_all_reduce_fusion(enable_all_reduce_fusion=True)
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=device_num,
parameter_broadcast=False)
context.set_auto_parallel_context(device_num=device_num, parameter_broadcast=False)
context.set_context(mode=context.GRAPH_MODE) context.set_context(mode=context.GRAPH_MODE)


predict = Tensor(np.ones([batch_size, 128]), dtype=ms.float32) predict = Tensor(np.ones([batch_size, 128]), dtype=ms.float32)
@@ -183,9 +182,12 @@ def test_allreduce_fusion_parameters():




def test_allreduce_fusion1(): def test_allreduce_fusion1():
_set_has_initializer(False)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.5) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.5)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
net = SimpleDMLNet(DenseNet1(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None)) net = SimpleDMLNet(DenseNet1(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None))
allreduce_fusion_dict = train_common(net) allreduce_fusion_dict = train_common(net)
expect_dict = {'backbone2.fc8.weight': 2, expect_dict = {'backbone2.fc8.weight': 2,
@@ -210,6 +212,8 @@ def test_allreduce_fusion2():
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.5) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.5)
cost_model_context.reset_cost_model_context() cost_model_context.reset_cost_model_context()
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
net = SimpleDMLNet(DenseNet1(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None)) net = SimpleDMLNet(DenseNet1(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None))
allreduce_fusion_dict = train_common(net) allreduce_fusion_dict = train_common(net)
expect_dict = {} expect_dict = {}
@@ -221,6 +225,8 @@ def test_allreduce_fusion3():
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=3) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=3)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.3333333) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.3333333)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
net = SimpleDMLNet(DenseNet1(has_bias=True, activation='relu'), DenseNet2(has_bias=False, activation='relu')) net = SimpleDMLNet(DenseNet1(has_bias=True, activation='relu'), DenseNet2(has_bias=False, activation='relu'))
allreduce_fusion_dict = train_common(net) allreduce_fusion_dict = train_common(net)
expect_dict = {'backbone2.fc8.weight': 3, expect_dict = {'backbone2.fc8.weight': 3,
@@ -247,6 +253,8 @@ def test_allreduce_fusion4():
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.5) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.5)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
net = SimpleDMLNet(DenseNet2(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None)) net = SimpleDMLNet(DenseNet2(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None))
allreduce_fusion_dict = train_common(net) allreduce_fusion_dict = train_common(net)
expect_dict = {'backbone2.fc8.weight': 2, expect_dict = {'backbone2.fc8.weight': 2,
@@ -276,6 +284,8 @@ def test_allreduce_fusion5():
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_inherent_time=0.05) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_inherent_time=0.05)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_bandwidth=0.000001) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_bandwidth=0.000001)
cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_computation_time_parameter=0.0000015) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_computation_time_parameter=0.0000015)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
net = SimpleDMLNet(DenseNet2(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None)) net = SimpleDMLNet(DenseNet2(has_bias=False, activation=None), DenseNet2(has_bias=False, activation=None))
allreduce_fusion_dict = train_common(net) allreduce_fusion_dict = train_common(net)




+ 2
- 1
tests/ut/python/parallel/test_alltoall.py View File

@@ -23,7 +23,7 @@ from mindspore.common.parameter import Parameter
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
from mindspore.nn.optim.momentum import Momentum from mindspore.nn.optim.momentum import Momentum
from mindspore.ops import operations as P from mindspore.ops import operations as P
from mindspore.parallel._utils import _reset_op_id
from mindspore.parallel._utils import _reset_op_id, _set_has_initializer
from mindspore.train import Model, ParallelMode from mindspore.train import Model, ParallelMode
from tests.dataset_mock import MindData from tests.dataset_mock import MindData


@@ -90,6 +90,7 @@ def all_to_all_common(strategy1):




def test_all_to_all(): def test_all_to_all():
_set_has_initializer(False)
strategy1 = ((8, 1),) strategy1 = ((8, 1),)
context.set_context(mode=context.GRAPH_MODE, save_graphs=False) context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
_reset_op_id() _reset_op_id()


+ 20
- 18
tests/ut/python/parallel/test_arithmetic.py View File

@@ -20,6 +20,7 @@ from mindspore import Parameter, Tensor, context
from mindspore.common.api import _executor from mindspore.common.api import _executor
from mindspore.ops import composite as C from mindspore.ops import composite as C
from mindspore.ops import operations as P from mindspore.ops import operations as P
from mindspore.parallel._utils import _set_has_initializer
from tests.ut.python.ops.test_math_ops import VirtualLoss from tests.ut.python.ops.test_math_ops import VirtualLoss




@@ -60,11 +61,12 @@ def test_matmul_sub():
out = self.sub(out, b) out = self.sub(out, b)
return out return out


_set_has_initializer(False)
context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2)) strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (4, 2)) strategy2 = ((4, 2), (4, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -85,10 +87,10 @@ def test_matmul_add():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2)) strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (4, 2)) strategy2 = ((4, 2), (4, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -109,10 +111,10 @@ def test_matmul_mul():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2)) strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (4, 2)) strategy2 = ((4, 2), (4, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -133,10 +135,10 @@ def test_matmul_div():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2)) strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (4, 2)) strategy2 = ((4, 2), (4, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -157,10 +159,10 @@ def test_matmul_greater():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2)) strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (4, 2)) strategy2 = ((4, 2), (4, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -181,10 +183,10 @@ def test_matmul_add_broadcast():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2)) strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (2,)) strategy2 = ((4, 2), (2,))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -205,10 +207,10 @@ def test_matmul_add_broadcast2():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 4), (4, 1)) strategy1 = ((2, 4), (4, 1))
strategy2 = ((4, 1), (1, 2)) strategy2 = ((4, 1), (1, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32)
@@ -229,10 +231,10 @@ def test_matmul_sub_broadcast():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2)) strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (2,)) strategy2 = ((4, 2), (2,))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -253,10 +255,10 @@ def test_matmul_sub_broadcast2():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 4), (4, 1)) strategy1 = ((2, 4), (4, 1))
strategy2 = ((4, 1), (1, 2)) strategy2 = ((4, 1), (1, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32)
@@ -277,10 +279,10 @@ def test_matmul_mul_broadcast():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2)) strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (2,)) strategy2 = ((4, 2), (2,))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -301,10 +303,10 @@ def test_matmul_mul_broadcast2():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 4), (4, 1)) strategy1 = ((2, 4), (4, 1))
strategy2 = ((4, 1), (1, 2)) strategy2 = ((4, 1), (1, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32)
@@ -325,10 +327,10 @@ def test_matmul_div_broadcast():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2)) strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (2,)) strategy2 = ((4, 2), (2,))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -349,10 +351,10 @@ def test_matmul_div_broadcast2():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 4), (4, 1)) strategy1 = ((2, 4), (4, 1))
strategy2 = ((4, 1), (1, 2)) strategy2 = ((4, 1), (1, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32)
@@ -373,10 +375,10 @@ def test_matmul_greater_broadcast():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2)) strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (2,)) strategy2 = ((4, 2), (2,))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -397,10 +399,10 @@ def test_matmul_greater_broadcast2():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 4), (4, 1)) strategy1 = ((2, 4), (4, 1))
strategy2 = ((4, 1), (1, 2)) strategy2 = ((4, 1), (1, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32)
@@ -421,10 +423,10 @@ def test_matmul_floordiv():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2)) strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (4, 2)) strategy2 = ((4, 2), (4, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -445,10 +447,10 @@ def test_matmul_floordiv_broadcast():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 2), (2, 2)) strategy1 = ((2, 2), (2, 2))
strategy2 = ((4, 2), (2,)) strategy2 = ((4, 2), (2,))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@@ -469,10 +471,10 @@ def test_matmul_floordiv_broadcast2():
return out return out


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 4), (4, 1)) strategy1 = ((2, 4), (4, 1))
strategy2 = ((4, 1), (1, 2)) strategy2 = ((4, 1), (1, 2))
net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 1]), dtype=ms.float32) y = Tensor(np.ones([32, 1]), dtype=ms.float32)


+ 1
- 1
tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py View File

@@ -64,10 +64,10 @@ def test_auto_parallel_bn_with_prelu():


size = 8 size = 8
context.set_auto_parallel_context(device_num=size, global_rank=0) context.set_auto_parallel_context(device_num=size, global_rank=0)
context.set_auto_parallel_context(parallel_mode="auto_parallel")


x = Tensor(np.random.rand(16, 16, 32, 64), dtype=ms.float32) x = Tensor(np.random.rand(16, 16, 32, 64), dtype=ms.float32)


net = GradWrap(NetWithLoss(Net())) net = GradWrap(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
net.set_auto_parallel() net.set_auto_parallel()
_executor.compile(net, x) _executor.compile(net, x)

+ 1
- 1
tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py View File

@@ -106,8 +106,8 @@ def test_double_subgraphs():
cost_model_context.set_cost_model_context(multi_subgraphs=True) cost_model_context.set_cost_model_context(multi_subgraphs=True)
context.set_context(save_graphs=True) context.set_context(save_graphs=True)
context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
net = TrainStepWarp(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel") context.set_auto_parallel_context(parallel_mode="auto_parallel")
net = TrainStepWarp(NetWithLoss(Net()))
net.set_auto_parallel() net.set_auto_parallel()


x = Tensor(np.ones([8, 8, 8, 8]), dtype=ms.float32) x = Tensor(np.ones([8, 8, 8, 8]), dtype=ms.float32)


+ 1
- 1
tests/ut/python/parallel/test_auto_parallel_tuple_depend.py View File

@@ -68,9 +68,9 @@ def test_virtual_dataset_3_input():
out = self.matmul2(out, b) out = self.matmul2(out, b)
return out return out


net = GradWrap(NetWithLoss(Net()))
context.set_auto_parallel_context(parallel_mode="auto_parallel") context.set_auto_parallel_context(parallel_mode="auto_parallel")
context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
net = GradWrap(NetWithLoss(Net()))
net.set_auto_parallel() net.set_auto_parallel()
x = Tensor(np.ones([128, 32]), dtype=ms.float32) x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)


+ 2
- 2
tests/ut/python/parallel/test_auto_parallel_two_bn.py View File

@@ -68,11 +68,11 @@ def test_two_bn():
out = self.block2(out) out = self.block2(out)
return out return out


net = NetWithLoss(Net())
x = Tensor(np.ones([64, 64]), dtype=ms.float32)
context.set_context(save_graphs=True) context.set_context(save_graphs=True)
context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="auto_parallel") context.set_auto_parallel_context(parallel_mode="auto_parallel")
net = NetWithLoss(Net())
x = Tensor(np.ones([64, 64]), dtype=ms.float32)
net.set_auto_parallel() net.set_auto_parallel()
set_algo_parameters(elementwise_op_strategy_follow=True) set_algo_parameters(elementwise_op_strategy_follow=True)
reset_op_id() reset_op_id()


+ 1
- 1
tests/ut/python/parallel/test_batch_parallel.py View File

@@ -94,12 +94,12 @@ def test_batch():
return out4 return out4


context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((8, 1, 1, 1), (1, 1, 1, 1)) strategy1 = ((8, 1, 1, 1), (1, 1, 1, 1))
strategy2 = ((1, 1, 1, 8), (1, 1, 1, 8)) strategy2 = ((1, 1, 1, 8), (1, 1, 1, 8))
strategy3 = ((4, 1, 1, 2), (4, 1, 1, 2)) strategy3 = ((4, 1, 1, 2), (4, 1, 1, 2))


net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3))) net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel() net.set_auto_parallel()


x = Tensor(np.ones([128, 16, 34, 34]), dtype=ms.float32) x = Tensor(np.ones([128, 16, 34, 34]), dtype=ms.float32)


+ 3
- 3
tests/ut/python/parallel/test_batchnorm_batch_parallel.py View File

@@ -118,6 +118,9 @@ def batchnorm_net(num_classes):




def test_batchnorm_batch_parallel(): def test_batchnorm_batch_parallel():
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
context.set_context(mode=context.GRAPH_MODE)
num_classes = 1001 num_classes = 1001
batch_size = 32 batch_size = 32
learning_rate = 0.1 learning_rate = 0.1
@@ -134,9 +137,6 @@ def test_batchnorm_batch_parallel():
loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1))) loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)


context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
context.set_context(mode=context.GRAPH_MODE)
model = Model(net, loss, opt) model = Model(net, loss, opt)
model.train(epoch_size, dataset, dataset_sink_mode=False) model.train(epoch_size, dataset, dataset_sink_mode=False)




+ 1
- 1
tests/ut/python/parallel/test_bn_prelu_cell.py View File

@@ -198,6 +198,7 @@ def bn_net():


def bn_common(parallel_mode, train_flag, strategy_loss=None): def bn_common(parallel_mode, train_flag, strategy_loss=None):
context.set_context(mode=context.GRAPH_MODE) context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8)
learning_rate = 0.1 learning_rate = 0.1
momentum = 0.9 momentum = 0.9
epoch_size = 2 epoch_size = 2
@@ -218,7 +219,6 @@ def bn_common(parallel_mode, train_flag, strategy_loss=None):


if parallel_mode == ParallelMode.DATA_PARALLEL: if parallel_mode == ParallelMode.DATA_PARALLEL:
context.set_auto_parallel_context(parameter_broadcast=True) context.set_auto_parallel_context(parameter_broadcast=True)
context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8)
model = Model(net, loss, opt) model = Model(net, loss, opt)
if train_flag: if train_flag:
model.train(epoch_size, dataset, dataset_sink_mode=False) model.train(epoch_size, dataset, dataset_sink_mode=False)


+ 4
- 4
tests/ut/python/parallel/test_get_next.py View File

@@ -88,13 +88,13 @@ def test_get_next_semi_auto_parallel():
return x return x


context.set_auto_parallel_context(device_num=4, global_rank=0) context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
network = Net(strategy1=((1, 4),), strategy2=((4, 1), (1,))) network = Net(strategy1=((1, 4),), strategy2=((4, 1), (1,)))
strategy3 = ((4, 1), (), ()) strategy3 = ((4, 1), (), ())
strategy4 = ((4, 1), (4, 1)) strategy4 = ((4, 1), (4, 1))
net_with_loss = NetWithLoss(network, [ms.float32, ms.int32], [[32, 64], [32]], 2, strategy3=strategy3, net_with_loss = NetWithLoss(network, [ms.float32, ms.int32], [[32, 64], [32]], 2, strategy3=strategy3,
strategy4=strategy4) strategy4=strategy4)
net = GradWrap(net_with_loss) net = GradWrap(net_with_loss)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
compile_net(net) compile_net(net)




@@ -112,13 +112,13 @@ def test_get_next_semi_auto_parallel1():
return x return x


context.set_auto_parallel_context(device_num=4, global_rank=0) context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
network = Net(strategy1=((1, 4),), strategy2=((4, 1), (1,))) network = Net(strategy1=((1, 4),), strategy2=((4, 1), (1,)))
strategy3 = ((1, 4), (), ()) strategy3 = ((1, 4), (), ())
strategy4 = ((4, 1), (4, 1)) strategy4 = ((4, 1), (4, 1))
net_with_loss = NetWithLoss(network, [ms.float32, ms.int32], [[32, 64], [32]], 2, strategy3=strategy3, net_with_loss = NetWithLoss(network, [ms.float32, ms.int32], [[32, 64], [32]], 2, strategy3=strategy3,
strategy4=strategy4) strategy4=strategy4)
net = GradWrap(net_with_loss) net = GradWrap(net_with_loss)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
compile_net(net) compile_net(net)




@@ -136,10 +136,10 @@ def test_get_next_auto_parallel():
return x return x


context.set_auto_parallel_context(device_num=4, global_rank=0) context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="auto_parallel")
network = Net() network = Net()
net_with_loss = NetWithLoss(network, [ms.float32, ms.int32], [[32, 64], [32]], 2) net_with_loss = NetWithLoss(network, [ms.float32, ms.int32], [[32, 64], [32]], 2)
net = GradWrap(net_with_loss) net = GradWrap(net_with_loss)
context.set_auto_parallel_context(parallel_mode="auto_parallel")
compile_net(net) compile_net(net)




@@ -153,6 +153,6 @@ def test_only_one_get_next():
return self.get_next() return self.get_next()


context.set_auto_parallel_context(device_num=4, global_rank=0) context.set_auto_parallel_context(device_num=4, global_rank=0)
net = Net()
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net = Net()
compile_net(net) compile_net(net)

+ 35
- 12
tests/ut/python/parallel/test_initializer_weight_slice.py View File

@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.


import numpy as np import numpy as np
import pytest
from mindspore import context from mindspore import context
import mindspore.nn as nn import mindspore.nn as nn
from mindspore.ops import operations as P from mindspore.ops import operations as P
@@ -22,20 +23,19 @@ import mindspore.common.api as me
from mindspore.common.initializer import initializer from mindspore.common.initializer import initializer
from hccl_test.manage.api import Hccl from hccl_test.manage.api import Hccl


class Net(nn.Cell):
def __init__(self, strategy1, strategy2, weight):
super().__init__()
self.weight = Parameter(weight, "w1")
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
self.relu = P.ReLU().set_strategy(strategy2)


def check_initializer_weight_slice(init_name="Uniform"):
class Net(nn.Cell):
def __init__(self, strategy1, strategy2, weight):
super().__init__()
self.weight = Parameter(weight, "w1")
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
self.relu = P.ReLU().set_strategy(strategy2)

def construct(self, x):
out = self.matmul(x, self.weight)
out = self.relu(out)
return out
def construct(self, x):
out = self.matmul(x, self.weight)
out = self.relu(out)
return out


def check_initializer_weight_slice(init_name="Uniform"):
def get_slice(rank): def get_slice(rank):
hccl = Hccl() hccl = Hccl()
rank_save = hccl.rank_id rank_save = hccl.rank_id
@@ -77,5 +77,28 @@ def test_initializer_weight_slice():
for init_name in initializers: for init_name in initializers:
check_initializer_weight_slice(init_name) check_initializer_weight_slice(init_name)


def test_wrong_order_set_parallel_mode_with_initializer():
weight = initializer("Normal", [64, 32], ms.float32)
strategy1 = ((2, 1), (4, 1))
strategy2 = ((2, 4),)
net = Net(strategy1, strategy2, weight)
exe = me._executor
x = Tensor(np.ones([32, 32]), dtype=ms.float32)
with pytest.raises(RuntimeError):
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
net.set_auto_parallel()
exe.compile(net, x, auto_parallel_mode=True, phase='train')

def test_wrong_order_set_parallel_mode_without_initializer():
weight = Tensor(np.ones([64, 32]), ms.float32)
strategy1 = ((2, 1), (4, 1))
strategy2 = ((2, 4),)
net = Net(strategy1, strategy2, weight)
exe = me._executor
x = Tensor(np.ones([32, 32]), dtype=ms.float32)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
net.set_auto_parallel()
exe.compile(net, x, auto_parallel_mode=True, phase='train')

if __name__ == '__main__': if __name__ == '__main__':
test_initializer_weight_slice() test_initializer_weight_slice()

+ 1
- 1
tests/ut/python/parallel/test_linear.py View File

@@ -58,12 +58,12 @@ def test_linear():
return out return out


context.set_auto_parallel_context(device_num=16, global_rank=0) context.set_auto_parallel_context(device_num=16, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy0 = ((2, 4), (2, 4)) strategy0 = ((2, 4), (2, 4))
strategy1 = ((2, 4), (4,)) strategy1 = ((2, 4), (4,))
strategy2 = ((2, 8),) strategy2 = ((2, 8),)
strategy3 = ((16, 1), (16, 1)) strategy3 = ((16, 1), (16, 1))
net = GradWrap(NetWithLoss(Net(strategy0, strategy1, strategy2), strategy3)) net = GradWrap(NetWithLoss(Net(strategy0, strategy1, strategy2), strategy3))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel() net.set_auto_parallel()


x = Tensor(np.ones([64, 32]), dtype=ms.float32) x = Tensor(np.ones([64, 32]), dtype=ms.float32)


+ 5
- 5
tests/ut/python/parallel/test_loss_and_optimizer.py View File

@@ -54,6 +54,7 @@ def test_momentum():
return out return out


context.set_auto_parallel_context(device_num=4, global_rank=0) context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 1), (2, 1)) strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),) strategy2 = ((4, 1),)
strategy3 = ((4, 1), (4, 1)) strategy3 = ((4, 1), (4, 1))
@@ -69,7 +70,6 @@ def test_momentum():
net_with_loss = NetWithLoss(net, strategy3) net_with_loss = NetWithLoss(net, strategy3)


train_net = TrainOneStepCell(net_with_loss, optimizer) train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


compile_net(train_net, x, b) compile_net(train_net, x, b)


@@ -88,6 +88,7 @@ def test_momentum_with_loss_scale():
return out return out


context.set_auto_parallel_context(device_num=4, global_rank=0) context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 1), (2, 1)) strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),) strategy2 = ((4, 1),)
strategy3 = ((4, 1), (4, 1)) strategy3 = ((4, 1), (4, 1))
@@ -103,7 +104,6 @@ def test_momentum_with_loss_scale():
net_with_loss = NetWithLoss(net, strategy3) net_with_loss = NetWithLoss(net, strategy3)


train_net = TrainOneStepCell(net_with_loss, optimizer) train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


compile_net(train_net, x, b) compile_net(train_net, x, b)


@@ -122,6 +122,7 @@ def test_momentum_with_dynamic_lr():
return out return out


context.set_auto_parallel_context(device_num=4, global_rank=0) context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 1), (2, 1)) strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),) strategy2 = ((4, 1),)
strategy3 = ((4, 1), (4, 1)) strategy3 = ((4, 1), (4, 1))
@@ -138,7 +139,6 @@ def test_momentum_with_dynamic_lr():
net_with_loss = NetWithLoss(net, strategy3) net_with_loss = NetWithLoss(net, strategy3)


train_net = TrainOneStepCell(net_with_loss, optimizer) train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


compile_net(train_net, x, b) compile_net(train_net, x, b)


@@ -157,6 +157,7 @@ def test_momentum_with_loss_scale_and_dynamic_lr():
return out return out


context.set_auto_parallel_context(device_num=4, global_rank=0) context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


strategy1 = ((2, 1), (2, 1)) strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),) strategy2 = ((4, 1),)
@@ -174,7 +175,6 @@ def test_momentum_with_loss_scale_and_dynamic_lr():
net_with_loss = NetWithLoss(net, strategy3) net_with_loss = NetWithLoss(net, strategy3)


train_net = TrainOneStepCell(net_with_loss, optimizer) train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


compile_net(train_net, x, b) compile_net(train_net, x, b)


@@ -193,6 +193,7 @@ def test_lars():
return out return out


context.set_auto_parallel_context(device_num=4, global_rank=0) context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
strategy1 = ((2, 1), (2, 1)) strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),) strategy2 = ((4, 1),)
strategy3 = ((4, 1), (4, 1)) strategy3 = ((4, 1), (4, 1))
@@ -209,6 +210,5 @@ def test_lars():
lars_filter=lambda x: 'bn' not in x.name) lars_filter=lambda x: 'bn' not in x.name)
net_with_loss = NetWithLoss(net, strategy3) net_with_loss = NetWithLoss(net, strategy3)
train_net = TrainOneStepCell(net_with_loss, optimizer) train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


compile_net(train_net, x, b) compile_net(train_net, x, b)

+ 4
- 4
tests/ut/python/parallel/test_one_hot_net.py View File

@@ -266,11 +266,11 @@ class BNReshapeDenseBNNet(nn.Cell):
def test_bn_reshape_dense_bn_train_loss(): def test_bn_reshape_dense_bn_train_loss():
batch_size = 16 batch_size = 16
context.set_auto_parallel_context(device_num=device_num, global_rank=0) context.set_auto_parallel_context(device_num=device_num, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01) input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01)
label = Tensor(np.ones([batch_size]), dtype=ms.int32) label = Tensor(np.ones([batch_size]), dtype=ms.int32)


net = GradWrap(NetWithLoss(BNReshapeDenseBNNet())) net = GradWrap(NetWithLoss(BNReshapeDenseBNNet()))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel() net.set_auto_parallel()


_executor.compile(net, input_, label) _executor.compile(net, input_, label)
@@ -279,12 +279,12 @@ def test_bn_reshape_dense_bn_train_loss():
def test_semi_one_hot_net_batch(): def test_semi_one_hot_net_batch():
batch_size = 16 batch_size = 16
context.set_auto_parallel_context(device_num=device_num, global_rank=0) context.set_auto_parallel_context(device_num=device_num, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
input_ = Tensor(np.ones([batch_size * 1, 512]).astype(np.float32) * 0.01) input_ = Tensor(np.ones([batch_size * 1, 512]).astype(np.float32) * 0.01)
label = Tensor(np.ones([batch_size]), dtype=ms.int32) label = Tensor(np.ones([batch_size]), dtype=ms.int32)


net = SemiAutoOneHotNet(args=Args(), strategy=StrategyBatch()) net = SemiAutoOneHotNet(args=Args(), strategy=StrategyBatch())
net = GradWrap(NetWithLoss(net)) net = GradWrap(NetWithLoss(net))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel() net.set_auto_parallel()


_executor.compile(net, input_, label) _executor.compile(net, input_, label)
@@ -300,10 +300,10 @@ def test_semi_one_hot_net_model():
label = Tensor(np.ones([batch_size]), dtype=ms.int32) label = Tensor(np.ones([batch_size]), dtype=ms.int32)
dataset = Dataset(predict, label, 2, input_num=2) dataset = Dataset(predict, label, 2, input_num=2)


net = SemiAutoOneHotNet(args=Args(), strategy=StrategyModel())
opt = Momentum(net.trainable_params(), learning_rate, momentum)
context.reset_auto_parallel_context() context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=16) context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=16)
context.set_context(mode=context.GRAPH_MODE) context.set_context(mode=context.GRAPH_MODE)
net = SemiAutoOneHotNet(args=Args(), strategy=StrategyModel())
opt = Momentum(net.trainable_params(), learning_rate, momentum)
model = Model(net, optimizer=opt) model = Model(net, optimizer=opt)
model.train(epoch_size, dataset, dataset_sink_mode=False) model.train(epoch_size, dataset, dataset_sink_mode=False)

+ 4
- 6
tests/ut/python/parallel/test_operator_model_parallel.py View File

@@ -353,6 +353,8 @@ def test_resnet_operator_batch_parallel():


context.reset_auto_parallel_context() context.reset_auto_parallel_context()
context.set_auto_parallel_context(device_num=dev_num, global_rank=0) context.set_auto_parallel_context(device_num=dev_num, global_rank=0)
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
context.set_context(mode=context.GRAPH_MODE)
predict = Tensor(np.ones([batch_size, 3, 224, 224]), dtype=ms.float32) predict = Tensor(np.ones([batch_size, 3, 224, 224]), dtype=ms.float32)
label = Tensor(np.ones([batch_size]), dtype=ms.int32) label = Tensor(np.ones([batch_size]), dtype=ms.int32)


@@ -363,9 +365,6 @@ def test_resnet_operator_batch_parallel():
loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1))) loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)


context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
context.set_context(mode=context.GRAPH_MODE)
model = Model(net, loss, opt) model = Model(net, loss, opt)
model.train(epoch_size, dataset, dataset_sink_mode=False) model.train(epoch_size, dataset, dataset_sink_mode=False)


@@ -379,6 +378,8 @@ def test_resnet_model_parallel():


context.reset_auto_parallel_context() context.reset_auto_parallel_context()
context.set_auto_parallel_context(device_num=dev_num, global_rank=0) context.set_auto_parallel_context(device_num=dev_num, global_rank=0)
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
context.set_context(mode=context.GRAPH_MODE)
predict = Tensor(np.ones([batch_size, 64, 112, 112]), dtype=ms.float32) predict = Tensor(np.ones([batch_size, 64, 112, 112]), dtype=ms.float32)
label = Tensor(np.ones([batch_size]), dtype=ms.int32) label = Tensor(np.ones([batch_size]), dtype=ms.int32)


@@ -389,9 +390,6 @@ def test_resnet_model_parallel():
loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1))) loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)


context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
context.set_context(mode=context.GRAPH_MODE)
model = Model(net, loss, opt) model = Model(net, loss, opt)
model.train(epoch_size, dataset, dataset_sink_mode=False) model.train(epoch_size, dataset, dataset_sink_mode=False)




+ 2
- 2
tests/ut/python/parallel/test_optimizer.py View File

@@ -45,6 +45,8 @@ class Net(nn.Cell):


def test_dense_gen_graph(): def test_dense_gen_graph():
context.set_context(mode=context.GRAPH_MODE) context.set_context(mode=context.GRAPH_MODE)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.HYBRID_PARALLEL, mirror_mean=True, device_num=8)
init() init()
network = Net(512, 128) network = Net(512, 128)


@@ -53,8 +55,6 @@ def test_dense_gen_graph():
learning_rate=0.1, learning_rate=0.1,
momentum=0.9) momentum=0.9)
network = WithLossCell(network, loss_fn) network = WithLossCell(network, loss_fn)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.HYBRID_PARALLEL, mirror_mean=True, device_num=8)
network = TrainOneStepCell(network, optimizer) network = TrainOneStepCell(network, optimizer)


predict = Tensor(np.ones([64, 512]).astype(np.float32) * 0.01) predict = Tensor(np.ones([64, 512]).astype(np.float32) * 0.01)


+ 2
- 2
tests/ut/python/parallel/test_optimizer_clone_weight.py View File

@@ -54,6 +54,7 @@ def test_optimizer_clone_weight():
return out return out


context.set_auto_parallel_context(device_num=4, global_rank=0) context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


strategy1 = ((2, 1), (2, 1)) strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),) strategy2 = ((4, 1),)
@@ -70,7 +71,6 @@ def test_optimizer_clone_weight():
net_with_loss = NetWithLoss(net, strategy3) net_with_loss = NetWithLoss(net, strategy3)


train_net = TrainOneStepCell(net_with_loss, optimizer) train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


compile_net(train_net, x, b) compile_net(train_net, x, b)


@@ -89,6 +89,7 @@ def test_optimizer_clone_weight2():
return out return out


context.set_auto_parallel_context(device_num=4, global_rank=0) context.set_auto_parallel_context(device_num=4, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


strategy1 = ((2, 1), (2, 1)) strategy1 = ((2, 1), (2, 1))
strategy2 = ((4, 1),) strategy2 = ((4, 1),)
@@ -105,6 +106,5 @@ def test_optimizer_clone_weight2():
net_with_loss = NetWithLoss(net, strategy3) net_with_loss = NetWithLoss(net, strategy3)


train_net = TrainOneStepCell(net_with_loss, optimizer) train_net = TrainOneStepCell(net_with_loss, optimizer)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


compile_net(train_net, x, b) compile_net(train_net, x, b)

+ 2
- 2
tests/ut/python/parallel/test_reshape.py View File

@@ -320,10 +320,10 @@ def reshape_net2(backbone):
batch_size = 16 batch_size = 16
device_num = 16 device_num = 16
context.set_auto_parallel_context(device_num=device_num, global_rank=0) context.set_auto_parallel_context(device_num=device_num, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
input_ = Tensor(np.ones([batch_size * device_num, 512, 7, 7]).astype(np.float32) * 0.01) input_ = Tensor(np.ones([batch_size * device_num, 512, 7, 7]).astype(np.float32) * 0.01)


net = GradWrap(NetWithLoss(backbone)) net = GradWrap(NetWithLoss(backbone))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


compile_net(net, input_) compile_net(net, input_)


@@ -530,10 +530,10 @@ def test_bn_reshape_dense_bn_train():
batch_size = 16 batch_size = 16
device_num = 16 device_num = 16
context.set_auto_parallel_context(device_num=device_num, global_rank=0) context.set_auto_parallel_context(device_num=device_num, global_rank=0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01) input_ = Tensor(np.ones([batch_size, 2, 32, 32]).astype(np.float32) * 0.01)


net = GradWrap(NetWithLoss(BNReshapeDenseBNNet())) net = GradWrap(NetWithLoss(BNReshapeDenseBNNet()))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")


compile_net(net, input_) compile_net(net, input_)




+ 3
- 0
tests/ut/python/parallel/test_using_seed_for_initializer.py View File

@@ -18,6 +18,7 @@ from numpy import allclose
import mindspore.common.initializer as init import mindspore.common.initializer as init
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import Parameter from mindspore import Parameter
from mindspore.parallel._utils import _set_has_initializer


parameter_shape = [16, 4] parameter_shape = [16, 4]


@@ -46,6 +47,7 @@ def test_using_same_seed_for_initializer():
np.random.seed(0) np.random.seed(0)
net2 = ParameterNet() net2 = ParameterNet()
net2.init_parameters_data() net2.init_parameters_data()
_set_has_initializer(False)
for key in net1.parameters_dict(): for key in net1.parameters_dict():
if key not in net2.parameters_dict(): if key not in net2.parameters_dict():
assert False assert False
@@ -60,6 +62,7 @@ def test_using_diffserent_seed_for_initializer():
np.random.seed(1) np.random.seed(1)
net2 = ParameterNet() net2 = ParameterNet()
net2.init_parameters_data() net2.init_parameters_data()
_set_has_initializer(False)
for key in net1.parameters_dict(): for key in net1.parameters_dict():
if key not in net2.parameters_dict(): if key not in net2.parameters_dict():
assert False assert False


+ 3
- 3
tests/ut/python/parallel/test_virtual_dataset_3_input.py View File

@@ -62,13 +62,13 @@ def test_virtual_dataset_3_input():
out = self.matmul2(out, b) out = self.matmul2(out, b)
return out return out


context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy0 = ((2, 1), (2, 1), (2, 1)) strategy0 = ((2, 1), (2, 1), (2, 1))
strategy1 = ((2, 2), (2, 2)) strategy1 = ((2, 2), (2, 2))
strategy2 = ((2, 2), (2, 2)) strategy2 = ((2, 2), (2, 2))
strategy3 = ((2, 4),) strategy3 = ((2, 4),)
net = GradWrap(NetWithLoss(Net(strategy0, strategy1, strategy2, strategy3))) net = GradWrap(NetWithLoss(Net(strategy0, strategy1, strategy2, strategy3)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
context.set_auto_parallel_context(device_num=8, global_rank=0)
x = Tensor(np.ones([128, 32]), dtype=ms.float32) x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 2048]), dtype=ms.float32) b = Tensor(np.ones([64, 2048]), dtype=ms.float32)
@@ -89,10 +89,10 @@ def test_virtualdataset_cell_3_inputs():
out = self.matmul2(out, b) out = self.matmul2(out, b)
return out return out


net = GradWrap(VirtualDatasetCellTriple(NetWithLoss(Net(None, None, None))))
context.set_context(save_graphs=True) context.set_context(save_graphs=True)
context.set_auto_parallel_context(parallel_mode="auto_parallel") context.set_auto_parallel_context(parallel_mode="auto_parallel")
context.set_auto_parallel_context(device_num=8, global_rank=0) context.set_auto_parallel_context(device_num=8, global_rank=0)
net = GradWrap(VirtualDatasetCellTriple(NetWithLoss(Net(None, None, None))))
x = Tensor(np.ones([128, 32]), dtype=ms.float32) x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([32, 64]), dtype=ms.float32) y = Tensor(np.ones([32, 64]), dtype=ms.float32)
b = Tensor(np.ones([64, 2048]), dtype=ms.float32) b = Tensor(np.ones([64, 2048]), dtype=ms.float32)


+ 4
- 4
tests/ut/python/train/test_amp.py View File

@@ -146,6 +146,10 @@ def test_compile_model_train_O2():
def test_compile_model_train_O2_parallel(): def test_compile_model_train_O2_parallel():
dataset_types = (np.float32, np.float32) dataset_types = (np.float32, np.float32)
dataset_shapes = ((16, 16), (16, 16)) dataset_shapes = ((16, 16), (16, 16))
context.set_auto_parallel_context(
global_rank=0, device_num=8,
mirror_mean=True, parameter_broadcast=True,
parallel_mode=ParallelMode.DATA_PARALLEL)


dataset = MindDataSet(dataset_types, dataset_shapes) dataset = MindDataSet(dataset_types, dataset_shapes)


@@ -153,10 +157,6 @@ def test_compile_model_train_O2_parallel():
loss = nn.MSELoss() loss = nn.MSELoss()
optimizer = nn.Momentum(net.trainable_params(), 0.1, 0.9, 0.00004, 1024.0) optimizer = nn.Momentum(net.trainable_params(), 0.1, 0.9, 0.00004, 1024.0)


context.set_auto_parallel_context(
global_rank=0, device_num=8,
mirror_mean=True, parameter_broadcast=True,
parallel_mode=ParallelMode.DATA_PARALLEL)
init() init()


model = Model(net, loss_fn=loss, optimizer=optimizer, metrics={"acc"}, amp_level="O2") model = Model(net, loss_fn=loss, optimizer=optimizer, metrics={"acc"}, amp_level="O2")


Loading…
Cancel
Save