Browse Source

add distributed parallel operators for reduceall and reduceprod

feature/build-system-rewrite
Bert0108 4 years ago
parent
commit
dfc92f1791
7 changed files with 406 additions and 47 deletions
  1. +1
    -0
      mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.h
  2. +2
    -0
      mindspore/ccsrc/frontend/parallel/dynamic_creator.h
  3. +6
    -1
      mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
  4. +68
    -19
      mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.cc
  5. +24
    -1
      mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.h
  6. +1
    -1
      mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc
  7. +304
    -25
      tests/ut/python/parallel/test_reduce_method_info.py

+ 1
- 0
mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.h View File

@@ -752,6 +752,7 @@ class ReduceSumCost : public OperatorCost {
bool cross_batch_ = false;
};
using ReduceMethodCost = ReduceSumCost;
using ReduceProdCost = ReduceSumCost;

class ReduceMeanCost : public ReduceSumCost {
public:


+ 2
- 0
mindspore/ccsrc/frontend/parallel/dynamic_creator.h View File

@@ -215,6 +215,8 @@ REGISTER(IOUInfo);
REGISTER(RandomChoiceWithMaskInfo);
REGISTER(CropAndResizeInfo);
REGISTER(ROIAlignInfo);
REGISTER(ReduceProdInfo);
REGISTER(ReduceAllInfo);
} // namespace parallel
} // namespace mindspore



+ 6
- 1
mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h View File

@@ -102,7 +102,9 @@ constexpr char REDUCE_OP_SUM[] = "sum";
constexpr char STRATEGY_GEN_MODE[] = "strategy_gen_mode";
constexpr char REDUCE_OP_MAX[] = "max";
constexpr char REDUCE_OP_MIN[] = "min";
constexpr char REDUCE_OP_ANY[] = "any";
constexpr char REDUCE_OP_ANY[] = "sum";
constexpr char REDUCE_OP_ALL[] = "prod";
constexpr char REDUCE_OP_PROD[] = "prod";
constexpr char OP_PATH[] = "mindspore.ops.operations";
constexpr char INNER_OP_PATH[] = "mindspore.ops.operations._inner_ops";
constexpr char FUNCTIONAL_OP_PATH[] = "mindspore.ops.functional";
@@ -328,6 +330,9 @@ constexpr char REDUCE_MAX[] = "ReduceMax";
constexpr char REDUCE_MIN[] = "ReduceMin";
constexpr char REDUCE_SUM[] = "ReduceSum";
constexpr char REDUCE_MEAN[] = "ReduceMean";
constexpr char REDUCE_PROD[] = "ReduceProd";
constexpr char REDUCE_ALL[] = "ReduceAll";
constexpr char REDUCE_ANY[] = "ReduceAny";
constexpr char ARGMAXWITHVALUE[] = "ArgMaxWithValue";
constexpr char ARGMINWITHVALUE[] = "ArgMinWithValue";
constexpr char CONV2D[] = "Conv2D";


+ 68
- 19
mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.cc View File

@@ -280,6 +280,74 @@ Status ReduceMeanInfo::InferForwardCommunication() {
return SUCCESS;
}

ForwardOp ReduceAnyInfo::CreateForwardOp(const std::vector<Group> &forward_group) {
// Create Cast to Int32 op
Operator op0 = CreateCastOp(kInt32);

// Create AllReduce op
Operator op1 = CreateAllReduceOp(reduce_method_, forward_group[0].name());
std::string group_name = forward_group[0].name();
MS_LOG(INFO) << "The group of forward all reduce is " << group_name << ", method is " << reduce_method_;

// Create Cast to Bool op
Operator op2 = CreateCastOp(kBool);

ForwardOp forward_op = {op0, op1, op2};

return forward_op;
}

Status ReduceAnyInfo::InferForwardCommunication() {
Dimensions stra = strategy_->GetInputDim().at(0);
if (cross_batch_ && IsDataParallelStrategy(stra, stage_id_)) {
MS_LOG(INFO) << name_ << ": cross_batch is True, don't need to InferForwardCommunication";
return SUCCESS;
}
forward_op_.clear();
std::vector<int64_t> dim_list = reduce_dim();
size_t size = stra.size();
// judge if the reduce dim is partitioned.
Shape group_creat_map;

// if repeated calculation and the repeated_calc_num_ insert to the first dimension of dev matrix,
// it need to handle the first dimension of map.
if ((dev_matrix_shape_.size() > size) && !repeated_num_in_dev_matrix_right_) {
group_creat_map.push_back(SizeToInt(dev_matrix_shape_.size() - size_t(1)));
}

for (size_t index = 0; index < size; ++index) {
auto pos =
std::find_if(dim_list.begin(), dim_list.end(), [index](const int64_t &dim) { return SizeToLong(index) == dim; });
if (pos != dim_list.end() && stra[index] != 1) {
continue;
}
group_creat_map.push_back(SizeToLong(size) - SizeToLong(index) - 1);
}

// if repeated calculation and the repeated_calc_num_ insert to the last dimension of dev matrix,
// it need to handle the group_creat_map and insert the 0 to the last dimension of the group_creat_map.
if (repeated_num_in_dev_matrix_right_ && (repeated_calc_num_ > 1)) {
for (auto &ele : group_creat_map) {
if (ele == MAP_NONE) {
continue;
}
ele += 1;
}
group_creat_map.push_back(0);
}

std::vector<Group> forward_group;
if (CreateGroupByTensorMap(group_creat_map, &forward_group) != SUCCESS) {
ReportError(name_ + ": Create group failed.");
return FAILED;
}
if (!forward_group.empty()) {
forward_op_ = CreateForwardOp(forward_group);
}

return SUCCESS;
}

Status ReduceMethod::InferMirrorOps() {
mirror_ops_.clear();
Shape input_tensor_map = inputs_tensor_map_.at(0);
@@ -520,24 +588,5 @@ std::vector<StrategyPtr> ArgMaxWithValueInfo::GenerateOpStrategies(int64_t stage

return sp_vector;
}

Status ReduceAnyInfo::CheckStrategy(const StrategyPtr &strategy) {
if (ReduceMethod::CheckStrategy(strategy) != SUCCESS) {
MS_LOG(ERROR) << name_ << ": checking strategy failed.";
return FAILED;
}
auto dim_list = ReduceMethod::reduce_dim();
Dimensions stra = strategy->GetInputDim().at(0);
for (size_t index = 0; index < stra.size(); ++index) {
auto pos =
std::find_if(dim_list.begin(), dim_list.end(), [index](const int64_t &dim) { return SizeToLong(index) == dim; });
if (pos != dim_list.end() && stra[index] != 1) {
MS_LOG(ERROR) << name_
<< ": checking strategy failed. ReduceAny operator does not support reduced dimension split.";
return FAILED;
}
}
return SUCCESS;
}
} // namespace parallel
} // namespace mindspore

+ 24
- 1
mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.h View File

@@ -131,7 +131,8 @@ class ReduceAnyInfo : public ReduceMethod {
~ReduceAnyInfo() override = default;

protected:
Status CheckStrategy(const StrategyPtr &strategy) override;
Status InferForwardCommunication() override;
ForwardOp CreateForwardOp(const std::vector<Group> &forward_group);
};

class ReduceMinInfo : public ReduceMethod {
@@ -144,6 +145,28 @@ class ReduceMinInfo : public ReduceMethod {

~ReduceMinInfo() override = default;
};

class ReduceProdInfo : public ReduceMethod {
public:
ReduceProdInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape,
const PrimitiveAttrs &attrs)
: ReduceMethod(name, inputs_shape, outputs_shape, attrs, std::make_shared<ReduceProdCost>()) {
reduce_method_ = REDUCE_OP_PROD;
}

~ReduceProdInfo() override = default;
};

class ReduceAllInfo : public ReduceAnyInfo {
public:
ReduceAllInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape,
const PrimitiveAttrs &attrs)
: ReduceAnyInfo(name, inputs_shape, outputs_shape, attrs) {
reduce_method_ = REDUCE_OP_ALL;
}

~ReduceAllInfo() override = default;
};
} // namespace parallel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_FRONTEND_PARALLEL_OPS_INFO_REDUCE_SUM_INFO_H_

+ 1
- 1
mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc View File

@@ -174,7 +174,7 @@ bool IsSplittableOperator(const std::string &op_name) {
UNSORTED_SEGMENT_MIN, REPEAT_ELEMENTS, TENSOR_DOT, RANGE, UNIFORM_CANDIDATE_SAMPLER, SLICE, SELECT, GATHERD,
UNSORTED_SEGMENT_MAX, GATHER_ND, TOPK, SCATTER_UPDATE, VIRTUAL_OUTPUT, CONV2D_BACK_PROP_INPUT, CONV2D_TRANSPOSE,
MATMUL_DDS, DSD_MATMUL, UNIFORMREAL, RESIZE_BILINEAR, RESIZE_NEAREST_NEIGHBOR, CUMSUM, FAST_GELU, IOU,
BOUNDING_BOX_ENCODE, RANDOM_CHOICE_WITH_MASK, CROP_AND_RESIZE, ROI_ALIGN};
BOUNDING_BOX_ENCODE, RANDOM_CHOICE_WITH_MASK, CROP_AND_RESIZE, ROI_ALIGN, REDUCE_PROD, REDUCE_ANY, REDUCE_ALL};
// clang-format on

auto iter = splittable_op.find(op_name);


+ 304
- 25
tests/ut/python/parallel/test_reduce_method_info.py View File

@@ -11,9 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''Reduce method ut'''
import numpy as np
import pytest
import mindspore as ms
import mindspore.nn as nn
from mindspore import Tensor
@@ -81,9 +80,14 @@ def compile_net(net, x, y, b):

# model_parallel test
def test_sum_mul():
"""
Feature: test ReduceSum model parallel strategy
Description: partition the non-reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2)
self.mul2 = P.Mul().shard(strategy3)
@@ -108,9 +112,14 @@ def test_sum_mul():


def test_sum_mul2():
"""
Feature: test ReduceSum model parallel strategy
Description: partition the reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2)
self.mul2 = P.Mul().shard(strategy3)
@@ -135,9 +144,14 @@ def test_sum_mul2():


def test_sum_mul3():
"""
Feature: test ReduceSum model parallel strategy
Description: partition the non-reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2)
self.mul2 = P.Mul().shard(strategy3)
@@ -162,9 +176,14 @@ def test_sum_mul3():


def test_sum_mul4():
"""
Feature: test ReduceSum model parallel strategy
Description: partition the reduced axes, keep_dims is True
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2)
self.mul2 = P.Mul().shard(strategy3)
@@ -189,9 +208,14 @@ def test_sum_mul4():


def test_sum_mul5():
"""
Feature: test ReduceSum model parallel strategy
Description: partition the reduced axes, keep_dims is True
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2)

@@ -212,9 +236,14 @@ def test_sum_mul5():


def test_sum_mul6():
"""
Feature: test ReduceSum model parallel strategy
Description: partition the non-reduced axes, keep_dims is True
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2)

@@ -235,9 +264,14 @@ def test_sum_mul6():


def test_sum_mul7():
"""
Feature: test ReduceSum model parallel strategy
Description: partition the reduced axes, keep_dims is True
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2)

@@ -258,9 +292,14 @@ def test_sum_mul7():


def test_max_mul():
"""
Feature: test ReduceMax model parallel strategy
Description: partition the reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_max = P.ReduceMax(keep_dims=False).shard(strategy2)
self.mul2 = P.Mul().shard(strategy3)
@@ -285,9 +324,14 @@ def test_max_mul():


def test_min_mul():
"""
Feature: test ReduceMin model parallel strategy
Description: partition the reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_min = P.ReduceMin(keep_dims=False).shard(strategy2)
self.mul2 = P.Mul().shard(strategy3)
@@ -312,9 +356,14 @@ def test_min_mul():


def test_reduce_mean_mul_float32():
"""
Feature: test ReduceMean model parallel strategy
Description: partition the reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy2)
self.mul2 = P.Mul().shard(strategy3)
@@ -341,7 +390,7 @@ def test_reduce_mean_mul_float32():

class ArgMaxWithValueNet(nn.Cell):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
super(ArgMaxWithValueNet, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.arg_max_with_value = P.ArgMaxWithValue(keep_dims=False, axis=-1).shard(strategy2)
self.mul2 = P.Mul().shard(strategy3)
@@ -355,7 +404,7 @@ class ArgMaxWithValueNet(nn.Cell):

class ArgMinWithValueNet(nn.Cell):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
super(ArgMinWithValueNet, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.arg_min_with_value = P.ArgMinWithValue(keep_dims=False, axis=-1).shard(strategy2)
self.mul2 = P.Mul().shard(strategy3)
@@ -391,6 +440,11 @@ def tobefixed_test_arg_max_with_value_mul_semi_axis_parallel():


def test_arg_max_with_value_mul_semi():
"""
Feature: test ArgMaxWithValue semi parallel strategy
Description: partition the reduced axes, keep_dims is False
Expectation: compile success
"""
context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy1 = ((1, 4, 2), (1, 4, 2))
strategy2 = ((4, 1, 1),)
@@ -401,6 +455,11 @@ def test_arg_max_with_value_mul_semi():


def test_arg_max_with_value_mul_auto():
"""
Feature: test ArgMaxWithValue auto parallel strategy
Description: don't set the strategy, keep_dims is False
Expectation: compile success
"""
context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy1 = None
strategy2 = None
@@ -411,6 +470,11 @@ def test_arg_max_with_value_mul_auto():


def test_arg_min_with_value_mul_semi_axis_parallel():
"""
Feature: test ArgMinWithValue semi parallel strategy
Description: partition the reduced axes, keep_dims is False
Expectation: compile success
"""
context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy1 = ((1, 4, 2), (1, 4, 2))
strategy2 = ((4, 1, 2),)
@@ -421,6 +485,11 @@ def test_arg_min_with_value_mul_semi_axis_parallel():


def test_arg_min_with_value_mul_semi():
"""
Feature: test ArgMinWithValue model parallel strategy
Description: partition the non-reduced axes, keep_dims is False
Expectation: compile success
"""
context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy1 = ((1, 4, 2), (1, 4, 2))
strategy2 = ((4, 1, 1),)
@@ -431,6 +500,11 @@ def test_arg_min_with_value_mul_semi():


def test_arg_min_with_value_mul_auto():
"""
Feature: test ArgMinWithValue auto parallel strategy
Description: don't set the strategy, keep_dims is False
Expectation: compile success
"""
context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy1 = None
strategy2 = None
@@ -442,7 +516,7 @@ def test_arg_min_with_value_mul_auto():

class ArgMinWithValueNet2(nn.Cell):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
super(ArgMinWithValueNet2, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.arg_min_with_value = P.ArgMinWithValue(keep_dims=True, axis=-1).shard(strategy2)
self.relu = P.ReLU().shard(strategy3)
@@ -465,6 +539,11 @@ def tobefixed_test_arg_min_with_value_mul_semi_axis_parallel2():


def test_arg_min_with_value_mul_semi2():
"""
Feature: test ArgMinWithValue semi parallel strategy
Description: partition the non-reduced axes, keep_dims is True
Expectation: compile success
"""
context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy1 = ((1, 4, 2), (1, 4, 2))
strategy2 = ((4, 1, 1),)
@@ -475,6 +554,11 @@ def test_arg_min_with_value_mul_semi2():


def test_arg_min_with_value_mul_auto2():
"""
Feature: test ArgMinWithValue auto parallel strategy
Description: don't set the strategy, keep_dims is True
Expectation: compile success
"""
context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy1 = None
strategy2 = None
@@ -485,12 +569,18 @@ def test_arg_min_with_value_mul_auto2():


def test_cross_batch():
"""
Feature: test ReduceMean semi parallel strategy with cross_batch
Description: partition the reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2)
self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy3).add_prim_attr("cross_batch", True)
self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy3) \
.add_prim_attr("cross_batch", True)

def construct(self, x, y):
out = self.mul1(x, y)
@@ -511,12 +601,18 @@ def test_cross_batch():


def test_cross_batch2():
"""
Feature: test ReduceSum semi parallel strategy with cross_batch
Description: partition the reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy2)
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy3).add_prim_attr("cross_batch", True)
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy3) \
.add_prim_attr("cross_batch", True)

def construct(self, x, y):
out = self.mul1(x, y)
@@ -537,9 +633,14 @@ def test_cross_batch2():


def test_cross_batch_auto():
"""
Feature: test ReduceSum auto parallel strategy with cross_batch
Description: don't set the strategy, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul()
self.reduce_mean = P.ReduceMean(keep_dims=False)
self.reduce_sum = P.ReduceSum(keep_dims=False).add_prim_attr("cross_batch", True)
@@ -560,9 +661,14 @@ def test_cross_batch_auto():


def test_max_empty_tuple():
"""
Feature: test ReduceMax semi parallel strategy
Description: partition the reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2, strategy3):
super().__init__()
super(Net, self).__init__()
self.mul = P.Mul().shard(strategy1)
self.reduce_max = P.ReduceMax(keep_dims=False).shard(strategy2)
self.add = P.Add().shard(strategy3)
@@ -588,9 +694,14 @@ def test_max_empty_tuple():


def test_any_mul():
"""
Feature: test ReduceAny semi parallel strategy
Description: partition the reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_any = P.ReduceAny(keep_dims=False).shard(strategy2)
self.cast = P.Cast()
@@ -609,14 +720,18 @@ def test_any_mul():

x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
with pytest.raises(RuntimeError):
compile_net_no_bias(net, x, y)
compile_net_no_bias(net, x, y)


def test_any_mul2():
"""
Feature: test ReduceAny semi parallel strategy
Description: partition the non-reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2):
super().__init__()
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_any = P.ReduceAny(keep_dims=False).shard(strategy2)
self.cast = P.Cast()
@@ -636,3 +751,167 @@ def test_any_mul2():
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
compile_net_no_bias(net, x, y)

def test_all_mul():
"""
Feature: test ReduceAll semi parallel strategy
Description: partition the reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2):
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_all = P.ReduceAll(keep_dims=False).shard(strategy2)
self.cast = P.Cast()

def construct(self, x, y):
out = self.mul1(x, y)
out = self.cast(out, ms.bool_)
out = self.reduce_all(out, 1)
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy1 = ((1, 8, 1), (1, 8, 1))
strategy2 = ((1, 8, 1),)
net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
compile_net_no_bias(net, x, y)


def test_all_mul2():
"""
Feature: test ReduceAll semi parallel strategy
Description: partition the non-reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2):
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_all = P.ReduceAll(keep_dims=False).shard(strategy2)
self.cast = P.Cast()

def construct(self, x, y):
out = self.mul1(x, y)
out = self.cast(out, ms.bool_)
out = self.reduce_all(out, -1)
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy1 = ((8, 1, 1), (8, 1, 1))
strategy2 = ((8, 1, 1),)
net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
compile_net_no_bias(net, x, y)

def test_prod_mul():
"""
Feature: test ReduceProd model parallel strategy
Description: partition the reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2):
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_prod = P.ReduceProd(keep_dims=False).shard(strategy2)

def construct(self, x, y):
out = self.mul1(x, y)
out = self.reduce_prod(out, 0)
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy1 = ((1, 1, 8), (1, 1, 8))
strategy2 = ((2, 4, 1),)
net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
compile_net_no_bias(net, x, y)

def test_prod_mul2():
"""
Feature: test ReduceProd model parallel strategy
Description: partition the non-reduced axes, keep_dims is False
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2):
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_prod = P.ReduceProd(keep_dims=False).shard(strategy2)

def construct(self, x, y):
out = self.mul1(x, y)
out = self.reduce_prod(out, -1)
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy1 = ((1, 8, 1), (1, 8, 1))
strategy2 = ((2, 4, 1),)
net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
compile_net_no_bias(net, x, y)

def test_prod_mul3():
"""
Feature: test ReduceProd model parallel strategy
Description: partition the reduced axes, keep_dims is True
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, stra_mul, stra_prod):
super(Net, self).__init__()
self.mul = P.Mul().shard(stra_mul)
self.reduce_prod = P.ReduceProd(keep_dims=True).shard(stra_prod)

def construct(self, x, y):
out = self.mul(x, y)
out = self.reduce_prod(out, 0)
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy1 = ((1, 1, 8), (1, 1, 8))
strategy2 = ((8, 1, 1),)
net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")

x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
compile_net_no_bias(net, x, y)

def test_prod_mul_auto():
"""
Feature: test ReduceProd auto parallel strategy
Description: don't set the strategy, keep_dims is True
Expectation: compile success
"""
class Net(nn.Cell):
def __init__(self, strategy1, strategy2):
super(Net, self).__init__()
self.mul1 = P.Mul().shard(strategy1)
self.reduce_prod = P.ReduceProd(keep_dims=True).shard(strategy2)

def construct(self, x, y):
out = self.mul1(x, y)
out = self.reduce_prod(out, 0)
return out

context.set_auto_parallel_context(device_num=8, global_rank=0)
strategy1 = None
strategy2 = None
net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2)))
context.set_auto_parallel_context(parallel_mode="auto_parallel")
gen_inputs_and_compile_net_no_bias(net)

Loading…
Cancel
Save