|
|
|
@@ -11,9 +11,8 @@ |
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
|
|
# See the License for the specific language governing permissions and |
|
|
|
# limitations under the License. |
|
|
|
|
|
|
|
'''Reduce method ut''' |
|
|
|
import numpy as np |
|
|
|
import pytest |
|
|
|
import mindspore as ms |
|
|
|
import mindspore.nn as nn |
|
|
|
from mindspore import Tensor |
|
|
|
@@ -81,9 +80,14 @@ def compile_net(net, x, y, b): |
|
|
|
|
|
|
|
# model_parallel test |
|
|
|
def test_sum_mul(): |
|
|
|
""" |
|
|
|
Feature: test ReduceSum model parallel strategy |
|
|
|
Description: partition the non-reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2, strategy3): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2) |
|
|
|
self.mul2 = P.Mul().shard(strategy3) |
|
|
|
@@ -108,9 +112,14 @@ def test_sum_mul(): |
|
|
|
|
|
|
|
|
|
|
|
def test_sum_mul2(): |
|
|
|
""" |
|
|
|
Feature: test ReduceSum model parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2, strategy3): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2) |
|
|
|
self.mul2 = P.Mul().shard(strategy3) |
|
|
|
@@ -135,9 +144,14 @@ def test_sum_mul2(): |
|
|
|
|
|
|
|
|
|
|
|
def test_sum_mul3(): |
|
|
|
""" |
|
|
|
Feature: test ReduceSum model parallel strategy |
|
|
|
Description: partition the non-reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2, strategy3): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2) |
|
|
|
self.mul2 = P.Mul().shard(strategy3) |
|
|
|
@@ -162,9 +176,14 @@ def test_sum_mul3(): |
|
|
|
|
|
|
|
|
|
|
|
def test_sum_mul4(): |
|
|
|
""" |
|
|
|
Feature: test ReduceSum model parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is True |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2, strategy3): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2) |
|
|
|
self.mul2 = P.Mul().shard(strategy3) |
|
|
|
@@ -189,9 +208,14 @@ def test_sum_mul4(): |
|
|
|
|
|
|
|
|
|
|
|
def test_sum_mul5(): |
|
|
|
""" |
|
|
|
Feature: test ReduceSum model parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is True |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2) |
|
|
|
|
|
|
|
@@ -212,9 +236,14 @@ def test_sum_mul5(): |
|
|
|
|
|
|
|
|
|
|
|
def test_sum_mul6(): |
|
|
|
""" |
|
|
|
Feature: test ReduceSum model parallel strategy |
|
|
|
Description: partition the non-reduced axes, keep_dims is True |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2) |
|
|
|
|
|
|
|
@@ -235,9 +264,14 @@ def test_sum_mul6(): |
|
|
|
|
|
|
|
|
|
|
|
def test_sum_mul7(): |
|
|
|
""" |
|
|
|
Feature: test ReduceSum model parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is True |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2) |
|
|
|
|
|
|
|
@@ -258,9 +292,14 @@ def test_sum_mul7(): |
|
|
|
|
|
|
|
|
|
|
|
def test_max_mul(): |
|
|
|
""" |
|
|
|
Feature: test ReduceMax model parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2, strategy3): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_max = P.ReduceMax(keep_dims=False).shard(strategy2) |
|
|
|
self.mul2 = P.Mul().shard(strategy3) |
|
|
|
@@ -285,9 +324,14 @@ def test_max_mul(): |
|
|
|
|
|
|
|
|
|
|
|
def test_min_mul(): |
|
|
|
""" |
|
|
|
Feature: test ReduceMin model parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2, strategy3): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_min = P.ReduceMin(keep_dims=False).shard(strategy2) |
|
|
|
self.mul2 = P.Mul().shard(strategy3) |
|
|
|
@@ -312,9 +356,14 @@ def test_min_mul(): |
|
|
|
|
|
|
|
|
|
|
|
def test_reduce_mean_mul_float32(): |
|
|
|
""" |
|
|
|
Feature: test ReduceMean model parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2, strategy3): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy2) |
|
|
|
self.mul2 = P.Mul().shard(strategy3) |
|
|
|
@@ -341,7 +390,7 @@ def test_reduce_mean_mul_float32(): |
|
|
|
|
|
|
|
class ArgMaxWithValueNet(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2, strategy3): |
|
|
|
super().__init__() |
|
|
|
super(ArgMaxWithValueNet, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.arg_max_with_value = P.ArgMaxWithValue(keep_dims=False, axis=-1).shard(strategy2) |
|
|
|
self.mul2 = P.Mul().shard(strategy3) |
|
|
|
@@ -355,7 +404,7 @@ class ArgMaxWithValueNet(nn.Cell): |
|
|
|
|
|
|
|
class ArgMinWithValueNet(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2, strategy3): |
|
|
|
super().__init__() |
|
|
|
super(ArgMinWithValueNet, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.arg_min_with_value = P.ArgMinWithValue(keep_dims=False, axis=-1).shard(strategy2) |
|
|
|
self.mul2 = P.Mul().shard(strategy3) |
|
|
|
@@ -391,6 +440,11 @@ def tobefixed_test_arg_max_with_value_mul_semi_axis_parallel(): |
|
|
|
|
|
|
|
|
|
|
|
def test_arg_max_with_value_mul_semi(): |
|
|
|
""" |
|
|
|
Feature: test ArgMaxWithValue semi parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
context.set_auto_parallel_context(device_num=8, global_rank=0) |
|
|
|
strategy1 = ((1, 4, 2), (1, 4, 2)) |
|
|
|
strategy2 = ((4, 1, 1),) |
|
|
|
@@ -401,6 +455,11 @@ def test_arg_max_with_value_mul_semi(): |
|
|
|
|
|
|
|
|
|
|
|
def test_arg_max_with_value_mul_auto(): |
|
|
|
""" |
|
|
|
Feature: test ArgMaxWithValue auto parallel strategy |
|
|
|
Description: don't set the strategy, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
context.set_auto_parallel_context(device_num=8, global_rank=0) |
|
|
|
strategy1 = None |
|
|
|
strategy2 = None |
|
|
|
@@ -411,6 +470,11 @@ def test_arg_max_with_value_mul_auto(): |
|
|
|
|
|
|
|
|
|
|
|
def test_arg_min_with_value_mul_semi_axis_parallel(): |
|
|
|
""" |
|
|
|
Feature: test ArgMinWithValue semi parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
context.set_auto_parallel_context(device_num=8, global_rank=0) |
|
|
|
strategy1 = ((1, 4, 2), (1, 4, 2)) |
|
|
|
strategy2 = ((4, 1, 2),) |
|
|
|
@@ -421,6 +485,11 @@ def test_arg_min_with_value_mul_semi_axis_parallel(): |
|
|
|
|
|
|
|
|
|
|
|
def test_arg_min_with_value_mul_semi(): |
|
|
|
""" |
|
|
|
Feature: test ArgMinWithValue model parallel strategy |
|
|
|
Description: partition the non-reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
context.set_auto_parallel_context(device_num=8, global_rank=0) |
|
|
|
strategy1 = ((1, 4, 2), (1, 4, 2)) |
|
|
|
strategy2 = ((4, 1, 1),) |
|
|
|
@@ -431,6 +500,11 @@ def test_arg_min_with_value_mul_semi(): |
|
|
|
|
|
|
|
|
|
|
|
def test_arg_min_with_value_mul_auto(): |
|
|
|
""" |
|
|
|
Feature: test ArgMinWithValue auto parallel strategy |
|
|
|
Description: don't set the strategy, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
context.set_auto_parallel_context(device_num=8, global_rank=0) |
|
|
|
strategy1 = None |
|
|
|
strategy2 = None |
|
|
|
@@ -442,7 +516,7 @@ def test_arg_min_with_value_mul_auto(): |
|
|
|
|
|
|
|
class ArgMinWithValueNet2(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2, strategy3): |
|
|
|
super().__init__() |
|
|
|
super(ArgMinWithValueNet2, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.arg_min_with_value = P.ArgMinWithValue(keep_dims=True, axis=-1).shard(strategy2) |
|
|
|
self.relu = P.ReLU().shard(strategy3) |
|
|
|
@@ -465,6 +539,11 @@ def tobefixed_test_arg_min_with_value_mul_semi_axis_parallel2(): |
|
|
|
|
|
|
|
|
|
|
|
def test_arg_min_with_value_mul_semi2(): |
|
|
|
""" |
|
|
|
Feature: test ArgMinWithValue semi parallel strategy |
|
|
|
Description: partition the non-reduced axes, keep_dims is True |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
context.set_auto_parallel_context(device_num=8, global_rank=0) |
|
|
|
strategy1 = ((1, 4, 2), (1, 4, 2)) |
|
|
|
strategy2 = ((4, 1, 1),) |
|
|
|
@@ -475,6 +554,11 @@ def test_arg_min_with_value_mul_semi2(): |
|
|
|
|
|
|
|
|
|
|
|
def test_arg_min_with_value_mul_auto2(): |
|
|
|
""" |
|
|
|
Feature: test ArgMinWithValue auto parallel strategy |
|
|
|
Description: don't set the strategy, keep_dims is True |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
context.set_auto_parallel_context(device_num=8, global_rank=0) |
|
|
|
strategy1 = None |
|
|
|
strategy2 = None |
|
|
|
@@ -485,12 +569,18 @@ def test_arg_min_with_value_mul_auto2(): |
|
|
|
|
|
|
|
|
|
|
|
def test_cross_batch(): |
|
|
|
""" |
|
|
|
Feature: test ReduceMean semi parallel strategy with cross_batch |
|
|
|
Description: partition the reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2, strategy3): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2) |
|
|
|
self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy3).add_prim_attr("cross_batch", True) |
|
|
|
self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy3) \ |
|
|
|
.add_prim_attr("cross_batch", True) |
|
|
|
|
|
|
|
def construct(self, x, y): |
|
|
|
out = self.mul1(x, y) |
|
|
|
@@ -511,12 +601,18 @@ def test_cross_batch(): |
|
|
|
|
|
|
|
|
|
|
|
def test_cross_batch2(): |
|
|
|
""" |
|
|
|
Feature: test ReduceSum semi parallel strategy with cross_batch |
|
|
|
Description: partition the reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2, strategy3): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy2) |
|
|
|
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy3).add_prim_attr("cross_batch", True) |
|
|
|
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy3) \ |
|
|
|
.add_prim_attr("cross_batch", True) |
|
|
|
|
|
|
|
def construct(self, x, y): |
|
|
|
out = self.mul1(x, y) |
|
|
|
@@ -537,9 +633,14 @@ def test_cross_batch2(): |
|
|
|
|
|
|
|
|
|
|
|
def test_cross_batch_auto(): |
|
|
|
""" |
|
|
|
Feature: test ReduceSum auto parallel strategy with cross_batch |
|
|
|
Description: don't set the strategy, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul() |
|
|
|
self.reduce_mean = P.ReduceMean(keep_dims=False) |
|
|
|
self.reduce_sum = P.ReduceSum(keep_dims=False).add_prim_attr("cross_batch", True) |
|
|
|
@@ -560,9 +661,14 @@ def test_cross_batch_auto(): |
|
|
|
|
|
|
|
|
|
|
|
def test_max_empty_tuple(): |
|
|
|
""" |
|
|
|
Feature: test ReduceMax semi parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2, strategy3): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul = P.Mul().shard(strategy1) |
|
|
|
self.reduce_max = P.ReduceMax(keep_dims=False).shard(strategy2) |
|
|
|
self.add = P.Add().shard(strategy3) |
|
|
|
@@ -588,9 +694,14 @@ def test_max_empty_tuple(): |
|
|
|
|
|
|
|
|
|
|
|
def test_any_mul(): |
|
|
|
""" |
|
|
|
Feature: test ReduceAny semi parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_any = P.ReduceAny(keep_dims=False).shard(strategy2) |
|
|
|
self.cast = P.Cast() |
|
|
|
@@ -609,14 +720,18 @@ def test_any_mul(): |
|
|
|
|
|
|
|
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
with pytest.raises(RuntimeError): |
|
|
|
compile_net_no_bias(net, x, y) |
|
|
|
compile_net_no_bias(net, x, y) |
|
|
|
|
|
|
|
|
|
|
|
def test_any_mul2(): |
|
|
|
""" |
|
|
|
Feature: test ReduceAny semi parallel strategy |
|
|
|
Description: partition the non-reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2): |
|
|
|
super().__init__() |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_any = P.ReduceAny(keep_dims=False).shard(strategy2) |
|
|
|
self.cast = P.Cast() |
|
|
|
@@ -636,3 +751,167 @@ def test_any_mul2(): |
|
|
|
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
compile_net_no_bias(net, x, y) |
|
|
|
|
|
|
|
def test_all_mul(): |
|
|
|
""" |
|
|
|
Feature: test ReduceAll semi parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2): |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_all = P.ReduceAll(keep_dims=False).shard(strategy2) |
|
|
|
self.cast = P.Cast() |
|
|
|
|
|
|
|
def construct(self, x, y): |
|
|
|
out = self.mul1(x, y) |
|
|
|
out = self.cast(out, ms.bool_) |
|
|
|
out = self.reduce_all(out, 1) |
|
|
|
return out |
|
|
|
|
|
|
|
context.set_auto_parallel_context(device_num=8, global_rank=0) |
|
|
|
strategy1 = ((1, 8, 1), (1, 8, 1)) |
|
|
|
strategy2 = ((1, 8, 1),) |
|
|
|
net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2))) |
|
|
|
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") |
|
|
|
|
|
|
|
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
compile_net_no_bias(net, x, y) |
|
|
|
|
|
|
|
|
|
|
|
def test_all_mul2(): |
|
|
|
""" |
|
|
|
Feature: test ReduceAll semi parallel strategy |
|
|
|
Description: partition the non-reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2): |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_all = P.ReduceAll(keep_dims=False).shard(strategy2) |
|
|
|
self.cast = P.Cast() |
|
|
|
|
|
|
|
def construct(self, x, y): |
|
|
|
out = self.mul1(x, y) |
|
|
|
out = self.cast(out, ms.bool_) |
|
|
|
out = self.reduce_all(out, -1) |
|
|
|
return out |
|
|
|
|
|
|
|
context.set_auto_parallel_context(device_num=8, global_rank=0) |
|
|
|
strategy1 = ((8, 1, 1), (8, 1, 1)) |
|
|
|
strategy2 = ((8, 1, 1),) |
|
|
|
net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2))) |
|
|
|
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") |
|
|
|
|
|
|
|
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
compile_net_no_bias(net, x, y) |
|
|
|
|
|
|
|
def test_prod_mul(): |
|
|
|
""" |
|
|
|
Feature: test ReduceProd model parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2): |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_prod = P.ReduceProd(keep_dims=False).shard(strategy2) |
|
|
|
|
|
|
|
def construct(self, x, y): |
|
|
|
out = self.mul1(x, y) |
|
|
|
out = self.reduce_prod(out, 0) |
|
|
|
return out |
|
|
|
|
|
|
|
context.set_auto_parallel_context(device_num=8, global_rank=0) |
|
|
|
strategy1 = ((1, 1, 8), (1, 1, 8)) |
|
|
|
strategy2 = ((2, 4, 1),) |
|
|
|
net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2))) |
|
|
|
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") |
|
|
|
|
|
|
|
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
compile_net_no_bias(net, x, y) |
|
|
|
|
|
|
|
def test_prod_mul2(): |
|
|
|
""" |
|
|
|
Feature: test ReduceProd model parallel strategy |
|
|
|
Description: partition the non-reduced axes, keep_dims is False |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2): |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_prod = P.ReduceProd(keep_dims=False).shard(strategy2) |
|
|
|
|
|
|
|
def construct(self, x, y): |
|
|
|
out = self.mul1(x, y) |
|
|
|
out = self.reduce_prod(out, -1) |
|
|
|
return out |
|
|
|
|
|
|
|
context.set_auto_parallel_context(device_num=8, global_rank=0) |
|
|
|
strategy1 = ((1, 8, 1), (1, 8, 1)) |
|
|
|
strategy2 = ((2, 4, 1),) |
|
|
|
net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2))) |
|
|
|
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") |
|
|
|
|
|
|
|
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
compile_net_no_bias(net, x, y) |
|
|
|
|
|
|
|
def test_prod_mul3(): |
|
|
|
""" |
|
|
|
Feature: test ReduceProd model parallel strategy |
|
|
|
Description: partition the reduced axes, keep_dims is True |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, stra_mul, stra_prod): |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul = P.Mul().shard(stra_mul) |
|
|
|
self.reduce_prod = P.ReduceProd(keep_dims=True).shard(stra_prod) |
|
|
|
|
|
|
|
def construct(self, x, y): |
|
|
|
out = self.mul(x, y) |
|
|
|
out = self.reduce_prod(out, 0) |
|
|
|
return out |
|
|
|
|
|
|
|
context.set_auto_parallel_context(device_num=8, global_rank=0) |
|
|
|
strategy1 = ((1, 1, 8), (1, 1, 8)) |
|
|
|
strategy2 = ((8, 1, 1),) |
|
|
|
net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2))) |
|
|
|
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") |
|
|
|
|
|
|
|
x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) |
|
|
|
compile_net_no_bias(net, x, y) |
|
|
|
|
|
|
|
def test_prod_mul_auto(): |
|
|
|
""" |
|
|
|
Feature: test ReduceProd auto parallel strategy |
|
|
|
Description: don't set the strategy, keep_dims is True |
|
|
|
Expectation: compile success |
|
|
|
""" |
|
|
|
class Net(nn.Cell): |
|
|
|
def __init__(self, strategy1, strategy2): |
|
|
|
super(Net, self).__init__() |
|
|
|
self.mul1 = P.Mul().shard(strategy1) |
|
|
|
self.reduce_prod = P.ReduceProd(keep_dims=True).shard(strategy2) |
|
|
|
|
|
|
|
def construct(self, x, y): |
|
|
|
out = self.mul1(x, y) |
|
|
|
out = self.reduce_prod(out, 0) |
|
|
|
return out |
|
|
|
|
|
|
|
context.set_auto_parallel_context(device_num=8, global_rank=0) |
|
|
|
strategy1 = None |
|
|
|
strategy2 = None |
|
|
|
net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2))) |
|
|
|
context.set_auto_parallel_context(parallel_mode="auto_parallel") |
|
|
|
gen_inputs_and_compile_net_no_bias(net) |