| @@ -493,15 +493,35 @@ def test_assign_sub(): | |||||
| 1.1, dtype=np.float32)), | 1.1, dtype=np.float32)), | ||||
| name="assignsub_weight") | name="assignsub_weight") | ||||
| def construct(self, x, y, z): | |||||
| def construct(self, x): | |||||
| out = self.mul(x, self.mul_weight) | out = self.mul(x, self.mul_weight) | ||||
| out = self.assign_sub(self.assignsub_weight, out) | out = self.assign_sub(self.assignsub_weight, out) | ||||
| return out | return out | ||||
| class SubNetWithLoss(nn.Cell): | |||||
| def __init__(self, network): | |||||
| super(SubNetWithLoss, self).__init__() | |||||
| self.loss = VirtualLoss() | |||||
| self.network = network | |||||
| def construct(self, x): | |||||
| predict = self.network(x,) | |||||
| return self.loss(predict) | |||||
| class SubGradWrap(nn.Cell): | |||||
| def __init__(self, network): | |||||
| super(SubGradWrap, self).__init__() | |||||
| self.network = network | |||||
| def construct(self, x): | |||||
| return C.grad_all(self.network)(x) | |||||
| def compile_sub_net(net, x): | |||||
| net.set_auto_parallel() | |||||
| _executor.compile(net, x) | |||||
| context.set_auto_parallel_context(device_num=64, global_rank=15) | context.set_auto_parallel_context(device_num=64, global_rank=15) | ||||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | ||||
| net = GradWrap(NetWithLoss(Net())) | |||||
| net = SubGradWrap(SubNetWithLoss(Net())) | |||||
| x = Tensor(np.ones([128, 32]), dtype=ms.float32) | x = Tensor(np.ones([128, 32]), dtype=ms.float32) | ||||
| y = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||||
| z = Tensor(np.ones([128, 32]), dtype=ms.float32) | |||||
| compile_net(net, x, y, z) | |||||
| compile_sub_net(net, x) | |||||
| @@ -20,7 +20,6 @@ from mindspore import Tensor, Parameter | |||||
| from mindspore import context | from mindspore import context | ||||
| from mindspore.common import dtype as mstype | from mindspore.common import dtype as mstype | ||||
| from mindspore.common.api import _executor | from mindspore.common.api import _executor | ||||
| from mindspore.ops import composite as C | |||||
| from mindspore.ops import operations as P | from mindspore.ops import operations as P | ||||
| from mindspore.parallel import set_algo_parameters | from mindspore.parallel import set_algo_parameters | ||||
| from mindspore.parallel._utils import _reset_op_id as reset_op_id | from mindspore.parallel._utils import _reset_op_id as reset_op_id | ||||
| @@ -33,8 +32,8 @@ class NetWithLoss(nn.Cell): | |||||
| self.loss = VirtualLoss() | self.loss = VirtualLoss() | ||||
| self.network = network | self.network = network | ||||
| def construct(self, x, y, z, w): | |||||
| predict = self.network(x, y, z, w) | |||||
| def construct(self, x, y): | |||||
| predict = self.network(x, y) | |||||
| return self.loss(predict) | return self.loss(predict) | ||||
| @@ -49,9 +48,9 @@ def test_common_parameter(): | |||||
| self.cast1 = P.Cast() | self.cast1 = P.Cast() | ||||
| self.cast2 = P.Cast() | self.cast2 = P.Cast() | ||||
| def construct(self, x, y, z, w): | |||||
| def construct(self, x, y): | |||||
| m1_result = self.matmul1(x, self.cast1(self.weight1, mstype.float32)) | m1_result = self.matmul1(x, self.cast1(self.weight1, mstype.float32)) | ||||
| m2_result = self.matmul2(z, self.cast2(self.weight1, mstype.float32)) | |||||
| m2_result = self.matmul2(y, self.cast2(self.weight1, mstype.float32)) | |||||
| m3_result = self.matmul3(m2_result, m1_result) | m3_result = self.matmul3(m2_result, m1_result) | ||||
| return m3_result | return m3_result | ||||
| @@ -62,15 +61,13 @@ def test_common_parameter(): | |||||
| set_algo_parameters(elementwise_op_strategy_follow=True) | set_algo_parameters(elementwise_op_strategy_follow=True) | ||||
| x = Tensor(np.ones([64, 64]), dtype=ms.float32) | x = Tensor(np.ones([64, 64]), dtype=ms.float32) | ||||
| y = Tensor(np.ones([64, 64]), dtype=ms.float32) | y = Tensor(np.ones([64, 64]), dtype=ms.float32) | ||||
| z = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||||
| w = Tensor(np.ones([64, 64]), dtype=ms.float32) | |||||
| net = NetWithLoss(Net()) | net = NetWithLoss(Net()) | ||||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | context.set_auto_parallel_context(parallel_mode="auto_parallel") | ||||
| net.set_auto_parallel() | net.set_auto_parallel() | ||||
| reset_op_id() | reset_op_id() | ||||
| _executor.compile(net, x, y, z, w, phase='train') | |||||
| _executor.compile(net, x, y, phase='train') | |||||
| strategies = _executor._get_strategy(net) | strategies = _executor._get_strategy(net) | ||||
| expected_strategies = {'Default/network-Net/MatMul-op1': [[8, 1], [1, 1]], | expected_strategies = {'Default/network-Net/MatMul-op1': [[8, 1], [1, 1]], | ||||
| 'Default/network-Net/MatMul-op3': [[8, 1], [1, 1]], | 'Default/network-Net/MatMul-op3': [[8, 1], [1, 1]], | ||||
| @@ -135,7 +135,11 @@ def test_dataset_interface_sens_shape_not_equal_loss(): | |||||
| sens = Tensor(np.ones([256, 1024]), dtype=ms.float32) | sens = Tensor(np.ones([256, 1024]), dtype=ms.float32) | ||||
| try: | try: | ||||
| loss_scale_manager_sens(strategy1, sens) | loss_scale_manager_sens(strategy1, sens) | ||||
| except BaseException: | |||||
| except ValueError: | |||||
| pass | |||||
| except TypeError: | |||||
| pass | |||||
| except RuntimeError: | |||||
| pass | pass | ||||
| @@ -153,7 +157,7 @@ def test_input_not_in_parameter_layotu_dict(): | |||||
| self.matmul_weight = Parameter(Tensor(np.ones([128, 256]), dtype=ms.float32), name="weight") | self.matmul_weight = Parameter(Tensor(np.ones([128, 256]), dtype=ms.float32), name="weight") | ||||
| self.transpose1 = P.Transpose().set_strategy(strategy1) | self.transpose1 = P.Transpose().set_strategy(strategy1) | ||||
| def construct(self, x, b): | |||||
| def construct(self, x): | |||||
| x = self.matmul(x, self.matmul_weight) | x = self.matmul(x, self.matmul_weight) | ||||
| x = self.transpose1(x, (1, 0)) | x = self.transpose1(x, (1, 0)) | ||||
| return x | return x | ||||
| @@ -163,7 +167,6 @@ def test_input_not_in_parameter_layotu_dict(): | |||||
| context.reset_auto_parallel_context() | context.reset_auto_parallel_context() | ||||
| context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=device_num) | context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=device_num) | ||||
| predict = Tensor(np.ones([32 * device_num, 128]), dtype=ms.float32) | predict = Tensor(np.ones([32 * device_num, 128]), dtype=ms.float32) | ||||
| b = Tensor(np.ones([32 * device_num, 128]), dtype=ms.float32) | |||||
| net = Net(strategy1) | net = Net(strategy1) | ||||
| net.set_train() | net.set_train() | ||||
| net(predict, b) | |||||
| net(predict) | |||||
| @@ -28,13 +28,13 @@ class GradWrap(nn.Cell): | |||||
| super(GradWrap, self).__init__() | super(GradWrap, self).__init__() | ||||
| self.network = network | self.network = network | ||||
| def construct(self, x, y, bias): | |||||
| return C.grad_all(self.network)(x, y, bias) | |||||
| def construct(self, x, y): | |||||
| return C.grad_all(self.network)(x, y) | |||||
| def compile_net(net, x, y, bias): | |||||
| def compile_net(net, x, y): | |||||
| net.set_auto_parallel() | net.set_auto_parallel() | ||||
| _executor.compile(net, x, y, bias) | |||||
| _executor.compile(net, x, y) | |||||
| def test_sum_as_loss_float16(): | def test_sum_as_loss_float16(): | ||||
| @@ -44,7 +44,7 @@ def test_sum_as_loss_float16(): | |||||
| self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) | self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) | ||||
| self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | ||||
| def construct(self, x, y, bias): | |||||
| def construct(self, x, y): | |||||
| out = self.fc_nobias(x, y) | out = self.fc_nobias(x, y) | ||||
| out = self.reduce_sum(out, (0, 1)) | out = self.reduce_sum(out, (0, 1)) | ||||
| return out | return out | ||||
| @@ -57,8 +57,7 @@ def test_sum_as_loss_float16(): | |||||
| x = Tensor(np.ones([64, 32]), dtype=ms.float16) | x = Tensor(np.ones([64, 32]), dtype=ms.float16) | ||||
| y = Tensor(np.ones([64, 32]), dtype=ms.float16) | y = Tensor(np.ones([64, 32]), dtype=ms.float16) | ||||
| bias = Tensor(np.ones([64]), dtype=ms.float16) | |||||
| compile_net(net, x, y, bias) | |||||
| compile_net(net, x, y) | |||||
| def test_sum_as_loss_float32(): | def test_sum_as_loss_float32(): | ||||
| @@ -68,7 +67,7 @@ def test_sum_as_loss_float32(): | |||||
| self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) | self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) | ||||
| self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | ||||
| def construct(self, x, y, bias): | |||||
| def construct(self, x, y): | |||||
| out = self.fc_nobias(x, y) | out = self.fc_nobias(x, y) | ||||
| out = self.reduce_sum(out, (0, 1)) | out = self.reduce_sum(out, (0, 1)) | ||||
| return out | return out | ||||
| @@ -81,8 +80,7 @@ def test_sum_as_loss_float32(): | |||||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | x = Tensor(np.ones([64, 32]), dtype=ms.float32) | ||||
| y = Tensor(np.ones([64, 32]), dtype=ms.float32) | y = Tensor(np.ones([64, 32]), dtype=ms.float32) | ||||
| bias = Tensor(np.ones([64]), dtype=ms.float32) | |||||
| compile_net(net, x, y, bias) | |||||
| compile_net(net, x, y) | |||||
| def test_sum_as_loss_int32(): | def test_sum_as_loss_int32(): | ||||
| @@ -92,7 +90,7 @@ def test_sum_as_loss_int32(): | |||||
| self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) | self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) | ||||
| self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | ||||
| def construct(self, x, y, bias): | |||||
| def construct(self, x, y): | |||||
| out = self.fc_nobias(x, y) | out = self.fc_nobias(x, y) | ||||
| out = self.reduce_sum(out, (0, 1)) | out = self.reduce_sum(out, (0, 1)) | ||||
| return out | return out | ||||
| @@ -105,5 +103,4 @@ def test_sum_as_loss_int32(): | |||||
| x = Tensor(np.ones([64, 32]), dtype=ms.int32) | x = Tensor(np.ones([64, 32]), dtype=ms.int32) | ||||
| y = Tensor(np.ones([64, 32]), dtype=ms.int32) | y = Tensor(np.ones([64, 32]), dtype=ms.int32) | ||||
| bias = Tensor(np.ones([64]), dtype=ms.int32) | |||||
| compile_net(net, x, y, bias) | |||||
| compile_net(net, x, y) | |||||
| @@ -104,7 +104,11 @@ def test_onehot_batch_parallel_invalid_strategy(): | |||||
| strategy4 = ((16, 1), (16, 1)) | strategy4 = ((16, 1), (16, 1)) | ||||
| try: | try: | ||||
| compile_graph(strategy1, strategy2, strategy3, strategy4) | compile_graph(strategy1, strategy2, strategy3, strategy4) | ||||
| except BaseException: | |||||
| except ValueError: | |||||
| pass | |||||
| except TypeError: | |||||
| pass | |||||
| except RuntimeError: | |||||
| pass | pass | ||||
| @@ -144,7 +148,11 @@ def test_onehot_batch_parallel_invalid_strategy_axis0(): | |||||
| strategy4 = ((16, 1), (16, 1)) | strategy4 = ((16, 1), (16, 1)) | ||||
| try: | try: | ||||
| compile_graph(strategy1, strategy2, strategy3, strategy4, onthot_axis=0) | compile_graph(strategy1, strategy2, strategy3, strategy4, onthot_axis=0) | ||||
| except BaseException: | |||||
| except ValueError: | |||||
| pass | |||||
| except TypeError: | |||||
| pass | |||||
| except RuntimeError: | |||||
| pass | pass | ||||
| @@ -24,6 +24,17 @@ from mindspore.ops import operations as P | |||||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | from tests.ut.python.ops.test_math_ops import VirtualLoss | ||||
| class NetWithLossNoBias(nn.Cell): | |||||
| def __init__(self, network): | |||||
| super(NetWithLossNoBias, self).__init__() | |||||
| self.loss = VirtualLoss() | |||||
| self.network = network | |||||
| def construct(self, x, y): | |||||
| predict = self.network(x, y) | |||||
| return self.loss(predict) | |||||
| class NetWithLoss(nn.Cell): | class NetWithLoss(nn.Cell): | ||||
| def __init__(self, network): | def __init__(self, network): | ||||
| super(NetWithLoss, self).__init__() | super(NetWithLoss, self).__init__() | ||||
| @@ -35,6 +46,15 @@ class NetWithLoss(nn.Cell): | |||||
| return self.loss(predict) | return self.loss(predict) | ||||
| class GradWrapNoBias(nn.Cell): | |||||
| def __init__(self, network): | |||||
| super(GradWrapNoBias, self).__init__() | |||||
| self.network = network | |||||
| def construct(self, x, y): | |||||
| return C.grad_all(self.network)(x, y) | |||||
| class GradWrap(nn.Cell): | class GradWrap(nn.Cell): | ||||
| def __init__(self, network): | def __init__(self, network): | ||||
| super(GradWrap, self).__init__() | super(GradWrap, self).__init__() | ||||
| @@ -44,6 +64,11 @@ class GradWrap(nn.Cell): | |||||
| return C.grad_all(self.network)(x, y, b) | return C.grad_all(self.network)(x, y, b) | ||||
| def compile_net_no_bias(net, x, y): | |||||
| net.set_auto_parallel() | |||||
| _executor.compile(net, x, y) | |||||
| def compile_net(net, x, y, b): | def compile_net(net, x, y, b): | ||||
| net.set_auto_parallel() | net.set_auto_parallel() | ||||
| _executor.compile(net, x, y, b) | _executor.compile(net, x, y, b) | ||||
| @@ -165,7 +190,7 @@ def test_sum_mul5(): | |||||
| self.mul1 = P.Mul().set_strategy(strategy1) | self.mul1 = P.Mul().set_strategy(strategy1) | ||||
| self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2) | self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2) | ||||
| def construct(self, x, y, b): | |||||
| def construct(self, x, y): | |||||
| out = self.mul1(x, y) | out = self.mul1(x, y) | ||||
| out = self.reduce_sum(out, 0) | out = self.reduce_sum(out, 0) | ||||
| return out | return out | ||||
| @@ -173,13 +198,12 @@ def test_sum_mul5(): | |||||
| context.set_auto_parallel_context(device_num=64, global_rank=0) | context.set_auto_parallel_context(device_num=64, global_rank=0) | ||||
| strategy1 = ((1, 8, 8), (1, 8, 8)) | strategy1 = ((1, 8, 8), (1, 8, 8)) | ||||
| strategy2 = ((2, 4, 1),) | strategy2 = ((2, 4, 1),) | ||||
| net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) | |||||
| net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2))) | |||||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | ||||
| x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | ||||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | ||||
| b = Tensor(np.ones([1, 32, 64]), dtype=ms.float32) | |||||
| compile_net(net, x, y, b) | |||||
| compile_net_no_bias(net, x, y) | |||||
| def test_sum_mul6(): | def test_sum_mul6(): | ||||
| @@ -189,7 +213,7 @@ def test_sum_mul6(): | |||||
| self.mul1 = P.Mul().set_strategy(strategy1) | self.mul1 = P.Mul().set_strategy(strategy1) | ||||
| self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2) | self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2) | ||||
| def construct(self, x, y, b): | |||||
| def construct(self, x, y): | |||||
| out = self.mul1(x, y) | out = self.mul1(x, y) | ||||
| out = self.reduce_sum(out, 1) | out = self.reduce_sum(out, 1) | ||||
| return out | return out | ||||
| @@ -197,13 +221,12 @@ def test_sum_mul6(): | |||||
| context.set_auto_parallel_context(device_num=64, global_rank=0) | context.set_auto_parallel_context(device_num=64, global_rank=0) | ||||
| strategy1 = ((1, 8, 8), (1, 8, 8)) | strategy1 = ((1, 8, 8), (1, 8, 8)) | ||||
| strategy2 = ((2, 1, 4),) | strategy2 = ((2, 1, 4),) | ||||
| net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) | |||||
| net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2))) | |||||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | ||||
| x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | ||||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | ||||
| b = Tensor(np.ones([128, 1, 64]), dtype=ms.float32) | |||||
| compile_net(net, x, y, b) | |||||
| compile_net_no_bias(net, x, y) | |||||
| def test_sum_mul7(): | def test_sum_mul7(): | ||||
| @@ -213,7 +236,7 @@ def test_sum_mul7(): | |||||
| self.mul1 = P.Mul().set_strategy(strategy1) | self.mul1 = P.Mul().set_strategy(strategy1) | ||||
| self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2) | self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2) | ||||
| def construct(self, x, y, b): | |||||
| def construct(self, x, y): | |||||
| out = self.mul1(x, y) | out = self.mul1(x, y) | ||||
| out = self.reduce_sum(out, (0, 1)) | out = self.reduce_sum(out, (0, 1)) | ||||
| return out | return out | ||||
| @@ -221,13 +244,12 @@ def test_sum_mul7(): | |||||
| context.set_auto_parallel_context(device_num=64, global_rank=0) | context.set_auto_parallel_context(device_num=64, global_rank=0) | ||||
| strategy1 = ((1, 8, 8), (1, 8, 8)) | strategy1 = ((1, 8, 8), (1, 8, 8)) | ||||
| strategy2 = ((2, 4, 1),) | strategy2 = ((2, 4, 1),) | ||||
| net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) | |||||
| net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2))) | |||||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | ||||
| x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | ||||
| y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32) | ||||
| b = Tensor(np.ones([1, 64]), dtype=ms.float32) | |||||
| compile_net(net, x, y, b) | |||||
| compile_net_no_bias(net, x, y) | |||||
| def test_max_mul(): | def test_max_mul(): | ||||
| @@ -347,6 +369,12 @@ def gen_inputs_and_compile_net(net): | |||||
| compile_net(net, x, y, b) | compile_net(net, x, y, b) | ||||
| def gen_inputs_and_compile_net_no_bias(net): | |||||
| x = Tensor(np.ones([128, 64, 64]), dtype=ms.float32) | |||||
| y = Tensor(np.ones([128, 64, 64]), dtype=ms.float32) | |||||
| compile_net_no_bias(net, x, y) | |||||
| def tobefixed_test_arg_max_with_value_mul_semi_axis_parallel(): | def tobefixed_test_arg_max_with_value_mul_semi_axis_parallel(): | ||||
| context.set_auto_parallel_context(device_num=8, global_rank=0) | context.set_auto_parallel_context(device_num=8, global_rank=0) | ||||
| strategy1 = ((1, 4, 2), (1, 4, 2)) | strategy1 = ((1, 4, 2), (1, 4, 2)) | ||||
| @@ -414,7 +442,7 @@ class ArgMinWithValueNet2(nn.Cell): | |||||
| self.arg_min_with_value = P.ArgMinWithValue(keep_dims=True, axis=-1).set_strategy(strategy2) | self.arg_min_with_value = P.ArgMinWithValue(keep_dims=True, axis=-1).set_strategy(strategy2) | ||||
| self.relu = P.ReLU().set_strategy(strategy3) | self.relu = P.ReLU().set_strategy(strategy3) | ||||
| def construct(self, x, y, b): | |||||
| def construct(self, x, y): | |||||
| out = self.mul1(x, y) | out = self.mul1(x, y) | ||||
| _, out = self.arg_min_with_value(out) | _, out = self.arg_min_with_value(out) | ||||
| out = self.relu(out) | out = self.relu(out) | ||||
| @@ -426,9 +454,9 @@ def tobefixed_test_arg_min_with_value_mul_semi_axis_parallel2(): | |||||
| strategy1 = ((1, 4, 2), (1, 4, 2)) | strategy1 = ((1, 4, 2), (1, 4, 2)) | ||||
| strategy2 = ((4, 1, 2),) | strategy2 = ((4, 1, 2),) | ||||
| strategy3 = ((2, 4, 1),) | strategy3 = ((2, 4, 1),) | ||||
| net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3))) | |||||
| net = GradWrapNoBias(NetWithLossNoBias(ArgMinWithValueNet2(strategy1, strategy2, strategy3))) | |||||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | ||||
| gen_inputs_and_compile_net(net) | |||||
| gen_inputs_and_compile_net_no_bias(net) | |||||
| def test_arg_min_with_value_mul_semi2(): | def test_arg_min_with_value_mul_semi2(): | ||||
| @@ -436,9 +464,9 @@ def test_arg_min_with_value_mul_semi2(): | |||||
| strategy1 = ((1, 4, 2), (1, 4, 2)) | strategy1 = ((1, 4, 2), (1, 4, 2)) | ||||
| strategy2 = ((4, 1, 1),) | strategy2 = ((4, 1, 1),) | ||||
| strategy3 = ((2, 4, 1),) | strategy3 = ((2, 4, 1),) | ||||
| net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3))) | |||||
| net = GradWrapNoBias(NetWithLossNoBias(ArgMinWithValueNet2(strategy1, strategy2, strategy3))) | |||||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | ||||
| gen_inputs_and_compile_net(net) | |||||
| gen_inputs_and_compile_net_no_bias(net) | |||||
| def test_arg_min_with_value_mul_auto2(): | def test_arg_min_with_value_mul_auto2(): | ||||
| @@ -446,9 +474,9 @@ def test_arg_min_with_value_mul_auto2(): | |||||
| strategy1 = None | strategy1 = None | ||||
| strategy2 = None | strategy2 = None | ||||
| strategy3 = None | strategy3 = None | ||||
| net = GradWrap(NetWithLoss(ArgMinWithValueNet2(strategy1, strategy2, strategy3))) | |||||
| net = GradWrapNoBias(NetWithLossNoBias(ArgMinWithValueNet2(strategy1, strategy2, strategy3))) | |||||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | context.set_auto_parallel_context(parallel_mode="auto_parallel") | ||||
| gen_inputs_and_compile_net(net) | |||||
| gen_inputs_and_compile_net_no_bias(net) | |||||
| def test_cross_batch(): | def test_cross_batch(): | ||||
| @@ -459,7 +487,7 @@ def test_cross_batch(): | |||||
| self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy2) | self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy2) | ||||
| self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy3).add_prim_attr("cross_batch", True) | self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy3).add_prim_attr("cross_batch", True) | ||||
| def construct(self, x, y, b): | |||||
| def construct(self, x, y): | |||||
| out = self.mul1(x, y) | out = self.mul1(x, y) | ||||
| out = self.reduce_sum(out, -1) | out = self.reduce_sum(out, -1) | ||||
| out = self.reduce_mean(out, 0) | out = self.reduce_mean(out, 0) | ||||
| @@ -469,13 +497,12 @@ def test_cross_batch(): | |||||
| strategy1 = ((4, 2), (4, 2)) | strategy1 = ((4, 2), (4, 2)) | ||||
| strategy2 = ((2, 1),) | strategy2 = ((2, 1),) | ||||
| strategy3 = ((8,),) | strategy3 = ((8,),) | ||||
| net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3))) | |||||
| net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2, strategy3))) | |||||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | ||||
| x = Tensor(np.ones([32, 64]), dtype=ms.float32) | x = Tensor(np.ones([32, 64]), dtype=ms.float32) | ||||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | y = Tensor(np.ones([32, 64]), dtype=ms.float32) | ||||
| b = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||||
| compile_net(net, x, y, b) | |||||
| compile_net_no_bias(net, x, y) | |||||
| def test_cross_batch2(): | def test_cross_batch2(): | ||||
| @@ -486,7 +513,7 @@ def test_cross_batch2(): | |||||
| self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy2) | self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy2) | ||||
| self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy3).add_prim_attr("cross_batch", True) | self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy3).add_prim_attr("cross_batch", True) | ||||
| def construct(self, x, y, b): | |||||
| def construct(self, x, y): | |||||
| out = self.mul1(x, y) | out = self.mul1(x, y) | ||||
| out = self.reduce_mean(out, -1) | out = self.reduce_mean(out, -1) | ||||
| out = self.reduce_sum(out, 0) | out = self.reduce_sum(out, 0) | ||||
| @@ -496,13 +523,12 @@ def test_cross_batch2(): | |||||
| strategy1 = ((4, 2), (4, 2)) | strategy1 = ((4, 2), (4, 2)) | ||||
| strategy2 = ((2, 1),) | strategy2 = ((2, 1),) | ||||
| strategy3 = ((8,),) | strategy3 = ((8,),) | ||||
| net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3))) | |||||
| net = GradWrapNoBias(NetWithLossNoBias(Net(strategy1, strategy2, strategy3))) | |||||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | ||||
| x = Tensor(np.ones([32, 64]), dtype=ms.float32) | x = Tensor(np.ones([32, 64]), dtype=ms.float32) | ||||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | y = Tensor(np.ones([32, 64]), dtype=ms.float32) | ||||
| b = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||||
| compile_net(net, x, y, b) | |||||
| compile_net_no_bias(net, x, y) | |||||
| def test_cross_batch_auto(): | def test_cross_batch_auto(): | ||||
| @@ -513,20 +539,19 @@ def test_cross_batch_auto(): | |||||
| self.reduce_mean = P.ReduceMean(keep_dims=False) | self.reduce_mean = P.ReduceMean(keep_dims=False) | ||||
| self.reduce_sum = P.ReduceSum(keep_dims=False).add_prim_attr("cross_batch", True) | self.reduce_sum = P.ReduceSum(keep_dims=False).add_prim_attr("cross_batch", True) | ||||
| def construct(self, x, y, b): | |||||
| def construct(self, x, y): | |||||
| out = self.mul1(x, y) | out = self.mul1(x, y) | ||||
| out = self.reduce_mean(out, -1) | out = self.reduce_mean(out, -1) | ||||
| out = self.reduce_sum(out, 0) | out = self.reduce_sum(out, 0) | ||||
| return out | return out | ||||
| context.set_auto_parallel_context(device_num=8, global_rank=0) | context.set_auto_parallel_context(device_num=8, global_rank=0) | ||||
| net = GradWrap(NetWithLoss(Net())) | |||||
| net = GradWrapNoBias(NetWithLossNoBias(Net())) | |||||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | context.set_auto_parallel_context(parallel_mode="auto_parallel") | ||||
| x = Tensor(np.ones([32, 64]), dtype=ms.float32) | x = Tensor(np.ones([32, 64]), dtype=ms.float32) | ||||
| y = Tensor(np.ones([32, 64]), dtype=ms.float32) | y = Tensor(np.ones([32, 64]), dtype=ms.float32) | ||||
| b = Tensor(np.ones([32, 64]), dtype=ms.float32) | |||||
| compile_net(net, x, y, b) | |||||
| compile_net_no_bias(net, x, y) | |||||
| def test_max_empty_tuple(): | def test_max_empty_tuple(): | ||||
| @@ -114,7 +114,11 @@ def test_reshape1_strategy_1(): | |||||
| strategy_loss = ((8, 1), (8, 1)) | strategy_loss = ((8, 1), (8, 1)) | ||||
| try: | try: | ||||
| reshape_common(ParallelMode.SEMI_AUTO_PARALLEL, strategy0, strategy1, strategy2, strategy_loss) | reshape_common(ParallelMode.SEMI_AUTO_PARALLEL, strategy0, strategy1, strategy2, strategy_loss) | ||||
| except BaseException: | |||||
| except ValueError: | |||||
| pass | |||||
| except TypeError: | |||||
| pass | |||||
| except RuntimeError: | |||||
| pass | pass | ||||
| @@ -125,7 +129,11 @@ def test_reshape1_strategy_2(): | |||||
| strategy_loss = ((8, 1), (8, 1)) | strategy_loss = ((8, 1), (8, 1)) | ||||
| try: | try: | ||||
| reshape_common(ParallelMode.AUTO_PARALLEL, strategy0, strategy1, strategy2, strategy_loss) | reshape_common(ParallelMode.AUTO_PARALLEL, strategy0, strategy1, strategy2, strategy_loss) | ||||
| except BaseException: | |||||
| except ValueError: | |||||
| pass | |||||
| except TypeError: | |||||
| pass | |||||
| except RuntimeError: | |||||
| pass | pass | ||||
| @@ -347,14 +355,22 @@ def test_reshape_net3_2(): | |||||
| def test_reshape_net4_1(): | def test_reshape_net4_1(): | ||||
| try: | try: | ||||
| reshape_net2(ReshapeNet4(((1, 8), (8, 1)))) | reshape_net2(ReshapeNet4(((1, 8), (8, 1)))) | ||||
| except BaseException: | |||||
| except ValueError: | |||||
| pass | |||||
| except TypeError: | |||||
| pass | |||||
| except RuntimeError: | |||||
| pass | pass | ||||
| def test_reshape_net4_2(): | def test_reshape_net4_2(): | ||||
| try: | try: | ||||
| reshape_net2(ReshapeNet4(((1, 8), (8, 2)))) | reshape_net2(ReshapeNet4(((1, 8), (8, 2)))) | ||||
| except BaseException: | |||||
| except ValueError: | |||||
| pass | |||||
| except TypeError: | |||||
| pass | |||||
| except RuntimeError: | |||||
| pass | pass | ||||
| @@ -29,8 +29,8 @@ class GradWrap(nn.Cell): | |||||
| super(GradWrap, self).__init__() | super(GradWrap, self).__init__() | ||||
| self.network = network | self.network = network | ||||
| def construct(self, x, y, bias): | |||||
| return C.grad_all(self.network)(x, y, bias) | |||||
| def construct(self, x, y): | |||||
| return C.grad_all(self.network)(x, y) | |||||
| def test_sum_as_loss(): | def test_sum_as_loss(): | ||||
| @@ -41,7 +41,7 @@ def test_sum_as_loss(): | |||||
| self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | ||||
| self.mul = P.Mul().set_strategy(strategy=((), ())) | self.mul = P.Mul().set_strategy(strategy=((), ())) | ||||
| def construct(self, x, y, bias): | |||||
| def construct(self, x, y): | |||||
| out = self.fc_nobias(x, y) | out = self.fc_nobias(x, y) | ||||
| out = self.reduce_sum(out, (0, 1)) | out = self.reduce_sum(out, (0, 1)) | ||||
| out = self.mul(out, F.scalar_to_array(2.0)) | out = self.mul(out, F.scalar_to_array(2.0)) | ||||
| @@ -57,5 +57,4 @@ def test_sum_as_loss(): | |||||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | x = Tensor(np.ones([64, 32]), dtype=ms.float32) | ||||
| y = Tensor(np.ones([64, 32]), dtype=ms.float32) | y = Tensor(np.ones([64, 32]), dtype=ms.float32) | ||||
| bias = Tensor(np.ones([64]), dtype=ms.float32) | |||||
| _executor.compile(net, x, y, bias) | |||||
| _executor.compile(net, x, y) | |||||
| @@ -52,11 +52,21 @@ class GradWrap3(nn.Cell): | |||||
| def construct(self, x, y, bias): | def construct(self, x, y, bias): | ||||
| return C.grad_all(self.network)(x, y, bias) | return C.grad_all(self.network)(x, y, bias) | ||||
| class GradWrap4(nn.Cell): | |||||
| def __init__(self, network): | |||||
| super(GradWrap4, self).__init__() | |||||
| self.network = network | |||||
| def construct(self, x, y): | |||||
| return C.grad_all(self.network)(x, y) | |||||
| def compile_net(net, x, y, b): | def compile_net(net, x, y, b): | ||||
| net.set_auto_parallel() | net.set_auto_parallel() | ||||
| _executor.compile(net, x, y, b) | _executor.compile(net, x, y, b) | ||||
| def compile_net_no_bias(net, x, y): | |||||
| net.set_auto_parallel() | |||||
| _executor.compile(net, x, y) | |||||
| def test_no_grad(): | def test_no_grad(): | ||||
| class Net(nn.Cell): | class Net(nn.Cell): | ||||
| @@ -144,7 +154,7 @@ def test_grad_sens_scalar_broadcast(): | |||||
| self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) | self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) | ||||
| self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | ||||
| def construct(self, x, y, bias): | |||||
| def construct(self, x, y): | |||||
| out = self.fc_nobias(x, y) | out = self.fc_nobias(x, y) | ||||
| out = self.reduce_sum(out, (0, 1)) | out = self.reduce_sum(out, (0, 1)) | ||||
| return out | return out | ||||
| @@ -152,10 +162,9 @@ def test_grad_sens_scalar_broadcast(): | |||||
| context.set_auto_parallel_context(device_num=16, global_rank=0) | context.set_auto_parallel_context(device_num=16, global_rank=0) | ||||
| strategy0 = ((4, 1), (4, 1)) | strategy0 = ((4, 1), (4, 1)) | ||||
| strategy1 = ((4, 1),) | strategy1 = ((4, 1),) | ||||
| net = GradWrap3(Net(strategy0, strategy1)) | |||||
| net = GradWrap4(Net(strategy0, strategy1)) | |||||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | ||||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | x = Tensor(np.ones([64, 32]), dtype=ms.float32) | ||||
| y = Tensor(np.ones([64, 32]), dtype=ms.float32) | y = Tensor(np.ones([64, 32]), dtype=ms.float32) | ||||
| bias = Tensor(np.ones([64]), dtype=ms.float32) | |||||
| compile_net(net, x, y, bias) | |||||
| compile_net_no_bias(net, x, y) | |||||
| @@ -28,13 +28,13 @@ class GradWrap(nn.Cell): | |||||
| super(GradWrap, self).__init__() | super(GradWrap, self).__init__() | ||||
| self.network = network | self.network = network | ||||
| def construct(self, x, y, bias): | |||||
| return C.grad_all(self.network)(x, y, bias) | |||||
| def construct(self, x, y): | |||||
| return C.grad_all(self.network)(x, y) | |||||
| def compile_net(net, x, y, bias): | |||||
| def compile_net(net, x, y): | |||||
| net.set_auto_parallel() | net.set_auto_parallel() | ||||
| _executor.compile(net, x, y, bias) | |||||
| _executor.compile(net, x, y) | |||||
| def test_sum_as_loss(): | def test_sum_as_loss(): | ||||
| @@ -44,7 +44,7 @@ def test_sum_as_loss(): | |||||
| self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) | self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) | ||||
| self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | ||||
| def construct(self, x, y, bias): | |||||
| def construct(self, x, y): | |||||
| out = self.fc_nobias(x, y) | out = self.fc_nobias(x, y) | ||||
| out = self.reduce_sum(out, (0, 1)) | out = self.reduce_sum(out, (0, 1)) | ||||
| return out | return out | ||||
| @@ -57,8 +57,7 @@ def test_sum_as_loss(): | |||||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | x = Tensor(np.ones([64, 32]), dtype=ms.float32) | ||||
| y = Tensor(np.ones([64, 32]), dtype=ms.float32) | y = Tensor(np.ones([64, 32]), dtype=ms.float32) | ||||
| bias = Tensor(np.ones([64]), dtype=ms.float32) | |||||
| compile_net(net, x, y, bias) | |||||
| compile_net(net, x, y) | |||||
| def test_sum_as_loss2(): | def test_sum_as_loss2(): | ||||
| @@ -68,7 +67,7 @@ def test_sum_as_loss2(): | |||||
| self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) | self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) | ||||
| self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) | ||||
| def construct(self, x, y, bias): | |||||
| def construct(self, x, y): | |||||
| out = self.fc_nobias(x, y) | out = self.fc_nobias(x, y) | ||||
| out = self.reduce_sum(out, (0, 1)) | out = self.reduce_sum(out, (0, 1)) | ||||
| return out | return out | ||||
| @@ -81,5 +80,4 @@ def test_sum_as_loss2(): | |||||
| x = Tensor(np.ones([64, 32]), dtype=ms.float32) | x = Tensor(np.ones([64, 32]), dtype=ms.float32) | ||||
| y = Tensor(np.ones([64, 32]), dtype=ms.float32) | y = Tensor(np.ones([64, 32]), dtype=ms.float32) | ||||
| bias = Tensor(np.ones([64]), dtype=ms.float32) | |||||
| compile_net(net, x, y, bias) | |||||
| compile_net(net, x, y) | |||||