!5011 remove global grad ops

Merge pull request !5011 from riemann_penn/remove_global_grad_ops
5 years ago · abe6b82138
--- a/mindspore/ops/composite/init.py
+++ b/mindspore/ops/composite/init.py
@@ -21,7 +21,6 @@ Pre-defined combination of operators.
 from .base import GradOperation, HyperMap, Map, MultitypeFuncGraph, add_flags, \
                  grad, grad_all, grad_all_with_sens, grad_by_list, grad_by_list_with_sens, grad_with_sens, \
                  core, env_get, tail, zip_operation
 from .clip_ops import clip_by_value
 from .multitype_ops.add_impl import hyper_add
@@ -31,12 +30,6 @@ from .random_ops import set_seed, normal, uniform, gamma, poisson, multinomial
 __all__ = [
    'grad',
    'grad_by_list_with_sens',
    'grad_all',
    'grad_by_list',
    'grad_all_with_sens',
    'grad_with_sens',
    'env_get',
    'core',
    'add_flags',
--- a/mindspore/ops/composite/base.py
+++ b/mindspore/ops/composite/base.py
@@ -163,14 +163,6 @@ class GradOperation(GradOperation_):
        return self.grad_fn
 grad = GradOperation('grad')
 grad_all = GradOperation('get_all', get_all=True)
 grad_by_list = GradOperation('get_by_list', get_by_list=True)
 grad_with_sens = GradOperation('grad_with_sens', sens_param=True)
 grad_all_with_sens = GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 grad_by_list_with_sens = GradOperation('grad_by_list_with_sens', get_by_list=True, sens_param=True)
 class MultitypeFuncGraph(MultitypeFuncGraph_):
    """
    Generate multiply graph.
--- a/mindspore/ops/operations/debug_ops.py
+++ b/mindspore/ops/operations/debug_ops.py
@@ -268,6 +268,7 @@ class HookBackward(PrimitiveWithInfer):
        >>> def hook_fn(grad_out):
        >>>     print(grad_out)
        >>>
        >>> grad_all = GradOperation('get_all', get_all=True)
        >>> hook = P.HookBackward(hook_fn)
        >>>
        >>> def hook_test(x, y):
@@ -277,7 +278,7 @@ class HookBackward(PrimitiveWithInfer):
        >>>     return z
        >>>
        >>> def backward(x, y):
        >>>     return C.grad_all(hook_test)(x, y)
        >>>     return grad_all(hook_test)(x, y)
        >>>
        >>> backward(1, 2)
    """
--- a/tests/ops_common.py
+++ b/tests/ops_common.py
@@ -23,6 +23,9 @@ from mindspore import Tensor
 from mindspore.common.api import _executor
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 class InputBackward(nn.Cell):
    """ InputBackward definition """
@@ -30,7 +33,7 @@ class InputBackward(nn.Cell):
        super(InputBackward, self).__init__()
        self.network = network
        self.network.set_train()
        self.grad = C.grad_all_with_sens
        self.grad = grad_all_with_sens
        self.c1 = c1
        self.c2 = c2
--- a/tests/perf_test/test_lenet.py
+++ b/tests/perf_test/test_lenet.py
@@ -26,6 +26,9 @@ from mindspore.common.api import _executor
 context.set_context(mode=context.GRAPH_MODE)
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 batch_size = 1
 channel = 1
 height = 32
@@ -38,7 +41,7 @@ class LeNetGrad(nn.Cell):
    def __init__(self, network):
        super(LeNetGrad, self).__init__()
        self.grad_op = C.grad_all_with_sens
        self.grad_op = grad_all_with_sens
        self.network = network
    def construct(self, x, sens):
--- a/tests/st/control/test_cont_grad.py
+++ b/tests/st/control/test_cont_grad.py
@@ -28,6 +28,10 @@ from mindspore.ops import operations as P
 # context.set_context(save_graphs=True)
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 grad_all = C.GradOperation('get_all', get_all=True)
 def test_while_forward():
    class MyWhileNet(nn.Cell):
        def __init__(self):
@@ -70,7 +74,7 @@ def test_while_grad():
            self.net = net
        def construct(self, *inputs):
            return C.grad_all(self.net)(*inputs)
            return grad_all(self.net)(*inputs)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    while_net = MyWhileNet()
@@ -157,7 +161,7 @@ def test_while_with_param_grad():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, a, b, c):
            return C.grad_by_list(self.net, self.weights)(a, b, c)
            return grad_by_list(self.net, self.weights)(a, b, c)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    while_net = MyWhileNet()
@@ -222,7 +226,7 @@ def test_while_opt_endless():
            self.net = net
        def construct(self, *inputs):
            return C.grad_all(self.net)(*inputs)
            return grad_all(self.net)(*inputs)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    while_net = MyWhileNet()
@@ -285,7 +289,7 @@ def test_while_with_param_grad_with_const_branch():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, a, b, c):
            return C.grad_by_list(self.net, self.weights)(a, b, c)
            return grad_by_list(self.net, self.weights)(a, b, c)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    while_net = MyWhileNet()
@@ -325,7 +329,7 @@ def test_for_while_with_param_grad_with_const_branch():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, a, b, c):
            return C.grad_by_list(self.net, self.weights)(a, b, c)
            return grad_by_list(self.net, self.weights)(a, b, c)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    while_net = MyWhileNet()
@@ -362,7 +366,7 @@ def test_for_while_with_param_grad_basic():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, a, b, c):
            return C.grad_by_list(self.net, self.weights)(a, b, c)
            return grad_by_list(self.net, self.weights)(a, b, c)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    while_net = MyWhileNet()
@@ -399,7 +403,7 @@ def test_for_while_with_param_grad_normal():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, a, b, c):
            return C.grad_by_list(self.net, self.weights)(a, b, c)
            return grad_by_list(self.net, self.weights)(a, b, c)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    while_net = MyWhileNet()
@@ -433,7 +437,7 @@ def test_while_with_param_basic_grad():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, a, b, c):
            return C.grad_by_list(self.net, self.weights)(a, b, c)
            return grad_by_list(self.net, self.weights)(a, b, c)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    while_net = MyWhileNet()
@@ -467,7 +471,7 @@ def test_while_with_param_basic_grad_mul():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, a, b, c):
            return C.grad_by_list(self.net, self.weights)(a, b, c)
            return grad_by_list(self.net, self.weights)(a, b, c)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    while_net = MyWhileNet()
@@ -502,7 +506,7 @@ def test_while_with_param_basic_grad_two():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, a, b, c):
            return C.grad_by_list(self.net, self.weights)(a, b, c)
            return grad_by_list(self.net, self.weights)(a, b, c)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    while_net = MyWhileNet()
@@ -538,7 +542,7 @@ def test_while_with_param_basic_grad_three():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, a, b, c):
            return C.grad_by_list(self.net, self.weights)(a, b, c)
            return grad_by_list(self.net, self.weights)(a, b, c)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    while_net = MyWhileNet()
@@ -575,7 +579,7 @@ def test_while_if_with_param_grad():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, a, b, c):
            return C.grad_by_list(self.net, self.weights)(a, b, c)
            return grad_by_list(self.net, self.weights)(a, b, c)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    while_net = MyWhileNet()
@@ -608,7 +612,7 @@ def test_while_with_param_grad_not_enter_while():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, a, b, c):
            return C.grad_by_list(self.net, self.weights)(a, b, c)
            return grad_by_list(self.net, self.weights)(a, b, c)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    while_net = MyWhileNet()
@@ -670,7 +674,7 @@ def test_with_param_if_by_if_grad_inputs():
            self.net = net
        def construct(self, *inputs):
            return C.grad_all(self.net)(*inputs)
            return grad_all(self.net)(*inputs)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    if_net = MyIfByIfNet()
@@ -704,7 +708,7 @@ def test_with_param_if_by_if_grad_parameter():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, *inputs):
            return C.grad_by_list(self.net, self.weights)(*inputs)
            return grad_by_list(self.net, self.weights)(*inputs)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    if_net = MyIfByIfNet()
@@ -736,7 +740,7 @@ def test_with_param_if_by_if_grad_param_excute_null():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, *inputs):
            return C.grad_by_list(self.net, self.weights)(*inputs)
            return grad_by_list(self.net, self.weights)(*inputs)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    if_net = MyIfByIfNet()
@@ -770,7 +774,7 @@ def test_if_by_if_return_inside_grad():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, *inputs):
            return C.grad_by_list(self.net, self.weights)(*inputs)
            return grad_by_list(self.net, self.weights)(*inputs)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    if_net = MyIfByIfNet()
--- a/tests/st/gnn/test_gnn_aggregator.py
+++ b/tests/st/gnn/test_gnn_aggregator.py
@@ -25,12 +25,15 @@ from mindspore.common.api import _executor
 context.set_context(mode=context.GRAPH_MODE)
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 class MeanAggregatorGrad(nn.Cell):
    """Backward of MeanAggregator"""
    def __init__(self, network):
        super(MeanAggregatorGrad, self).__init__()
        self.grad_op = C.grad_all_with_sens
        self.grad_op = grad_all_with_sens
        self.network = network
    def construct(self, x, sens):
--- a/tests/st/networks/test_cell_bprop.py
+++ b/tests/st/networks/test_cell_bprop.py
@@ -28,6 +28,10 @@ from mindspore.ops import operations as P
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 grad_all = C.GradOperation('get_all', get_all=True)
 class MulAdd(nn.Cell):
    def construct(self, x, y):
        return 2 * x + y
@@ -43,7 +47,7 @@ def test_grad_mul_add():
    mul_add = MulAdd()
    x = Tensor(1, dtype=ms.int32)
    y = Tensor(2, dtype=ms.int32)
    assert C.grad_all(mul_add)(x, y) == (2, 4)
    assert grad_all(mul_add)(x, y) == (2, 4)
 class InlineMulADD(nn.Cell):
@@ -62,7 +66,7 @@ def test_grad_inline_mul_add():
    inline_mul_add = InlineMulADD()
    x = Tensor(1, dtype=ms.int32)
    y = Tensor(2, dtype=ms.int32)
    assert C.grad_all(inline_mul_add)(x, y) == (3, 6)
    assert grad_all(inline_mul_add)(x, y) == (3, 6)
 class WithParameter(nn.Cell):
@@ -84,7 +88,7 @@ class WithParameter(nn.Cell):
 def test_with_param():
    with_param = WithParameter()
    with pytest.raises(RuntimeError):
        C.grad_all(with_param)(1, 2)
        grad_all(with_param)(1, 2)
 class WithNoBprop(nn.Cell):
@@ -98,7 +102,7 @@ def test_with_no_bprop():
    with_no_bprop = WithNoBprop()
    x = Tensor(1, dtype=ms.int32)
    y = Tensor(2, dtype=ms.int32)
    assert C.grad_all(with_no_bprop)(x, y) == (2, 1)
    assert grad_all(with_no_bprop)(x, y) == (2, 1)
@pytest.mark.level0
@pytest.mark.platform_x86_ascend_training
@@ -118,10 +122,10 @@ def test_grad_in_bprop_1():
            self.f = GradInBprop_1()
        def construct(self, x, y):
            return self.f(x, y), C.grad_all(self.f)(x, y)
            return self.f(x, y), grad_all(self.f)(x, y)
        def bprop(self, x, y, out, dout):
            grads = C.grad_all(self.f)(x, y)
            grads = grad_all(self.f)(x, y)
            return out[1][0], grads[1]
    class GradInBprop_3(nn.Cell):
@@ -133,8 +137,8 @@ def test_grad_in_bprop_1():
            return self.f(x, y)
    grad_in_bprop = GradInBprop_3()
    grads = C.grad_all(grad_in_bprop)(Tensor(np.ones([2, 2]).astype(np.float32)),
                                      Tensor(np.ones([2, 2]).astype(np.float32)))
    grads = grad_all(grad_in_bprop)(Tensor(np.ones([2, 2]).astype(np.float32)),
                                    Tensor(np.ones([2, 2]).astype(np.float32)))
    assert (grads[0].asnumpy() == np.ones([2, 2]).astype(np.float32)).all()
    assert (grads[1].asnumpy() == np.zeros([2, 2]).astype(np.float32)).all()
@@ -159,10 +163,10 @@ def test_grad_in_bprop_2():
            self.f = GradInBprop_1()
        def construct(self, x, y):
            return self.f(x, y), C.grad_all(self.f)(x, y)
            return self.f(x, y), grad_all(self.f)(x, y)
        def bprop(self, x, y, out, dout):
            grads = C.grad_all(self.f)(x, y)
            grads = grad_all(self.f)(x, y)
            return out[1][0], grads[1]
    class GradInBprop_3(nn.Cell):
@@ -174,8 +178,8 @@ def test_grad_in_bprop_2():
            return self.f(x, y)
    grad_in_bprop = GradInBprop_3()
    grads = C.grad_all(grad_in_bprop)(Tensor(np.ones([2, 2]).astype(np.float32)),
                                      Tensor(np.ones([2, 2]).astype(np.float32)))
    grads = grad_all(grad_in_bprop)(Tensor(np.ones([2, 2]).astype(np.float32)),
                                    Tensor(np.ones([2, 2]).astype(np.float32)))
    assert (grads[0].asnumpy() == np.ones([2, 2]).astype(np.float32)).all()
    assert (grads[1].asnumpy() == np.array([[2, 2], [2, 2]]).astype(np.float32)).all()
@@ -197,10 +201,10 @@ def test_grad_in_bprop_3():
            self.f = GradInBprop_1()
        def construct(self, x, y):
            return self.f(x, y), C.grad_all(self.f)(x, y)
            return self.f(x, y), grad_all(self.f)(x, y)
        def bprop(self, x, y, out, dout):
            grads = C.grad_all(self.f)(x, y)
            grads = grad_all(self.f)(x, y)
            return out[1][0], grads[1]
    class GradInBprop_3(nn.Cell):
@@ -215,8 +219,8 @@ def test_grad_in_bprop_3():
            return x + y + y + out[0], x + x + y + y + dout[0]
    grad_in_bprop = GradInBprop_3()
    grads = C.grad_all(grad_in_bprop)(Tensor(np.ones([2, 2]).astype(np.float32)),
                                      Tensor(np.ones([2, 2]).astype(np.float32)))
    grads = grad_all(grad_in_bprop)(Tensor(np.ones([2, 2]).astype(np.float32)),
                                    Tensor(np.ones([2, 2]).astype(np.float32)))
    assert (grads[0].asnumpy() == np.array([[4, 4], [4, 4]]).astype(np.float32)).all()
    assert (grads[1].asnumpy() == np.array([[5, 5], [5, 5]]).astype(np.float32)).all()
@@ -238,7 +242,7 @@ class OneInputBprop(nn.Cell):
 def test_grad_one_input_bprop():
    net = OneInputBprop()
    input1 = Tensor(np.ones([2, 2]).astype(np.float32))
    grad = C.grad_all(net)(input1)
    grad = grad_all(net)(input1)
    assert (grad[0].asnumpy() == np.array([5, 5]).astype(np.float32)).all()
@@ -253,10 +257,10 @@ class InlineBpropTwoInput(nn.Cell):
        self.f = TwoInput()
    def construct(self, x, y):
        return self.f(x, y), C.grad_all(self.f)(x, y)
        return self.f(x, y), grad_all(self.f)(x, y)
    def bprop(self, x, y, out, dout):
        grads = C.grad_all(self.f)(x, y)
        grads = grad_all(self.f)(x, y)
        return grads[0] * 2, grads[1] * 2
@pytest.mark.level0
@@ -266,7 +270,7 @@ def test_grad_inline_bprop_two_input():
    net = InlineBpropTwoInput()
    input1 = Tensor(np.ones([2, 2]).astype(np.float32))
    input2 = Tensor(np.ones([2, 2]).astype(np.float32))
    grads = C.grad_all(net)(input1, input2)
    grads = grad_all(net)(input1, input2)
    assert (grads[0].asnumpy() == np.array([2, 2]).astype(np.float32)).all()
    assert (grads[1].asnumpy() == np.array([2, 2]).astype(np.float32)).all()
    assert len(grads) == 2
@@ -328,7 +332,7 @@ def test_grad_inline_bprop_multi_input():
    input1 = Tensor(np.ones([2, 2]).astype(np.float32))
    input2 = Tensor(np.ones([2, 2]).astype(np.float32))
    net.init_parameters_data()
    grads = C.grad_all(net)(input1, input2)
    grads = grad_all(net)(input1, input2)
    assert (grads[0].asnumpy() == np.array([[12, 12], [12, 12]]).astype(np.float32)).all()
    assert (grads[1].asnumpy() == np.array([[19, 19], [19, 19]]).astype(np.float32)).all()
    assert len(grads) == 2
@@ -378,7 +382,7 @@ def test_grad_mul_add_with_wrong_output_num():
    context.set_context(check_bprop=True)
    mul_add = MulAddWithWrongOutputNum()
    with pytest.raises(TypeError):
        C.grad_all(mul_add)(1, 2)
        grad_all(mul_add)(1, 2)
 class MulAddWithWrongOutputType(nn.Cell):
@@ -395,7 +399,7 @@ def test_grad_mul_add_with_wrong_output_type():
    context.set_context(check_bprop=True)
    mul_add = MulAddWithWrongOutputType()
    with pytest.raises(TypeError):
        C.grad_all(mul_add)(1, Tensor(np.ones([2, 2])))
        grad_all(mul_add)(1, Tensor(np.ones([2, 2])))
 class MulAddWithWrongOutputShape(nn.Cell):
@@ -416,4 +420,4 @@ def test_grad_mul_add_with_wrong_output_shape():
    context.set_context(check_bprop=True)
    mul_add = MulAddWithWrongOutputShape()
    with pytest.raises(TypeError):
        C.grad_all(mul_add)(1, Tensor(np.ones([2, 2])))
        grad_all(mul_add)(1, Tensor(np.ones([2, 2])))
--- a/tests/st/ops/custom_ops_tbe/test_square.py
+++ b/tests/st/ops/custom_ops_tbe/test_square.py
@@ -22,6 +22,10 @@ from mindspore import Tensor
 from mindspore.ops import composite as C
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 grad_with_sens = C.GradOperation('grad_with_sens', sens_param=True)
 class Net(nn.Cell):
    """Net definition"""
@@ -52,6 +56,6 @@ def test_grad_net():
    x = np.array([1.0, 4.0, 9.0]).astype(np.float32)
    sens = np.array([1.0, 1.0, 1.0]).astype(np.float32)
    square = Net()
    dx = C.grad_with_sens(square)(Tensor(x), Tensor(sens))
    dx = grad_with_sens(square)(Tensor(x), Tensor(sens))
    expect = np.array([2.0, 8.0, 18.0]).astype(np.float32)
    assert (dx.asnumpy() == expect).all()
--- a/tests/st/pynative/test_pynative_hook.py
+++ b/tests/st/pynative/test_pynative_hook.py
@@ -30,6 +30,9 @@ from mindspore.common.initializer import TruncatedNormal
 context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
 grad_all = C.GradOperation('get_all', get_all=True)
 def weight_variable():
    """weight initial"""
    return TruncatedNormal(0.02)
@@ -121,9 +124,6 @@ class test_custom_cell_base():
 class MulAdd(nn.Cell):
    def __init__(self):
        super(MulAdd, self).__init__()
    def construct(self, x, y):
        return 2 * x + y
@@ -181,8 +181,8 @@ def test_pynative_custom_bprop_and_Cell_MulAdd():
    custom_cell = test_custom_cell_base()
    mul_add = custom_cell.test_custom_cell_function(MulAdd())
    mul_add.bprop_debug = True
    C.grad_all(mul_add)(Tensor(1, mstype.float32), Tensor(2, mstype.float32))
    assert C.grad_all(mul_add)(Tensor(1, mstype.float32), Tensor(2, mstype.float32)) == \
    grad_all(mul_add)(Tensor(1, mstype.float32), Tensor(2, mstype.float32))
    assert grad_all(mul_add)(Tensor(1, mstype.float32), Tensor(2, mstype.float32)) == \
           (Tensor(1.0, mstype.float32), Tensor(2.0, mstype.float32))
@@ -194,5 +194,5 @@ def test_pynative_custom_bprop_and_Cell_Ms_Cell():
    custom_cell = test_custom_cell_base()
    ms_Cell = custom_cell.test_custom_cell_function(Ms_Cell())
    ms_Cell.bprop_debug = True
    assert C.grad_all(ms_Cell)(Tensor(1, mstype.float32)) == (Tensor(1.0, mstype.float32),)
    assert grad_all(ms_Cell)(Tensor(1, mstype.float32)) == (Tensor(1.0, mstype.float32),)
--- a/tests/st/pynative/test_pynative_lenet.py
+++ b/tests/st/pynative/test_pynative_lenet.py
@@ -29,6 +29,9 @@ from mindspore.ops import operations as P
 np.random.seed(1)
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 def weight_variable():
    """weight initial"""
    return TruncatedNormal(0.02)
@@ -122,7 +125,7 @@ class GradWrap(nn.Cell):
    def construct(self, x, label):
        weights = self.weights
        return C.grad_by_list(self.network, weights)(x, label)
        return grad_by_list(self.network, weights)(x, label)
@pytest.mark.level0
--- a/tests/st/pynative/test_pynative_resnet50.py
+++ b/tests/st/pynative/test_pynative_resnet50.py
@@ -40,6 +40,9 @@ np.random.seed(1)
 ds.config.set_seed(1)
 grad_by_list = CP.GradOperation('get_by_list', get_by_list=True)
 def weight_variable(shape):
    return initializer('XavierUniform', shape=shape, dtype=mstype.float32)
@@ -389,7 +392,7 @@ class GradWrap(Cell):
    def construct(self, x, label):
        weights = self.weights
        return CP.grad_by_list(self.network, weights)(x, label)
        return grad_by_list(self.network, weights)(x, label)
@pytest.mark.level0
--- a/tests/st/pynative/test_tensor_index.py
+++ b/tests/st/pynative/test_tensor_index.py
@@ -24,6 +24,9 @@ from mindspore.common.parameter import ParameterTuple
 from mindspore.ops import composite as C
 grad_by_list_with_sens = C.GradOperation('grad_by_list_with_sens', get_by_list=True, sens_param=True)
 def setup_module():
    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
@@ -319,9 +322,6 @@ def test_setitem_by_mixed_tensors_2():
 class TensorGetItemByMixedTensorsTypeError(Cell):
    def __init__(self):
        super(TensorGetItemByMixedTensorsTypeError, self).__init__()
    def construct(self, x, index_0, index_1):
        ret = x[index_0, index_1, 0:3, ..., 0:5, [1, 2, 3, 4]]
        return ret
@@ -667,7 +667,7 @@ def test_setitem_grad():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, x, y, sens):
            return C.grad_by_list_with_sens(self.net, self.weights)(x, y, sens)
            return grad_by_list_with_sens(self.net, self.weights)(x, y, sens)
    net = GradNet(Net())
    x = Tensor(np.ones([4, 4, 5]).astype(np.float32), mstype.float32)
    y = Tensor(np.array([3]).astype(np.float32), mstype.float32)
@@ -676,27 +676,18 @@ def test_setitem_grad():
 class TensorAssignWithSliceError1(Cell):
    def __init__(self):
        super(TensorAssignWithSliceError1, self).__init__()
    def construct(self, a, b):
        a[1:3:-1, ::] = b
        return a
 class TensorAssignWithSliceError2(Cell):
    def __init__(self):
        super(TensorAssignWithSliceError2, self).__init__()
    def construct(self, a, b):
        a[1:3:-1] = b
        return a
 class TensorAssignWithSlice2(Cell):
    def __init__(self):
        super(TensorAssignWithSlice2, self).__init__()
    def construct(self, a, b, ck):
        a[1:5] = b
        a[3:4] = 5
@@ -864,18 +855,12 @@ def test_tensor_assign_exception():
 class TensorAssignWithTupleEllipsis2(Cell):
    def __init__(self):
        super(TensorAssignWithTupleEllipsis2, self).__init__()
    def construct(self, a, b):
        a[1:, ..., ::] = b
        return a
 class TensorAssignWithTupleEllipsis(Cell):
    def __init__(self):
        super(TensorAssignWithTupleEllipsis, self).__init__()
    def construct(self, a, b):
        a[:2, ...] = 1.0
        a[1:, ...] = b
@@ -883,9 +868,6 @@ class TensorAssignWithTupleEllipsis(Cell):
 class TensorAssignWithEllipsis(Cell):
    def __init__(self):
        super(TensorAssignWithEllipsis, self).__init__()
    def construct(self, a, b):
        a[...] = 1
        a[...] = b
@@ -893,9 +875,6 @@ class TensorAssignWithEllipsis(Cell):
 class TensorAssignWithInteger(Cell):
    def __init__(self):
        super(TensorAssignWithInteger, self).__init__()
    def construct(self, a, b, ck):
        a[1] = 1
        a[0] = b
@@ -904,9 +883,6 @@ class TensorAssignWithInteger(Cell):
 class TensorAssignWithTupleInteger(Cell):
    def __init__(self):
        super(TensorAssignWithTupleInteger, self).__init__()
    def construct(self, a, b, ck):
        a[(1)] = 1
        a[(1)] = b
@@ -930,9 +906,6 @@ class TensorAssignWithBoolTensorIndex(Cell):
 class TensorAssignWithBoolTensorIndexError(Cell):
    def __init__(self):
        super(TensorAssignWithBoolTensorIndexError, self).__init__()
    def construct(self, a, b, c, u_tensor):
        a[b][c] = u_tensor
        return a
@@ -955,9 +928,6 @@ class TensorAssignWithBoolTensorIndex2(Cell):
 class TensorAssignWithBoolTensorIndex2Error(Cell):
    def __init__(self):
        super(TensorAssignWithBoolTensorIndex2Error, self).__init__()
    def construct(self, a, u_tensor):
        a[a > 8][a > 5] = u_tensor
        return a
--- a/tests/ut/python/dtype/test_list.py
+++ b/tests/ut/python/dtype/test_list.py
@@ -31,6 +31,9 @@ from tests.mindspore_test_framework.pipeline.forward.compile_forward \
 context.set_context(mode=context.GRAPH_MODE)
 grad_all = C.GradOperation('get_all', get_all=True)
 def test_list_equal():
    class Net(nn.Cell):
        def __init__(self, z: list):
@@ -303,7 +306,7 @@ def test_grad_make_list():
            self.net = net
        def construct(self, *inputs):
            return C.grad_all(self.net)(*inputs)
            return grad_all(self.net)(*inputs)
    while_net = MyWhileNet()
    net = GradNet(while_net)
--- a/tests/ut/python/nn/optim/test_lr_schedule.py
+++ b/tests/ut/python/nn/optim/test_lr_schedule.py
@@ -18,8 +18,11 @@ import numpy as np
 from mindspore import Parameter, ParameterTuple, Tensor
 from mindspore.nn import Cell
 from mindspore.nn.optim import Optimizer
 from mindspore.ops.composite import grad_by_list
 from mindspore.ops.operations import BiasAdd, MatMul
 import mindspore.ops.composite as C
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 class Net(Cell):
--- a/tests/ut/python/ops/test_bprop_disorder.py
+++ b/tests/ut/python/ops/test_bprop_disorder.py
@@ -28,6 +28,9 @@ from ....mindspore_test_framework.pipeline.forward.compile_forward \
    import pipeline_for_compile_forward_ge_graph_for_case_by_case_config
 grad_by_list_with_sens = C.GradOperation('grad_by_list_with_sens', get_by_list=True, sens_param=True)
 class DisOrderTest1(nn.Cell):
    """ DisOrderTest1 definition """
@@ -72,7 +75,7 @@ class GradNetWrap(nn.Cell):
        self.weights = ParameterTuple(net.get_parameters())
    def construct(self, x, sens):
        return C.grad_by_list_with_sens(self.net, self.weights)(x, sens)
        return grad_by_list_with_sens(self.net, self.weights)(x, sens)
 test_case_ops = [
--- a/tests/ut/python/ops/test_control_ops.py
+++ b/tests/ut/python/ops/test_control_ops.py
@@ -30,6 +30,11 @@ from mindspore.common import ms_function
 context.set_context(mode=context.GRAPH_MODE)
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 grad_all = C.GradOperation('get_all', get_all=True)
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 def cond_data_test(x_init, y_init):
    class Net(nn.Cell):
        def __init__(self):
@@ -401,9 +406,9 @@ def test_switch_layer():
    index = Tensor(0, dtype=mstype.int32)
    net = SwitchLayerCell()
    net(index, Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
    C.grad_by_list(net, ParameterTuple(net.trainable_params()))(index,
                                                                Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
    C.grad_all(net)(index, Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
    grad_by_list(net, ParameterTuple(net.trainable_params()))(index,
                                                              Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
    grad_all(net)(index, Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
 def test_index_to_switch_layer():
@@ -439,9 +444,9 @@ def test_index_to_switch_layer():
    index = Tensor(0, dtype=mstype.int32)
    net = SwitchLayerCell()
    net(index, Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
    C.grad_by_list(net, ParameterTuple(net.trainable_params()))(index,
                                                                Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
    C.grad_all(net)(index, Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
    grad_by_list(net, ParameterTuple(net.trainable_params()))(index,
                                                              Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
    grad_all(net)(index, Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
 def test_parser_switch_layer_switch_in_bprop():
@@ -477,7 +482,7 @@ def test_parser_switch_layer_switch_in_bprop():
    input1 = Tensor(np.ones([2, 2]).astype(np.float32))
    grad = Tensor(np.random.randn(2, 2).astype(np.float32))
    i = Tensor(1, mstype.int32)
    grad_net = C.grad_all_with_sens(net)
    grad_net = grad_all_with_sens(net)
    grad_net(i, input1, grad)
@@ -520,7 +525,7 @@ def test_parser_switch_layer_inputs_tuple():
    input2 = Tensor(np.random.randn(2, 3, 4, 5).astype(np.float32))
    i = Tensor(1, mstype.int32)
    grad = Tensor(np.random.randn(2, 3, 4, 5).astype(np.float32))
    back_net = C.grad_all_with_sens(net)
    back_net = grad_all_with_sens(net)
    back_out = back_net(i, input1, input2, grad)
@@ -539,9 +544,9 @@ def test_switch_layer_with_single_prim():
    index = Tensor(0, dtype=mstype.int32)
    net = SwitchLayerCell()
    net(index, Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
    C.grad_by_list(net, ParameterTuple(net.trainable_params()))(index,
                                                                Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
    C.grad_all(net)(index, Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
    grad_by_list(net, ParameterTuple(net.trainable_params()))(index,
                                                              Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
    grad_all(net)(index, Tensor(np.full([128, 96], 0.6, dtype=np.float32)))
 def test_switch_layer_env_eliminate():
--- a/tests/ut/python/ops/test_math_ops.py
+++ b/tests/ut/python/ops/test_math_ops.py
@@ -38,6 +38,8 @@ context.set_context(mode=context.GRAPH_MODE)
 # W0613: unused-argument
 # W0231: super-init-not-called
 grad = C.GradOperation('grad')
 def test_multiply():
    """ test_multiply """
    input_x = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]))
@@ -200,7 +202,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad(self.network)(x, y, b)
        return grad(self.network)(x, y, b)
 class MatMulNet(nn.Cell):
@@ -236,7 +238,7 @@ class GradWrapSub(nn.Cell):
        self.network = network
    def construct(self, x, y):
        return C.grad(self.network)(x, y)
        return grad(self.network)(x, y)
 class SubNet(nn.Cell):
@@ -315,7 +317,7 @@ class GradWrapCumSum(nn.Cell):
        self.network = network
    def construct(self, input_):
        return C.grad(self.network)(input_)
        return grad(self.network)(input_)
 class NetCumSum(nn.Cell):
--- a/tests/ut/python/ops/test_momentum.py
+++ b/tests/ut/python/ops/test_momentum.py
@@ -34,6 +34,9 @@ from ....mindspore_test_framework.pipeline.forward.compile_forward \
 run_opt = C.MultitypeFuncGraph("run_opt")
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
@run_opt.register("Function", "Tensor", "Tensor", "Tensor",
                  "Tensor", "Tensor",
                  "Tensor")
@@ -83,7 +86,7 @@ class TrainStepWrap(nn.Cell):
    def construct(self, x, label):
        weights = self.weights
        grads = C.grad_by_list(self.network, weights)(x, label)
        grads = grad_by_list(self.network, weights)(x, label)
        return self.optimizer(grads)
--- a/tests/ut/python/ops/test_nn_ops.py
+++ b/tests/ut/python/ops/test_nn_ops.py
@@ -45,6 +45,10 @@ def conv1x1(in_channels, out_channels, stride=1, padding=0):
                     kernel_size=1, stride=stride, padding=padding)
 grad = C.GradOperation('grad')
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 class ResidualBlock(nn.Cell):
    """
    residual Block
@@ -169,7 +173,7 @@ class SoftMaxGrad(nn.Cell):
        self.network = network
    def construct(self, x):
        return C.grad(self.network)(x)
        return grad(self.network)(x)
 class DropoutGrad(nn.Cell):
@@ -180,7 +184,7 @@ class DropoutGrad(nn.Cell):
        self.network = network
    def construct(self, x):
        return C.grad(self.network)(x)
        return grad(self.network)(x)
 class ScalarSummaryNet(nn.Cell):
@@ -255,7 +259,7 @@ class Grad(nn.Cell):
        self.network.set_train()
    def construct(self, x, label):
        return C.grad(self.network)(x, label)
        return grad(self.network)(x, label)
 class BatchnormNet(nn.Cell):
@@ -418,7 +422,7 @@ class GradWrapUnfold(nn.Cell):
        self.sens = Tensor(np.ones([1, 4, 2, 2], np.float32))
    def construct(self, x):
        return C.grad_all_with_sens(self.network)(x, self.sens)
        return grad_all_with_sens(self.network)(x, self.sens)
 class UnfoldNetValid(nn.Cell):
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -34,12 +34,16 @@ from ....mindspore_test_framework.pipeline.forward.compile_forward \
 from ....mindspore_test_framework.pipeline.gradient.compile_gradient \
    import pipeline_for_compile_grad_ge_graph_for_case_by_case_config
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 class InputBackward(nn.Cell):
    def __init__(self, network):
        super(InputBackward, self).__init__()
        self.network = network
        self.network.set_train()
        self.grad = C.grad_all_with_sens
        self.grad = grad_all_with_sens
    def construct(self, x1, x2, x3, sens):
        return self.grad(self.network)(x1, x2, x3, sens)
--- a/tests/ut/python/parallel/test_add_relu_redistribution.py
+++ b/tests/ut/python/parallel/test_add_relu_redistribution.py
@@ -23,6 +23,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class AddRelu(nn.Cell):
    def __init__(self, strategy0=None, strategy1=None):
        super(AddRelu, self).__init__()
@@ -51,7 +54,7 @@ class Grad(nn.Cell):
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 def compile_net(net, x, y):
--- a/tests/ut/python/parallel/test_arithmetic.py
+++ b/tests/ut/python/parallel/test_arithmetic.py
@@ -23,6 +23,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -40,7 +43,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def compile_net(net, x, y, b):
@@ -514,7 +517,7 @@ def test_assign_sub():
            self.network = network
        def construct(self, x):
            return C.grad_all(self.network)(x)
            return grad_all(self.network)(x)
    def compile_sub_net(net, x):
        net.set_auto_parallel()
--- a/tests/ut/python/parallel/test_attention.py
+++ b/tests/ut/python/parallel/test_attention.py
@@ -27,6 +27,9 @@ from mindspore.common.parameter import Parameter
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -44,7 +47,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x):
        return C.grad_all(self.network)(x)
        return grad_all(self.network)(x)
 def compile_net(net, x):
--- a/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py
+++ b/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py
@@ -23,6 +23,9 @@ from mindspore.ops import composite as C
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -45,7 +48,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x):
        return C.grad_all(self.network)(x)
        return grad_all(self.network)(x)
    # model_parallel test
--- a/tests/ut/python/parallel/test_auto_parallel_arithmetic.py
+++ b/tests/ut/python/parallel/test_auto_parallel_arithmetic.py
@@ -27,6 +27,9 @@ from tests.ut.python.ops.test_math_ops import VirtualLoss
 context.set_context(mode=context.GRAPH_MODE)
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -44,7 +47,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def compile_net(net, x, y, b, phase):
--- a/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py
+++ b/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py
@@ -25,6 +25,9 @@ from mindspore.parallel._utils import _reset_op_id as reset_op_id
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -42,7 +45,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x):
        return C.grad_all(self.network)(x)
        return grad_all(self.network)(x)
    # model_parallel test
--- a/tests/ut/python/parallel/test_auto_parallel_cast.py
+++ b/tests/ut/python/parallel/test_auto_parallel_cast.py
@@ -26,6 +26,9 @@ from mindspore.parallel._utils import _reset_op_id as reset_op_id
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -43,7 +46,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, z, w):
        return C.grad_all(self.network)(x, y, z, w)
        return grad_all(self.network)(x, y, z, w)
    # model_parallel test
--- a/tests/ut/python/parallel/test_auto_parallel_common_parameter.py
+++ b/tests/ut/python/parallel/test_auto_parallel_common_parameter.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, z):
        return C.grad_all(self.network)(x, y, z)
        return grad_all(self.network)(x, y, z)
    # model_parallel test
--- a/tests/ut/python/parallel/test_auto_parallel_double_sources.py
+++ b/tests/ut/python/parallel/test_auto_parallel_double_sources.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, z, w, a):
        return C.grad_all(self.network)(x, y, z, w, a)
        return grad_all(self.network)(x, y, z, w, a)
    # model_parallel test
--- a/tests/ut/python/parallel/test_auto_parallel_double_star.py
+++ b/tests/ut/python/parallel/test_auto_parallel_double_star.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, z, w, a, b, c):
        return C.grad_all(self.network)(x, y, z, w, a, b, c)
        return grad_all(self.network)(x, y, z, w, a, b, c)
    # model_parallel test
--- a/tests/ut/python/parallel/test_auto_parallel_fc_nobias.py
+++ b/tests/ut/python/parallel/test_auto_parallel_fc_nobias.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
    # model_parallel test
--- a/tests/ut/python/parallel/test_auto_parallel_four_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_four_matmul.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, z, w, b):
        return C.grad_all(self.network)(x, y, z, w, b)
        return grad_all(self.network)(x, y, z, w, b)
 def compile_net(net, x, y, z, w, b):
--- a/tests/ut/python/parallel/test_auto_parallel_l2normalize.py
+++ b/tests/ut/python/parallel/test_auto_parallel_l2normalize.py
@@ -25,6 +25,9 @@ from mindspore.parallel._utils import _reset_op_id as reset_op_id
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -42,7 +45,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def test_auto_parallel_l2normalize():
--- a/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py
+++ b/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 # model_parallel test
--- a/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py
+++ b/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py
@@ -26,6 +26,9 @@ from mindspore.parallel._utils import _reset_op_id as reset_op_id
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -43,7 +46,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
    # model_parallel test
--- a/tests/ut/python/parallel/test_auto_parallel_onehot.py
+++ b/tests/ut/python/parallel/test_auto_parallel_onehot.py
@@ -30,6 +30,9 @@ from tests.ut.python.ops.test_math_ops import VirtualLoss
 context.set_context(mode=context.GRAPH_MODE)
 grad_all = C.GradOperation('get_all', get_all=True)
 class Dataset(MindData):
    def __init__(self, predict, label, length=3):
        super(Dataset, self).__init__(size=length)
@@ -68,7 +71,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def test_auto_parallel_arithmetic():
--- a/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py
+++ b/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, z, w, b):
        return C.grad_all(self.network)(x, y, z, w, b)
        return grad_all(self.network)(x, y, z, w, b)
    # model_parallel test
--- a/tests/ut/python/parallel/test_auto_parallel_reduce_method.py
+++ b/tests/ut/python/parallel/test_auto_parallel_reduce_method.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def compile_net(net, x, y, b):
--- a/tests/ut/python/parallel/test_auto_parallel_reshape.py
+++ b/tests/ut/python/parallel/test_auto_parallel_reshape.py
@@ -25,6 +25,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -42,7 +45,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x):
        return C.grad_all(self.network)(x)
        return grad_all(self.network)(x)
 def test_reshape_matmul():
@@ -211,7 +214,7 @@ def test_reshape_auto_5():
            self.network = network
        def construct(self, x, y):
            return C.grad_all(self.network)(x, y)
            return grad_all(self.network)(x, y)
    class Net(nn.Cell):
        def __init__(self):
@@ -261,7 +264,7 @@ def test_reshape_auto_6():
            self.network = network
        def construct(self, x, y):
            return C.grad_all(self.network)(x, y)
            return grad_all(self.network)(x, y)
    class Net(nn.Cell):
        def __init__(self):
--- a/tests/ut/python/parallel/test_auto_parallel_rhombus.py
+++ b/tests/ut/python/parallel/test_auto_parallel_rhombus.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def compile_net(net, x, y, b):
--- a/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py
+++ b/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py
@@ -23,6 +23,9 @@ from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -40,7 +43,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def test_softmax_cross_entropy_loss_auto_parallel():
--- a/tests/ut/python/parallel/test_auto_parallel_transformer.py
+++ b/tests/ut/python/parallel/test_auto_parallel_transformer.py
@@ -23,6 +23,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -40,7 +43,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x):
        return C.grad_all(self.network)(x)
        return grad_all(self.network)(x)
 class CustomDense(nn.Cell):
--- a/tests/ut/python/parallel/test_auto_parallel_transpose.py
+++ b/tests/ut/python/parallel/test_auto_parallel_transpose.py
@@ -25,6 +25,9 @@ from mindspore.parallel._utils import _reset_op_id as reset_op_id
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -42,7 +45,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 # core dump, step_auto_parallel should SetInputs for transpose axis
--- a/tests/ut/python/parallel/test_auto_parallel_tuple_depend.py
+++ b/tests/ut/python/parallel/test_auto_parallel_tuple_depend.py
@@ -25,6 +25,9 @@ from mindspore.ops.operations.comm_ops import _VirtualDataset
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -42,7 +45,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def bn_with_initialize(out_channels):
--- a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
@@ -27,6 +27,9 @@ from mindspore.parallel._utils import _reset_op_id as reset_op_id
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -44,7 +47,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
    # model_parallel test
--- a/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
    # model_parallel test
--- a/tests/ut/python/parallel/test_auto_parallel_zig_zag.py
+++ b/tests/ut/python/parallel/test_auto_parallel_zig_zag.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, z, w, a):
        return C.grad_all(self.network)(x, y, z, w, a)
        return grad_all(self.network)(x, y, z, w, a)
    # model_parallel test
--- a/tests/ut/python/parallel/test_auto_star_elimination.py
+++ b/tests/ut/python/parallel/test_auto_star_elimination.py
@@ -26,6 +26,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -43,7 +46,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 class CustomMatMul(nn.Cell):
--- a/tests/ut/python/parallel/test_batch_parallel.py
+++ b/tests/ut/python/parallel/test_batch_parallel.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, w1, w2):
        return C.grad_all(self.network)(x, w1, w2)
        return grad_all(self.network)(x, w1, w2)
 class NetConv(nn.Cell):
--- a/tests/ut/python/parallel/test_batch_parallel_dropout.py
+++ b/tests/ut/python/parallel/test_batch_parallel_dropout.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 # model_parallel test
--- a/tests/ut/python/parallel/test_batch_parallel_tensoradd.py
+++ b/tests/ut/python/parallel/test_batch_parallel_tensoradd.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def test_matmul_add():
--- a/tests/ut/python/parallel/test_comparison_function_info.py
+++ b/tests/ut/python/parallel/test_comparison_function_info.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def compile_net(net, x, y, b):
--- a/tests/ut/python/parallel/test_different_type_for_div_op.py
+++ b/tests/ut/python/parallel/test_different_type_for_div_op.py
@@ -23,13 +23,16 @@ from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 grad_all = C.GradOperation('get_all', get_all=True)
 class GradWrap(nn.Cell):
    def __init__(self, network):
        super(GradWrap, self).__init__()
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 def compile_net(net, x, y):
--- a/tests/ut/python/parallel/test_element_wise_function.py
+++ b/tests/ut/python/parallel/test_element_wise_function.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def compile_net(net, x, y, b):
--- a/tests/ut/python/parallel/test_embeddinglookup.py
+++ b/tests/ut/python/parallel/test_embeddinglookup.py
@@ -22,13 +22,17 @@ from mindspore.ops import composite as C
 from mindspore import Tensor, context
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class GradWrap(nn.Cell):
    def __init__(self, network):
        super(GradWrap, self).__init__()
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
--- a/tests/ut/python/parallel/test_gather_v2.py
+++ b/tests/ut/python/parallel/test_gather_v2.py
@@ -23,6 +23,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -40,7 +43,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 class Net(nn.Cell):
--- a/tests/ut/python/parallel/test_get_next.py
+++ b/tests/ut/python/parallel/test_get_next.py
@@ -25,6 +25,9 @@ from mindspore.ops import operations as P
 context.set_context(mode=context.GRAPH_MODE)
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network, types, shapes, output_num, strategy3=None, strategy4=None, axis=-1):
        super(NetWithLoss, self).__init__()
@@ -49,7 +52,7 @@ class GradWrap(nn.Cell):
        self.weights = ParameterTuple(network.trainable_params())
    def construct(self):
        return C.grad_by_list(self.network, self.weights)()
        return grad_by_list(self.network, self.weights)()
 def compile_net(net):
--- a/tests/ut/python/parallel/test_gpu_dropout.py
+++ b/tests/ut/python/parallel/test_gpu_dropout.py
@@ -23,6 +23,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -40,7 +43,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 class Net(nn.Cell):
--- a/tests/ut/python/parallel/test_hybird_parallel_activation.py
+++ b/tests/ut/python/parallel/test_hybird_parallel_activation.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def compile_net(net, x, y, b):
--- a/tests/ut/python/parallel/test_l2normalize.py
+++ b/tests/ut/python/parallel/test_l2normalize.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 # model_parallel test
--- a/tests/ut/python/parallel/test_linear.py
+++ b/tests/ut/python/parallel/test_linear.py
@@ -23,6 +23,9 @@ from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network, strategy3):
        super(NetWithLoss, self).__init__()
@@ -40,7 +43,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, bias, label):
        return C.grad_all(self.network)(x, y, bias, label)
        return grad_all(self.network)(x, y, bias, label)
 def test_linear():
--- a/tests/ut/python/parallel/test_loop_two_matmul.py
+++ b/tests/ut/python/parallel/test_loop_two_matmul.py
@@ -25,6 +25,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -42,7 +45,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def loop_config(size):
--- a/tests/ut/python/parallel/test_matmul_dropout.py
+++ b/tests/ut/python/parallel/test_matmul_dropout.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 # model_parallel test
--- a/tests/ut/python/parallel/test_matmul_tensor.py
+++ b/tests/ut/python/parallel/test_matmul_tensor.py
@@ -26,6 +26,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -43,7 +46,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 def compile_net(net, x, y):
--- a/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py
+++ b/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b, z):
        return C.grad_all(self.network)(x, y, b, z)
        return grad_all(self.network)(x, y, b, z)
 class Net1(nn.Cell):
--- a/tests/ut/python/parallel/test_one_hot_net.py
+++ b/tests/ut/python/parallel/test_one_hot_net.py
@@ -29,6 +29,10 @@ from mindspore.train import Model, ParallelMode
 from tests.dataset_mock import MindData
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 device_num = 16
 device_id = 2
@@ -233,7 +237,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, b):
        return C.grad_all(self.network)(x, b)
        return grad_all(self.network)(x, b)
 def bn_with_initialize(out_channels):
--- a/tests/ut/python/parallel/test_one_weight_parameter.py
+++ b/tests/ut/python/parallel/test_one_weight_parameter.py
@@ -23,6 +23,9 @@ from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network, strategy3):
        super(NetWithLoss, self).__init__()
@@ -42,7 +45,7 @@ class OneStepCell(nn.Cell):
    def construct(self, data, label):
        weights = self.weights
        grads = C.grad_by_list(self.network, weights)(data, label)
        grads = grad_by_list(self.network, weights)(data, label)
        return grads
--- a/tests/ut/python/parallel/test_onehot.py
+++ b/tests/ut/python/parallel/test_onehot.py
@@ -26,6 +26,9 @@ from mindspore.ops.operations.comm_ops import _VirtualDataset
 context.set_context(mode=context.GRAPH_MODE)
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network, strategy3, strategy4, axis):
        super(NetWithLoss, self).__init__()
@@ -49,7 +52,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 class Net(nn.Cell):
--- a/tests/ut/python/parallel/test_prelu.py
+++ b/tests/ut/python/parallel/test_prelu.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 def compile_net(net, x, y):
@@ -140,7 +143,7 @@ def test_prelu_parallel_success3():
            self.network = network
        def construct(self, x, y, w):
            return C.grad_all(self.network)(x, y, w)
            return grad_all(self.network)(x, y, w)
    class Net(nn.Cell):
        def __init__(self, strategy1, strategy2):
--- a/tests/ut/python/parallel/test_reduce_method_info.py
+++ b/tests/ut/python/parallel/test_reduce_method_info.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLossNoBias(nn.Cell):
    def __init__(self, network):
        super(NetWithLossNoBias, self).__init__()
@@ -52,7 +55,7 @@ class GradWrapNoBias(nn.Cell):
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 class GradWrap(nn.Cell):
@@ -61,7 +64,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def compile_net_no_bias(net, x, y):
--- a/tests/ut/python/parallel/test_reshape.py
+++ b/tests/ut/python/parallel/test_reshape.py
@@ -36,6 +36,9 @@ context.set_context(mode=context.GRAPH_MODE)
 context.reset_auto_parallel_context()
 grad_all = C.GradOperation('get_all', get_all=True)
 class Dataset(MindData):
    def __init__(self, predict, label, length=3, input_num=2):
        super(Dataset, self).__init__(size=length)
@@ -194,7 +197,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x):
        return C.grad_all(self.network)(x)
        return grad_all(self.network)(x)
 class ReshapeNet1(nn.Cell):
--- a/tests/ut/python/parallel/test_reshape_parameter.py
+++ b/tests/ut/python/parallel/test_reshape_parameter.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 class Net(nn.Cell):
--- a/tests/ut/python/parallel/test_scalar_loss.py
+++ b/tests/ut/python/parallel/test_scalar_loss.py
@@ -24,13 +24,16 @@ from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 grad_all = C.GradOperation('get_all', get_all=True)
 class GradWrap(nn.Cell):
    def __init__(self, network):
        super(GradWrap, self).__init__()
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 def test_sum_as_loss():
--- a/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py
+++ b/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py
@@ -23,6 +23,9 @@ from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network, strategy3=None):
        super(NetWithLoss, self).__init__()
@@ -40,7 +43,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def compile_net(net, x, y, b):
--- a/tests/ut/python/parallel/test_sparse_feature_bprop.py
+++ b/tests/ut/python/parallel/test_sparse_feature_bprop.py
@@ -26,13 +26,16 @@ from mindspore.common.api import _executor
 from mindspore.nn import TrainOneStepCell, Adam
 grad_all = C.GradOperation('get_all', get_all=True)
 class GradWrap(nn.Cell):
    def __init__(self, network):
        super(GradWrap, self).__init__()
        self.network = network
    def construct(self, x):
        return C.grad_all(self.network)(x)
        return grad_all(self.network)(x)
 def test_bprop_with_sparse_feature_allreduce():
    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="hybrid_parallel")
--- a/tests/ut/python/parallel/test_sparse_gather_v2.py
+++ b/tests/ut/python/parallel/test_sparse_gather_v2.py
@@ -25,6 +25,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -42,7 +45,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 class Net(nn.Cell):
--- a/tests/ut/python/parallel/test_split_grad_sens.py
+++ b/tests/ut/python/parallel/test_split_grad_sens.py
@@ -24,13 +24,17 @@ from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 grad_all = C.GradOperation('get_all', get_all=True)
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 class GradWrap(nn.Cell):
    def __init__(self, network):
        super(GradWrap, self).__init__()
        self.network = network
    def construct(self, x, y, b, sens):
        return C.grad_all_with_sens(self.network)(x, y, b, sens)
        return grad_all_with_sens(self.network)(x, y, b, sens)
 class GradWrap2(nn.Cell):
@@ -41,7 +45,7 @@ class GradWrap2(nn.Cell):
    def construct(self, x, y, b):
        loss = self.network(x, y, b)
        sens = P.Fill()(mstype.float32, P.Shape()(loss), 1.0)
        return C.grad_all_with_sens(self.network)(x, y, b, sens)
        return grad_all_with_sens(self.network)(x, y, b, sens)
 class GradWrap3(nn.Cell):
@@ -50,7 +54,7 @@ class GradWrap3(nn.Cell):
        self.network = network
    def construct(self, x, y, bias):
        return C.grad_all(self.network)(x, y, bias)
        return grad_all(self.network)(x, y, bias)
 class GradWrap4(nn.Cell):
    def __init__(self, network):
@@ -58,7 +62,7 @@ class GradWrap4(nn.Cell):
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 def compile_net(net, x, y, b):
    net.set_auto_parallel()
--- a/tests/ut/python/parallel/test_step_parallel.py
+++ b/tests/ut/python/parallel/test_step_parallel.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b, a):
        return C.grad_all(self.network)(x, y, b, a)
        return grad_all(self.network)(x, y, b, a)
 def test_two_matmul():
--- a/tests/ut/python/parallel/test_strategy_checkpoint.py
+++ b/tests/ut/python/parallel/test_strategy_checkpoint.py
@@ -25,6 +25,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 # model_parallel test
 def test_six_matmul_save():
    class NetWithLoss(nn.Cell):
@@ -43,7 +46,7 @@ def test_six_matmul_save():
            self.network = network
        def construct(self, x1, x6):
            return C.grad_all(self.network)(x1, x6)
            return grad_all(self.network)(x1, x6)
    class Net(nn.Cell):
        def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6):
@@ -105,7 +108,7 @@ def test_six_matmul_load():
            self.network = network
        def construct(self, x1, x6, x7):
            return C.grad_all(self.network)(x1, x6, x7)
            return grad_all(self.network)(x1, x6, x7)
    class Net(nn.Cell):
        def __init__(self, strategy1, strategy3, strategy4, strategy5, strategy6, strategy7):
@@ -167,7 +170,7 @@ def test_six_matmul_save_auto():
            self.network = network
        def construct(self, x1, x6):
            return C.grad_all(self.network)(x1, x6)
            return grad_all(self.network)(x1, x6)
    class Net(nn.Cell):
        def __init__(self):
@@ -223,7 +226,7 @@ def test_six_matmul_load_auto():
            self.network = network
        def construct(self, x1, x6, x7):
            return C.grad_all(self.network)(x1, x6, x7)
            return grad_all(self.network)(x1, x6, x7)
    class Net(nn.Cell):
        def __init__(self, strategy1, strategy3, strategy4, strategy5):
--- a/tests/ut/python/parallel/test_sum_as_loss.py
+++ b/tests/ut/python/parallel/test_sum_as_loss.py
@@ -23,13 +23,16 @@ from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 grad_all = C.GradOperation('get_all', get_all=True)
 class GradWrap(nn.Cell):
    def __init__(self, network):
        super(GradWrap, self).__init__()
        self.network = network
    def construct(self, x, y):
        return C.grad_all(self.network)(x, y)
        return grad_all(self.network)(x, y)
 def compile_net(net, x, y):
--- a/tests/ut/python/parallel/test_two_matmul.py
+++ b/tests/ut/python/parallel/test_two_matmul.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -41,7 +44,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 def compile_net(net, x, y, b):
--- a/tests/ut/python/parallel/test_two_weights_parameter.py
+++ b/tests/ut/python/parallel/test_two_weights_parameter.py
@@ -23,6 +23,9 @@ from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network, strategy3):
        super(NetWithLoss, self).__init__()
@@ -42,7 +45,7 @@ class OneStepCell(nn.Cell):
    def construct(self, data, label):
        weights = self.weights
        grads = C.grad_by_list(self.network, weights)(data, label)
        grads = grad_by_list(self.network, weights)(data, label)
        return grads
--- a/tests/ut/python/parallel/test_virtual_dataset_3_input.py
+++ b/tests/ut/python/parallel/test_virtual_dataset_3_input.py
@@ -26,6 +26,9 @@ from mindspore.ops.operations.comm_ops import _VirtualDataset
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 grad_all = C.GradOperation('get_all', get_all=True)
 class NetWithLoss(nn.Cell):
    def __init__(self, network):
        super(NetWithLoss, self).__init__()
@@ -43,7 +46,7 @@ class GradWrap(nn.Cell):
        self.network = network
    def construct(self, x, y, b):
        return C.grad_all(self.network)(x, y, b)
        return grad_all(self.network)(x, y, b)
 # model_parallel test
--- a/tests/ut/python/parameter_feature/test_parameter.py
+++ b/tests/ut/python/parameter_feature/test_parameter.py
@@ -23,6 +23,10 @@ from mindspore.ops import operations as P
 context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
 grad_all = C.GradOperation('get_all', get_all=True)
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 def test_parser_three_default_mixed_args_subnet():
    class SubNetDefaultMixedArgs(Cell):
        def __init__(self):
@@ -216,7 +220,7 @@ def test_net_vargs_expand():
            super(InputBackward, self).__init__()
            self.network = network
            self.network.set_train()
            self.grad = C.grad_all_with_sens
            self.grad = grad_all_with_sens
            self.c1 = c1
            self.c2 = c2
--- a/tests/ut/python/parameter_feature/test_var_grad.py
+++ b/tests/ut/python/parameter_feature/test_var_grad.py
@@ -25,6 +25,13 @@ from mindspore.ops import operations as P
 context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 grad_by_list_with_sens = C.GradOperation('grad_by_list_with_sens', get_by_list=True, sens_param=True)
 grad_all = C.GradOperation('get_all', get_all=True)
 grad_with_sens = C.GradOperation('grad_with_sens', sens_param=True)
 def test_net_vargs_expand():
    class AddNet(Cell):
        def __init__(self):
@@ -39,7 +46,7 @@ def test_net_vargs_expand():
    y = Tensor(np.random.normal(0, 1, [3, 4, 5]).astype(np.float32))
    sens = Tensor(np.random.normal(0, 1, [3, 4, 5]).astype(np.float32))
    net = AddNet()
    _ = C.grad_all_with_sens(net, net.trainable_params())(x, y, sens)
    _ = grad_all_with_sens(net, net.trainable_params())(x, y, sens)
 class VarNet(Cell):
@@ -104,7 +111,7 @@ def test_all_var_args_grad_with_sens():
            self.net = net
        def construct(self, *inputs):
            return C.grad_by_list_with_sens(self.net, self.weights)(*inputs)
            return grad_by_list_with_sens(self.net, self.weights)(*inputs)
    x = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
    y = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
@@ -122,7 +129,7 @@ def test_grad_list_var_args():
            self.net = net
        def construct(self, *inputs):
            return C.grad_by_list(self.net, self.weights)(*inputs)
            return grad_by_list(self.net, self.weights)(*inputs)
    x = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
    y = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
@@ -139,7 +146,7 @@ def test_grad_all_var_args():
            self.net = net
        def construct(self, *inputs):
            return C.grad_all(self.net)(*inputs)
            return grad_all(self.net)(*inputs)
    x = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
    y = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
@@ -156,7 +163,7 @@ def test_grad_all_var_args_with_sens():
            self.net = net
        def construct(self, *inputs):
            return C.grad_all_with_sens(self.net)(*inputs)
            return grad_all_with_sens(self.net)(*inputs)
    x = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
    y = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
@@ -174,7 +181,7 @@ def test_grad_var_args_with_sens():
            self.net = net
        def construct(self, *inputs):
            return C.grad_with_sens(self.net)(*inputs)
            return grad_with_sens(self.net)(*inputs)
    x = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
    y = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
@@ -233,7 +240,7 @@ def test_var_args_grad():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, x, y, sens):
            return C.grad_by_list_with_sens(self.net, self.weights)(x, y, sens)
            return grad_by_list_with_sens(self.net, self.weights)(x, y, sens)
    x = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
    y = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
@@ -268,7 +275,7 @@ def test_var_args_positional():
            self.weights = ParameterTuple(net.trainable_params())
        def construct(self, x, y):
            return C.grad_all(self.net)(x, y)
            return grad_all(self.net)(x, y)
    x = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
    y = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
--- a/tests/ut/python/pipeline/parse/test_parse.py
+++ b/tests/ut/python/pipeline/parse/test_parse.py
@@ -37,6 +37,9 @@ from ...ut_filter import non_graph_engine
 # W0613: unused-argument
 grad_all = C.GradOperation('get_all', get_all=True)
 log = logging.getLogger("test")
 log.setLevel(level=logging.ERROR)
 context.set_context(mode=context.GRAPH_MODE)
@@ -176,7 +179,7 @@ def test_bprop_with_wrong_output_num():
            return BpropWithWrongOutputNum()(x, y)
    with pytest.raises(ValueError):
        C.grad_all(BpropWithWrongOutputNumCell())(1, 2)
        grad_all(BpropWithWrongOutputNumCell())(1, 2)
 def test_bprop_with_wrong_output_type():
    context.set_context(check_bprop=True)
@@ -211,7 +214,7 @@ def test_bprop_with_wrong_output_type():
            return BpropWithWrongOutputType()(x)
    with pytest.raises(TypeError):
        C.grad_all(BpropWithWrongOutputTypeCell())(Tensor(np.ones([64, 10]).astype(np.int32)))
        grad_all(BpropWithWrongOutputTypeCell())(Tensor(np.ones([64, 10]).astype(np.int32)))
 def test_bprop_with_wrong_output_shape():
@@ -250,4 +253,4 @@ def test_bprop_with_wrong_output_shape():
    with pytest.raises(ValueError):
        net = BpropWithWrongOutputShapeCell()
        net.set_grad()
        C.grad_all(net)(Tensor(np.ones([64, 10]).astype(np.int32)))
        grad_all(net)(Tensor(np.ones([64, 10]).astype(np.int32)))
--- a/tests/ut/python/pynative_mode/ops/test_grad.py
+++ b/tests/ut/python/pynative_mode/ops/test_grad.py
@@ -22,20 +22,24 @@ from mindspore.common.api import ms_function
 from mindspore.common.dtype import get_py_obj_dtype
 from mindspore.ops import composite as C
 from mindspore.ops import functional as F
 from mindspore.ops.composite import grad_all_with_sens
 from ...ut_filter import non_graph_engine
 # pylint: disable=unused-argument
 def setup_module(module):
    context.set_context(mode=context.PYNATIVE_MODE)
 grad = C.GradOperation('grad')
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 def mul(x, y):
    return x * y
@ms_function
 def mainf(x, y):
    return C.grad(mul)(x, y)
    return grad(mul)(x, y)
@non_graph_engine
@@ -94,7 +98,7 @@ def test_scalar_cast_grad():
    @ms_function
    def grad_fx_cast(input_x):
        return C.grad(fx_cast)(input_x)
        return grad(fx_cast)(input_x)
    gfn = grad_fx_cast(input_x)
    expect_dx = 1
--- a/tests/ut/python/pynative_mode/test_framstruct.py
+++ b/tests/ut/python/pynative_mode/test_framstruct.py
@@ -35,6 +35,12 @@ def setup_module(module):
    context.set_context(mode=context.PYNATIVE_MODE)
 grad = C.GradOperation('grad')
 grad_all = C.GradOperation('get_all', get_all=True)
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
@ms_function
 def while_upper_bound(upper):
    rval = 2
@@ -109,12 +115,12 @@ def add_mul(x, y):
 def mainf(x, y):
    """ mainf """
    return C.grad_all(mul)(x, y)
    return grad_all(mul)(x, y)
 def grad_add_mul(x, y):
    """ grad_add_mul """
    return C.grad_all(add_mul)(x, y)
    return grad_all(add_mul)(x, y)
@ms_function
@@ -269,7 +275,7 @@ def rec(x):
@ms_function
 def grad_rec(input_x):
    return C.grad(rec)(input_x)
    return grad(rec)(input_x)
 def test_grad_rec():
    """ test_grad_rec """
@@ -300,7 +306,7 @@ def test_while2():
 def test_grad_while2():
    @ms_function
    def df_t2_while(input_x, input_y):
        return C.grad(t2_while)(input_x, input_y)
        return grad(t2_while)(input_x, input_y)
    assert df_t2_while(2, 3) == 3
@@ -313,7 +319,7 @@ def if_test(a, b):
 def grad_if(x, y):
    """ grad_if """
    return C.grad_all(if_test)(x, y)
    return grad_all(if_test)(x, y)
 def test_grad_if():
@@ -333,7 +339,7 @@ def test_dont_unroll_while():
    @ms_function()
    def invoke_while(x, y):
        return C.grad(dont_unroll_while)(x, y)
        return grad(dont_unroll_while)(x, y)
    res = invoke_while(2, 3)
    assert res == 3
@@ -418,7 +424,7 @@ def _while(x):
 def grad_while(x):
    """ grad_while """
    return C.grad_all(_while)(x)
    return grad_all(_while)(x)
 def test_grad_while():
@@ -442,7 +448,7 @@ def test_factorial():
 def test_grad_factorial():
    @ms_function
    def df_factorial(x):
        return C.grad(factorial)(x)
        return grad(factorial)(x)
    assert df_factorial(3) == 11
@@ -520,7 +526,7 @@ def _for(x):
@ms_function
 def grad_for(x):
    """ grad_for """
    return C.grad_all(_for)(x)
    return grad_all(_for)(x)
 def test_grad_for():
@@ -792,7 +798,7 @@ def multi_outputs(x, y):
 def test_grad_multi_outputs():
    @ms_function
    def df_multi_outputs(x, y):
        return C.grad_all_with_sens(multi_outputs)(x, y, (1, 1))
        return grad_all_with_sens(multi_outputs)(x, y, (1, 1))
    assert df_multi_outputs(2, 3) == (4, 4)
@@ -820,7 +826,7 @@ def grad_refactor_simple_1(x, y):
 def test_grad_refactor_simple_1():
    assert C.grad_all(grad_refactor_simple_1)(Tensor(2, dtype=ms.int32), Tensor(1, dtype=ms.int32)) == (4, 2)
    assert grad_all(grad_refactor_simple_1)(Tensor(2, dtype=ms.int32), Tensor(1, dtype=ms.int32)) == (4, 2)
 def grad_refactor_simple_2(x, y, z):
@@ -832,7 +838,7 @@ def test_grad_refactor_simple_2():
    x = Tensor(2, dtype=ms.int32)
    y = Tensor(3, dtype=ms.int32)
    z = Tensor(0, dtype=ms.int32)
    assert C.grad_all(grad_refactor_simple_2)(x, y, z) == (7, 4, 7)
    assert grad_all(grad_refactor_simple_2)(x, y, z) == (7, 4, 7)
 def grad_refactor_1(a, b):
@@ -845,7 +851,7 @@ def grad_refactor_1(a, b):
 def test_grad_refactor_1():
    assert C.grad_all(grad_refactor_1)(Tensor(2, dtype=ms.int32), Tensor(3, dtype=ms.int32)) == (3, 2)
    assert grad_all(grad_refactor_1)(Tensor(2, dtype=ms.int32), Tensor(3, dtype=ms.int32)) == (3, 2)
 def grad_refactor_2(a, b):
@@ -858,7 +864,7 @@ def grad_refactor_2(a, b):
 def test_grad_refactor_2():
    assert C.grad_all(grad_refactor_2)(Tensor(2, dtype=ms.int32), Tensor(3, dtype=ms.int32)) == (27, 54)
    assert grad_all(grad_refactor_2)(Tensor(2, dtype=ms.int32), Tensor(3, dtype=ms.int32)) == (27, 54)
 def grad_refactor_3(a):
@@ -871,7 +877,7 @@ def grad_refactor_3(a):
 def test_grad_refactor_3():
    @ms_function
    def df_refactor_3(x):
        return C.grad_all(grad_refactor_3)(x)
        return grad_all(grad_refactor_3)(x)
    assert df_refactor_3(3) == (3,)
@@ -883,7 +889,7 @@ def grad_refactor_4(a):
 def test_grad_refactor_4():
    assert C.grad_all(grad_refactor_4)(Tensor(4, dtype=ms.int32)) == (3,)
    assert grad_all(grad_refactor_4)(Tensor(4, dtype=ms.int32)) == (3,)
 def grad_refactor_5(a):
@@ -896,7 +902,7 @@ def grad_refactor_5(a):
 def test_grad_refactor_5():
    @ms_function
    def df_refactor_5(x):
        return C.grad_all(grad_refactor_5)(x)
        return grad_all(grad_refactor_5)(x)
    assert df_refactor_5(1) == (1,)
@@ -908,7 +914,7 @@ def grad_refactor_6(a, b):
 def test_grad_refactor_6():
    assert C.grad_all(grad_refactor_6)(Tensor(3, dtype=ms.int32), Tensor(2, dtype=ms.int32)) == (3, 1)
    assert grad_all(grad_refactor_6)(Tensor(3, dtype=ms.int32), Tensor(2, dtype=ms.int32)) == (3, 1)
 def grad_refactor_while(x):
@@ -922,7 +928,7 @@ def grad_refactor_while(x):
 def test_grad_refactor_9():
    @ms_function
    def df_refactor_while(input_x):
        return C.grad_all(grad_refactor_while)(input_x)
        return grad_all(grad_refactor_while)(input_x)
    assert df_refactor_while(3) == (6,)
@@ -938,7 +944,7 @@ def grad_refactor__while_1(x):
 def test_grad_refactor_10():
    """ test_grad_while """
    assert C.grad_all(grad_refactor__while_1)(Tensor(5, dtype=ms.int32)) == (60,)
    assert grad_all(grad_refactor__while_1)(Tensor(5, dtype=ms.int32)) == (60,)
 def test_grad_refactor_11():
@@ -952,7 +958,7 @@ def test_grad_refactor_11():
            return x * y * y
    net = Net()
    C.grad_all(net)(Tensor(np.ones([2]).astype(np.float32)), Tensor(np.ones([2]).astype(np.float32)))
    grad_all(net)(Tensor(np.ones([2]).astype(np.float32)), Tensor(np.ones([2]).astype(np.float32)))
 def test_grad_refactor_12():
@@ -967,7 +973,7 @@ def test_grad_refactor_12():
            return x * self.z * y
    net = Net()
    C.grad_all(net)(Tensor(np.ones([2]).astype(np.float32)), Tensor(np.zeros([2]).astype(np.float32)))
    grad_all(net)(Tensor(np.ones([2]).astype(np.float32)), Tensor(np.zeros([2]).astype(np.float32)))
 def test_grad_refactor_13():
@@ -983,7 +989,7 @@ def test_grad_refactor_13():
    net = Net()
    weights = ParameterTuple(net.trainable_params())
    C.grad_by_list(net, weights)(Tensor(np.ones([2]).astype(np.float32)), Tensor(np.zeros([2]).astype(np.float32)))
    grad_by_list(net, weights)(Tensor(np.ones([2]).astype(np.float32)), Tensor(np.zeros([2]).astype(np.float32)))
 def grad_refactor_14(a, b):
@@ -1006,7 +1012,7 @@ def grad_refactor_14(a, b):
 def test_grad_refactor_14():
    @ms_function
    def df_refactor_14(x, y):
        return C.grad_all(grad_refactor_14)(x, y)
        return grad_all(grad_refactor_14)(x, y)
    assert df_refactor_14(2, 3) == (3, 9)
@@ -1029,7 +1035,7 @@ def test_grad_if_defer_inline():
    network = IfDeferInline([128, 96])
    network.add_flags(defer_inline=False)
    inp = Tensor(np.ones([128, 96]).astype(np.float32))
    grads = C.grad_all(network)(inp)
    grads = grad_all(network)(inp)
    assert np.all(grads[0].asnumpy() == np.full([128, 96], 0.6, dtype=np.float32))
--- a/tests/ut/python/pynative_mode/test_high_order_grad.py
+++ b/tests/ut/python/pynative_mode/test_high_order_grad.py
@@ -15,9 +15,13 @@
 """ test_high_order_grad """
 from mindspore import context
 from mindspore.common.api import ms_function
 from mindspore.ops.composite import grad, grad_all, grad_all_with_sens
 import mindspore.ops.composite as C
 grad = C.GradOperation('grad')
 grad_all = C.GradOperation('get_all', get_all=True)
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 def setup_module(module):
    context.set_context(mode=context.PYNATIVE_MODE, check_bprop=False)
--- a/tests/ut/python/pynative_mode/test_hook.py
+++ b/tests/ut/python/pynative_mode/test_hook.py
@@ -28,6 +28,9 @@ var_hook_done = False
 cell_bprop_done = False
 grad_all = C.GradOperation('get_all', get_all=True)
 def conv(in_channels, out_channels, kernel_size, stride=1, padding=0):
    """weight initial for conv layer"""
    weight = weight_variable()
@@ -175,7 +178,7 @@ def test_custom_bprop():
    mul_add.bprop_debug = True
    x = Tensor(np.array([1, 2, 3]).astype(np.int32))
    y = Tensor(np.array([2, 3, 4]).astype(np.int32))
    C.grad_all(mul_add)(x, y)
    grad_all(mul_add)(x, y)
    assert bprop_debug
@@ -190,7 +193,7 @@ def test_grad_all():
    net = Net()
    x = Tensor(np.array([1, 2, 3]).astype(np.int32))
    y = Tensor(np.array([2, 3, 4]).astype(np.int32))
    res = C.grad_all(net)(x, y)
    res = grad_all(net)(x, y)
    print(res)
 def test_check_input():
--- a/tests/ut/python/pynative_mode/test_implicit_conversion.py
+++ b/tests/ut/python/pynative_mode/test_implicit_conversion.py
@@ -20,6 +20,9 @@ from mindspore import Tensor, nn
 from mindspore.ops import composite as C
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 def test_float_tensor_and_int_add():
    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
    y = 2
@@ -139,7 +142,7 @@ def test_float_tensor_and_bool_tensors_add_grad():
            self.net = net
        def construct(self, x, y, sens):
            return C.grad_all_with_sens(self.net)(x, y, sens)
            return grad_all_with_sens(self.net)(x, y, sens)
    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
    y = Tensor(np.array([[True, True, True], [False, False, False]], dtype=np.bool_))
@@ -167,7 +170,7 @@ def test_float_tensor_and_int_tensors_sub_grad():
            self.net = net
        def construct(self, x, y, sens):
            return C.grad_all_with_sens(self.net)(x, y, sens)
            return grad_all_with_sens(self.net)(x, y, sens)
    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
@@ -195,7 +198,7 @@ def test_float16_tensor_and_float32_tensors_sub_grad():
            self.net = net
        def construct(self, x, y, sens):
            return C.grad_all_with_sens(self.net)(x, y, sens)
            return grad_all_with_sens(self.net)(x, y, sens)
    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.int32))
    y = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32))
@@ -223,7 +226,7 @@ def test_float_tensor_and_int_add_grad():
            self.net = net
        def construct(self, x, sens):
            return C.grad_all_with_sens(self.net)(x, sens)
            return grad_all_with_sens(self.net)(x, sens)
    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
@@ -248,7 +251,7 @@ def test_int8_tensor_and_uint8_tensors_add_grad():
            self.net = net
        def construct(self, x, y, sens):
            return C.grad_all_with_sens(self.net)(x, y, sens)
            return grad_all_with_sens(self.net)(x, y, sens)
    x = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int8))
    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8))
--- a/tests/ut/python/pynative_mode/test_insert_grad_of.py
+++ b/tests/ut/python/pynative_mode/test_insert_grad_of.py
@@ -26,6 +26,10 @@ from ....mindspore_test_framework.utils.bprop_util import bprop
 from ....mindspore_test_framework.utils.debug_util import PrintShapeTypeCell, PrintGradShapeTypeCell
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 grad_all = C.GradOperation('get_all', get_all=True)
 def setup_module(module):
    context.set_context(mode=context.PYNATIVE_MODE)
@@ -48,7 +52,7 @@ def test_InsertGradientOf_1():
    @ms_function
    def f(x, y):
        return C.grad_all(stop_test)(x, y)
        return grad_all(stop_test)(x, y)
    print("stop_gradient:", f(1, 2))
@@ -83,7 +87,7 @@ def test_InsertGradientOf_2():
    @ms_function
    def fd(x, y):
        return C.grad_all(clip_test)(x, y)
        return grad_all(clip_test)(x, y)
    print("forward: ", f(1.1, 0.1))
    print("clip_gradient:", fd(1.1, 0.1))
@@ -111,7 +115,7 @@ def test_InsertGradientOf_3():
        return c
    def f(x, y):
        return C.grad_all(debug_test)(x, y)
        return grad_all(debug_test)(x, y)
    print("debug_gradient:", f(Tensor(1.0), Tensor(2.0)))
@@ -145,7 +149,7 @@ def test_cell_assign():
            self.weights = mindspore.ParameterTuple(net.get_parameters())
        def construct(self, x, y):
            return C.grad_by_list(self.net, self.weights)(x, y)
            return grad_by_list(self.net, self.weights)(x, y)
    class Mul(nn.Cell):
        def __init__(self):
--- a/tests/ut/python/pynative_mode/test_pynative_model.py
+++ b/tests/ut/python/pynative_mode/test_pynative_model.py
@@ -24,6 +24,9 @@ from mindspore.ops import operations as P
 from ..ut_filter import non_graph_engine
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 def setup_module(module):
    context.set_context(mode=context.PYNATIVE_MODE)
@@ -38,7 +41,7 @@ class GradWrap(nn.Cell):
    def construct(self, x, label):
        weights = self.weights
        return C.grad_by_list(self.network, weights)(x, label)
        return grad_by_list(self.network, weights)(x, label)
@non_graph_engine
--- a/tests/ut/python/pynative_mode/test_stop_gradient.py
+++ b/tests/ut/python/pynative_mode/test_stop_gradient.py
@@ -31,6 +31,10 @@ from ..ut_filter import non_graph_engine
 from ....mindspore_test_framework.utils.bprop_util import bprop
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 grad_all = C.GradOperation('get_all', get_all=True)
 def setup_module(module):
    context.set_context(mode=context.PYNATIVE_MODE)
@@ -85,19 +89,19 @@ def stop_test4(x, y):
@ms_function
 def grad_stop_test(x, y):
    """ grad_stop_test """
    return C.grad_all(stop_test2)(x, y)
    return grad_all(stop_test2)(x, y)
@ms_function
 def grad_stop_test1(x, y):
    """ grad_stop_test1 """
    return C.grad_all(stop_test3)(x, y)
    return grad_all(stop_test3)(x, y)
@ms_function
 def grad_stop_test5(x, y):
    """ grad_stop_test5 """
    return C.grad_all(stop_test5)(x, y)
    return grad_all(stop_test5)(x, y)
 def test_stop():
@@ -126,7 +130,7 @@ class GradWrap(nn.Cell):
    @ms_function
    def construct(self, x, label):
        weights = self.weights
        return C.grad_by_list(self.network, weights)(x, label)
        return grad_by_list(self.network, weights)(x, label)
@non_graph_engine
@@ -256,7 +260,7 @@ def test_stop_gradient_4():
    def stop_test(x):
        return stop_gradient(x)
    assert C.grad_all(stop_test)(Tensor(1, dtype=ms.int32)) == (1,)
    assert grad_all(stop_test)(Tensor(1, dtype=ms.int32)) == (1,)
 def test_stop_gradient_5():
@@ -266,7 +270,7 @@ def test_stop_gradient_5():
        ret = x + y
        return ret
    assert C.grad_all(stop_test)(Tensor(1, dtype=ms.int32)) == (1,)
    assert grad_all(stop_test)(Tensor(1, dtype=ms.int32)) == (1,)
 def test_stop_gradient_6():
@@ -275,7 +279,7 @@ def test_stop_gradient_6():
        ret = stop_gradient(ret)
        return ret
    assert C.grad_all(stop_test)(Tensor(1, dtype=ms.int32), Tensor(3, dtype=ms.int32)) == (0, 0)
    assert grad_all(stop_test)(Tensor(1, dtype=ms.int32), Tensor(3, dtype=ms.int32)) == (0, 0)
 class PrimWithMultiOutputs(PrimitiveWithInfer):
@@ -436,5 +440,5 @@ def test_stop_print():
            self.printm(y)
            return x, y
    C.grad_all(StopPrint())(Tensor(np.ones([2]).astype(np.float32)),
                            Tensor(np.ones([2]).astype(np.float32)))
    grad_all(StopPrint())(Tensor(np.ones([2]).astype(np.float32)),
                          Tensor(np.ones([2]).astype(np.float32)))
--- a/tests/ut/python/pynative_mode/test_user_define_bprop_check.py
+++ b/tests/ut/python/pynative_mode/test_user_define_bprop_check.py
@@ -21,6 +21,9 @@ from mindspore import dtype as mstype
 from mindspore.ops import composite as C
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 def test_user_define_bprop_check_ok():
    class Net(nn.Cell):
        def __init__(self):
@@ -40,7 +43,7 @@ def test_user_define_bprop_check_ok():
            self.net = net
        def construct(self, x, sens):
            return C.grad_all_with_sens(self.net)(x, sens)
            return grad_all_with_sens(self.net)(x, sens)
    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
@@ -72,7 +75,7 @@ def test_user_define_bprop_no_check_dtype():
            self.net = net
        def construct(self, x, sens):
            return C.grad_all_with_sens(self.net)(x, sens)
            return grad_all_with_sens(self.net)(x, sens)
    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
@@ -104,7 +107,7 @@ def test_user_define_bprop_check_shape():
            self.net = net
        def construct(self, x, sens):
            return C.grad_all_with_sens(self.net)(x, sens)
            return grad_all_with_sens(self.net)(x, sens)
    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
@@ -135,7 +138,7 @@ def test_user_define_bprop_check_dtype():
            self.net = net
        def construct(self, x, sens):
            return C.grad_all_with_sens(self.net)(x, sens)
            return grad_all_with_sens(self.net)(x, sens)
    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
@@ -167,7 +170,7 @@ def test_user_define_bprop_check_parameter():
            self.net = net
        def construct(self, x, sens):
            return C.grad_all_with_sens(self.net)(x, sens)
            return grad_all_with_sens(self.net)(x, sens)
    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
@@ -198,7 +201,7 @@ def test_user_define_bprop_check_number():
            self.net = net
        def construct(self, x, y, sens):
            return C.grad_all_with_sens(self.net)(x, y, sens)
            return grad_all_with_sens(self.net)(x, y, sens)
    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
    y = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))