remove name arg from gradoperation

5 years ago · 1a54785fe2
--- a/mindspore/nn/wrap/cell_wrapper.py
+++ b/mindspore/nn/wrap/cell_wrapper.py
@@ -117,7 +117,7 @@ class WithGradCell(Cell):
        self.network = network
        self.loss_fn = loss_fn
        self.weights = ParameterTuple(network.trainable_params())
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=(sens is not None))
        self.grad = C.GradOperation(get_by_list=True, sens_param=(sens is not None))
        self.sens = sens
        if loss_fn is None:
            self.network_with_loss = network
@@ -182,7 +182,7 @@ class TrainOneStepCell(Cell):
        self.network.add_flags(defer_inline=True)
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
        self.reducer_flag = False
        self.grad_reducer = None
--- a/mindspore/nn/wrap/grad_reducer.py
+++ b/mindspore/nn/wrap/grad_reducer.py
@@ -269,7 +269,7 @@ class DistributedGradReducer(Cell):
        >>>         self.network.add_flags(defer_inline=True)
        >>>         self.weights = optimizer.parameters
        >>>         self.optimizer = optimizer
        >>>         self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        >>>         self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        >>>         self.sens = sens
        >>>         self.reducer_flag = False
        >>>         self.grad_reducer = None
--- a/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/nn/wrap/loss_scale.py
@@ -210,7 +210,7 @@ class TrainOneStepWithLossScaleCell(Cell):
        self.network.add_flags(defer_inline=True)
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.hyper_map = C.HyperMap()
        if context.get_context("device_target") == "GPU":
            self.gpu_target = True
--- a/mindspore/ops/composite/base.py
+++ b/mindspore/ops/composite/base.py
@@ -106,12 +106,11 @@ class GradOperation(GradOperation_):
            a 'ones_like(outputs)' sensitivity will be attached automatically. Default: False.
    """
    def __init__(self, name,
                 get_all=False, get_by_list=False, sens_param=False):
    def __init__(self, get_all=False, get_by_list=False, sens_param=False):
        self.get_all = get_all
        self.get_by_list = get_by_list
        self.sens_param = sens_param
        GradOperation_.__init__(self, name, get_all, get_by_list, sens_param)
        GradOperation_.__init__(self, 'grad', get_all, get_by_list, sens_param)
        self.grad_fn = None
        self.fn = None
        self.need_forward = False
@@ -139,7 +138,7 @@ class GradOperation(GradOperation_):
                fn.already_run = False
    def __call__(self, fn, weights=None):
        grad_ = GradOperation('grad', self.get_all, self.get_by_list, self.sens_param)
        grad_ = GradOperation(self.get_all, self.get_by_list, self.sens_param)
        if self.grad_fn is None or self.fn != fn:
            if context.get_context("mode") == context.GRAPH_MODE:
                if self.get_by_list:
--- a/mindspore/ops/operations/debug_ops.py
+++ b/mindspore/ops/operations/debug_ops.py
@@ -216,7 +216,7 @@ class InsertGradientOf(PrimitiveWithInfer):
        >>>     return ret
        >>>
        >>> clip = P.InsertGradientOf(clip_gradient)
        >>> grad_all = C.GradOperation('get_all', get_all=True)
        >>> grad_all = C.GradOperation(get_all=True)
        >>> def InsertGradientOfClipDemo():
        >>>     def clip_test(x, y):
        >>>         x = clip(x)
@@ -268,7 +268,7 @@ class HookBackward(PrimitiveWithInfer):
        >>> def hook_fn(grad_out):
        >>>     print(grad_out)
        >>>
        >>> grad_all = GradOperation('get_all', get_all=True)
        >>> grad_all = GradOperation(get_all=True)
        >>> hook = P.HookBackward(hook_fn)
        >>>
        >>> def hook_test(x, y):
--- a/model_zoo/official/cv/faster_rcnn/src/network_define.py
+++ b/model_zoo/official/cv/faster_rcnn/src/network_define.py
@@ -163,8 +163,7 @@ class TrainOneStepCell(nn.Cell):
        self.backbone = network_backbone
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.sens = Tensor((np.ones((1,)) * sens).astype(np.float16))
        self.reduce_flag = reduce_flag
--- a/model_zoo/official/cv/maskrcnn/src/network_define.py
+++ b/model_zoo/official/cv/maskrcnn/src/network_define.py
@@ -171,8 +171,7 @@ class TrainOneStepCell(nn.Cell):
        self.backbone = network_backbone
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.sens = Tensor((np.ones((1,)) * sens).astype(np.float16))
        self.reduce_flag = reduce_flag
--- a/model_zoo/official/cv/resnet_thor/src/grad_reducer_thor.py
+++ b/model_zoo/official/cv/resnet_thor/src/grad_reducer_thor.py
@@ -119,7 +119,7 @@ class DistributedGradReducerThor(Cell):
        >>>         self.network.add_flags(defer_inline=True)
        >>>         self.weights = ParameterTuple(network.trainable_params())
        >>>         self.optimizer = optimizer
        >>>         self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        >>>         self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        >>>         self.sens = sens
        >>>         self.reducer_flag = False
        >>>         self.grad_reducer = None
--- a/model_zoo/official/cv/ssd/src/ssd.py
+++ b/model_zoo/official/cv/ssd/src/ssd.py
@@ -383,7 +383,7 @@ class TrainingWrapper(nn.Cell):
        self.network = network
        self.weights = ms.ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
        self.reducer_flag = False
        self.grad_reducer = None
--- a/model_zoo/official/cv/warpctc/src/warpctc_for_train.py
+++ b/model_zoo/official/cv/warpctc/src/warpctc_for_train.py
@@ -77,7 +77,7 @@ class TrainOneStepCellWithGradClip(Cell):
        self.network.add_flags(defer_inline=True)
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
        self.reducer_flag = False
        self.grad_reducer = None
--- a/model_zoo/official/cv/yolov3_darknet53/src/yolo.py
+++ b/model_zoo/official/cv/yolov3_darknet53/src/yolo.py
@@ -412,7 +412,7 @@ class TrainingWrapper(nn.Cell):
        self.network = network
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
        self.reducer_flag = False
        self.grad_reducer = None
--- a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
+++ b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
@@ -412,7 +412,7 @@ class TrainingWrapper(nn.Cell):
        self.network = network
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
        self.reducer_flag = False
        self.grad_reducer = None
--- a/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
+++ b/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
@@ -647,7 +647,7 @@ class TrainingWrapper(nn.Cell):
        self.network = network
        self.weights = ms.ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
        self.reducer_flag = False
        self.grad_reducer = None
--- a/model_zoo/official/gnn/gat/src/utils.py
+++ b/model_zoo/official/gnn/gat/src/utils.py
@@ -141,7 +141,7 @@ class TrainOneStepCell(nn.Cell):
        self.network.add_flags(defer_inline=True)
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
    def construct(self):
--- a/model_zoo/official/gnn/gcn/src/metrics.py
+++ b/model_zoo/official/gnn/gcn/src/metrics.py
@@ -150,7 +150,7 @@ class TrainOneStepCell(nn.Cell):
        self.network.add_flags(defer_inline=True)
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
    def construct(self):
--- a/model_zoo/official/nlp/bert/src/bert_for_finetune.py
+++ b/model_zoo/official/nlp/bert/src/bert_for_finetune.py
@@ -57,8 +57,7 @@ class BertFinetuneCell(nn.Cell):
        self.network = network
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.reducer_flag = False
        self.allreduce = P.AllReduce()
@@ -160,7 +159,7 @@ class BertSquadCell(nn.Cell):
        self.network = network
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.reducer_flag = False
        self.allreduce = P.AllReduce()
        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
--- a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py
+++ b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py
@@ -274,7 +274,7 @@ class BertTrainOneStepCell(nn.Cell):
        self.network = network
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
        self.reducer_flag = False
        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
@@ -353,8 +353,7 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
        self.network = network
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.reducer_flag = False
        self.allreduce = P.AllReduce()
--- a/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
+++ b/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
@@ -293,7 +293,7 @@ class BertTrainOneStepCell(nn.Cell):
        self.network = network
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
        self.reducer_flag = False
        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
@@ -373,8 +373,7 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
        self.network = network
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.reducer_flag = False
        self.allreduce = P.AllReduce()
--- a/model_zoo/official/nlp/bert_thor/src/grad_reducer_thor.py
+++ b/model_zoo/official/nlp/bert_thor/src/grad_reducer_thor.py
@@ -119,7 +119,7 @@ class DistributedGradReducerThor(Cell):
        >>>         self.network.add_flags(defer_inline=True)
        >>>         self.weights = ParameterTuple(network.trainable_params())
        >>>         self.optimizer = optimizer
        >>>         self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        >>>         self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        >>>         self.sens = sens
        >>>         self.reducer_flag = False
        >>>         self.grad_reducer = None
--- a/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py
+++ b/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py
@@ -239,7 +239,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
        self.network.add_flags(defer_inline=True)
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.reducer_flag = False
        self.all_reduce = P.AllReduce()
--- a/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
+++ b/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
@@ -218,8 +218,7 @@ class BertTrainWithLossScaleCell(nn.Cell):
        self.network = network
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.reducer_flag = False
        self.allreduce = P.AllReduce()
@@ -310,8 +309,7 @@ class BertTrainCell(nn.Cell):
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.sens = sens
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.reducer_flag = False
        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
@@ -474,8 +472,7 @@ class BertEvaluationWithLossScaleCell(nn.Cell):
        self.network = network
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.reducer_flag = False
        self.allreduce = P.AllReduce()
@@ -562,8 +559,7 @@ class BertEvaluationCell(nn.Cell):
        self.weights = optimizer.parameters
        self.optimizer = optimizer
        self.sens = sens
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.reducer_flag = False
        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
--- a/model_zoo/official/nlp/transformer/src/transformer_for_train.py
+++ b/model_zoo/official/nlp/transformer/src/transformer_for_train.py
@@ -158,7 +158,7 @@ class TransformerTrainOneStepCell(nn.Cell):
        self.network = network
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
        self.reducer_flag = False
        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
@@ -244,8 +244,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
        self.network.add_flags(defer_inline=True)
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.reducer_flag = False
        self.allreduce = P.AllReduce()
--- a/model_zoo/official/recommend/deepfm/src/deepfm.py
+++ b/model_zoo/official/recommend/deepfm/src/deepfm.py
@@ -286,7 +286,7 @@ class TrainStepWrap(nn.Cell):
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = Adam(self.weights, learning_rate=lr, eps=eps, loss_scale=loss_scale)
        self.hyper_map = C.HyperMap()
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = loss_scale
    def construct(self, batch_ids, batch_wts, label):
--- a/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py
+++ b/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py
@@ -337,9 +337,9 @@ class TrainStepWrap(nn.Cell):
            self.optimizer_w = FTRL(learning_rate=5e-2, params=self.weights_w,
                                    l1=1e-8, l2=1e-8, initial_accum=1.0, loss_scale=sens)
        self.hyper_map = C.HyperMap()
        self.grad_w = C.GradOperation('grad_w', get_by_list=True,
        self.grad_w = C.GradOperation(get_by_list=True,
                                      sens_param=True)
        self.grad_d = C.GradOperation('grad_d', get_by_list=True,
        self.grad_d = C.GradOperation(get_by_list=True,
                                      sens_param=True)
        self.sens = sens
        self.loss_net_w = IthOutputCell(network, output_index=0)
--- a/model_zoo/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py
+++ b/model_zoo/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py
@@ -537,11 +537,9 @@ class TrainStepWrap(nn.Cell):
        self.hyper_map = C.HyperMap()
        self.grad_w = C.GradOperation('grad_w',
                                      get_by_list=True,
        self.grad_w = C.GradOperation(get_by_list=True,
                                      sens_param=True)
        self.grad_d = C.GradOperation('grad_d',
                                      get_by_list=True,
        self.grad_d = C.GradOperation(get_by_list=True,
                                      sens_param=True)
        self.sens = sens
--- a/tests/mindspore_test_framework/components/function/compile_gradient_wrt_inputs.py
+++ b/tests/mindspore_test_framework/components/function/compile_gradient_wrt_inputs.py
@@ -46,5 +46,5 @@ class CompileBackwardBlockWrtInputsBC(IBuilderComponent):
    """
    def __call__(self):
        grad_op = GradOperation('grad', get_all=True, sens_param=True)
        grad_op = GradOperation(get_all=True, sens_param=True)
        return create_funcs(self.verification_set, gen_grad_net, compile_block, grad_op)
--- a/tests/mindspore_test_framework/components/function/compile_gradient_wrt_params.py
+++ b/tests/mindspore_test_framework/components/function/compile_gradient_wrt_params.py
@@ -46,5 +46,5 @@ class CompileBackwardBlockWrtParamsBC(IBuilderComponent):
    """
    def __call__(self, verification_set):
        grad_op = GradOperation('grad', get_by_list=True, sens_param=True)
        grad_op = GradOperation(get_by_list=True, sens_param=True)
        return create_funcs(self.verification_set, gen_grad_net, compile_block, grad_op)
--- a/tests/mindspore_test_framework/components/function/init_params_with_rand_and_run_gradient_wrt_inputs.py
+++ b/tests/mindspore_test_framework/components/function/init_params_with_rand_and_run_gradient_wrt_inputs.py
@@ -22,5 +22,5 @@ from ...utils.block_util import run_block, gen_grad_net, create_funcs, get_unifo
 class RunBackwardBlockWrtInputsWithRandParamBC(IBuilderComponent):
    def __call__(self):
        grad_op = GradOperation('grad', get_all=True, sens_param=True)
        grad_op = GradOperation(get_all=True, sens_param=True)
        return create_funcs(self.verification_set, gen_grad_net, run_block, grad_op, get_uniform_with_shape)
--- a/tests/mindspore_test_framework/components/function/init_params_with_rand_and_run_gradient_wrt_params.py
+++ b/tests/mindspore_test_framework/components/function/init_params_with_rand_and_run_gradient_wrt_params.py
@@ -22,5 +22,5 @@ from ...utils.block_util import run_block, gen_grad_net, create_funcs, get_unifo
 class RunBackwardBlockWrtParamsWithRandParamBC(IBuilderComponent):
    def __call__(self):
        grad_op = GradOperation('grad', get_by_list=True, sens_param=True)
        grad_op = GradOperation(get_by_list=True, sens_param=True)
        return create_funcs(self.verification_set, gen_grad_net, run_block, grad_op, get_uniform_with_shape)
--- a/tests/mindspore_test_framework/components/function/run_gradient_wrt_inputs.py
+++ b/tests/mindspore_test_framework/components/function/run_gradient_wrt_inputs.py
@@ -22,5 +22,5 @@ from ...utils.block_util import run_block, gen_grad_net, create_funcs
 class RunBackwardBlockWrtInputsBC(IBuilderComponent):
    def __call__(self):
        grad_op = GradOperation('grad', get_all=True, sens_param=True)
        grad_op = GradOperation(get_all=True, sens_param=True)
        return create_funcs(self.verification_set, gen_grad_net, run_block, grad_op)
--- a/tests/mindspore_test_framework/components/function/run_gradient_wrt_params.py
+++ b/tests/mindspore_test_framework/components/function/run_gradient_wrt_params.py
@@ -22,5 +22,5 @@ from ...utils.block_util import run_block, gen_grad_net, create_funcs
 class RunBackwardBlockWrtParamsBC(IBuilderComponent):
    def __call__(self):
        grad_op = GradOperation('grad', get_by_list=True, sens_param=True)
        grad_op = GradOperation(get_by_list=True, sens_param=True)
        return create_funcs(self.verification_set, gen_grad_net, run_block, grad_op)
--- a/tests/mindspore_test_framework/utils/block_util.py
+++ b/tests/mindspore_test_framework/utils/block_util.py
@@ -331,7 +331,7 @@ def create_funcs(verification_set, block_generator, block_runner, grad_op=None,
            # gradient
            if grad_op:
                if num_outputs == 0:
                    grad_op_ = GradOperation('grad', get_all=grad_op.get_all,
                    grad_op_ = GradOperation(get_all=grad_op.get_all,
                                             get_by_list=grad_op.get_by_list, sens_param=False)
                    b = block_generator(block, grad_op_, len(inputs), desc_const=desc_const,
                                        const_first=const_first, add_fake_input=add_fake_input)
--- a/tests/mindspore_test_framework/utils/bprop_util.py
+++ b/tests/mindspore_test_framework/utils/bprop_util.py
@@ -85,7 +85,7 @@ def bprop(func, *inputs, grads_wrt_outputs=None, wrt: list = None, params: list
        if not params:
            params = func.trainable_params()
    grad_op = GradOperation(name='grad', get_all=wrt_inputs, get_by_list=wrt_params, sens_param=with_sens_param)
    grad_op = GradOperation(get_all=wrt_inputs, get_by_list=wrt_params, sens_param=with_sens_param)
    grad = Bprop(func, wrt_params, params, grad_op, grads_wrt_outputs)
    if context.get_context("mode") == context.PYNATIVE_MODE:
--- a/tests/mindspore_test_framework/utils/check_gradient.py
+++ b/tests/mindspore_test_framework/utils/check_gradient.py
@@ -315,7 +315,7 @@ class ScalarGradChecker(_GradChecker):
                 output_selector=None,
                 sampling_times=-1,
                 reduce_output=False) -> None:
        grad_op = GradOperation('grad', get_all=True, sens_param=True)
        grad_op = GradOperation(get_all=True, sens_param=True)
        super(ScalarGradChecker, self).__init__(fn, grad_op, args, delta, max_error, input_selector, \
                                                output_selector, sampling_times, reduce_output)
@@ -358,7 +358,7 @@ class OperationGradChecker(_GradChecker):
                 output_selector=None,
                 sampling_times=-1,
                 reduce_output=False) -> None:
        grad_op = GradOperation('grad', get_all=True, sens_param=True)
        grad_op = GradOperation(get_all=True, sens_param=True)
        super(OperationGradChecker, self).__init__(fn, grad_op, args, delta, max_error, input_selector, \
                                                   output_selector, sampling_times, reduce_output)
@@ -390,7 +390,7 @@ class NNGradChecker(_GradChecker):
                 output_selector=None,
                 sampling_times=-1,
                 reduce_output=False) -> None:
        grad_op = GradOperation('grad', get_by_list=True, sens_param=True)
        grad_op = GradOperation(get_by_list=True, sens_param=True)
        self.params = ParameterTuple(fn.trainable_params())
        super(NNGradChecker, self).__init__(fn, grad_op, args, delta, max_error, input_selector, \
                                            output_selector, sampling_times, reduce_output)
--- a/tests/ops_common.py
+++ b/tests/ops_common.py
@@ -23,7 +23,7 @@ from mindspore import Tensor
 from mindspore.common.api import _executor
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 grad_all_with_sens = C.GradOperation(get_all=True, sens_param=True)
 class InputBackward(nn.Cell):
--- a/tests/perf_test/test_lenet.py
+++ b/tests/perf_test/test_lenet.py
@@ -27,7 +27,7 @@ from mindspore.common.api import _executor
 context.set_context(mode=context.GRAPH_MODE)
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 grad_all_with_sens = C.GradOperation(get_all=True, sens_param=True)
 batch_size = 1
 channel = 1
--- a/tests/st/control/test_cont_grad.py
+++ b/tests/st/control/test_cont_grad.py
@@ -28,8 +28,8 @@ from mindspore.ops import operations as P
 # context.set_context(save_graphs=True)
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 grad_all = C.GradOperation('get_all', get_all=True)
 grad_by_list = C.GradOperation(get_by_list=True)
 grad_all = C.GradOperation(get_all=True)
 def test_while_forward():
--- a/tests/st/gnn/test_gnn_aggregator.py
+++ b/tests/st/gnn/test_gnn_aggregator.py
@@ -25,7 +25,7 @@ from mindspore.common.api import _executor
 context.set_context(mode=context.GRAPH_MODE)
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 grad_all_with_sens = C.GradOperation(get_all=True, sens_param=True)
 class MeanAggregatorGrad(nn.Cell):
--- a/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py
+++ b/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py
@@ -284,9 +284,9 @@ class TrainStepWrap(nn.Cell):
        self.optimizer_d = Adam(
            self.weights_d, learning_rate=3.5e-4, eps=1e-8, loss_scale=sens)
        self.hyper_map = C.HyperMap()
        self.grad_w = C.GradOperation('grad_w', get_by_list=True,
        self.grad_w = C.GradOperation(get_by_list=True,
                                      sens_param=True)
        self.grad_d = C.GradOperation('grad_d', get_by_list=True,
        self.grad_d = C.GradOperation(get_by_list=True,
                                      sens_param=True)
        self.sens = sens
        self.loss_net_w = IthOutputCell(network, output_index=0)
--- a/tests/st/model_zoo_tests/yolov3/src/yolov3.py
+++ b/tests/st/model_zoo_tests/yolov3/src/yolov3.py
@@ -647,7 +647,7 @@ class TrainingWrapper(nn.Cell):
        self.network = network
        self.weights = ms.ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
        self.reducer_flag = False
        self.grad_reducer = None
--- a/tests/st/networks/models/bert/src/bert_for_pre_training.py
+++ b/tests/st/networks/models/bert/src/bert_for_pre_training.py
@@ -271,7 +271,7 @@ class BertTrainOneStepCell(nn.Cell):
        self.network = network
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
        self.reducer_flag = False
        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
@@ -351,8 +351,7 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
        self.network = network
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.reducer_flag = False
        self.allreduce = P.AllReduce()
--- a/tests/st/networks/models/bert/src/utils.py
+++ b/tests/st/networks/models/bert/src/utils.py
@@ -52,8 +52,7 @@ class BertFinetuneCell(nn.Cell):
        self.network = network
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.reducer_flag = False
        self.allreduce = P.AllReduce()
--- a/tests/st/networks/models/resnet50/src_thor/grad_reducer_thor.py
+++ b/tests/st/networks/models/resnet50/src_thor/grad_reducer_thor.py
@@ -120,7 +120,7 @@ class DistributedGradReducerThor(Cell):
        >>>         self.network.add_flags(defer_inline=True)
        >>>         self.weights = ParameterTuple(network.trainable_params())
        >>>         self.optimizer = optimizer
        >>>         self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        >>>         self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        >>>         self.sens = sens
        >>>         self.reducer_flag = False
        >>>         self.grad_reducer = None
--- a/tests/st/networks/test_cell_bprop.py
+++ b/tests/st/networks/test_cell_bprop.py
@@ -29,7 +29,7 @@ from mindspore.ops import operations as P
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 grad_all = C.GradOperation('get_all', get_all=True)
 grad_all = C.GradOperation(get_all=True)
 class MulAdd(nn.Cell):
@@ -351,7 +351,7 @@ class MulAddWithParam(nn.Cell):
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
 def test_refkey_bprop():
    grad_by_list = C.GradOperation('get_by_list', get_all=True, get_by_list=True)
    grad_by_list = C.GradOperation(get_all=True, get_by_list=True)
    class GradWrap(nn.Cell):
        def __init__(self, network):
            super(GradWrap, self).__init__()
--- a/tests/st/ops/ascend/test_addn.py
+++ b/tests/st/ops/ascend/test_addn.py
@@ -49,7 +49,7 @@ def test_net():
 def test_grad_addn_with_list():
    grad_op = C.GradOperation('get_all', get_all=True)
    grad_op = C.GradOperation(get_all=True)
    class AddN(nn.Cell):
        def __init__(self):
            super().__init__()
--- a/tests/st/ops/ascend/test_conv_grad.py
+++ b/tests/st/ops/ascend/test_conv_grad.py
@@ -29,7 +29,7 @@ context.set_context(device_target="Ascend")
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_dense_grad.py
+++ b/tests/st/ops/ascend/test_dense_grad.py
@@ -26,7 +26,7 @@ context.set_context(device_target="Ascend")
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_fused_batchnorm_grad.py
+++ b/tests/st/ops/ascend/test_fused_batchnorm_grad.py
@@ -30,7 +30,7 @@ context.set_context(device_target="Ascend")
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_maxpool_grad.py
+++ b/tests/st/ops/ascend/test_maxpool_grad.py
@@ -27,7 +27,7 @@ context.set_context(device_target="Ascend")
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_maxpool_with_argmax_grad.py
+++ b/tests/st/ops/ascend/test_maxpool_with_argmax_grad.py
@@ -27,7 +27,7 @@ context.set_context(device_target="Ascend")
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_relu_grad.py
+++ b/tests/st/ops/ascend/test_relu_grad.py
@@ -27,7 +27,7 @@ context.set_context(device_target="Ascend")
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_simplemean_grad.py
+++ b/tests/st/ops/ascend/test_simplemean_grad.py
@@ -27,7 +27,7 @@ context.set_context(device_target="Ascend")
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_tbe_ops/test_batchnorm_grad.py
+++ b/tests/st/ops/ascend/test_tbe_ops/test_batchnorm_grad.py
@@ -30,7 +30,7 @@ context.set_context(device_target="Ascend")
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_tbe_ops/test_gelu_grad_sens.py
+++ b/tests/st/ops/ascend/test_tbe_ops/test_gelu_grad_sens.py
@@ -27,7 +27,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 class Grad(Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, input_, output_grad):
@@ -71,7 +71,7 @@ class MEGeluLargeIn(Cell):
 class GradLargeIn(Cell):
    def __init__(self, network):
        super(GradLargeIn, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, x1, x2, output_grad):
--- a/tests/st/ops/ascend/test_tbe_ops/test_layernorm_grad.py
+++ b/tests/st/ops/ascend/test_tbe_ops/test_layernorm_grad.py
@@ -27,7 +27,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 class Grad(Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, input_, output_grad,):
--- a/tests/st/ops/ascend/test_tbe_ops/test_maximum_grad.py
+++ b/tests/st/ops/ascend/test_tbe_ops/test_maximum_grad.py
@@ -21,7 +21,7 @@ from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 context.set_context(device_target="Ascend")
 grad = C.GradOperation('get_all', get_all=True, sens_param=True)
 grad = C.GradOperation(get_all=True, sens_param=True)
 class MaxNetMe(Cell):
--- a/tests/st/ops/ascend/test_tbe_ops/test_maxpool_grad.py
+++ b/tests/st/ops/ascend/test_tbe_ops/test_maxpool_grad.py
@@ -27,7 +27,7 @@ context.set_context(device_target="Ascend")
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_tbe_ops/test_minimum_grad.py
+++ b/tests/st/ops/ascend/test_tbe_ops/test_minimum_grad.py
@@ -21,7 +21,7 @@ from mindspore.ops import composite as C
 from mindspore.ops.operations import Minimum
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 grad = C.GradOperation('get_all', get_all=True, sens_param=True)
 grad = C.GradOperation(get_all=True, sens_param=True)
 class MinNetMe(Cell):
--- a/tests/st/ops/ascend/test_tbe_ops/test_relu_grad.py
+++ b/tests/st/ops/ascend/test_tbe_ops/test_relu_grad.py
@@ -27,7 +27,7 @@ context.set_context(device_target="Ascend")
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_tbe_ops/test_relu_v2_grad.py
+++ b/tests/st/ops/ascend/test_tbe_ops/test_relu_v2_grad.py
@@ -27,7 +27,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True)
        self.grad = GradOperation(get_all=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_tbe_ops/test_resize_nearest_neighbor_grad.py
+++ b/tests/st/ops/ascend/test_tbe_ops/test_resize_nearest_neighbor_grad.py
@@ -37,7 +37,7 @@ class Net(nn.Cell):
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_tbe_ops/test_sigmoid_cross_entropy_with_logits_grad.py
+++ b/tests/st/ops/ascend/test_tbe_ops/test_sigmoid_cross_entropy_with_logits_grad.py
@@ -37,7 +37,7 @@ class Net(nn.Cell):
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_tbe_ops/test_sigmoid_grad.py
+++ b/tests/st/ops/ascend/test_tbe_ops/test_sigmoid_grad.py
@@ -37,7 +37,7 @@ class Net(nn.Cell):
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/st/ops/ascend/test_tbe_ops/test_smooth_l1_loss_grad.py
+++ b/tests/st/ops/ascend/test_tbe_ops/test_smooth_l1_loss_grad.py
@@ -36,7 +36,7 @@ class Net(nn.Cell):
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, pred, gt, dout):
--- a/tests/st/ops/ascend/test_tbe_ops/test_stridedslice_grad.py
+++ b/tests/st/ops/ascend/test_tbe_ops/test_stridedslice_grad.py
@@ -26,7 +26,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 class Grad(Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, input_, output_grad):
--- a/tests/st/ops/cpu/test_batchnorm_op.py
+++ b/tests/st/ops/cpu/test_batchnorm_op.py
@@ -37,7 +37,7 @@ class Batchnorm_Net(Cell):
 class Grad(Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = C.GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, input_data, sens):
--- a/tests/st/ops/cpu/test_lstm_op.py
+++ b/tests/st/ops/cpu/test_lstm_op.py
@@ -207,8 +207,7 @@ class Grad(nn.Cell):
        super(Grad, self).__init__()
        self.network = network
        self.weights = ParameterTuple(network.trainable_params())
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
    @ms_function
--- a/tests/st/ops/custom_ops_tbe/test_square.py
+++ b/tests/st/ops/custom_ops_tbe/test_square.py
@@ -23,7 +23,7 @@ from mindspore.ops import composite as C
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 grad_with_sens = C.GradOperation('grad_with_sens', sens_param=True)
 grad_with_sens = C.GradOperation(sens_param=True)
 class Net(nn.Cell):
--- a/tests/st/ops/gpu/test_batchnorm_op.py
+++ b/tests/st/ops/gpu/test_batchnorm_op.py
@@ -37,7 +37,7 @@ class Batchnorm_Net(Cell):
 class Grad(Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = C.GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, input_data, sens):
--- a/tests/st/ops/gpu/test_binary_cross_entropy_op.py
+++ b/tests/st/ops/gpu/test_binary_cross_entropy_op.py
@@ -54,7 +54,7 @@ def test_binary_cross_entropy_loss():
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = C.GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, x1, x2, sens, weight):
--- a/tests/st/ops/gpu/test_ctcloss_op.py
+++ b/tests/st/ops/gpu/test_ctcloss_op.py
@@ -40,7 +40,7 @@ class Net(nn.Cell):
 class GradData(nn.Cell):
    def __init__(self, network):
        super(GradData, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=False)
        self.grad = GradOperation(get_all=True, sens_param=False)
        self.network = network
    def construct(self, probs, labels, input_lengths, label_lengths):
--- a/tests/st/ops/gpu/test_dense_op.py
+++ b/tests/st/ops/gpu/test_dense_op.py
@@ -65,7 +65,7 @@ def test_biasadd():
 class GradData(nn.Cell):
    def __init__(self, network):
        super(GradData, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, inputs, output_grad):
@@ -77,8 +77,7 @@ class GradWeight(nn.Cell):
        super(GradWeight, self).__init__()
        self.network = network
        self.weights = ParameterTuple(network.trainable_params())
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
    def construct(self, x, output_grad):
@@ -169,7 +168,7 @@ def test_dw():
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, input_, bias, dy):
--- a/tests/st/ops/gpu/test_gelu_grad_op.py
+++ b/tests/st/ops/gpu/test_gelu_grad_op.py
@@ -37,7 +37,7 @@ class GeluNet(nn.Cell):
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = C.GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, input_data, sens):
--- a/tests/st/ops/gpu/test_kl_div_op.py
+++ b/tests/st/ops/gpu/test_kl_div_op.py
@@ -53,7 +53,7 @@ def test_binary_cross_entropy_loss():
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = C.GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, x1, x2, sens):
--- a/tests/st/ops/gpu/test_logsoftmax_op.py
+++ b/tests/st/ops/gpu/test_logsoftmax_op.py
@@ -52,7 +52,7 @@ class LogSoftmax(nn.Cell):
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = C.GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, input_data, sens):
--- a/tests/st/ops/gpu/test_lstm_op.py
+++ b/tests/st/ops/gpu/test_lstm_op.py
@@ -581,8 +581,7 @@ class Grad(nn.Cell):
        super(Grad, self).__init__()
        self.network = network
        self.weights = ParameterTuple(network.trainable_params())
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
    @ms_function
--- a/tests/st/ops/gpu/test_maximum_op.py
+++ b/tests/st/ops/gpu/test_maximum_op.py
@@ -35,7 +35,7 @@ class Net(Cell):
 class Grad(Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = C.GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, x1, x2, sens):
--- a/tests/st/ops/gpu/test_minimum_op.py
+++ b/tests/st/ops/gpu/test_minimum_op.py
@@ -36,7 +36,7 @@ class MinimumNet(Cell):
 class Grad(Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = C.GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, x1, x2, sens):
--- a/tests/st/ops/gpu/test_mirror_pad.py
+++ b/tests/st/ops/gpu/test_mirror_pad.py
@@ -58,7 +58,7 @@ def test_mirror_pad():
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, input_, output_grad):
        return self.grad(self.network)(input_, output_grad)
--- a/tests/st/ops/gpu/test_smoothl1loss_op.py
+++ b/tests/st/ops/gpu/test_smoothl1loss_op.py
@@ -59,7 +59,7 @@ def test_smoothl1loss():
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = C.GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, x1, x2, sens):
--- a/tests/st/ops/gpu/test_softmax_op.py
+++ b/tests/st/ops/gpu/test_softmax_op.py
@@ -79,7 +79,7 @@ class Net(nn.Cell):
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = C.GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, input_data, sens):
--- a/tests/st/ops/gpu/test_stridedslice_grad_op.py
+++ b/tests/st/ops/gpu/test_stridedslice_grad_op.py
@@ -36,7 +36,7 @@ class StridedSliceNet(nn.Cell):
 class GradData(nn.Cell):
    def __init__(self, network):
        super(GradData, self).__init__()
        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=False)
        self.grad = C.GradOperation(get_all=True, sens_param=False)
        self.network = network
    def construct(self, x):
--- a/tests/st/ops/gpu/test_tanh_op.py
+++ b/tests/st/ops/gpu/test_tanh_op.py
@@ -37,7 +37,7 @@ class TanhNet(nn.Cell):
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = C.GradOperation(get_all=True, sens_param=True)
        self.network = network
    def construct(self, input_data, sens):
--- a/tests/st/pynative/test_pynative_hook.py
+++ b/tests/st/pynative/test_pynative_hook.py
@@ -30,7 +30,7 @@ from mindspore.common.initializer import TruncatedNormal
 context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
 grad_all = C.GradOperation('get_all', get_all=True)
 grad_all = C.GradOperation(get_all=True)
 def weight_variable():
@@ -112,7 +112,7 @@ class GradWrap(nn.Cell):
    def construct(self, x, label):
        weights = self.weights
        return C.GradOperation('get_by_list', get_by_list=True)(self.network, weights)(x, label)
        return C.GradOperation(get_by_list=True)(self.network, weights)(x, label)
 class test_custom_cell_base():
--- a/tests/st/pynative/test_pynative_lenet.py
+++ b/tests/st/pynative/test_pynative_lenet.py
@@ -29,7 +29,7 @@ from mindspore.ops import operations as P
 np.random.seed(1)
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 grad_by_list = C.GradOperation(get_by_list=True)
 def weight_variable():
--- a/tests/st/pynative/test_pynative_mindarmour.py
+++ b/tests/st/pynative/test_pynative_mindarmour.py
@@ -87,7 +87,7 @@ class LeNet(nn.Cell):
 class GradWithSens(Cell):
    def __init__(self, network):
        super(GradWithSens, self).__init__()
        self.grad = GradOperation(name="grad", get_all=False,
        self.grad = GradOperation(get_all=False,
                                  sens_param=True)
        self.network = network
@@ -99,8 +99,7 @@ class GradWithSens(Cell):
 class GradWrapWithLoss(Cell):
    def __init__(self, network):
        super(GradWrapWithLoss, self).__init__()
        self._grad_all = GradOperation(name="get_all",
                                       get_all=True,
        self._grad_all = GradOperation(get_all=True,
                                       sens_param=False)
        self._network = network
--- a/tests/st/pynative/test_pynative_resnet50.py
+++ b/tests/st/pynative/test_pynative_resnet50.py
@@ -40,7 +40,7 @@ np.random.seed(1)
 ds.config.set_seed(1)
 grad_by_list = CP.GradOperation('get_by_list', get_by_list=True)
 grad_by_list = CP.GradOperation(get_by_list=True)
 def weight_variable(shape):
--- a/tests/st/pynative/test_tensor_index.py
+++ b/tests/st/pynative/test_tensor_index.py
@@ -24,7 +24,7 @@ from mindspore.common.parameter import ParameterTuple
 from mindspore.ops import composite as C
 grad_by_list_with_sens = C.GradOperation('grad_by_list_with_sens', get_by_list=True, sens_param=True)
 grad_by_list_with_sens = C.GradOperation(get_by_list=True, sens_param=True)
 def setup_module():
--- a/tests/train_step_wrap.py
+++ b/tests/train_step_wrap.py
@@ -32,7 +32,7 @@ class TrainStepWrap(nn.Cell):
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = nn.Momentum(self.weights, 0.1, 0.9)
        self.hyper_map = C.HyperMap()
        self.grad = C.GradOperation('grad', get_by_list=True)
        self.grad = C.GradOperation(get_by_list=True)
    def construct(self, x, label):
        weights = self.weights
@@ -71,7 +71,7 @@ class TrainStepWrap2(nn.Cell):
        self.weights = ParameterTuple(network.get_parameters())
        self.optimizer = nn.Momentum(self.weights, 0.1, 0.9)
        self.hyper_map = C.HyperMap()
        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
        self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        self.sens = sens
    def construct(self, x):
@@ -93,7 +93,7 @@ class TrainStepWrapWithoutOpt(nn.Cell):
        super(TrainStepWrapWithoutOpt, self).__init__()
        self.network = network
        self.weights = ParameterTuple(network.trainable_params())
        self.grad = C.GradOperation('grad', get_by_list=True)
        self.grad = C.GradOperation(get_by_list=True)
    def construct(self, x, label):
        grads = self.grad(self.network, self.weights)(x, label)
--- a/tests/ut/python/dtype/test_list.py
+++ b/tests/ut/python/dtype/test_list.py
@@ -31,7 +31,7 @@ from tests.mindspore_test_framework.pipeline.forward.compile_forward \
 context.set_context(mode=context.GRAPH_MODE)
 grad_all = C.GradOperation('get_all', get_all=True)
 grad_all = C.GradOperation(get_all=True)
 def test_list_equal():
--- a/tests/ut/python/exec/test_train_with_lars.py
+++ b/tests/ut/python/exec/test_train_with_lars.py
@@ -52,8 +52,7 @@ class TrainOneStepWithLarsCell(nn.Cell):
        self.slice_index, self.params_len, weights = get_net_trainable_reordered_params(self.network)
        self.weights = ParameterTuple(weights)
        self.optimizer = optimizer
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
        self.sens = Parameter(Tensor([sens], mstype.float32), name='sens', requires_grad=False)
        self.weight_decay = 1.0
--- a/tests/ut/python/ir/test_row_tensor.py
+++ b/tests/ut/python/ir/test_row_tensor.py
@@ -248,7 +248,7 @@ def test_row_tensor_attr():
 def test_row_tensor_sparse_gatherv2_grad_all():
    grad_all = C.GradOperation('get_all', get_all=True)
    grad_all = C.GradOperation(get_all=True)
    class GradWrap(nn.Cell):
        def __init__(self, network):
            super(GradWrap, self).__init__()
@@ -269,7 +269,7 @@ def test_row_tensor_sparse_gatherv2_grad_all():
 def test_row_tensor_sparse_gatherv2_grad_with_pram():
    grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
    grad_by_list = C.GradOperation(get_by_list=True)
    class GradWrap(nn.Cell):
        def __init__(self, network):
            super(GradWrap, self).__init__()
--- a/tests/ut/python/ir/test_sparse_tensor.py
+++ b/tests/ut/python/ir/test_sparse_tensor.py
@@ -28,7 +28,7 @@ from mindspore import Tensor, SparseTensor, context
 context.set_context(mode=context.GRAPH_MODE, enable_sparse=True)
 grad_op = C.GradOperation('get_all', get_all=True)
 grad_op = C.GradOperation(get_all=True)
 class MakeSparseTensor(nn.Cell):
    def __init__(self, dense_shape):
--- a/tests/ut/python/keep_order/test_keep_order.py
+++ b/tests/ut/python/keep_order/test_keep_order.py
@@ -50,7 +50,7 @@ class Func(nn.Cell):
        return out
 grad_s = C.GradOperation('grad_with_sens', get_all=True, sens_param=True)
 grad_s = C.GradOperation(get_all=True, sens_param=True)
 class Net(nn.Cell):
--- a/tests/ut/python/model/test_mix_precision.py
+++ b/tests/ut/python/model/test_mix_precision.py
@@ -166,8 +166,7 @@ class GetParamGrad(nn.Cell):
        super(GetParamGrad, self).__init__(auto_prefix=False)
        self.network = network
        self.weights = ParameterTuple(network.trainable_params())
        self.grad = C.GradOperation('grad',
                                    get_by_list=True,
        self.grad = C.GradOperation(get_by_list=True,
                                    sens_param=True)
    def construct(self, data, sens):
--- a/tests/ut/python/nn/optim/test_lr_schedule.py
+++ b/tests/ut/python/nn/optim/test_lr_schedule.py
@@ -22,7 +22,7 @@ from mindspore.ops.operations import BiasAdd, MatMul
 import mindspore.ops.composite as C
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 grad_by_list = C.GradOperation(get_by_list=True)
 class Net(Cell):
--- a/tests/ut/python/nn/test_nn_pad.py
+++ b/tests/ut/python/nn/test_nn_pad.py
@@ -34,7 +34,7 @@ class Net(nn.Cell):
 class Grad(nn.Cell):
    def __init__(self, network):
        super(Grad, self).__init__()
        self.grad = GradOperation(name="get_all", get_all=True, sens_param=True)
        self.grad = GradOperation(get_all=True, sens_param=True)
        self.network = network
    @ms_function
--- a/tests/ut/python/ops/test_bprop_disorder.py
+++ b/tests/ut/python/ops/test_bprop_disorder.py
@@ -28,7 +28,7 @@ from ....mindspore_test_framework.pipeline.forward.compile_forward \
    import pipeline_for_compile_forward_ge_graph_for_case_by_case_config
 grad_by_list_with_sens = C.GradOperation('grad_by_list_with_sens', get_by_list=True, sens_param=True)
 grad_by_list_with_sens = C.GradOperation(get_by_list=True, sens_param=True)
 class DisOrderTest1(nn.Cell):
--- a/tests/ut/python/ops/test_control_ops.py
+++ b/tests/ut/python/ops/test_control_ops.py
@@ -30,9 +30,9 @@ from mindspore.common import ms_function
 context.set_context(mode=context.GRAPH_MODE)
 grad_by_list = C.GradOperation('get_by_list', get_by_list=True)
 grad_all = C.GradOperation('get_all', get_all=True)
 grad_all_with_sens = C.GradOperation('grad_all_with_sens', get_all=True, sens_param=True)
 grad_by_list = C.GradOperation(get_by_list=True)
 grad_all = C.GradOperation(get_all=True)
 grad_all_with_sens = C.GradOperation(get_all=True, sens_param=True)
 def cond_data_test(x_init, y_init):
@@ -564,7 +564,7 @@ def test_switch_layer_env_eliminate():
    class NetGrad(nn.Cell):
        def __init__(self, net):
            super(NetGrad, self).__init__()
            self.grad_op = C.GradOperation('grad', get_by_list=True, sens_param=False)
            self.grad_op = C.GradOperation(get_by_list=True, sens_param=False)
            self.net = net
            self.weights = ParameterTuple(self.net.trainable_params())
@@ -593,7 +593,7 @@ def test_switch_layer_single_layer():
    class NetGrad(nn.Cell):
        def __init__(self, net):
            super(NetGrad, self).__init__()
            self.grad_op = C.GradOperation('grad', get_by_list=True, sens_param=False)
            self.grad_op = C.GradOperation(get_by_list=True, sens_param=False)
            self.net = net
            self.weights = ParameterTuple(self.net.trainable_params())
--- a/tests/ut/python/ops/test_math_ops.py
+++ b/tests/ut/python/ops/test_math_ops.py
@@ -38,7 +38,7 @@ context.set_context(mode=context.GRAPH_MODE)
 # W0613: unused-argument
 # W0231: super-init-not-called
 grad = C.GradOperation('grad')
 grad = C.GradOperation()
 def test_multiply():
    """ test_multiply """