Refactor GraphKernelExpander (1st submission)

Decoupled from the front-end interfaces. 1. Removed the call to "Renormalize". Completed the infer-format in model_builder. Only used the device shape and device format to infer an abstract shape without considering padding. 2. Removed the call to python's Primitive interfaces. The "Renormalize" relies on the PrimitivePy, so they can be removed together. After that, the functions "ConstAttrToInput", "DeleteAttrInInput" and related can be removed. 3. Reuse the AkgKernelJsonGenerator in GraphKernelExpander. 1) set the attribute "extract_opinfo_from_anf" to true, so that the generator can handle the basic operator with anf info. 2) added a function "extract_expand_info" in expander.py to convert the json into a more friendly format. The attrs was converted to a dict instead of a list. 4. Scalars only support DefaultFormat. Removed the argument "format" from graph_builder.value 5. Moved the expander op list from graph_kernel_helper.cc to graph_kernel_expander.cc
5 years ago · e0e6c39eae
--- a/mindspore/_extends/graph_kernel/expander.py
+++ b/mindspore/_extends/graph_kernel/expander.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,18 +20,31 @@ from mindspore import log as logger
 import mindspore._extends.graph_kernel.expanders as expanders


 def extract_expand_info(kernel_info):
    """Convert the json into a more friendly format"""
    input_desc = []
    if 'input_desc' in kernel_info and kernel_info['input_desc']:
        for desc in kernel_info['input_desc']:
            input_desc += desc
    attrs = {}
    if 'attr' in kernel_info and kernel_info['attr']:
        for attr in kernel_info["attr"]:
            attrs[attr["name"]] = attr["value"]
    expand_info = {
        "name": kernel_info["name"],
        "input_desc": input_desc,
        "output_desc": kernel_info["output_desc"],
        "attr": attrs,
        "process": kernel_info["process"],
    }
    return expand_info


 def get_op_expander(json_str: str):
    """get op expander by json info"""
    try:
        kernel_info = json.loads(json_str)
        expand_info = kernel_info['expand_info']

        if 'name' not in expand_info:
            logger.error("expand info have no op name")
            return None
        if 'process' not in expand_info:
            logger.error("expand info have no processor info")
            return None
        expand_info = extract_expand_info(kernel_info)

        processor = expand_info['process']
        op_name = str(expand_info['name']).lower()
--- a/mindspore/_extends/graph_kernel/expanders/dropout_grad.py
+++ b/mindspore/_extends/graph_kernel/expanders/dropout_grad.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,20 +21,15 @@ def expand_dropoutgrad(expand_info):
    # get op info.
    dy_desc = expand_info['input_desc'][0]
    mask_desc = expand_info['input_desc'][1]
    keep_prob = None
    for attr in expand_info['attr']:
        if 'keep_prob' in attr:
            keep_prob = attr['keep_prob']
    if keep_prob is None:
        raise RuntimeError("keep_prob does not exist in attrs.")
    # generate a graph.
    keep_prob = expand_info['attr']['keep_prob']

    graph_builder = builder.GraphBuilder()
    with graph_builder.graph_scope('main') as graph_scope:
        # create tensor input.
        input_dy = graph_builder.tensor(dy_desc['shape'], dy_desc['data_type'], dy_desc['format'])
        input_mask = graph_builder.tensor(mask_desc['shape'], mask_desc['data_type'], mask_desc['format'])
        graph_scope.set_input(input_dy, input_mask)
        r_keep_prob = graph_builder.value(input_dy.dtype, 1.0 / keep_prob, "DefaultFormat")
        r_keep_prob = graph_builder.value(input_dy.dtype, 1.0 / keep_prob)
        # create op.
        result = graph_builder.emit('Mul', [input_dy, r_keep_prob])
        result = graph_builder.emit('Mul', [result, input_mask])
--- a/mindspore/_extends/graph_kernel/expanders/gelu.py
+++ b/mindspore/_extends/graph_kernel/expanders/gelu.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -40,17 +40,16 @@ def expand_gelu(expand_info):
        # cal y
        mul_0 = graph_builder.emit('Mul', [input_x, input_x])
        pow_0 = graph_builder.emit('Mul', [mul_0, input_x])
        const_csvalue = graph_builder.value(pow_0.dtype, CSVALUE, input_desc['format'])
        const_csvalue = graph_builder.value(pow_0.dtype, CSVALUE)
        mul_1 = graph_builder.emit('Mul', [pow_0, const_csvalue])
        tanh_res = graph_builder.emit('Add', [input_x, mul_1])
        const_csvalue_sqrt_two_div_pi = graph_builder.value(
            tanh_res.dtype, CSVALUE_SQRT_TWO_DIV_PI, input_desc['format'])
        const_csvalue_sqrt_two_div_pi = graph_builder.value(tanh_res.dtype, CSVALUE_SQRT_TWO_DIV_PI)
        y = graph_builder.emit('Mul', [tanh_res, const_csvalue_sqrt_two_div_pi])

        # cal gelu(x)
        tanh_y = graph_builder.emit('Tanh', [y])
        const_one = graph_builder.value(tanh_y.dtype, ONE, input_desc['format'])
        const_half = graph_builder.value(tanh_y.dtype, HALF, input_desc['format'])
        const_one = graph_builder.value(tanh_y.dtype, ONE)
        const_half = graph_builder.value(tanh_y.dtype, HALF)
        tanh_y_add_one = graph_builder.emit('Add', [tanh_y, const_one])
        mul_x = graph_builder.emit('Mul', [input_x, tanh_y_add_one])
        result = graph_builder.emit('Mul', [const_half, mul_x])
--- a/mindspore/_extends/graph_kernel/expanders/gelu_grad.py
+++ b/mindspore/_extends/graph_kernel/expanders/gelu_grad.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -45,12 +45,11 @@ def expand_gelugrad(expand_info):
        graph_scope.set_input(input_dy, input_x, input_y)

        # create some const var
        const_csvalue = graph_builder.value(input_dy.dtype, CSVALUE, input_desc_0['format'])
        const_csvalue_sqrt_two_div_pi = graph_builder.value(
            input_dy.dtype, CSVALUE_SQRT_TWO_DIV_PI, input_desc_0['format'])
        const_csvalue_tri = graph_builder.value(input_dy.dtype, CSVALUE_TRI, input_desc_0['format'])
        const_one = graph_builder.value(input_dy.dtype, ONE, input_desc_0['format'])
        const_half = graph_builder.value(input_dy.dtype, HALF, input_desc_0['format'])
        const_csvalue = graph_builder.value(input_dy.dtype, CSVALUE)
        const_csvalue_sqrt_two_div_pi = graph_builder.value(input_dy.dtype, CSVALUE_SQRT_TWO_DIV_PI)
        const_csvalue_tri = graph_builder.value(input_dy.dtype, CSVALUE_TRI)
        const_one = graph_builder.value(input_dy.dtype, ONE)
        const_half = graph_builder.value(input_dy.dtype, HALF)

        # cal mul_right
        mul_double = graph_builder.emit('Mul', [input_x, input_x])
--- a/mindspore/_extends/graph_kernel/expanders/gkdropout.py
+++ b/mindspore/_extends/graph_kernel/expanders/gkdropout.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,25 +21,20 @@ def expand_gkdropout(expand_info):
    # get op info.
    input_desc = expand_info['input_desc'][0]
    maks_desc = expand_info['input_desc'][1]
    keep_prob = None
    for attr in expand_info['attr']:
        if 'keep_prob' in attr:
            keep_prob = attr['keep_prob']
    if keep_prob is None:
        raise RuntimeError("keep_prob does not exist in attrs.")
    # generate a graph.
    keep_prob = expand_info['attr']['keep_prob']

    graph_builder = builder.GraphBuilder()
    with graph_builder.graph_scope('main') as graph_scope:
        # create tensor input.
        input_x = graph_builder.tensor(input_desc['shape'], input_desc['data_type'], input_desc['format'])
        input_mask = graph_builder.tensor(maks_desc['shape'], maks_desc['data_type'], maks_desc['format'])
        graph_scope.set_input(input_x, input_mask)
        keep_prob_v = graph_builder.value(input_x.dtype, keep_prob, "DefaultFormat")
        r_keep_prob = graph_builder.value(input_x.dtype, 1.0 / keep_prob, "DefaultFormat")
        keep_prob_v = graph_builder.value(input_x.dtype, keep_prob)
        r_keep_prob = graph_builder.value(input_x.dtype, 1.0 / keep_prob)

        if input_mask.dtype != input_x.dtype:
            input_mask = graph_builder.emit('Cast', [input_mask], attrs={'dst_type': input_x.dtype})
        mask = graph_builder.emit('LessEqual', [input_mask, keep_prob_v]) # output is bool type
        mask = graph_builder.emit('LessEqual', [input_mask, keep_prob_v])  # output is bool type
        mask = graph_builder.emit('Cast', [mask], attrs={'dst_type': input_x.dtype})

        # compute result
--- a/mindspore/_extends/graph_kernel/expanders/layernorm.py
+++ b/mindspore/_extends/graph_kernel/expanders/layernorm.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,16 +23,10 @@ def expand_layernorm(expand_info):
    input_desc_1 = expand_info['input_desc'][1]
    input_desc_2 = expand_info['input_desc'][2]
    attrs = expand_info['attr']
    begin_norm_axis = None
    epsilon = None
    for item in attrs:
        if 'begin_norm_axis' in item:
            begin_norm_axis = item['begin_norm_axis']
        if 'epsilon' in item:
            epsilon = item['epsilon']
    graph_builder = builder.GraphBuilder()
    begin_norm_axis = attrs['begin_norm_axis']
    epsilon = attrs['epsilon']

    # generate a graph.
    graph_builder = builder.GraphBuilder()
    with graph_builder.graph_scope('main') as graph_scope:
        # create tensor input.
        input_x = graph_builder.tensor(input_desc_0['shape'], input_desc_0['data_type'], input_desc_0['format'])
@@ -52,7 +46,7 @@ def expand_layernorm(expand_info):
        for i in reduce_axis:
            reduce_elts *= shape_x[i]
        mean_cof = 1.0 / reduce_elts
        mean_cof_v = graph_builder.value(input_x.dtype, mean_cof, input_x.data_format)
        mean_cof_v = graph_builder.value(input_x.dtype, mean_cof)

        # Calculate mean
        mean_red = graph_builder.emit('ReduceSum', [input_x], attrs={'reduce_axis': reduce_axis, 'keep_dims': True})
@@ -67,7 +61,7 @@ def expand_layernorm(expand_info):

        # Calculate normalize
        normalize_sub = graph_builder.emit('Sub', [input_x, mean])
        epsilon_v = graph_builder.value(input_x.dtype, epsilon, input_x.data_format)
        epsilon_v = graph_builder.value(input_x.dtype, epsilon)
        normalize_add = graph_builder.emit('Add', [variance, epsilon_v])
        normlize_rsqrt = graph_builder.emit('Rsqrt', [normalize_add])
        normalize_mul = graph_builder.emit('Mul', [normalize_sub, normlize_rsqrt])
--- a/mindspore/_extends/graph_kernel/expanders/layernorm_grad.py
+++ b/mindspore/_extends/graph_kernel/expanders/layernorm_grad.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -24,16 +24,10 @@ def expand_layernormgrad(expand_info):
    var_desc = expand_info['input_desc'][2]
    mean_desc = expand_info['input_desc'][3]
    gamma_desc = expand_info['input_desc'][4]
    begin_norm_axis = None
    begin_params_axis = None
    epsilon = 1e-11
    for item in expand_info['attr']:
        if 'begin_norm_axis' in item:
            begin_norm_axis = item['begin_norm_axis']
        if 'begin_params_axis' in item:
            begin_params_axis = item['begin_params_axis']
        if 'epsilon' in item:
            epsilon = item['epsilon']
    attrs = expand_info['attr']
    begin_norm_axis = attrs['begin_norm_axis']
    begin_params_axis = attrs['begin_params_axis']
    epsilon = attrs['epsilon'] if 'epsilon' in attrs else 1e-11

    shape_x = x_desc['shape']
    if begin_norm_axis < 0:
@@ -57,13 +51,13 @@ def expand_layernormgrad(expand_info):
        graph_scope.set_input(x, dy, variance, mean, gamma)

        # set some constant val.
        eps = graph_builder.value(x.dtype, epsilon, x.data_format)
        const_one = graph_builder.value(x.dtype, 1.0, x.data_format)
        const_neg_half = graph_builder.value(x.dtype, -0.5, x.data_format)
        const_neg_two = graph_builder.value(x.dtype, -2.0, x.data_format)
        const_two = graph_builder.value(x.dtype, 2.0, x.data_format)
        const_neg_one = graph_builder.value(x.dtype, -1.0, x.data_format)
        mean_cof = graph_builder.value(x.dtype, (1.0 / reduce_size), x.data_format)
        eps = graph_builder.value(x.dtype, epsilon)
        const_one = graph_builder.value(x.dtype, 1.0)
        const_neg_half = graph_builder.value(x.dtype, -0.5)
        const_neg_two = graph_builder.value(x.dtype, -2.0)
        const_two = graph_builder.value(x.dtype, 2.0)
        const_neg_one = graph_builder.value(x.dtype, -1.0)
        mean_cof = graph_builder.value(x.dtype, (1.0 / reduce_size))

        # cal dg db
        var_eps = graph_builder.emit('Add', [variance, eps])
--- a/mindspore/_extends/graph_kernel/expanders/logsoftmax.py
+++ b/mindspore/_extends/graph_kernel/expanders/logsoftmax.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,11 +20,7 @@ def expand_logsoftmax(expand_info):
    """LogSoftmax expander"""
    # get op info.
    input_desc = expand_info['input_desc'][0]
    attrs = expand_info['attr']
    axis = None
    for item in attrs:
        if 'axis' in item:
            axis = item['axis']
    axis = expand_info['attr']['axis']
    graph_builder = builder.GraphBuilder()
    if isinstance(axis, int):
        axis = (axis,)
--- a/mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py
+++ b/mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,11 +21,7 @@ def expand_logsoftmaxgrad(expand_info):
    # get op info.
    input_desc_0 = expand_info['input_desc'][0]
    input_desc_1 = expand_info['input_desc'][1]
    attrs = expand_info['attr']
    axis = None
    for item in attrs:
        if 'axis' in item:
            axis = item['axis']
    axis = expand_info['attr']['axis']
    graph_builder = builder.GraphBuilder()

    if isinstance(axis, int):
--- a/mindspore/_extends/graph_kernel/expanders/maximum_grad.py
+++ b/mindspore/_extends/graph_kernel/expanders/maximum_grad.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,16 +23,10 @@ def expand_maximumgrad(expand_info):
    input_desc_1 = expand_info['input_desc'][1]
    input_desc_2 = expand_info['input_desc'][2]
    attrs = expand_info['attr']
    grad_x = None
    grad_y = None
    for item in attrs:
        if 'grad_x' in item:
            grad_x = item['grad_x']
        if 'grad_y' in item:
            grad_y = item['grad_y']
    graph_builder = builder.GraphBuilder()
    grad_x = attrs['grad_x'] if 'grad_x' in attrs else True
    grad_y = attrs['grad_y'] if 'grad_y' in attrs else True

    # generate a graph.
    graph_builder = builder.GraphBuilder()
    with graph_builder.graph_scope('main') as graph_scope:
        # create tensor input.
        input_x = graph_builder.tensor(input_desc_0['shape'], input_desc_0['data_type'], input_desc_0['format'])
--- a/mindspore/_extends/graph_kernel/expanders/minimum_grad.py
+++ b/mindspore/_extends/graph_kernel/expanders/minimum_grad.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,15 +23,10 @@ def expand_minimumgrad(expand_info):
    input_desc_1 = expand_info['input_desc'][1]
    input_desc_2 = expand_info['input_desc'][2]
    attrs = expand_info['attr']
    grad_x = None
    grad_y = None
    for item in attrs:
        if 'grad_x' in item:
            grad_x = item['grad_x']
        if 'grad_y' in item:
            grad_y = item['grad_y']
    grad_x = attrs['grad_x'] if 'grad_x' in attrs else True
    grad_y = attrs['grad_y'] if 'grad_y' in attrs else True

    graph_builder = builder.GraphBuilder()
    # generate a graph.
    with graph_builder.graph_scope('main') as graph_scope:
        # create tensor input.
        input_x = graph_builder.tensor(input_desc_0['shape'], input_desc_0['data_type'], input_desc_0['format'])
--- a/mindspore/_extends/graph_kernel/expanders/reduce_mean.py
+++ b/mindspore/_extends/graph_kernel/expanders/reduce_mean.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,20 +18,13 @@ from mindspore._extends.graph_kernel.model import model_builder as builder

 def expand_reducemean(expand_info):
    """ReduceMean expander"""

    # get op info.
    input_desc = expand_info['input_desc'][0]
    attrs = expand_info['attr']
    axis = None
    keep_dims = None
    for item in attrs:
        if 'axis' in item:
            axis = item['axis']
        if 'keep_dims' in item:
            keep_dims = item['keep_dims']
    graph_builder = builder.GraphBuilder()
    axis = attrs['axis']
    keep_dims = attrs['keep_dims']

    # generate a graph.
    graph_builder = builder.GraphBuilder()
    with graph_builder.graph_scope('main') as graph_scope:
        # create tensor input.
        input_x = graph_builder.tensor(input_desc['shape'], input_desc['data_type'], input_desc['format'])
@@ -49,7 +42,7 @@ def expand_reducemean(expand_info):
            for idx in axis:
                all_shape *= x_shape[idx]

        all_shape_value = graph_builder.value(input_x.dtype, all_shape, input_x.data_format)
        all_shape_value = graph_builder.value(input_x.dtype, all_shape)

        if not axis:
            sum_x = graph_builder.emit('ReduceSum', [input_x], attrs={'reduce_axis': real_axis, 'keep_dims': keep_dims})
--- a/mindspore/_extends/graph_kernel/expanders/softmax.py
+++ b/mindspore/_extends/graph_kernel/expanders/softmax.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,16 +18,10 @@ from mindspore._extends.graph_kernel.model import model_builder as builder

 def expand_softmax(expand_info):
    """Softmax expander"""
    # get op info.
    input_desc = expand_info['input_desc'][0]
    attrs = expand_info['attr']
    axis = None
    for item in attrs:
        if 'axis' in item:
            axis = item['axis']
    graph_builder = builder.GraphBuilder()
    axis = expand_info['attr']['axis']

    # generate a graph.
    graph_builder = builder.GraphBuilder()
    with graph_builder.graph_scope('main') as graph_scope:
        # create tensor input.
        input_x = graph_builder.tensor(input_desc['shape'], input_desc['data_type'], input_desc['format'])
--- a/mindspore/_extends/graph_kernel/expanders/sqrt_grad.py
+++ b/mindspore/_extends/graph_kernel/expanders/sqrt_grad.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ def expand_sqrtgrad(expand_info):
        graph_scope.set_input(input_x, input_dout)

        # cal result
        const_two = graph_builder.value(input_x.dtype, 2, input_x.data_format)
        const_two = graph_builder.value(input_x.dtype, 2)
        dividend = graph_builder.emit('Mul', [input_x, const_two])
        result = graph_builder.emit('RealDiv', [input_dout, dividend])

--- a/mindspore/_extends/graph_kernel/expanders/tanh_grad.py
+++ b/mindspore/_extends/graph_kernel/expanders/tanh_grad.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,7 +31,7 @@ def expand_tanhgrad(expand_info):
        # create tensor input.
        input_y = graph_builder.tensor(input_desc_0['shape'], input_desc_0['data_type'], input_desc_0['format'])
        input_dy = graph_builder.tensor(input_desc_1['shape'], input_desc_1['data_type'], input_desc_1['format'])
        const_one = graph_builder.value(input_y.dtype, ONE, input_y.data_format)
        const_one = graph_builder.value(input_y.dtype, ONE)
        graph_scope.set_input(input_y, input_dy)

        # cal result
--- a/mindspore/_extends/graph_kernel/expanders/tile.py
+++ b/mindspore/_extends/graph_kernel/expanders/tile.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,18 +18,11 @@ from mindspore._extends.graph_kernel.model import model_builder as builder

 def expand_tile(expand_info):
    """Tile expander"""

    # get op info.
    input_desc = expand_info['input_desc'][0]
    attrs = expand_info['attr']
    multiples = None
    for item in attrs:
        if 'multiples' in item:
            multiples = item['multiples']
    multiples = expand_info['attr']['multiples']
    output_shape, _, _, shape_compatible = builder.get_tile_output_shape(input_desc['shape'], multiples)
    graph_builder = builder.GraphBuilder()

    # generate a graph.
    graph_builder = builder.GraphBuilder()
    with graph_builder.graph_scope('main') as graph_scope:
        # create tensor input.
        input_x = graph_builder.tensor(input_desc['shape'], input_desc['data_type'], input_desc['format'])
--- a/mindspore/_extends/graph_kernel/model/model_builder.py
+++ b/mindspore/_extends/graph_kernel/model/model_builder.py
@@ -15,7 +15,7 @@
 """GraphKernel model builder"""

 import copy
 from .model import PrimLib, Tensor, Value, Operator, Graph, AlignShape, AddControlBuddy
 from .model import PrimLib, Tensor, Value, Operator, Graph, AlignShape, AddControlBuddy, DataFormat


 def get_tile_output_shape(shape, multiples):
@@ -70,7 +70,7 @@ class OpInfer:

        real_shape = []
        for i, _ in enumerate(shape):
            if i not in attrs['reduce_axis']:
            if i not in attrs['reduce_axis'] and i - len(shape) not in attrs['reduce_axis']:
                real_shape.append(shape[i])
        return real_shape

@@ -106,7 +106,15 @@ class OpInfer:
    @staticmethod
    def default_infer_format_func(inputs, attrs):
        """Infer format"""
        return inputs[0].data_format
        result = inputs[0].data_format
        # default_format and other_format results in other_format
        for input_tensor in inputs[1:]:
            data_format = input_tensor.data_format
            if data_format != DataFormat.DEFAULT:
                if result not in [DataFormat.DEFAULT, data_format]:
                    raise RuntimeError("Incompatible data format %s and %s" % (data_format, result))
                result = data_format
        return result

    infer_shape_func = {
        # add special infer func here
@@ -114,13 +122,20 @@ class OpInfer:
        'Reshape': lambda inputs, attrs: attrs["shape"],
        'BroadcastTo': lambda inputs, attrs: attrs["shape"],
        'Tile': lambda inputs, attrs: get_tile_output_shape(inputs[0].shape, attrs["multiples"])[0],
        'ExpandDims': lambda inputs, attrs: list(inputs[0].shape).insert(attrs["axis"], 1),
    }
    infer_dtype_func = {
        # add special infer func here
        'Cast': lambda inputs, attrs: attrs['dst_type'],
        'Less': lambda inputs, attrs: "bool",
        'LessEqual': lambda inputs, attrs: "bool",
        'Equal': lambda inputs, attrs: "bool",
        'Greater': lambda inputs, attrs: "bool",
        'GreaterEqual': lambda inputs, attrs: "bool",
    }
    infer_format_func = {
        # add special infer func here
        'Reshape': lambda inputs, attrs: "DefaultFormat",
    }

    @classmethod
@@ -188,18 +203,12 @@ class GraphBuilder:
            shape = [1]
        return Tensor(name, shape, dtype, data_format, para_type=para_type)

    def value(self, dtype, value, data_format, name=None):
    def value(self, dtype, value, name=None):
        """Create a new Value"""
        if name in (None, ''):
            name = self._alloc_tensor_name()

        if dtype == "float16":
            # For float16 value, it will be changed to float32 wrongly. And there is no good solution for now.
            # So instead just declare float32 value and then cast it to float16.
            v_fp32 = Value(name, "float32", value, data_format)
            v = self.emit("Cast", [v_fp32], attrs={"dst_type": "float16"})
        else:
            v = Value(name, dtype, value, data_format)
        v = Value(name, dtype, value)
        return v

    def op(self, prim, output, inputs, attrs=None):
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_decoder.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_decoder.cc
@@ -19,8 +19,7 @@
 #include <memory>
 #include <sstream>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
 #include <map>
 #include <vector>
 #include "backend/kernel_compiler/akg/akg_kernel_json_generator.h"
 #include "backend/kernel_compiler/common_utils.h"
@@ -46,6 +45,62 @@ namespace {
 constexpr auto kIsFeatureMapOutput = "IsFeatureMapOutput";
 constexpr auto kIsFeatureMapInputList = "IsFeatureMapInputList";

 class AbstractShapeCreator {
 public:
  using AbstractShapeTransferFunc = std::function<ShapeVector(const ShapeVector &)>;
  /**
   * Get an abstract shape.
   * For a given device_shape and format, the available abstract_shape is not unique,
   * this interface only returns a legal abstract_shape without considering padding
   * so that the AnfAlgo's get device shape interface can get the right device_shape.
   */
  static ShapeVector GetFakeAbstractShape(const ShapeVector &device_shape, const std::string &format) {
    const std::map<std::string, AbstractShapeTransferFunc> fmap{
      {kOpFormat_NCHW, NchwAbstractShape},
      {kOpFormat_NHWC, NhwcAbstractShape},
      {kOpFormat_FRAC_NZ, FractalNzAbstractShape},
    };
    if (format == kOpFormat_ND || format == kOpFormat_DEFAULT) {
      return device_shape;
    }
    auto iter = fmap.find(format);
    if (iter == fmap.end()) {
      MS_LOG(WARNING) << "Unexpected format[" << format << "]";
      return device_shape;
    }
    return iter->second(device_shape);
  }

 private:
  static ShapeVector NchwAbstractShape(const ShapeVector &device_shape) { return device_shape; }
  static ShapeVector NhwcAbstractShape(const ShapeVector &device_shape) {
    if (device_shape.size() != 4) {
      MS_LOG(EXCEPTION) << "Shape size of NHWC should be 4, but got " << device_shape.size();
    }
    return {device_shape[0], device_shape[3], device_shape[1], device_shape[2]};
  }
  static ShapeVector FractalNzAbstractShape(const ShapeVector &device_shape) {
    if (device_shape.size() == 1 && (device_shape[0] == 1 || device_shape[0] % kCubeSize == 0)) {
      return device_shape;
    }
    if (device_shape.size() < 4) {
      MS_LOG(EXCEPTION) << "Shape size of FRACTAL_NZ should >= 4, but got " << device_shape.size();
    }
    ShapeVector shape;
    size_t dims = device_shape.size();
    size_t batch = dims - 4;
    for (size_t i = 0; i < batch; ++i) {
      shape.push_back(device_shape[i]);
    }
    int64_t m = device_shape[dims - 3] * device_shape[dims - 2];
    int64_t n = device_shape[dims - 4] * device_shape[dims - 1];
    shape.push_back(m);
    shape.push_back(n);

    return shape;
  }
 };

 class CNodeDecoder {
 public:
  explicit CNodeDecoder(std::map<std::string, AnfNodePtr> *nodes_map) : nodes_map_(*nodes_map) {}
@@ -66,6 +121,7 @@ class CNodeDecoder {
      return nullptr;
    }
    CreateKernelInfo(processor);
    CreateAbstract();
    return cnode_;
  }

@@ -117,12 +173,8 @@ class CNodeDecoder {

  bool DecodeInputDesc(const nlohmann::json &cnode_json, const FuncGraphPtr &func_graph) {
    std::string op_name = cnode_json[kJsonKeyName];
    // new primitive.
    auto primitive = GetPrimitive(op_name);
    if (primitive == nullptr) {
      MS_LOG(ERROR) << "Create primitive failed.";
      return false;
    }
    auto primitive = CreatePrimitiveWithAttrs(op_name);
    MS_EXCEPTION_IF_NULL(primitive);

    // collect inputs.
    auto primitive_v = NewValueNode(primitive);
@@ -142,6 +194,7 @@ class CNodeDecoder {
      }
      input_formats_.push_back(input_desc[kJsonKeyFormat]);
      input_types_.push_back(DtypeToTypeId(input_desc[kJsonKeyDataType]));
      input_shapes_.push_back(input_desc[kJsonKeyShape]);
    }
    // new cnode.
    cnode_ = func_graph->NewCNode(inputs);
@@ -160,6 +213,7 @@ class CNodeDecoder {
      nlohmann::json output_desc = output_descs[0];
      output_formats_.push_back(output_desc[kJsonKeyFormat]);
      output_types_.push_back(DtypeToTypeId(output_desc[kJsonKeyDataType]));
      output_shapes_.push_back(output_desc[kJsonKeyShape]);
      nodes_map_[output_desc[kJsonKeyTensorName]] = cnode_;
    } else {
      // multi outputs.
@@ -167,6 +221,7 @@ class CNodeDecoder {
        nlohmann::json output_desc = output_descs[j];
        output_formats_.push_back(output_desc[kJsonKeyFormat]);
        output_types_.push_back(DtypeToTypeId(output_desc[kJsonKeyDataType]));
        output_shapes_.push_back(output_desc[kJsonKeyShape]);
        auto get_item =
          func_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), cnode_, NewValueNode(SizeToLong(j))});
        func_graph->AddNode(get_item);
@@ -219,72 +274,29 @@ class CNodeDecoder {
    AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), cnode_.get());
  }

  ValuePtr CreatOpInstance(const std::string &op_name, const std::vector<ValuePtr> &attrs) {
    // python utils.
    constexpr auto kGetPythonOpFunc = "_get_python_op";
    constexpr auto kParallelUtilsModule = "mindspore.parallel._utils";
    // almost all ops are defined in this path.
    constexpr auto kOperationsModule = "mindspore.ops.operations";
    py::module mod = py::module::import(kOperationsModule);
    if (!py::hasattr(mod, op_name.c_str())) {
      MS_LOG(ERROR) << kOperationsModule << " don't have attr: " << op_name;
      return nullptr;
    }
    std::vector<py::object> arg_list;
    (void)std::transform(attrs.begin(), attrs.end(), std::back_inserter(arg_list),
                         [](const ValuePtr &attr) { return ValuePtrToPyData(attr); });
    py::object obj = parse::python_adapter::CallPyFn(kParallelUtilsModule, kGetPythonOpFunc, op_name, kOperationsModule,
                                                     op_name, arg_list);
    ValuePtr op_instance = nullptr;
    bool succ = parse::ConvertData(obj, &op_instance);
    if (!succ) {
      MS_LOG(ERROR) << "Get python op " << op_name << " from " << kOperationsModule << " failed.";
      return nullptr;
    }
    return op_instance;
  void CreateAbstract() {
    auto shape = AbstractShapeCreator::GetFakeAbstractShape(output_shapes_[0], output_formats_[0]);
    auto abstract = std::make_shared<abstract::AbstractTensor>(TypeIdToType(output_types_[0]), shape);
    cnode_->set_abstract(abstract);
  }

  const std::map<std::string, std::vector<std::string>> op_attrs_map_ = {
    {kReduceSumOpName, std::vector<std::string>{kAttrKeepDims}},
    {kReduceMaxOpName, std::vector<std::string>{kAttrKeepDims}},
    {kReduceMinOpName, std::vector<std::string>{kAttrKeepDims}},
    {kBroadcastToOpName, std::vector<std::string>{kAttrShape}},
  };

  PrimitivePtr GetPrimitive(const std::string &op_name) {
    PrimitivePtr primitive{nullptr};
    if (op_attrs_map_.count(op_name) == 0) {
      // no attrs for op instance.
      primitive = CreatOpInstance(op_name, std::vector<ValuePtr>{})->cast<PrimitivePtr>();
    } else {
      // make attrs for op instance.
      std::vector<ValuePtr> op_attrs;
      const auto &attr_names = op_attrs_map_.at(op_name);
      for (const auto &attr_name : attr_names) {
        if (cnode_attrs_.count(attr_name) == 0) {
          MS_LOG(ERROR) << "Attr: " << attr_name << " for: " << op_name << " not found.";
          return nullptr;
        }
        op_attrs.push_back(cnode_attrs_.at(attr_name));
      }
      primitive = CreatOpInstance(op_name, op_attrs)->cast<PrimitivePtr>();
    }
    if (primitive != nullptr) {
      for (const auto &attr : cnode_attrs_) {
        primitive->AddAttr(attr.first, attr.second);
      }
  PrimitivePtr CreatePrimitiveWithAttrs(const std::string &op_name) {
    auto primitive = std::make_shared<Primitive>(op_name);
    for (const auto &attr : cnode_attrs_) {
      primitive->AddAttr(attr.first, attr.second);
    }
    return primitive;
  }

  ScalarPtr DecodeScalar(const nlohmann::json &scalar_json) {
  tensor::TensorPtr DecodeScalar(const nlohmann::json &scalar_json) {
    auto type_id = DtypeToTypeId(scalar_json[kJsonKeyDataType]);
    switch (type_id) {
      case kNumberTypeFloat16:
        return std::make_shared<tensor::Tensor>(static_cast<float>(scalar_json[kJsonKeyValue]), kFloat16);
      case kNumberTypeFloat32:
        return std::make_shared<FP32Imm>(scalar_json[kJsonKeyValue]);
        return std::make_shared<tensor::Tensor>(static_cast<float>(scalar_json[kJsonKeyValue]), kFloat32);
      case kNumberTypeInt32:
        return std::make_shared<Int32Imm>(scalar_json[kJsonKeyValue]);
        return std::make_shared<tensor::Tensor>(static_cast<int64_t>(scalar_json[kJsonKeyValue]), kInt32);
      default:
        MS_LOG(ERROR) << "Unknown type: " << scalar_json[kJsonKeyDataType];
        break;
@@ -294,9 +306,8 @@ class CNodeDecoder {

  ValueNodePtr DecodeValueNode(const nlohmann::json &value_json, const FuncGraphPtr &func_graph) {
    MS_LOG(DEBUG) << "start decode value node, " << value_json;
    auto scalar = DecodeScalar(value_json);
    auto tensor = ScalarToTensor(scalar);

    auto tensor = DecodeScalar(value_json);
    MS_EXCEPTION_IF_NULL(tensor);
    auto value_node = std::make_shared<ValueNode>(tensor);
    value_node->set_abstract(tensor->ToAbstract());
    // create kernel_info fo new value node.
@@ -319,6 +330,8 @@ class CNodeDecoder {
  std::vector<std::string> output_formats_;
  std::vector<TypeId> input_types_;
  std::vector<TypeId> output_types_;
  std::vector<ShapeVector> input_shapes_;
  std::vector<ShapeVector> output_shapes_;
  CNodePtr cnode_{nullptr};
 };
 }  // namespace
@@ -329,11 +342,16 @@ ParameterPtr AkgKernelJsonDecoder::DecodeParameter(const nlohmann::json &paramet
  ParameterPtr new_parameter = func_graph->add_parameter();
  std::string name = parameter_json[kJsonKeyTensorName];
  new_parameter->set_name(name);
  std::string format = parameter_json[kJsonKeyFormat];
  TypeId dtype = DtypeToTypeId(parameter_json[kJsonKeyDataType]);
  ShapeVector shape = AbstractShapeCreator::GetFakeAbstractShape(parameter_json[kJsonKeyShape], format);
  auto abstract = std::make_shared<abstract::AbstractTensor>(TypeIdToType(dtype), shape);
  new_parameter->set_abstract(abstract);
  auto kernel_info = std::make_shared<device::KernelInfo>();
  new_parameter->set_kernel_info(kernel_info);
  auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
  builder->SetOutputsFormat(std::vector<std::string>{parameter_json[kJsonKeyFormat]});
  builder->SetOutputsDeviceType(std::vector<TypeId>{DtypeToTypeId(parameter_json[kJsonKeyDataType])});
  builder->SetOutputsFormat(std::vector<std::string>{format});
  builder->SetOutputsDeviceType(std::vector<TypeId>{dtype});
  AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), new_parameter.get());
  nodes_map_[name] = new_parameter;
  return new_parameter;
@@ -349,6 +367,7 @@ CNodePtr AkgKernelJsonDecoder::DecodeCNode(const nlohmann::json &cnode_json, con
 AnfNodePtr AkgKernelJsonDecoder::DecodeOutput(const std::vector<nlohmann::json> &output_descs,
                                              const FuncGraphPtr &func_graph) {
  std::vector<AnfNodePtr> outputs{NewValueNode(prim::kPrimMakeTuple)};
  AbstractBasePtrList output_abstract_list;
  for (const auto &output_desc : output_descs) {
    std::string name = output_desc[kJsonKeyTensorName];
    if (nodes_map_.count(name) == 0) {
@@ -356,11 +375,13 @@ AnfNodePtr AkgKernelJsonDecoder::DecodeOutput(const std::vector<nlohmann::json>
      return nullptr;
    }
    outputs.push_back(nodes_map_[name]);
    output_abstract_list.push_back(outputs.back()->abstract());
  }
  if (outputs.size() == 2) {
    func_graph->set_output(outputs[1]);
  } else {
    auto output = func_graph->NewCNode(outputs);
    output->set_abstract(std::make_shared<abstract::AbstractTuple>(output_abstract_list));
    func_graph->AddNode(output);
    func_graph->set_output(output);
  }
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc
@@ -80,10 +80,12 @@ class OpInfoExtractor {
  }

  void ExtractOutputs(const OpInfoPtr &op_info) {
    // only support single output in op desc.
    auto io_info = std::make_shared<OpIOInfo>();
    io_info->set_name("output");
    op_info->add_outputs_ptr(io_info);
    size_t output_tensor_num = AnfAlgo::GetOutputTensorNum(cnode_);
    for (size_t i = 0; i < output_tensor_num; i++) {
      auto io_info = std::make_shared<OpIOInfo>();
      io_info->set_name("output_" + std::to_string(i));
      op_info->add_outputs_ptr(io_info);
    }
  }

  bool ExcludeAttr(const std::string &name) {
@@ -204,8 +206,7 @@ bool AkgKernelJsonGenerator::CreateInputDescJson(const AnfNodePtr &anf_node, con
      input_desc_json[kJsonKeyName] = input_ptr->name();
      input_desc_json[kJsonKeyTensorName] = "input_" + std::to_string(GetInputTensorIdxInc(anf_node, real_input_index));
      auto input_shape = this->GetInputShape(anf_node, real_input_index);
      if (dump_option_.extract_opinfo_from_anfnode &&
          GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) {
      if (!is_basic_op_ && GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) {
        MS_LOG(DEBUG) << "Take input[" << real_input_index << "] of [" << anf_node->DebugString(2)
                      << "] as const tensor, shape: [" << Vector2Str(input_shape)
                      << "], value: " << input_desc_json[kJsonKeyValue];
@@ -529,9 +530,9 @@ bool AkgKernelJsonGenerator::CollectJson(const AnfNodePtr &anf_node, nlohmann::j
  MS_EXCEPTION_IF_NULL(anf_node);
  MS_EXCEPTION_IF_NULL(kernel_json);
  std::string op_name = AnfAlgo::GetCNodeName(anf_node);
  MS_LOG(INFO) << "Akg start generate kernel json desc, full scope name is : " << anf_node->fullname_with_scope();
  MS_LOG(DEBUG) << "Akg start generate kernel json desc, full scope name is : " << anf_node->fullname_with_scope();
  SetAkgKernelAttrs(anf_node);
  dump_option_.extract_opinfo_from_anfnode = false;
  is_basic_op_ = true;
  if (!GenerateSingleKernelJson(anf_node, kernel_json)) {
    MS_LOG(ERROR) << "Op[" << anf_node->fullname_with_scope() << "] create single kernel json failed.";
    return false;
@@ -551,8 +552,8 @@ bool AkgKernelJsonGenerator::CollectJson(const AnfNodePtr &anf_node, nlohmann::j
    return false;
  }

  MS_LOG(INFO) << "Akg create kernel json desc success, full scope name is : " << anf_node->fullname_with_scope()
               << ", json info name is : " << kernel_name_;
  MS_LOG(DEBUG) << "Akg create kernel json desc success, full scope name is : " << anf_node->fullname_with_scope()
                << ", json info name is : " << kernel_name_;
  return true;
 }

@@ -613,10 +614,11 @@ bool AkgKernelJsonGenerator::CollectFusedJson(const std::vector<AnfNodePtr> &anf
                  << "].";
    return false;
  }
  MS_LOG(INFO) << "Fusion nodes: [" << output_list.size() << "], input_list: [" << anf_nodes.size()
               << "], output_list: [" << input_list.size() << "].";
  MS_LOG(DEBUG) << "Fusion nodes: [" << output_list.size() << "], input_list: [" << anf_nodes.size()
                << "], output_list: [" << input_list.size() << "].";
  std::map<AnfNodePtr, nlohmann::json> node_json_map;
  dump_option_.extract_opinfo_from_anfnode = true;
  is_basic_op_ = false;
  dump_option_.extract_opinfo_from_anfnode = true;  // always extract from anfnode for composite ops.
  if (!GenSingleJsons(anf_nodes, &node_json_map)) return false;

  UpdateTensorName(anf_nodes, &node_json_map);
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.h
@@ -144,6 +144,7 @@ class AkgKernelJsonGenerator {
  std::map<std::string, AnfNodePtr> address_node_map_;
  std::map<size_t, std::vector<std::string>> sub_graphs_;
  std::map<size_t, size_t> dim_infos_;
  bool is_basic_op_{false};
 };
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean_gpu.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean_gpu.cc
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -393,6 +393,14 @@ CNodePtr AtomicCleanInsertter::CreateAtomicCleanCompositeNode(const KernelGraphP
                                        broadcast_input_node};
  auto broadcast_to_node_inner = CreateCNode(
    atomic_clean_inputs, new_sub_graph, {.format = format, .shape = dst_shape_vec, .type = GetType(atomic_add_node_)});

  auto device_shape = AnfAlgo::GetOutputDeviceShape(atomic_add_node_, 0);
  dst_shape_vec.clear();
  if (device_shape.empty()) {
    dst_shape_vec.push_back(1);
  } else {
    std::transform(device_shape.begin(), device_shape.end(), std::back_inserter(dst_shape_vec), SizeToLong);
  }
  SetNodeAttrSafely("shape", MakeValue(dst_shape_vec), broadcast_to_node_inner);

  // Makeup sub-graph.
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -36,88 +36,40 @@
 namespace mindspore {
 namespace opt {
 namespace {
 constexpr auto kJsonKeyExpandInfo = "expand_info";

 #define GET_VALUE_FOR_JSON(JSON, VALUE, VALUE_ELEM, TYPE_NAME, TYPE) \
  if (VALUE_ELEM->isa<TYPE_NAME>()) {                                \
    JSON = GetValue<TYPE>(VALUE);                                    \
  }

 nlohmann::json ExpandAttrJsonInfo(const CNodePtr &cnode) {
  nlohmann::json attrs_json;
  if (auto prim = GetCNodePrimitive(cnode); prim != nullptr) {
    auto attrs = prim->attrs();
    for (const auto &[k, v] : attrs) {
      nlohmann::json attr_json;
      MS_LOG(DEBUG) << "attr key is : " << k << " and value type is : " << v->type_name();
      GET_VALUE_FOR_JSON(attr_json[k], v, v, Int32Imm, int);
      GET_VALUE_FOR_JSON(attr_json[k], v, v, Int64Imm, int64_t);
      GET_VALUE_FOR_JSON(attr_json[k], v, v, UInt32Imm, uint32_t);
      GET_VALUE_FOR_JSON(attr_json[k], v, v, UInt64Imm, uint64_t);
      GET_VALUE_FOR_JSON(attr_json[k], v, v, FP32Imm, float);
      GET_VALUE_FOR_JSON(attr_json[k], v, v, FP64Imm, double);
      GET_VALUE_FOR_JSON(attr_json[k], v, v, BoolImm, bool);
      GET_VALUE_FOR_JSON(attr_json[k], v, v, StringImm, std::string);

      if (v->isa<ValueList>() || v->isa<ValueTuple>()) {
        auto vec = v->isa<ValueList>() ? v->cast<ValueListPtr>()->value() : v->cast<ValueTuplePtr>()->value();
        if (!vec.empty()) {
          MS_LOG(DEBUG) << "value type is : " << vec[0]->type_name();
          GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], Int32Imm, std::vector<int>);
          GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], Int64Imm, std::vector<int64_t>);
          GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], UInt32Imm, std::vector<uint32_t>);
          GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], UInt64Imm, std::vector<uint64_t>);
          GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], FP32Imm, std::vector<float>);
          GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], FP64Imm, std::vector<double>);
          GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], StringImm, std::vector<std::string>);
        }
      }
      if (!attr_json.empty()) {
        attrs_json.push_back(attr_json);
      }
    }
  }
  return attrs_json;
 std::unordered_set<PrimitivePtr> GetExpandOps() {
  std::unordered_set<PrimitivePtr> expand_ops = {
    prim::kPrimSquare,
    prim::kPrimGeLUGrad,
 #if ENABLE_D
    prim::kPrimTile,
    prim::kPrimSqrtGrad,
    prim::kPrimClipByNormNoDivSum,
 #elif ENABLE_GPU
    prim::kPrimBiasAdd,
    prim::kPrimBiasAddGrad,
    prim::kPrimGeLU,
    prim::kPrimFusedAdam,
    prim::kPrimFusedAdamWeightDecay,
    prim::kPrimReduceMean,
    prim::kPrimMaximumGrad,
    prim::kPrimMinimumGrad,
    prim::kPrimGkDropout,
    prim::kPrimDropoutGrad,
    prim::kPrimSoftmax,
    prim::kPrimLayerNorm,
    prim::kPrimLayerNormGrad,
 #endif
  };
  return expand_ops;
 }
 }  // namespace

 bool ExpandJsonInfo(const CNodePtr &cnode, nlohmann::json *kernel_json) {
  MS_EXCEPTION_IF_NULL(kernel_json);
  if (kernel_json->find(kJsonKeyExpandInfo) != kernel_json->end()) {
    return false;
  }

  nlohmann::json expand_info;
  expand_info[kernel::kJsonKeyAttr] = ExpandAttrJsonInfo(cnode);
  expand_info[kernel::kJsonKeyName] = AnfAlgo::GetCNodeName(cnode);
  expand_info[kernel::kJsonKeyProcess] = kernel::GetProcessorStr(cnode);
  std::vector<nlohmann::json> inputs_info;
  for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(cnode); ++i) {
    nlohmann::json input_info;
    input_info[kernel::kJsonKeyFormat] = AnfAlgo::GetInputFormat(cnode, i);
    input_info[kernel::kJsonKeyInferShape] = AnfAlgo::GetPrevNodeOutputInferShape(cnode, i);
    input_info[kernel::kJsonKeyShape] = AnfAlgo::GetInputDeviceShape(cnode, i);
    input_info[kernel::kJsonKeyInferDataType] =
      kernel::TypeId2String(AnfAlgo::GetPrevNodeOutputInferDataType(cnode, i));
    input_info[kernel::kJsonKeyDataType] = kernel::TypeId2String(AnfAlgo::GetInputDeviceDataType(cnode, i));
    inputs_info.push_back(input_info);
  }
  expand_info[kernel::kJsonKeyInputDesc] = inputs_info;

  std::vector<nlohmann::json> outputs_info;
  for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(cnode); ++i) {
    nlohmann::json output_info;
    output_info[kernel::kJsonKeyFormat] = AnfAlgo::GetOutputFormat(cnode, i);
    output_info[kernel::kJsonKeyInferShape] = AnfAlgo::GetOutputInferShape(cnode, i);
    output_info[kernel::kJsonKeyShape] = AnfAlgo::GetOutputDeviceShape(cnode, i);
    output_info[kernel::kJsonKeyInferDataType] = kernel::TypeId2String(AnfAlgo::GetOutputInferDataType(cnode, i));
    output_info[kernel::kJsonKeyDataType] = kernel::TypeId2String(AnfAlgo::GetOutputDeviceDataType(cnode, i));
    outputs_info.push_back(output_info);
  }
  expand_info[kernel::kJsonKeyOutputDesc] = outputs_info;
  (*kernel_json)[kJsonKeyExpandInfo] = expand_info;
  return true;
 bool GraphKernelExpander::ExpandJsonInfo(const AnfNodePtr &node, nlohmann::json *kernel_json) {
  DumpOption dump_option;
  dump_option.extract_opinfo_from_anfnode = true;
  kernel::AkgKernelJsonGenerator json_generator(dump_option);
  return json_generator.CollectJson(node, kernel_json);
 }
 }  // namespace

 FuncGraphPtr GraphKernelExpander::CreateExpandFuncGraph(const CNodePtr &node) {
  nlohmann::json kernel_json;
@@ -213,33 +165,11 @@ bool GraphKernelExpander::DoExpand(const FuncGraphPtr &func_graph) {

    // replace origin node.
    (void)mng->Replace(node, graph_kernel_node);

    ToPrimitive(AnfAlgo::GetCNodeFuncGraphPtr(graph_kernel_node));
    changed = true;
  }
  return changed;
 }

 void GraphKernelExpander::ToPrimitive(const FuncGraphPtr &func_graph) const {
  auto todos = TopoSort(func_graph->get_return());
  std::reverse(todos.begin(), todos.end());
  auto mng = func_graph->manager();
  MS_EXCEPTION_IF_NULL(mng);
  for (const auto &n : todos) {
    auto cnode = n->cast<CNodePtr>();
    if (cnode == nullptr) {
      continue;
    }

    auto origin_prim = AnfAlgo::GetCNodePrimitive(cnode);
    MS_EXCEPTION_IF_NULL(origin_prim);
    if (!origin_prim->isa<PrimitivePy>()) {
      continue;
    }
    cnode->set_input(0, std::make_shared<ValueNode>(std::make_shared<Primitive>(*origin_prim)));
  }
 }

 bool GraphKernelExpander::Run(const FuncGraphPtr &func_graph) {
  expand_ops_ = GetExpandOps();
  MS_EXCEPTION_IF_NULL(func_graph);
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_GRAPH_KERNEL_EXPANDER_H_
 #include <memory>
 #include <unordered_set>
 #include <nlohmann/json.hpp>
 #include "backend/optimizer/common/pass.h"
 #include "ir/func_graph.h"

@@ -31,7 +32,6 @@ class GraphKernelExpander : public Pass {
 private:
  FuncGraphPtr CreateExpandFuncGraph(const CNodePtr &node);
  bool DoExpand(const FuncGraphPtr &func_graph);
  void ToPrimitive(const FuncGraphPtr &func_graph) const;
  void EliminateRedundantParameters(const FuncGraphPtr &func_graph, AnfNodePtrList *inputs);
  AnfNodePtr CreateExpandGraphKernel(const FuncGraphPtr &func_graph, const FuncGraphPtr &new_func_graph,
                                     const CNodePtr &node);
@@ -39,6 +39,7 @@ class GraphKernelExpander : public Pass {
    return std::any_of(expand_ops_.begin(), expand_ops_.end(),
                       [&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); });
  }
  bool ExpandJsonInfo(const AnfNodePtr &node, nlohmann::json *kernel_json);

 private:
  std::unordered_set<PrimitivePtr> expand_ops_;
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
@@ -39,27 +39,6 @@
 namespace mindspore {
 namespace opt {
 namespace {
 void DebugDump(const FuncGraphPtr &graph, std::stringstream *buf) {
  (*buf) << "Parameters: \n";
  const auto &parameters = graph->parameters();
  (*buf) << "size: " << parameters.size() << "\n";
  for (const auto &p : parameters) {
    (*buf) << "\t" << p->DebugString(2) << "\n";
  }
  (*buf) << "ValueNodes: \n";
  const auto &value_nodes = graph->value_nodes();
  (*buf) << "size: " << value_nodes.size() << "\n";
  for (const auto &v : value_nodes) {
    (*buf) << "\t" << v.first->DebugString(2) << "\n";
  }
  (*buf) << "CNodes: \n";
  const auto &all_nodes = graph->nodes();
  (*buf) << "size: " << all_nodes.size() << "\n";
  for (const auto &n : all_nodes) {
    (*buf) << "\t" << n->DebugString(2) << "\n";
  }
 }

 bool IsMakeTupleOut(const AnfNodePtr &out, AnfNodePtrList *real_outs) {
  MS_EXCEPTION_IF_NULL(real_outs);
  if (IsPrimitiveCNode(out, prim::kPrimMakeTuple)) {
@@ -91,132 +70,6 @@ AbstractBasePtr GetOutputAbstract(const AnfNodePtr &node, size_t output_idx) {
  return out_spec;
 }

 ValueNodePtr ProcessAttrsForCast(const CNodePtr &cnode, const std::string &attr_name) {
  auto dst_type = AnfAlgo::GetNodeAttr<std::string>(cnode, attr_name);
  auto type = TypeIdToType(kernel::DtypeToTypeId(dst_type));
  auto type_val_node = NewValueNode(type);
  return type_val_node;
 }

 const std::map<std::string, std::function<ValueNodePtr(const CNodePtr &cnode, const std::string &attr_name)>>
  attrs_process_map = {
    {kCastOpName, ProcessAttrsForCast},
 };

 ValueNodePtr ProcessAttrValue(const CNodePtr &cnode, const std::string &attr_name) {
  auto op_name = AnfAlgo::GetCNodeName(cnode);
  if (attrs_process_map.count(op_name) != 0) {
    return attrs_process_map.at(op_name)(cnode, attr_name);
  }

  auto attr_val = AnfAlgo::GetNodeAttr<ValuePtr>(cnode, attr_name);
  auto attr_val_node = NewValueNode(attr_val);
  return attr_val_node;
 }

 AnfNodePtr ConstAttrToInput(const FuncGraphPtr &func_graph, const CNodePtr &cnode,
                            const std::unordered_set<size_t> &input_attrs) {
  MS_EXCEPTION_IF_NULL(func_graph);
  MS_EXCEPTION_IF_NULL(cnode);
  MS_LOG(DEBUG) << "process node: " << cnode->DebugString(2);
  if (input_attrs.empty()) {
    return nullptr;
  }

  auto input_names = AnfAlgo::GetNodeAttr<std::vector<std::string>>(cnode, kAttrInputNames);
  MS_LOG(DEBUG) << "ori_input_names: " << kernel::Vector2Str(input_names);
  std::vector<AnfNodePtr> new_inputs;
  std::vector<std::string> new_input_names;
  const auto &inputs = cnode->inputs();
  for (size_t i = 0; i < inputs.size() - 1; ++i) {
    new_input_names.push_back(input_names[i]);
  }

  (void)new_inputs.insert(new_inputs.end(), inputs.begin(), inputs.end());
  bool need_update = false;
  for (size_t i = inputs.size() - 1; i < input_names.size(); ++i) {
    auto attr_name = input_names[i];
    if (input_attrs.find(i) == input_attrs.end()) {
      MS_LOG(WARNING) << "Other type input between tensors and attrs, name: " << attr_name
                      << ", node: " << cnode->DebugString(2);
      new_input_names.push_back(attr_name);
      continue;
    }
    if (!AnfAlgo::HasNodeAttr(attr_name, cnode)) {
      MS_LOG(EXCEPTION) << "Attr: " << attr_name << " not found in node: " << cnode->DebugString(2);
    }

    // Hardcode. It should convert attrs value according to format, like op ReduceSum.
    auto attr_val_node = ProcessAttrValue(cnode, attr_name);
    new_inputs.push_back(attr_val_node);
    new_input_names.push_back(attr_name);
    need_update = true;
    MS_LOG(DEBUG) << "convert attr: " << attr_name << " to input, value: " << attr_val_node;
  }
  MS_LOG(DEBUG) << "new_input_names: " << kernel::Vector2Str(new_input_names);

  if (!need_update) {
    return nullptr;
  }

  auto new_cnode = func_graph->NewCNode(new_inputs);
  // we do not modify abstract and kernel info.
  new_cnode->set_abstract(cnode->abstract());
  new_cnode->set_kernel_info(cnode->kernel_info_ptr());
  AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(new_input_names), new_cnode);
  return new_cnode;
 }

 AnfNodePtr DeleteAttrInInput(const FuncGraphPtr &func_graph, const CNodePtr &cnode,
                             const std::unordered_set<size_t> &input_attrs) {
  MS_EXCEPTION_IF_NULL(func_graph);
  MS_EXCEPTION_IF_NULL(cnode);
  MS_LOG(DEBUG) << "process node: " << cnode->DebugString(2);
  if (input_attrs.empty()) {
    return nullptr;
  }

  auto input_names = AnfAlgo::GetNodeAttr<std::vector<std::string>>(cnode, kAttrInputNames);
  MS_LOG(DEBUG) << "ori_input_names: " << kernel::Vector2Str(input_names);
  std::vector<AnfNodePtr> new_inputs;
  std::vector<std::string> new_input_names;

  const auto &inputs = cnode->inputs();
  new_inputs.push_back(inputs[0]);
  bool need_update = false;
  for (size_t i = 0; i < inputs.size() - 1; ++i) {
    auto input_node = inputs[i + 1];
    MS_EXCEPTION_IF_NULL(input_node);
    // The attrs counts from 0
    if (input_attrs.find(i) != input_attrs.end() && input_node->isa<ValueNode>()) {
      auto value_node = input_node->cast<ValueNodePtr>();
      MS_EXCEPTION_IF_NULL(value_node);
      MS_LOG(DEBUG) << "delete attr input: " << i << " of node: " << cnode->DebugString(2);
      if (i >= input_names.size()) {
        MS_LOG(EXCEPTION) << "Index " << i << " is larger than input names size: " << input_names.size();
      }
      need_update = true;
    } else {
      new_inputs.push_back(input_node);
      if (i < input_names.size()) {
        new_input_names.push_back(input_names[i]);
      }
    }
  }
  MS_LOG(DEBUG) << "new_input_names: " << kernel::Vector2Str(new_input_names);

  if (!need_update) {
    return nullptr;
  }

  auto new_cnode = func_graph->NewCNode(new_inputs);
  // we do not modify abstract and kernel info.
  new_cnode->set_abstract(cnode->abstract());
  new_cnode->set_kernel_info(cnode->kernel_info_ptr());
  AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(new_input_names), new_cnode);
  return new_cnode;
 }

 AnfNodePtrList EliminateMakeTuple(const FuncGraphPtr &fg, const FuncGraphManagerPtr &mng) {
  AnfNodePtrList outs;
  auto out_node = fg->output();
@@ -396,59 +249,6 @@ void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const
  AnfAlgo::SetSelectKernelBuildInfo(graph_selected_info, new_node.get());
 }

 void ConstAttrToInput(const FuncGraphPtr &func_graph) {
  MS_EXCEPTION_IF_NULL(func_graph);
  auto mng = func_graph->manager();
  MS_EXCEPTION_IF_NULL(mng);
  std::vector<AnfNodePtr> todos;
  kernel::GetValidKernelNodes(func_graph, &todos);
  for (const auto &node : todos) {
    ConstInputToAttrInfoRegister reg;
    if (!ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(AnfAlgo::GetCNodeName(node), &reg)) {
      continue;
    }
    auto new_node = ConstAttrToInput(func_graph, node->cast<CNodePtr>(), reg.GetConstInputAttrInfo());
    if (new_node != nullptr && new_node != node) {
      mng->Replace(node, new_node);
    }
  }
 }

 void DeleteAttrInInput(const FuncGraphPtr &func_graph) {
  MS_EXCEPTION_IF_NULL(func_graph);
  auto mng = func_graph->manager();
  MS_EXCEPTION_IF_NULL(mng);
  std::vector<AnfNodePtr> todos;
  kernel::GetValidKernelNodes(func_graph, &todos);
  for (const auto &node : todos) {
    ConstInputToAttrInfoRegister reg;
    if (!ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(AnfAlgo::GetCNodeName(node), &reg)) {
      continue;
    }
    auto new_node = DeleteAttrInInput(func_graph, node->cast<CNodePtr>(), reg.GetConstInputAttrInfo());
    if (new_node != nullptr && new_node != node) {
      mng->Replace(node, new_node);
    }
  }
 }

 AnfNodePtrList GetExpandOuts(const AnfNodePtrList &outs) {
  AnfNodePtrList res;
  if (outs.size() <= 1) {
    return outs;
  }

  for (auto out : outs) {
    AnfNodePtrList real_outs;
    if (IsMakeTupleOut(out, &real_outs)) {
      res.insert(res.end(), real_outs.begin(), real_outs.end());
      continue;
    }
    res.push_back(out);
  }
  return res;
 }

 AnfNodePtr CreateNewFuseCNode(const FuncGraphPtr &func_graph, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
                              const AnfNodePtrList &outputs) {
  auto func_node = NewValueNode(fg);
@@ -661,68 +461,7 @@ FuncGraphPtr JsonDescToAnf(const std::string &json_desc, const std::vector<AnfNo
    MS_LOG(ERROR) << "Akg decode json to graph failed.";
    return nullptr;
  }

  pipeline::ResourcePtr resource = std::make_shared<pipeline::Resource>();
  auto mng = resource->manager();
  MS_EXCEPTION_IF_NULL(mng);
  mng->AddFuncGraph(fg);
  ConstAttrToInput(fg);
  std::stringstream buf;
  buf << "===================== graph after ConstAttrToInput " << fg->ToString() << " =====================\n";
  DebugDump(fg, &buf);
  MS_LOG(DEBUG) << buf.str();

  // Do infer and specialize.
  AbstractBasePtrList args_spec_list;
  std::for_each(inputs.begin(), inputs.end(),
                [&args_spec_list](const AnfNodePtr &node) { args_spec_list.push_back(node->abstract()); });
  auto infer_fg = pipeline::Renormalize(resource, fg, args_spec_list);
  if (infer_fg == nullptr) {
    MS_LOG(ERROR) << "Infer decoded graph failed.";
    return nullptr;
  }
  buf.str("");
  buf << "===================== graph after Renormalize " << infer_fg->ToString() << " =====================\n";
  DebugDump(infer_fg, &buf);
  MS_LOG(DEBUG) << buf.str();

  // delete no use inputs(attrs), like op ReduceSum(axis).
  DeleteAttrInInput(infer_fg);
  buf.str("");
  buf << "===================== graph after DeleteAttrInInput " << infer_fg->ToString() << " =====================\n";
  DebugDump(infer_fg, &buf);
  MS_LOG(DEBUG) << buf.str();

  // clone a new graph.
  auto new_fg = TransformableClone(infer_fg, std::make_shared<TraceTransform>("akg_decode"));
  return new_fg;
 }

 std::unordered_set<PrimitivePtr> GetExpandOps() {
  std::unordered_set<PrimitivePtr> expand_ops = {
    prim::kPrimSquare,
    prim::kPrimGeLUGrad,
 #if ENABLE_D
    prim::kPrimTile,
    prim::kPrimSqrtGrad,
    prim::kPrimClipByNormNoDivSum,
 #elif ENABLE_GPU
    prim::kPrimBiasAdd,
    prim::kPrimBiasAddGrad,
    prim::kPrimGeLU,
    prim::kPrimFusedAdam,
    prim::kPrimFusedAdamWeightDecay,
    prim::kPrimReduceMean,
    prim::kPrimMaximumGrad,
    prim::kPrimMinimumGrad,
    prim::kPrimGkDropout,
    prim::kPrimDropoutGrad,
    prim::kPrimSoftmax,
    prim::kPrimLayerNorm,
    prim::kPrimLayerNormGrad,
 #endif
  };
  return expand_ops;
  return fg;
 }

 std::string ExtractGraphKernelName(const AnfNodePtrList &cnodes, const string &prefix, const string &postfix) {
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h
@@ -61,7 +61,6 @@ std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(
                                                                                AnfNodePtrList *src_outputs = nullptr);
 void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
                      const AnfNodePtrList &outputs, kernel::Processor processor);
 AnfNodePtrList GetExpandOuts(const AnfNodePtrList &outs);
 AnfNodePtr CreateNewFuseCNode(const FuncGraphPtr &kernel_graph, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
                              const AnfNodePtrList &outputs);
 void ReplaceNewFuseCNode(const FuncGraphPtr &kernel_graph, const AnfNodePtr &new_fuse_cnode,
@@ -74,7 +73,6 @@ bool AnfToJsonDesc(const AnfNodePtrList &nodes, const DumpOption &dump_option, n
                   std::map<std::string, AnfNodePtr> *address_node_map);
 bool AnfToJsonDesc(const std::vector<AnfNodePtrList> &graphs, const DumpOption &dump_option, nlohmann::json *op_desc);
 FuncGraphPtr JsonDescToAnf(const std::string &json_desc, const std::vector<AnfNodePtr> &inputs);
 std::unordered_set<PrimitivePtr> GetExpandOps();
 std::string ExtractGraphKernelName(const AnfNodePtrList &cnodes, const string &prefix = "", const string &postfix = "");
 std::vector<PrimitivePtr> GetFusibleOpList();
 bool IsBasicFuseOp(const AnfNodePtr &node);