Browse Source

Refactor GraphKernelExpander (1st submission)

Decoupled from the front-end interfaces.
1. Removed the call to "Renormalize".
   Completed the infer-format in model_builder.
   Only used the device shape and device format to
   infer an abstract shape without considering padding.
2. Removed the call to python's Primitive interfaces.
   The "Renormalize" relies on the PrimitivePy, so they can be
   removed together. After that, the functions "ConstAttrToInput",
   "DeleteAttrInInput" and related can be removed.
3. Reuse the AkgKernelJsonGenerator in GraphKernelExpander.
   1) set the attribute "extract_opinfo_from_anf" to true, so that
   the generator can handle the basic operator with anf info.
   2) added a function "extract_expand_info" in expander.py
   to convert the json into a more friendly format. The attrs
   was converted to a dict instead of a list.
4. Scalars only support DefaultFormat.
   Removed the argument "format" from graph_builder.value
5. Moved the expander op list from graph_kernel_helper.cc to graph_kernel_expander.cc
tags/v1.2.0-rc1
dayschan 5 years ago
parent
commit
e0e6c39eae
25 changed files with 258 additions and 599 deletions
  1. +22
    -9
      mindspore/_extends/graph_kernel/expander.py
  2. +4
    -9
      mindspore/_extends/graph_kernel/expanders/dropout_grad.py
  3. +5
    -6
      mindspore/_extends/graph_kernel/expanders/gelu.py
  4. +6
    -7
      mindspore/_extends/graph_kernel/expanders/gelu_grad.py
  5. +6
    -11
      mindspore/_extends/graph_kernel/expanders/gkdropout.py
  6. +6
    -12
      mindspore/_extends/graph_kernel/expanders/layernorm.py
  7. +12
    -18
      mindspore/_extends/graph_kernel/expanders/layernorm_grad.py
  8. +2
    -6
      mindspore/_extends/graph_kernel/expanders/logsoftmax.py
  9. +2
    -6
      mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py
  10. +4
    -10
      mindspore/_extends/graph_kernel/expanders/maximum_grad.py
  11. +4
    -9
      mindspore/_extends/graph_kernel/expanders/minimum_grad.py
  12. +5
    -12
      mindspore/_extends/graph_kernel/expanders/reduce_mean.py
  13. +3
    -9
      mindspore/_extends/graph_kernel/expanders/softmax.py
  14. +2
    -2
      mindspore/_extends/graph_kernel/expanders/sqrt_grad.py
  15. +2
    -2
      mindspore/_extends/graph_kernel/expanders/tanh_grad.py
  16. +3
    -10
      mindspore/_extends/graph_kernel/expanders/tile.py
  17. +20
    -11
      mindspore/_extends/graph_kernel/model/model_builder.py
  18. +89
    -68
      mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_decoder.cc
  19. +15
    -13
      mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc
  20. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.h
  21. +9
    -1
      mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean_gpu.cc
  22. +32
    -102
      mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc
  23. +3
    -2
      mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.h
  24. +1
    -262
      mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
  25. +0
    -2
      mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h

+ 22
- 9
mindspore/_extends/graph_kernel/expander.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -20,18 +20,31 @@ from mindspore import log as logger
import mindspore._extends.graph_kernel.expanders as expanders


def extract_expand_info(kernel_info):
"""Convert the json into a more friendly format"""
input_desc = []
if 'input_desc' in kernel_info and kernel_info['input_desc']:
for desc in kernel_info['input_desc']:
input_desc += desc
attrs = {}
if 'attr' in kernel_info and kernel_info['attr']:
for attr in kernel_info["attr"]:
attrs[attr["name"]] = attr["value"]
expand_info = {
"name": kernel_info["name"],
"input_desc": input_desc,
"output_desc": kernel_info["output_desc"],
"attr": attrs,
"process": kernel_info["process"],
}
return expand_info


def get_op_expander(json_str: str):
"""get op expander by json info"""
try:
kernel_info = json.loads(json_str)
expand_info = kernel_info['expand_info']

if 'name' not in expand_info:
logger.error("expand info have no op name")
return None
if 'process' not in expand_info:
logger.error("expand info have no processor info")
return None
expand_info = extract_expand_info(kernel_info)

processor = expand_info['process']
op_name = str(expand_info['name']).lower()


+ 4
- 9
mindspore/_extends/graph_kernel/expanders/dropout_grad.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -21,20 +21,15 @@ def expand_dropoutgrad(expand_info):
# get op info.
dy_desc = expand_info['input_desc'][0]
mask_desc = expand_info['input_desc'][1]
keep_prob = None
for attr in expand_info['attr']:
if 'keep_prob' in attr:
keep_prob = attr['keep_prob']
if keep_prob is None:
raise RuntimeError("keep_prob does not exist in attrs.")
# generate a graph.
keep_prob = expand_info['attr']['keep_prob']

graph_builder = builder.GraphBuilder()
with graph_builder.graph_scope('main') as graph_scope:
# create tensor input.
input_dy = graph_builder.tensor(dy_desc['shape'], dy_desc['data_type'], dy_desc['format'])
input_mask = graph_builder.tensor(mask_desc['shape'], mask_desc['data_type'], mask_desc['format'])
graph_scope.set_input(input_dy, input_mask)
r_keep_prob = graph_builder.value(input_dy.dtype, 1.0 / keep_prob, "DefaultFormat")
r_keep_prob = graph_builder.value(input_dy.dtype, 1.0 / keep_prob)
# create op.
result = graph_builder.emit('Mul', [input_dy, r_keep_prob])
result = graph_builder.emit('Mul', [result, input_mask])


+ 5
- 6
mindspore/_extends/graph_kernel/expanders/gelu.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -40,17 +40,16 @@ def expand_gelu(expand_info):
# cal y
mul_0 = graph_builder.emit('Mul', [input_x, input_x])
pow_0 = graph_builder.emit('Mul', [mul_0, input_x])
const_csvalue = graph_builder.value(pow_0.dtype, CSVALUE, input_desc['format'])
const_csvalue = graph_builder.value(pow_0.dtype, CSVALUE)
mul_1 = graph_builder.emit('Mul', [pow_0, const_csvalue])
tanh_res = graph_builder.emit('Add', [input_x, mul_1])
const_csvalue_sqrt_two_div_pi = graph_builder.value(
tanh_res.dtype, CSVALUE_SQRT_TWO_DIV_PI, input_desc['format'])
const_csvalue_sqrt_two_div_pi = graph_builder.value(tanh_res.dtype, CSVALUE_SQRT_TWO_DIV_PI)
y = graph_builder.emit('Mul', [tanh_res, const_csvalue_sqrt_two_div_pi])

# cal gelu(x)
tanh_y = graph_builder.emit('Tanh', [y])
const_one = graph_builder.value(tanh_y.dtype, ONE, input_desc['format'])
const_half = graph_builder.value(tanh_y.dtype, HALF, input_desc['format'])
const_one = graph_builder.value(tanh_y.dtype, ONE)
const_half = graph_builder.value(tanh_y.dtype, HALF)
tanh_y_add_one = graph_builder.emit('Add', [tanh_y, const_one])
mul_x = graph_builder.emit('Mul', [input_x, tanh_y_add_one])
result = graph_builder.emit('Mul', [const_half, mul_x])


+ 6
- 7
mindspore/_extends/graph_kernel/expanders/gelu_grad.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -45,12 +45,11 @@ def expand_gelugrad(expand_info):
graph_scope.set_input(input_dy, input_x, input_y)

# create some const var
const_csvalue = graph_builder.value(input_dy.dtype, CSVALUE, input_desc_0['format'])
const_csvalue_sqrt_two_div_pi = graph_builder.value(
input_dy.dtype, CSVALUE_SQRT_TWO_DIV_PI, input_desc_0['format'])
const_csvalue_tri = graph_builder.value(input_dy.dtype, CSVALUE_TRI, input_desc_0['format'])
const_one = graph_builder.value(input_dy.dtype, ONE, input_desc_0['format'])
const_half = graph_builder.value(input_dy.dtype, HALF, input_desc_0['format'])
const_csvalue = graph_builder.value(input_dy.dtype, CSVALUE)
const_csvalue_sqrt_two_div_pi = graph_builder.value(input_dy.dtype, CSVALUE_SQRT_TWO_DIV_PI)
const_csvalue_tri = graph_builder.value(input_dy.dtype, CSVALUE_TRI)
const_one = graph_builder.value(input_dy.dtype, ONE)
const_half = graph_builder.value(input_dy.dtype, HALF)

# cal mul_right
mul_double = graph_builder.emit('Mul', [input_x, input_x])


+ 6
- 11
mindspore/_extends/graph_kernel/expanders/gkdropout.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -21,25 +21,20 @@ def expand_gkdropout(expand_info):
# get op info.
input_desc = expand_info['input_desc'][0]
maks_desc = expand_info['input_desc'][1]
keep_prob = None
for attr in expand_info['attr']:
if 'keep_prob' in attr:
keep_prob = attr['keep_prob']
if keep_prob is None:
raise RuntimeError("keep_prob does not exist in attrs.")
# generate a graph.
keep_prob = expand_info['attr']['keep_prob']

graph_builder = builder.GraphBuilder()
with graph_builder.graph_scope('main') as graph_scope:
# create tensor input.
input_x = graph_builder.tensor(input_desc['shape'], input_desc['data_type'], input_desc['format'])
input_mask = graph_builder.tensor(maks_desc['shape'], maks_desc['data_type'], maks_desc['format'])
graph_scope.set_input(input_x, input_mask)
keep_prob_v = graph_builder.value(input_x.dtype, keep_prob, "DefaultFormat")
r_keep_prob = graph_builder.value(input_x.dtype, 1.0 / keep_prob, "DefaultFormat")
keep_prob_v = graph_builder.value(input_x.dtype, keep_prob)
r_keep_prob = graph_builder.value(input_x.dtype, 1.0 / keep_prob)

if input_mask.dtype != input_x.dtype:
input_mask = graph_builder.emit('Cast', [input_mask], attrs={'dst_type': input_x.dtype})
mask = graph_builder.emit('LessEqual', [input_mask, keep_prob_v]) # output is bool type
mask = graph_builder.emit('LessEqual', [input_mask, keep_prob_v]) # output is bool type
mask = graph_builder.emit('Cast', [mask], attrs={'dst_type': input_x.dtype})

# compute result


+ 6
- 12
mindspore/_extends/graph_kernel/expanders/layernorm.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -23,16 +23,10 @@ def expand_layernorm(expand_info):
input_desc_1 = expand_info['input_desc'][1]
input_desc_2 = expand_info['input_desc'][2]
attrs = expand_info['attr']
begin_norm_axis = None
epsilon = None
for item in attrs:
if 'begin_norm_axis' in item:
begin_norm_axis = item['begin_norm_axis']
if 'epsilon' in item:
epsilon = item['epsilon']
graph_builder = builder.GraphBuilder()
begin_norm_axis = attrs['begin_norm_axis']
epsilon = attrs['epsilon']

# generate a graph.
graph_builder = builder.GraphBuilder()
with graph_builder.graph_scope('main') as graph_scope:
# create tensor input.
input_x = graph_builder.tensor(input_desc_0['shape'], input_desc_0['data_type'], input_desc_0['format'])
@@ -52,7 +46,7 @@ def expand_layernorm(expand_info):
for i in reduce_axis:
reduce_elts *= shape_x[i]
mean_cof = 1.0 / reduce_elts
mean_cof_v = graph_builder.value(input_x.dtype, mean_cof, input_x.data_format)
mean_cof_v = graph_builder.value(input_x.dtype, mean_cof)

# Calculate mean
mean_red = graph_builder.emit('ReduceSum', [input_x], attrs={'reduce_axis': reduce_axis, 'keep_dims': True})
@@ -67,7 +61,7 @@ def expand_layernorm(expand_info):

# Calculate normalize
normalize_sub = graph_builder.emit('Sub', [input_x, mean])
epsilon_v = graph_builder.value(input_x.dtype, epsilon, input_x.data_format)
epsilon_v = graph_builder.value(input_x.dtype, epsilon)
normalize_add = graph_builder.emit('Add', [variance, epsilon_v])
normlize_rsqrt = graph_builder.emit('Rsqrt', [normalize_add])
normalize_mul = graph_builder.emit('Mul', [normalize_sub, normlize_rsqrt])


+ 12
- 18
mindspore/_extends/graph_kernel/expanders/layernorm_grad.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -24,16 +24,10 @@ def expand_layernormgrad(expand_info):
var_desc = expand_info['input_desc'][2]
mean_desc = expand_info['input_desc'][3]
gamma_desc = expand_info['input_desc'][4]
begin_norm_axis = None
begin_params_axis = None
epsilon = 1e-11
for item in expand_info['attr']:
if 'begin_norm_axis' in item:
begin_norm_axis = item['begin_norm_axis']
if 'begin_params_axis' in item:
begin_params_axis = item['begin_params_axis']
if 'epsilon' in item:
epsilon = item['epsilon']
attrs = expand_info['attr']
begin_norm_axis = attrs['begin_norm_axis']
begin_params_axis = attrs['begin_params_axis']
epsilon = attrs['epsilon'] if 'epsilon' in attrs else 1e-11

shape_x = x_desc['shape']
if begin_norm_axis < 0:
@@ -57,13 +51,13 @@ def expand_layernormgrad(expand_info):
graph_scope.set_input(x, dy, variance, mean, gamma)

# set some constant val.
eps = graph_builder.value(x.dtype, epsilon, x.data_format)
const_one = graph_builder.value(x.dtype, 1.0, x.data_format)
const_neg_half = graph_builder.value(x.dtype, -0.5, x.data_format)
const_neg_two = graph_builder.value(x.dtype, -2.0, x.data_format)
const_two = graph_builder.value(x.dtype, 2.0, x.data_format)
const_neg_one = graph_builder.value(x.dtype, -1.0, x.data_format)
mean_cof = graph_builder.value(x.dtype, (1.0 / reduce_size), x.data_format)
eps = graph_builder.value(x.dtype, epsilon)
const_one = graph_builder.value(x.dtype, 1.0)
const_neg_half = graph_builder.value(x.dtype, -0.5)
const_neg_two = graph_builder.value(x.dtype, -2.0)
const_two = graph_builder.value(x.dtype, 2.0)
const_neg_one = graph_builder.value(x.dtype, -1.0)
mean_cof = graph_builder.value(x.dtype, (1.0 / reduce_size))

# cal dg db
var_eps = graph_builder.emit('Add', [variance, eps])


+ 2
- 6
mindspore/_extends/graph_kernel/expanders/logsoftmax.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -20,11 +20,7 @@ def expand_logsoftmax(expand_info):
"""LogSoftmax expander"""
# get op info.
input_desc = expand_info['input_desc'][0]
attrs = expand_info['attr']
axis = None
for item in attrs:
if 'axis' in item:
axis = item['axis']
axis = expand_info['attr']['axis']
graph_builder = builder.GraphBuilder()
if isinstance(axis, int):
axis = (axis,)


+ 2
- 6
mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -21,11 +21,7 @@ def expand_logsoftmaxgrad(expand_info):
# get op info.
input_desc_0 = expand_info['input_desc'][0]
input_desc_1 = expand_info['input_desc'][1]
attrs = expand_info['attr']
axis = None
for item in attrs:
if 'axis' in item:
axis = item['axis']
axis = expand_info['attr']['axis']
graph_builder = builder.GraphBuilder()

if isinstance(axis, int):


+ 4
- 10
mindspore/_extends/graph_kernel/expanders/maximum_grad.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -23,16 +23,10 @@ def expand_maximumgrad(expand_info):
input_desc_1 = expand_info['input_desc'][1]
input_desc_2 = expand_info['input_desc'][2]
attrs = expand_info['attr']
grad_x = None
grad_y = None
for item in attrs:
if 'grad_x' in item:
grad_x = item['grad_x']
if 'grad_y' in item:
grad_y = item['grad_y']
graph_builder = builder.GraphBuilder()
grad_x = attrs['grad_x'] if 'grad_x' in attrs else True
grad_y = attrs['grad_y'] if 'grad_y' in attrs else True

# generate a graph.
graph_builder = builder.GraphBuilder()
with graph_builder.graph_scope('main') as graph_scope:
# create tensor input.
input_x = graph_builder.tensor(input_desc_0['shape'], input_desc_0['data_type'], input_desc_0['format'])


+ 4
- 9
mindspore/_extends/graph_kernel/expanders/minimum_grad.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -23,15 +23,10 @@ def expand_minimumgrad(expand_info):
input_desc_1 = expand_info['input_desc'][1]
input_desc_2 = expand_info['input_desc'][2]
attrs = expand_info['attr']
grad_x = None
grad_y = None
for item in attrs:
if 'grad_x' in item:
grad_x = item['grad_x']
if 'grad_y' in item:
grad_y = item['grad_y']
grad_x = attrs['grad_x'] if 'grad_x' in attrs else True
grad_y = attrs['grad_y'] if 'grad_y' in attrs else True

graph_builder = builder.GraphBuilder()
# generate a graph.
with graph_builder.graph_scope('main') as graph_scope:
# create tensor input.
input_x = graph_builder.tensor(input_desc_0['shape'], input_desc_0['data_type'], input_desc_0['format'])


+ 5
- 12
mindspore/_extends/graph_kernel/expanders/reduce_mean.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -18,20 +18,13 @@ from mindspore._extends.graph_kernel.model import model_builder as builder

def expand_reducemean(expand_info):
"""ReduceMean expander"""

# get op info.
input_desc = expand_info['input_desc'][0]
attrs = expand_info['attr']
axis = None
keep_dims = None
for item in attrs:
if 'axis' in item:
axis = item['axis']
if 'keep_dims' in item:
keep_dims = item['keep_dims']
graph_builder = builder.GraphBuilder()
axis = attrs['axis']
keep_dims = attrs['keep_dims']

# generate a graph.
graph_builder = builder.GraphBuilder()
with graph_builder.graph_scope('main') as graph_scope:
# create tensor input.
input_x = graph_builder.tensor(input_desc['shape'], input_desc['data_type'], input_desc['format'])
@@ -49,7 +42,7 @@ def expand_reducemean(expand_info):
for idx in axis:
all_shape *= x_shape[idx]

all_shape_value = graph_builder.value(input_x.dtype, all_shape, input_x.data_format)
all_shape_value = graph_builder.value(input_x.dtype, all_shape)

if not axis:
sum_x = graph_builder.emit('ReduceSum', [input_x], attrs={'reduce_axis': real_axis, 'keep_dims': keep_dims})


+ 3
- 9
mindspore/_extends/graph_kernel/expanders/softmax.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -18,16 +18,10 @@ from mindspore._extends.graph_kernel.model import model_builder as builder

def expand_softmax(expand_info):
"""Softmax expander"""
# get op info.
input_desc = expand_info['input_desc'][0]
attrs = expand_info['attr']
axis = None
for item in attrs:
if 'axis' in item:
axis = item['axis']
graph_builder = builder.GraphBuilder()
axis = expand_info['attr']['axis']

# generate a graph.
graph_builder = builder.GraphBuilder()
with graph_builder.graph_scope('main') as graph_scope:
# create tensor input.
input_x = graph_builder.tensor(input_desc['shape'], input_desc['data_type'], input_desc['format'])


+ 2
- 2
mindspore/_extends/graph_kernel/expanders/sqrt_grad.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ def expand_sqrtgrad(expand_info):
graph_scope.set_input(input_x, input_dout)

# cal result
const_two = graph_builder.value(input_x.dtype, 2, input_x.data_format)
const_two = graph_builder.value(input_x.dtype, 2)
dividend = graph_builder.emit('Mul', [input_x, const_two])
result = graph_builder.emit('RealDiv', [input_dout, dividend])



+ 2
- 2
mindspore/_extends/graph_kernel/expanders/tanh_grad.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -31,7 +31,7 @@ def expand_tanhgrad(expand_info):
# create tensor input.
input_y = graph_builder.tensor(input_desc_0['shape'], input_desc_0['data_type'], input_desc_0['format'])
input_dy = graph_builder.tensor(input_desc_1['shape'], input_desc_1['data_type'], input_desc_1['format'])
const_one = graph_builder.value(input_y.dtype, ONE, input_y.data_format)
const_one = graph_builder.value(input_y.dtype, ONE)
graph_scope.set_input(input_y, input_dy)

# cal result


+ 3
- 10
mindspore/_extends/graph_kernel/expanders/tile.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -18,18 +18,11 @@ from mindspore._extends.graph_kernel.model import model_builder as builder

def expand_tile(expand_info):
"""Tile expander"""

# get op info.
input_desc = expand_info['input_desc'][0]
attrs = expand_info['attr']
multiples = None
for item in attrs:
if 'multiples' in item:
multiples = item['multiples']
multiples = expand_info['attr']['multiples']
output_shape, _, _, shape_compatible = builder.get_tile_output_shape(input_desc['shape'], multiples)
graph_builder = builder.GraphBuilder()

# generate a graph.
graph_builder = builder.GraphBuilder()
with graph_builder.graph_scope('main') as graph_scope:
# create tensor input.
input_x = graph_builder.tensor(input_desc['shape'], input_desc['data_type'], input_desc['format'])


+ 20
- 11
mindspore/_extends/graph_kernel/model/model_builder.py View File

@@ -15,7 +15,7 @@
"""GraphKernel model builder"""

import copy
from .model import PrimLib, Tensor, Value, Operator, Graph, AlignShape, AddControlBuddy
from .model import PrimLib, Tensor, Value, Operator, Graph, AlignShape, AddControlBuddy, DataFormat


def get_tile_output_shape(shape, multiples):
@@ -70,7 +70,7 @@ class OpInfer:

real_shape = []
for i, _ in enumerate(shape):
if i not in attrs['reduce_axis']:
if i not in attrs['reduce_axis'] and i - len(shape) not in attrs['reduce_axis']:
real_shape.append(shape[i])
return real_shape

@@ -106,7 +106,15 @@ class OpInfer:
@staticmethod
def default_infer_format_func(inputs, attrs):
"""Infer format"""
return inputs[0].data_format
result = inputs[0].data_format
# default_format and other_format results in other_format
for input_tensor in inputs[1:]:
data_format = input_tensor.data_format
if data_format != DataFormat.DEFAULT:
if result not in [DataFormat.DEFAULT, data_format]:
raise RuntimeError("Incompatible data format %s and %s" % (data_format, result))
result = data_format
return result

infer_shape_func = {
# add special infer func here
@@ -114,13 +122,20 @@ class OpInfer:
'Reshape': lambda inputs, attrs: attrs["shape"],
'BroadcastTo': lambda inputs, attrs: attrs["shape"],
'Tile': lambda inputs, attrs: get_tile_output_shape(inputs[0].shape, attrs["multiples"])[0],
'ExpandDims': lambda inputs, attrs: list(inputs[0].shape).insert(attrs["axis"], 1),
}
infer_dtype_func = {
# add special infer func here
'Cast': lambda inputs, attrs: attrs['dst_type'],
'Less': lambda inputs, attrs: "bool",
'LessEqual': lambda inputs, attrs: "bool",
'Equal': lambda inputs, attrs: "bool",
'Greater': lambda inputs, attrs: "bool",
'GreaterEqual': lambda inputs, attrs: "bool",
}
infer_format_func = {
# add special infer func here
'Reshape': lambda inputs, attrs: "DefaultFormat",
}

@classmethod
@@ -188,18 +203,12 @@ class GraphBuilder:
shape = [1]
return Tensor(name, shape, dtype, data_format, para_type=para_type)

def value(self, dtype, value, data_format, name=None):
def value(self, dtype, value, name=None):
"""Create a new Value"""
if name in (None, ''):
name = self._alloc_tensor_name()

if dtype == "float16":
# For float16 value, it will be changed to float32 wrongly. And there is no good solution for now.
# So instead just declare float32 value and then cast it to float16.
v_fp32 = Value(name, "float32", value, data_format)
v = self.emit("Cast", [v_fp32], attrs={"dst_type": "float16"})
else:
v = Value(name, dtype, value, data_format)
v = Value(name, dtype, value)
return v

def op(self, prim, output, inputs, attrs=None):


+ 89
- 68
mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_decoder.cc View File

@@ -19,8 +19,7 @@
#include <memory>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <map>
#include <vector>
#include "backend/kernel_compiler/akg/akg_kernel_json_generator.h"
#include "backend/kernel_compiler/common_utils.h"
@@ -46,6 +45,62 @@ namespace {
constexpr auto kIsFeatureMapOutput = "IsFeatureMapOutput";
constexpr auto kIsFeatureMapInputList = "IsFeatureMapInputList";

class AbstractShapeCreator {
public:
using AbstractShapeTransferFunc = std::function<ShapeVector(const ShapeVector &)>;
/**
* Get an abstract shape.
* For a given device_shape and format, the available abstract_shape is not unique,
* this interface only returns a legal abstract_shape without considering padding
* so that the AnfAlgo's get device shape interface can get the right device_shape.
*/
static ShapeVector GetFakeAbstractShape(const ShapeVector &device_shape, const std::string &format) {
const std::map<std::string, AbstractShapeTransferFunc> fmap{
{kOpFormat_NCHW, NchwAbstractShape},
{kOpFormat_NHWC, NhwcAbstractShape},
{kOpFormat_FRAC_NZ, FractalNzAbstractShape},
};
if (format == kOpFormat_ND || format == kOpFormat_DEFAULT) {
return device_shape;
}
auto iter = fmap.find(format);
if (iter == fmap.end()) {
MS_LOG(WARNING) << "Unexpected format[" << format << "]";
return device_shape;
}
return iter->second(device_shape);
}

private:
static ShapeVector NchwAbstractShape(const ShapeVector &device_shape) { return device_shape; }
static ShapeVector NhwcAbstractShape(const ShapeVector &device_shape) {
if (device_shape.size() != 4) {
MS_LOG(EXCEPTION) << "Shape size of NHWC should be 4, but got " << device_shape.size();
}
return {device_shape[0], device_shape[3], device_shape[1], device_shape[2]};
}
static ShapeVector FractalNzAbstractShape(const ShapeVector &device_shape) {
if (device_shape.size() == 1 && (device_shape[0] == 1 || device_shape[0] % kCubeSize == 0)) {
return device_shape;
}
if (device_shape.size() < 4) {
MS_LOG(EXCEPTION) << "Shape size of FRACTAL_NZ should >= 4, but got " << device_shape.size();
}
ShapeVector shape;
size_t dims = device_shape.size();
size_t batch = dims - 4;
for (size_t i = 0; i < batch; ++i) {
shape.push_back(device_shape[i]);
}
int64_t m = device_shape[dims - 3] * device_shape[dims - 2];
int64_t n = device_shape[dims - 4] * device_shape[dims - 1];
shape.push_back(m);
shape.push_back(n);

return shape;
}
};

class CNodeDecoder {
public:
explicit CNodeDecoder(std::map<std::string, AnfNodePtr> *nodes_map) : nodes_map_(*nodes_map) {}
@@ -66,6 +121,7 @@ class CNodeDecoder {
return nullptr;
}
CreateKernelInfo(processor);
CreateAbstract();
return cnode_;
}

@@ -117,12 +173,8 @@ class CNodeDecoder {

bool DecodeInputDesc(const nlohmann::json &cnode_json, const FuncGraphPtr &func_graph) {
std::string op_name = cnode_json[kJsonKeyName];
// new primitive.
auto primitive = GetPrimitive(op_name);
if (primitive == nullptr) {
MS_LOG(ERROR) << "Create primitive failed.";
return false;
}
auto primitive = CreatePrimitiveWithAttrs(op_name);
MS_EXCEPTION_IF_NULL(primitive);

// collect inputs.
auto primitive_v = NewValueNode(primitive);
@@ -142,6 +194,7 @@ class CNodeDecoder {
}
input_formats_.push_back(input_desc[kJsonKeyFormat]);
input_types_.push_back(DtypeToTypeId(input_desc[kJsonKeyDataType]));
input_shapes_.push_back(input_desc[kJsonKeyShape]);
}
// new cnode.
cnode_ = func_graph->NewCNode(inputs);
@@ -160,6 +213,7 @@ class CNodeDecoder {
nlohmann::json output_desc = output_descs[0];
output_formats_.push_back(output_desc[kJsonKeyFormat]);
output_types_.push_back(DtypeToTypeId(output_desc[kJsonKeyDataType]));
output_shapes_.push_back(output_desc[kJsonKeyShape]);
nodes_map_[output_desc[kJsonKeyTensorName]] = cnode_;
} else {
// multi outputs.
@@ -167,6 +221,7 @@ class CNodeDecoder {
nlohmann::json output_desc = output_descs[j];
output_formats_.push_back(output_desc[kJsonKeyFormat]);
output_types_.push_back(DtypeToTypeId(output_desc[kJsonKeyDataType]));
output_shapes_.push_back(output_desc[kJsonKeyShape]);
auto get_item =
func_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), cnode_, NewValueNode(SizeToLong(j))});
func_graph->AddNode(get_item);
@@ -219,72 +274,29 @@ class CNodeDecoder {
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), cnode_.get());
}

ValuePtr CreatOpInstance(const std::string &op_name, const std::vector<ValuePtr> &attrs) {
// python utils.
constexpr auto kGetPythonOpFunc = "_get_python_op";
constexpr auto kParallelUtilsModule = "mindspore.parallel._utils";
// almost all ops are defined in this path.
constexpr auto kOperationsModule = "mindspore.ops.operations";
py::module mod = py::module::import(kOperationsModule);
if (!py::hasattr(mod, op_name.c_str())) {
MS_LOG(ERROR) << kOperationsModule << " don't have attr: " << op_name;
return nullptr;
}
std::vector<py::object> arg_list;
(void)std::transform(attrs.begin(), attrs.end(), std::back_inserter(arg_list),
[](const ValuePtr &attr) { return ValuePtrToPyData(attr); });
py::object obj = parse::python_adapter::CallPyFn(kParallelUtilsModule, kGetPythonOpFunc, op_name, kOperationsModule,
op_name, arg_list);
ValuePtr op_instance = nullptr;
bool succ = parse::ConvertData(obj, &op_instance);
if (!succ) {
MS_LOG(ERROR) << "Get python op " << op_name << " from " << kOperationsModule << " failed.";
return nullptr;
}
return op_instance;
void CreateAbstract() {
auto shape = AbstractShapeCreator::GetFakeAbstractShape(output_shapes_[0], output_formats_[0]);
auto abstract = std::make_shared<abstract::AbstractTensor>(TypeIdToType(output_types_[0]), shape);
cnode_->set_abstract(abstract);
}

const std::map<std::string, std::vector<std::string>> op_attrs_map_ = {
{kReduceSumOpName, std::vector<std::string>{kAttrKeepDims}},
{kReduceMaxOpName, std::vector<std::string>{kAttrKeepDims}},
{kReduceMinOpName, std::vector<std::string>{kAttrKeepDims}},
{kBroadcastToOpName, std::vector<std::string>{kAttrShape}},
};

PrimitivePtr GetPrimitive(const std::string &op_name) {
PrimitivePtr primitive{nullptr};
if (op_attrs_map_.count(op_name) == 0) {
// no attrs for op instance.
primitive = CreatOpInstance(op_name, std::vector<ValuePtr>{})->cast<PrimitivePtr>();
} else {
// make attrs for op instance.
std::vector<ValuePtr> op_attrs;
const auto &attr_names = op_attrs_map_.at(op_name);
for (const auto &attr_name : attr_names) {
if (cnode_attrs_.count(attr_name) == 0) {
MS_LOG(ERROR) << "Attr: " << attr_name << " for: " << op_name << " not found.";
return nullptr;
}
op_attrs.push_back(cnode_attrs_.at(attr_name));
}
primitive = CreatOpInstance(op_name, op_attrs)->cast<PrimitivePtr>();
}
if (primitive != nullptr) {
for (const auto &attr : cnode_attrs_) {
primitive->AddAttr(attr.first, attr.second);
}
PrimitivePtr CreatePrimitiveWithAttrs(const std::string &op_name) {
auto primitive = std::make_shared<Primitive>(op_name);
for (const auto &attr : cnode_attrs_) {
primitive->AddAttr(attr.first, attr.second);
}
return primitive;
}

ScalarPtr DecodeScalar(const nlohmann::json &scalar_json) {
tensor::TensorPtr DecodeScalar(const nlohmann::json &scalar_json) {
auto type_id = DtypeToTypeId(scalar_json[kJsonKeyDataType]);
switch (type_id) {
case kNumberTypeFloat16:
return std::make_shared<tensor::Tensor>(static_cast<float>(scalar_json[kJsonKeyValue]), kFloat16);
case kNumberTypeFloat32:
return std::make_shared<FP32Imm>(scalar_json[kJsonKeyValue]);
return std::make_shared<tensor::Tensor>(static_cast<float>(scalar_json[kJsonKeyValue]), kFloat32);
case kNumberTypeInt32:
return std::make_shared<Int32Imm>(scalar_json[kJsonKeyValue]);
return std::make_shared<tensor::Tensor>(static_cast<int64_t>(scalar_json[kJsonKeyValue]), kInt32);
default:
MS_LOG(ERROR) << "Unknown type: " << scalar_json[kJsonKeyDataType];
break;
@@ -294,9 +306,8 @@ class CNodeDecoder {

ValueNodePtr DecodeValueNode(const nlohmann::json &value_json, const FuncGraphPtr &func_graph) {
MS_LOG(DEBUG) << "start decode value node, " << value_json;
auto scalar = DecodeScalar(value_json);
auto tensor = ScalarToTensor(scalar);

auto tensor = DecodeScalar(value_json);
MS_EXCEPTION_IF_NULL(tensor);
auto value_node = std::make_shared<ValueNode>(tensor);
value_node->set_abstract(tensor->ToAbstract());
// create kernel_info fo new value node.
@@ -319,6 +330,8 @@ class CNodeDecoder {
std::vector<std::string> output_formats_;
std::vector<TypeId> input_types_;
std::vector<TypeId> output_types_;
std::vector<ShapeVector> input_shapes_;
std::vector<ShapeVector> output_shapes_;
CNodePtr cnode_{nullptr};
};
} // namespace
@@ -329,11 +342,16 @@ ParameterPtr AkgKernelJsonDecoder::DecodeParameter(const nlohmann::json &paramet
ParameterPtr new_parameter = func_graph->add_parameter();
std::string name = parameter_json[kJsonKeyTensorName];
new_parameter->set_name(name);
std::string format = parameter_json[kJsonKeyFormat];
TypeId dtype = DtypeToTypeId(parameter_json[kJsonKeyDataType]);
ShapeVector shape = AbstractShapeCreator::GetFakeAbstractShape(parameter_json[kJsonKeyShape], format);
auto abstract = std::make_shared<abstract::AbstractTensor>(TypeIdToType(dtype), shape);
new_parameter->set_abstract(abstract);
auto kernel_info = std::make_shared<device::KernelInfo>();
new_parameter->set_kernel_info(kernel_info);
auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
builder->SetOutputsFormat(std::vector<std::string>{parameter_json[kJsonKeyFormat]});
builder->SetOutputsDeviceType(std::vector<TypeId>{DtypeToTypeId(parameter_json[kJsonKeyDataType])});
builder->SetOutputsFormat(std::vector<std::string>{format});
builder->SetOutputsDeviceType(std::vector<TypeId>{dtype});
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), new_parameter.get());
nodes_map_[name] = new_parameter;
return new_parameter;
@@ -349,6 +367,7 @@ CNodePtr AkgKernelJsonDecoder::DecodeCNode(const nlohmann::json &cnode_json, con
AnfNodePtr AkgKernelJsonDecoder::DecodeOutput(const std::vector<nlohmann::json> &output_descs,
const FuncGraphPtr &func_graph) {
std::vector<AnfNodePtr> outputs{NewValueNode(prim::kPrimMakeTuple)};
AbstractBasePtrList output_abstract_list;
for (const auto &output_desc : output_descs) {
std::string name = output_desc[kJsonKeyTensorName];
if (nodes_map_.count(name) == 0) {
@@ -356,11 +375,13 @@ AnfNodePtr AkgKernelJsonDecoder::DecodeOutput(const std::vector<nlohmann::json>
return nullptr;
}
outputs.push_back(nodes_map_[name]);
output_abstract_list.push_back(outputs.back()->abstract());
}
if (outputs.size() == 2) {
func_graph->set_output(outputs[1]);
} else {
auto output = func_graph->NewCNode(outputs);
output->set_abstract(std::make_shared<abstract::AbstractTuple>(output_abstract_list));
func_graph->AddNode(output);
func_graph->set_output(output);
}


+ 15
- 13
mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc View File

@@ -80,10 +80,12 @@ class OpInfoExtractor {
}

void ExtractOutputs(const OpInfoPtr &op_info) {
// only support single output in op desc.
auto io_info = std::make_shared<OpIOInfo>();
io_info->set_name("output");
op_info->add_outputs_ptr(io_info);
size_t output_tensor_num = AnfAlgo::GetOutputTensorNum(cnode_);
for (size_t i = 0; i < output_tensor_num; i++) {
auto io_info = std::make_shared<OpIOInfo>();
io_info->set_name("output_" + std::to_string(i));
op_info->add_outputs_ptr(io_info);
}
}

bool ExcludeAttr(const std::string &name) {
@@ -204,8 +206,7 @@ bool AkgKernelJsonGenerator::CreateInputDescJson(const AnfNodePtr &anf_node, con
input_desc_json[kJsonKeyName] = input_ptr->name();
input_desc_json[kJsonKeyTensorName] = "input_" + std::to_string(GetInputTensorIdxInc(anf_node, real_input_index));
auto input_shape = this->GetInputShape(anf_node, real_input_index);
if (dump_option_.extract_opinfo_from_anfnode &&
GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) {
if (!is_basic_op_ && GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) {
MS_LOG(DEBUG) << "Take input[" << real_input_index << "] of [" << anf_node->DebugString(2)
<< "] as const tensor, shape: [" << Vector2Str(input_shape)
<< "], value: " << input_desc_json[kJsonKeyValue];
@@ -529,9 +530,9 @@ bool AkgKernelJsonGenerator::CollectJson(const AnfNodePtr &anf_node, nlohmann::j
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(kernel_json);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
MS_LOG(INFO) << "Akg start generate kernel json desc, full scope name is : " << anf_node->fullname_with_scope();
MS_LOG(DEBUG) << "Akg start generate kernel json desc, full scope name is : " << anf_node->fullname_with_scope();
SetAkgKernelAttrs(anf_node);
dump_option_.extract_opinfo_from_anfnode = false;
is_basic_op_ = true;
if (!GenerateSingleKernelJson(anf_node, kernel_json)) {
MS_LOG(ERROR) << "Op[" << anf_node->fullname_with_scope() << "] create single kernel json failed.";
return false;
@@ -551,8 +552,8 @@ bool AkgKernelJsonGenerator::CollectJson(const AnfNodePtr &anf_node, nlohmann::j
return false;
}

MS_LOG(INFO) << "Akg create kernel json desc success, full scope name is : " << anf_node->fullname_with_scope()
<< ", json info name is : " << kernel_name_;
MS_LOG(DEBUG) << "Akg create kernel json desc success, full scope name is : " << anf_node->fullname_with_scope()
<< ", json info name is : " << kernel_name_;
return true;
}

@@ -613,10 +614,11 @@ bool AkgKernelJsonGenerator::CollectFusedJson(const std::vector<AnfNodePtr> &anf
<< "].";
return false;
}
MS_LOG(INFO) << "Fusion nodes: [" << output_list.size() << "], input_list: [" << anf_nodes.size()
<< "], output_list: [" << input_list.size() << "].";
MS_LOG(DEBUG) << "Fusion nodes: [" << output_list.size() << "], input_list: [" << anf_nodes.size()
<< "], output_list: [" << input_list.size() << "].";
std::map<AnfNodePtr, nlohmann::json> node_json_map;
dump_option_.extract_opinfo_from_anfnode = true;
is_basic_op_ = false;
dump_option_.extract_opinfo_from_anfnode = true; // always extract from anfnode for composite ops.
if (!GenSingleJsons(anf_nodes, &node_json_map)) return false;

UpdateTensorName(anf_nodes, &node_json_map);


+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.h View File

@@ -144,6 +144,7 @@ class AkgKernelJsonGenerator {
std::map<std::string, AnfNodePtr> address_node_map_;
std::map<size_t, std::vector<std::string>> sub_graphs_;
std::map<size_t, size_t> dim_infos_;
bool is_basic_op_{false};
};
} // namespace kernel
} // namespace mindspore


+ 9
- 1
mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean_gpu.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -393,6 +393,14 @@ CNodePtr AtomicCleanInsertter::CreateAtomicCleanCompositeNode(const KernelGraphP
broadcast_input_node};
auto broadcast_to_node_inner = CreateCNode(
atomic_clean_inputs, new_sub_graph, {.format = format, .shape = dst_shape_vec, .type = GetType(atomic_add_node_)});

auto device_shape = AnfAlgo::GetOutputDeviceShape(atomic_add_node_, 0);
dst_shape_vec.clear();
if (device_shape.empty()) {
dst_shape_vec.push_back(1);
} else {
std::transform(device_shape.begin(), device_shape.end(), std::back_inserter(dst_shape_vec), SizeToLong);
}
SetNodeAttrSafely("shape", MakeValue(dst_shape_vec), broadcast_to_node_inner);

// Makeup sub-graph.


+ 32
- 102
mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -36,88 +36,40 @@
namespace mindspore {
namespace opt {
namespace {
constexpr auto kJsonKeyExpandInfo = "expand_info";

#define GET_VALUE_FOR_JSON(JSON, VALUE, VALUE_ELEM, TYPE_NAME, TYPE) \
if (VALUE_ELEM->isa<TYPE_NAME>()) { \
JSON = GetValue<TYPE>(VALUE); \
}

nlohmann::json ExpandAttrJsonInfo(const CNodePtr &cnode) {
nlohmann::json attrs_json;
if (auto prim = GetCNodePrimitive(cnode); prim != nullptr) {
auto attrs = prim->attrs();
for (const auto &[k, v] : attrs) {
nlohmann::json attr_json;
MS_LOG(DEBUG) << "attr key is : " << k << " and value type is : " << v->type_name();
GET_VALUE_FOR_JSON(attr_json[k], v, v, Int32Imm, int);
GET_VALUE_FOR_JSON(attr_json[k], v, v, Int64Imm, int64_t);
GET_VALUE_FOR_JSON(attr_json[k], v, v, UInt32Imm, uint32_t);
GET_VALUE_FOR_JSON(attr_json[k], v, v, UInt64Imm, uint64_t);
GET_VALUE_FOR_JSON(attr_json[k], v, v, FP32Imm, float);
GET_VALUE_FOR_JSON(attr_json[k], v, v, FP64Imm, double);
GET_VALUE_FOR_JSON(attr_json[k], v, v, BoolImm, bool);
GET_VALUE_FOR_JSON(attr_json[k], v, v, StringImm, std::string);

if (v->isa<ValueList>() || v->isa<ValueTuple>()) {
auto vec = v->isa<ValueList>() ? v->cast<ValueListPtr>()->value() : v->cast<ValueTuplePtr>()->value();
if (!vec.empty()) {
MS_LOG(DEBUG) << "value type is : " << vec[0]->type_name();
GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], Int32Imm, std::vector<int>);
GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], Int64Imm, std::vector<int64_t>);
GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], UInt32Imm, std::vector<uint32_t>);
GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], UInt64Imm, std::vector<uint64_t>);
GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], FP32Imm, std::vector<float>);
GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], FP64Imm, std::vector<double>);
GET_VALUE_FOR_JSON(attr_json[k], v, vec[0], StringImm, std::vector<std::string>);
}
}
if (!attr_json.empty()) {
attrs_json.push_back(attr_json);
}
}
}
return attrs_json;
std::unordered_set<PrimitivePtr> GetExpandOps() {
std::unordered_set<PrimitivePtr> expand_ops = {
prim::kPrimSquare,
prim::kPrimGeLUGrad,
#if ENABLE_D
prim::kPrimTile,
prim::kPrimSqrtGrad,
prim::kPrimClipByNormNoDivSum,
#elif ENABLE_GPU
prim::kPrimBiasAdd,
prim::kPrimBiasAddGrad,
prim::kPrimGeLU,
prim::kPrimFusedAdam,
prim::kPrimFusedAdamWeightDecay,
prim::kPrimReduceMean,
prim::kPrimMaximumGrad,
prim::kPrimMinimumGrad,
prim::kPrimGkDropout,
prim::kPrimDropoutGrad,
prim::kPrimSoftmax,
prim::kPrimLayerNorm,
prim::kPrimLayerNormGrad,
#endif
};
return expand_ops;
}
} // namespace

bool ExpandJsonInfo(const CNodePtr &cnode, nlohmann::json *kernel_json) {
MS_EXCEPTION_IF_NULL(kernel_json);
if (kernel_json->find(kJsonKeyExpandInfo) != kernel_json->end()) {
return false;
}

nlohmann::json expand_info;
expand_info[kernel::kJsonKeyAttr] = ExpandAttrJsonInfo(cnode);
expand_info[kernel::kJsonKeyName] = AnfAlgo::GetCNodeName(cnode);
expand_info[kernel::kJsonKeyProcess] = kernel::GetProcessorStr(cnode);
std::vector<nlohmann::json> inputs_info;
for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(cnode); ++i) {
nlohmann::json input_info;
input_info[kernel::kJsonKeyFormat] = AnfAlgo::GetInputFormat(cnode, i);
input_info[kernel::kJsonKeyInferShape] = AnfAlgo::GetPrevNodeOutputInferShape(cnode, i);
input_info[kernel::kJsonKeyShape] = AnfAlgo::GetInputDeviceShape(cnode, i);
input_info[kernel::kJsonKeyInferDataType] =
kernel::TypeId2String(AnfAlgo::GetPrevNodeOutputInferDataType(cnode, i));
input_info[kernel::kJsonKeyDataType] = kernel::TypeId2String(AnfAlgo::GetInputDeviceDataType(cnode, i));
inputs_info.push_back(input_info);
}
expand_info[kernel::kJsonKeyInputDesc] = inputs_info;

std::vector<nlohmann::json> outputs_info;
for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(cnode); ++i) {
nlohmann::json output_info;
output_info[kernel::kJsonKeyFormat] = AnfAlgo::GetOutputFormat(cnode, i);
output_info[kernel::kJsonKeyInferShape] = AnfAlgo::GetOutputInferShape(cnode, i);
output_info[kernel::kJsonKeyShape] = AnfAlgo::GetOutputDeviceShape(cnode, i);
output_info[kernel::kJsonKeyInferDataType] = kernel::TypeId2String(AnfAlgo::GetOutputInferDataType(cnode, i));
output_info[kernel::kJsonKeyDataType] = kernel::TypeId2String(AnfAlgo::GetOutputDeviceDataType(cnode, i));
outputs_info.push_back(output_info);
}
expand_info[kernel::kJsonKeyOutputDesc] = outputs_info;
(*kernel_json)[kJsonKeyExpandInfo] = expand_info;
return true;
bool GraphKernelExpander::ExpandJsonInfo(const AnfNodePtr &node, nlohmann::json *kernel_json) {
DumpOption dump_option;
dump_option.extract_opinfo_from_anfnode = true;
kernel::AkgKernelJsonGenerator json_generator(dump_option);
return json_generator.CollectJson(node, kernel_json);
}
} // namespace

FuncGraphPtr GraphKernelExpander::CreateExpandFuncGraph(const CNodePtr &node) {
nlohmann::json kernel_json;
@@ -213,33 +165,11 @@ bool GraphKernelExpander::DoExpand(const FuncGraphPtr &func_graph) {

// replace origin node.
(void)mng->Replace(node, graph_kernel_node);

ToPrimitive(AnfAlgo::GetCNodeFuncGraphPtr(graph_kernel_node));
changed = true;
}
return changed;
}

void GraphKernelExpander::ToPrimitive(const FuncGraphPtr &func_graph) const {
auto todos = TopoSort(func_graph->get_return());
std::reverse(todos.begin(), todos.end());
auto mng = func_graph->manager();
MS_EXCEPTION_IF_NULL(mng);
for (const auto &n : todos) {
auto cnode = n->cast<CNodePtr>();
if (cnode == nullptr) {
continue;
}

auto origin_prim = AnfAlgo::GetCNodePrimitive(cnode);
MS_EXCEPTION_IF_NULL(origin_prim);
if (!origin_prim->isa<PrimitivePy>()) {
continue;
}
cnode->set_input(0, std::make_shared<ValueNode>(std::make_shared<Primitive>(*origin_prim)));
}
}

bool GraphKernelExpander::Run(const FuncGraphPtr &func_graph) {
expand_ops_ = GetExpandOps();
MS_EXCEPTION_IF_NULL(func_graph);


+ 3
- 2
mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_GRAPH_KERNEL_EXPANDER_H_
#include <memory>
#include <unordered_set>
#include <nlohmann/json.hpp>
#include "backend/optimizer/common/pass.h"
#include "ir/func_graph.h"

@@ -31,7 +32,6 @@ class GraphKernelExpander : public Pass {
private:
FuncGraphPtr CreateExpandFuncGraph(const CNodePtr &node);
bool DoExpand(const FuncGraphPtr &func_graph);
void ToPrimitive(const FuncGraphPtr &func_graph) const;
void EliminateRedundantParameters(const FuncGraphPtr &func_graph, AnfNodePtrList *inputs);
AnfNodePtr CreateExpandGraphKernel(const FuncGraphPtr &func_graph, const FuncGraphPtr &new_func_graph,
const CNodePtr &node);
@@ -39,6 +39,7 @@ class GraphKernelExpander : public Pass {
return std::any_of(expand_ops_.begin(), expand_ops_.end(),
[&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); });
}
bool ExpandJsonInfo(const AnfNodePtr &node, nlohmann::json *kernel_json);

private:
std::unordered_set<PrimitivePtr> expand_ops_;


+ 1
- 262
mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc View File

@@ -39,27 +39,6 @@
namespace mindspore {
namespace opt {
namespace {
void DebugDump(const FuncGraphPtr &graph, std::stringstream *buf) {
(*buf) << "Parameters: \n";
const auto &parameters = graph->parameters();
(*buf) << "size: " << parameters.size() << "\n";
for (const auto &p : parameters) {
(*buf) << "\t" << p->DebugString(2) << "\n";
}
(*buf) << "ValueNodes: \n";
const auto &value_nodes = graph->value_nodes();
(*buf) << "size: " << value_nodes.size() << "\n";
for (const auto &v : value_nodes) {
(*buf) << "\t" << v.first->DebugString(2) << "\n";
}
(*buf) << "CNodes: \n";
const auto &all_nodes = graph->nodes();
(*buf) << "size: " << all_nodes.size() << "\n";
for (const auto &n : all_nodes) {
(*buf) << "\t" << n->DebugString(2) << "\n";
}
}

bool IsMakeTupleOut(const AnfNodePtr &out, AnfNodePtrList *real_outs) {
MS_EXCEPTION_IF_NULL(real_outs);
if (IsPrimitiveCNode(out, prim::kPrimMakeTuple)) {
@@ -91,132 +70,6 @@ AbstractBasePtr GetOutputAbstract(const AnfNodePtr &node, size_t output_idx) {
return out_spec;
}

ValueNodePtr ProcessAttrsForCast(const CNodePtr &cnode, const std::string &attr_name) {
auto dst_type = AnfAlgo::GetNodeAttr<std::string>(cnode, attr_name);
auto type = TypeIdToType(kernel::DtypeToTypeId(dst_type));
auto type_val_node = NewValueNode(type);
return type_val_node;
}

const std::map<std::string, std::function<ValueNodePtr(const CNodePtr &cnode, const std::string &attr_name)>>
attrs_process_map = {
{kCastOpName, ProcessAttrsForCast},
};

ValueNodePtr ProcessAttrValue(const CNodePtr &cnode, const std::string &attr_name) {
auto op_name = AnfAlgo::GetCNodeName(cnode);
if (attrs_process_map.count(op_name) != 0) {
return attrs_process_map.at(op_name)(cnode, attr_name);
}

auto attr_val = AnfAlgo::GetNodeAttr<ValuePtr>(cnode, attr_name);
auto attr_val_node = NewValueNode(attr_val);
return attr_val_node;
}

AnfNodePtr ConstAttrToInput(const FuncGraphPtr &func_graph, const CNodePtr &cnode,
const std::unordered_set<size_t> &input_attrs) {
MS_EXCEPTION_IF_NULL(func_graph);
MS_EXCEPTION_IF_NULL(cnode);
MS_LOG(DEBUG) << "process node: " << cnode->DebugString(2);
if (input_attrs.empty()) {
return nullptr;
}

auto input_names = AnfAlgo::GetNodeAttr<std::vector<std::string>>(cnode, kAttrInputNames);
MS_LOG(DEBUG) << "ori_input_names: " << kernel::Vector2Str(input_names);
std::vector<AnfNodePtr> new_inputs;
std::vector<std::string> new_input_names;
const auto &inputs = cnode->inputs();
for (size_t i = 0; i < inputs.size() - 1; ++i) {
new_input_names.push_back(input_names[i]);
}

(void)new_inputs.insert(new_inputs.end(), inputs.begin(), inputs.end());
bool need_update = false;
for (size_t i = inputs.size() - 1; i < input_names.size(); ++i) {
auto attr_name = input_names[i];
if (input_attrs.find(i) == input_attrs.end()) {
MS_LOG(WARNING) << "Other type input between tensors and attrs, name: " << attr_name
<< ", node: " << cnode->DebugString(2);
new_input_names.push_back(attr_name);
continue;
}
if (!AnfAlgo::HasNodeAttr(attr_name, cnode)) {
MS_LOG(EXCEPTION) << "Attr: " << attr_name << " not found in node: " << cnode->DebugString(2);
}

// Hardcode. It should convert attrs value according to format, like op ReduceSum.
auto attr_val_node = ProcessAttrValue(cnode, attr_name);
new_inputs.push_back(attr_val_node);
new_input_names.push_back(attr_name);
need_update = true;
MS_LOG(DEBUG) << "convert attr: " << attr_name << " to input, value: " << attr_val_node;
}
MS_LOG(DEBUG) << "new_input_names: " << kernel::Vector2Str(new_input_names);

if (!need_update) {
return nullptr;
}

auto new_cnode = func_graph->NewCNode(new_inputs);
// we do not modify abstract and kernel info.
new_cnode->set_abstract(cnode->abstract());
new_cnode->set_kernel_info(cnode->kernel_info_ptr());
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(new_input_names), new_cnode);
return new_cnode;
}

AnfNodePtr DeleteAttrInInput(const FuncGraphPtr &func_graph, const CNodePtr &cnode,
const std::unordered_set<size_t> &input_attrs) {
MS_EXCEPTION_IF_NULL(func_graph);
MS_EXCEPTION_IF_NULL(cnode);
MS_LOG(DEBUG) << "process node: " << cnode->DebugString(2);
if (input_attrs.empty()) {
return nullptr;
}

auto input_names = AnfAlgo::GetNodeAttr<std::vector<std::string>>(cnode, kAttrInputNames);
MS_LOG(DEBUG) << "ori_input_names: " << kernel::Vector2Str(input_names);
std::vector<AnfNodePtr> new_inputs;
std::vector<std::string> new_input_names;

const auto &inputs = cnode->inputs();
new_inputs.push_back(inputs[0]);
bool need_update = false;
for (size_t i = 0; i < inputs.size() - 1; ++i) {
auto input_node = inputs[i + 1];
MS_EXCEPTION_IF_NULL(input_node);
// The attrs counts from 0
if (input_attrs.find(i) != input_attrs.end() && input_node->isa<ValueNode>()) {
auto value_node = input_node->cast<ValueNodePtr>();
MS_EXCEPTION_IF_NULL(value_node);
MS_LOG(DEBUG) << "delete attr input: " << i << " of node: " << cnode->DebugString(2);
if (i >= input_names.size()) {
MS_LOG(EXCEPTION) << "Index " << i << " is larger than input names size: " << input_names.size();
}
need_update = true;
} else {
new_inputs.push_back(input_node);
if (i < input_names.size()) {
new_input_names.push_back(input_names[i]);
}
}
}
MS_LOG(DEBUG) << "new_input_names: " << kernel::Vector2Str(new_input_names);

if (!need_update) {
return nullptr;
}

auto new_cnode = func_graph->NewCNode(new_inputs);
// we do not modify abstract and kernel info.
new_cnode->set_abstract(cnode->abstract());
new_cnode->set_kernel_info(cnode->kernel_info_ptr());
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(new_input_names), new_cnode);
return new_cnode;
}

AnfNodePtrList EliminateMakeTuple(const FuncGraphPtr &fg, const FuncGraphManagerPtr &mng) {
AnfNodePtrList outs;
auto out_node = fg->output();
@@ -396,59 +249,6 @@ void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const
AnfAlgo::SetSelectKernelBuildInfo(graph_selected_info, new_node.get());
}

void ConstAttrToInput(const FuncGraphPtr &func_graph) {
MS_EXCEPTION_IF_NULL(func_graph);
auto mng = func_graph->manager();
MS_EXCEPTION_IF_NULL(mng);
std::vector<AnfNodePtr> todos;
kernel::GetValidKernelNodes(func_graph, &todos);
for (const auto &node : todos) {
ConstInputToAttrInfoRegister reg;
if (!ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(AnfAlgo::GetCNodeName(node), &reg)) {
continue;
}
auto new_node = ConstAttrToInput(func_graph, node->cast<CNodePtr>(), reg.GetConstInputAttrInfo());
if (new_node != nullptr && new_node != node) {
mng->Replace(node, new_node);
}
}
}

void DeleteAttrInInput(const FuncGraphPtr &func_graph) {
MS_EXCEPTION_IF_NULL(func_graph);
auto mng = func_graph->manager();
MS_EXCEPTION_IF_NULL(mng);
std::vector<AnfNodePtr> todos;
kernel::GetValidKernelNodes(func_graph, &todos);
for (const auto &node : todos) {
ConstInputToAttrInfoRegister reg;
if (!ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(AnfAlgo::GetCNodeName(node), &reg)) {
continue;
}
auto new_node = DeleteAttrInInput(func_graph, node->cast<CNodePtr>(), reg.GetConstInputAttrInfo());
if (new_node != nullptr && new_node != node) {
mng->Replace(node, new_node);
}
}
}

AnfNodePtrList GetExpandOuts(const AnfNodePtrList &outs) {
AnfNodePtrList res;
if (outs.size() <= 1) {
return outs;
}

for (auto out : outs) {
AnfNodePtrList real_outs;
if (IsMakeTupleOut(out, &real_outs)) {
res.insert(res.end(), real_outs.begin(), real_outs.end());
continue;
}
res.push_back(out);
}
return res;
}

AnfNodePtr CreateNewFuseCNode(const FuncGraphPtr &func_graph, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
const AnfNodePtrList &outputs) {
auto func_node = NewValueNode(fg);
@@ -661,68 +461,7 @@ FuncGraphPtr JsonDescToAnf(const std::string &json_desc, const std::vector<AnfNo
MS_LOG(ERROR) << "Akg decode json to graph failed.";
return nullptr;
}

pipeline::ResourcePtr resource = std::make_shared<pipeline::Resource>();
auto mng = resource->manager();
MS_EXCEPTION_IF_NULL(mng);
mng->AddFuncGraph(fg);
ConstAttrToInput(fg);
std::stringstream buf;
buf << "===================== graph after ConstAttrToInput " << fg->ToString() << " =====================\n";
DebugDump(fg, &buf);
MS_LOG(DEBUG) << buf.str();

// Do infer and specialize.
AbstractBasePtrList args_spec_list;
std::for_each(inputs.begin(), inputs.end(),
[&args_spec_list](const AnfNodePtr &node) { args_spec_list.push_back(node->abstract()); });
auto infer_fg = pipeline::Renormalize(resource, fg, args_spec_list);
if (infer_fg == nullptr) {
MS_LOG(ERROR) << "Infer decoded graph failed.";
return nullptr;
}
buf.str("");
buf << "===================== graph after Renormalize " << infer_fg->ToString() << " =====================\n";
DebugDump(infer_fg, &buf);
MS_LOG(DEBUG) << buf.str();

// delete no use inputs(attrs), like op ReduceSum(axis).
DeleteAttrInInput(infer_fg);
buf.str("");
buf << "===================== graph after DeleteAttrInInput " << infer_fg->ToString() << " =====================\n";
DebugDump(infer_fg, &buf);
MS_LOG(DEBUG) << buf.str();

// clone a new graph.
auto new_fg = TransformableClone(infer_fg, std::make_shared<TraceTransform>("akg_decode"));
return new_fg;
}

std::unordered_set<PrimitivePtr> GetExpandOps() {
std::unordered_set<PrimitivePtr> expand_ops = {
prim::kPrimSquare,
prim::kPrimGeLUGrad,
#if ENABLE_D
prim::kPrimTile,
prim::kPrimSqrtGrad,
prim::kPrimClipByNormNoDivSum,
#elif ENABLE_GPU
prim::kPrimBiasAdd,
prim::kPrimBiasAddGrad,
prim::kPrimGeLU,
prim::kPrimFusedAdam,
prim::kPrimFusedAdamWeightDecay,
prim::kPrimReduceMean,
prim::kPrimMaximumGrad,
prim::kPrimMinimumGrad,
prim::kPrimGkDropout,
prim::kPrimDropoutGrad,
prim::kPrimSoftmax,
prim::kPrimLayerNorm,
prim::kPrimLayerNormGrad,
#endif
};
return expand_ops;
return fg;
}

std::string ExtractGraphKernelName(const AnfNodePtrList &cnodes, const string &prefix, const string &postfix) {


+ 0
- 2
mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h View File

@@ -61,7 +61,6 @@ std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(
AnfNodePtrList *src_outputs = nullptr);
void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
const AnfNodePtrList &outputs, kernel::Processor processor);
AnfNodePtrList GetExpandOuts(const AnfNodePtrList &outs);
AnfNodePtr CreateNewFuseCNode(const FuncGraphPtr &kernel_graph, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
const AnfNodePtrList &outputs);
void ReplaceNewFuseCNode(const FuncGraphPtr &kernel_graph, const AnfNodePtr &new_fuse_cnode,
@@ -74,7 +73,6 @@ bool AnfToJsonDesc(const AnfNodePtrList &nodes, const DumpOption &dump_option, n
std::map<std::string, AnfNodePtr> *address_node_map);
bool AnfToJsonDesc(const std::vector<AnfNodePtrList> &graphs, const DumpOption &dump_option, nlohmann::json *op_desc);
FuncGraphPtr JsonDescToAnf(const std::string &json_desc, const std::vector<AnfNodePtr> &inputs);
std::unordered_set<PrimitivePtr> GetExpandOps();
std::string ExtractGraphKernelName(const AnfNodePtrList &cnodes, const string &prefix = "", const string &postfix = "");
std::vector<PrimitivePtr> GetFusibleOpList();
bool IsBasicFuseOp(const AnfNodePtr &node);


Loading…
Cancel
Save