You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

message.py 6.7 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. # Copyright 2019-2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """message"""
  15. import importlib.util
  16. import json
  17. import json.decoder as jd
  18. import logging
  19. import traceback
  20. import os
  21. from pathlib import Path
  22. import akg.tvm
  23. from akg.utils import kernel_exec as utils
  24. from akg.utils import validation_check as vc_util
  25. from akg import composite
  26. from akg.tvm import _api_internal
  27. from . import cce
  28. from . import gpu
  29. from . import op_build
  30. def should_use_poly(kernel_info):
  31. if os.getenv('MS_AKG_USE_POLY') != "off":
  32. return True
  33. for desc in kernel_info['op_desc']:
  34. if desc['name'].startswith('Reduce'):
  35. return True
  36. return False
  37. def _pragma_rmselfdep(kernel_info):
  38. for op in kernel_info["op_desc"]:
  39. if op['name'] == "MatMul":
  40. return False
  41. return True
  42. @vc_util.check_input_type(str)
  43. def compilewithjson_to_func(json_str):
  44. """compile with json."""
  45. try:
  46. kernel_info = json.loads(json_str)
  47. except jd.JSONDecodeError:
  48. logging.error(traceback.format_exc())
  49. return False
  50. supported_processors = ['cuda', 'aicore']
  51. processor = 'cuda'
  52. if 'process' in kernel_info:
  53. processor = kernel_info['process']
  54. if processor not in supported_processors:
  55. logging.error("supported processors: {}, current processor: {}".format(supported_processors, processor))
  56. return False
  57. if 'composite' in kernel_info and kernel_info['composite'] is True:
  58. try:
  59. if processor == 'cuda':
  60. use_poly = should_use_poly(kernel_info)
  61. enable_atomic_add = composite.should_enable_atomic_add(kernel_info)
  62. _ = composite._build(json_str, kernel_info, attrs={
  63. "target": "cuda", "enable_akg_reduce_lib": True, "enable_atomic_add": enable_atomic_add}, poly=use_poly)
  64. return True
  65. else:
  66. pragma_rmselfdep = _pragma_rmselfdep(kernel_info)
  67. mod = composite._build_to_func(json_str, kernel_info, attr = {"pragma_rmselfdep": pragma_rmselfdep})
  68. return mod
  69. except Exception:
  70. logging.error(traceback.format_exc())
  71. return False
  72. op_name = kernel_info['name']
  73. op_func = None
  74. # get custom ops implementation first.
  75. if 'impl_path' in kernel_info and kernel_info['impl_path'] is not None:
  76. impl_path = os.path.realpath(kernel_info['impl_path'])
  77. if os.path.isfile(impl_path):
  78. custom_mod_name = Path(impl_path).resolve().stem
  79. mod_spec = importlib.util.spec_from_file_location(
  80. custom_mod_name, impl_path)
  81. custom_mod = importlib.util.module_from_spec(mod_spec)
  82. mod_spec.loader.exec_module(custom_mod)
  83. op_func = getattr(custom_mod, op_name, None)
  84. # get built-in ops.
  85. if op_func is None:
  86. if processor == 'cuda':
  87. op_func = getattr(gpu, op_name, None)
  88. if op_func is not None:
  89. input_shapes = []
  90. input_types = []
  91. for input_desc in kernel_info['input_desc']:
  92. input_shapes.append(input_desc[0]['shape'])
  93. input_types.append(input_desc[0]['data_type'])
  94. op_attrs = []
  95. if kernel_info['attr']:
  96. for ext_arg in kernel_info['attr']:
  97. op_attrs.append(ext_arg['value'])
  98. dump_ir = os.getenv('MS_AKG_DUMP_IR') == "on"
  99. dump_code = os.getenv('MS_AKG_DUMP_CODE') == "on"
  100. mod = utils.op_build(op_func, input_shapes, input_types, op_attrs, kernel_info['op'], dump_ir=dump_ir,
  101. dump_code=dump_code)
  102. return True
  103. else:
  104. op_func = getattr(cce, op_name, None)
  105. if op_func is None:
  106. logging.error(
  107. "this op not support by akg, please check op name %s", str(op_name))
  108. return False
  109. args = {}
  110. tsr = []
  111. for input_desc in kernel_info['input_desc']:
  112. if len(input_desc) == 1:
  113. tensor_shape = input_desc[0]['shape']
  114. tensor_shape = (1,) if not tensor_shape else tensor_shape
  115. vc_util.shape_dtype_max_size_check(
  116. tensor_shape, input_desc[0]['data_type'])
  117. args[input_desc[0]['name']] = akg.tvm.placeholder(
  118. shape=tensor_shape, name=input_desc[0]['tensor_name'], dtype=input_desc[0]['data_type'])
  119. tsr.append(args[input_desc[0]['name']])
  120. else:
  121. tmp_input = []
  122. for tmp_desc in input_desc:
  123. tensor_shape = tmp_desc['shape']
  124. tensor_shape = (1,) if not tensor_shape else tensor_shape
  125. vc_util.shape_dtype_max_size_check(
  126. tensor_shape, tmp_desc['data_type'])
  127. tmp_input.append(akg.tvm.placeholder(
  128. shape=tensor_shape, name=tmp_desc['tensor_name'], dtype=tmp_desc['data_type']))
  129. args[input_desc[0]['name']] = tmp_input
  130. tsr = tsr + tmp_input
  131. if kernel_info['attr']:
  132. for ext_arg in kernel_info['attr']:
  133. args[ext_arg['name']] = ext_arg['value']
  134. output = op_func(**args)
  135. schedule_func = None
  136. attrs = {}
  137. if isinstance(output, (list, tuple)):
  138. from inspect import isfunction
  139. tmp_outputs = []
  140. for elem in output:
  141. if isfunction(elem):
  142. schedule_func = elem
  143. elif isinstance(elem, dict):
  144. for key, value in elem.items():
  145. if key not in attrs or not attrs[key]:
  146. attrs[key] = value
  147. else:
  148. tmp_outputs.append(elem)
  149. output = tmp_outputs
  150. else:
  151. output = [output]
  152. tsr = tsr + [i for i in output if utils.TensorUtils.is_output_value(i)]
  153. return op_build([op_name], output, tsr, schedule_func, processor, kernel_info['op'], attrs)
  154. def compilewithjson(json_str):
  155. tmp_rst = compilewithjson_to_func(json_str)
  156. if isinstance(tmp_rst, bool):
  157. return tmp_rst
  158. return _api_internal._BuildToModule(tmp_rst)

AKG(Auto Kernel Generator)对深度神经网络中的算子进行优化,并提供特定模式下的算子自动融合功能。AKG与MindSpore的图算融合功能协同工作,可提升在不同硬件后端上运行网络的性能。