You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

result_analysis.py 9.8 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. #!/usr/bin/env python3
  2. # coding: utf-8
  3. # Copyright 2019 Huawei Technologies Co., Ltd
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. """result compare function"""
  17. import logging
  18. import numpy as np
  19. import akg.tvm as tvm
  20. def result_compare(actual, bench_mark, r_tol=5e-3):
  21. """function for compare result."""
  22. error = 0
  23. count = 0
  24. last_err = -2
  25. continue_err = 0
  26. max_continue = -1
  27. max_end = 0
  28. logging.debug(actual.shape)
  29. logging.debug(bench_mark.shape)
  30. actual = actual.reshape((actual.size,))
  31. len_a = actual.size
  32. bench_mark = bench_mark.reshape((bench_mark.size,))
  33. len_b = bench_mark.size
  34. if len_a != len_b:
  35. return False
  36. for i in range(len_a):
  37. a = actual[i]
  38. b = bench_mark[i]
  39. if abs(a - b) > abs(b) * r_tol:
  40. error += 1
  41. if last_err + 1 == count:
  42. continue_err += 1
  43. else:
  44. if continue_err > max_continue:
  45. max_continue = continue_err
  46. max_end = last_err
  47. continue_err = 1
  48. last_err = count
  49. elif np.isnan(a):
  50. error += 1
  51. if last_err + 1 == count:
  52. continue_err += 1
  53. else:
  54. if continue_err > max_continue:
  55. max_continue = continue_err
  56. max_end = last_err
  57. continue_err = 1
  58. last_err = count
  59. count += 1
  60. if continue_err > max_continue:
  61. max_continue = continue_err
  62. max_end = last_err
  63. logging.debug("error num: %d/%d (%.2f%%)", error, count, 100.0 * error / count)
  64. logging.debug("longest error range: [%d, %d]", max_end - max_continue + 1, max_end)
  65. if max_continue >= 16:
  66. return False
  67. logging.debug("\n\n******************** test ok *****************\n\n")
  68. return True
  69. def akg_fp16_mean(inputs, axis=None, keepdims=True):
  70. size = 1
  71. for dim in axis:
  72. size = size * inputs.shape[dim]
  73. expect = np_bisect_sum(inputs, axis=axis, keepdims=keepdims) * np.float16(1 / size)
  74. return expect
  75. def np_bisect_sum(inputs, axis=None, keepdims=True):
  76. """numpy bisection summation."""
  77. shape = inputs.shape
  78. size = 1
  79. for dim in axis:
  80. size = size * shape[dim]
  81. if size <= 2:
  82. expect = np_bisect_sum_fp16(inputs, axis=tuple(axis), keepdims=keepdims)
  83. else:
  84. expect = np.sum(inputs.astype("float32"), axis=tuple(axis), keepdims=keepdims).astype("float16")
  85. return expect
  86. def np_bisect_sum_fp16(inputs, axis=None, keepdims=True):
  87. """
  88. Function for expected result of bisect sum operation.
  89. Note:
  90. For fp16 data, np.sum doesn't have enough accuracy, so use bisect sum instead.
  91. """
  92. if axis is None:
  93. axis = []
  94. if isinstance(axis, int):
  95. expect = bisect_sum(inputs, axis, keepdims)
  96. elif isinstance(axis, (list, tuple)):
  97. axis = sorted(axis)
  98. expect = inputs
  99. i = 0
  100. for x in axis:
  101. expect = bisect_sum(expect, x if keepdims else x - i, keepdims)
  102. i = i + 1
  103. return expect
  104. def bisect_sum(a, axis=0, keepdims=True):
  105. """Axis transformations for bisect sum operation."""
  106. import math
  107. shape = a.shape
  108. if not len(shape) <= 8:
  109. raise AssertionError("the dimension of input cannot be larger than 6!")
  110. if axis < 0:
  111. axis = len(shape) + axis
  112. dimlen = shape[axis]
  113. reduce_num = int(math.pow(2, int(math.log(dimlen, 2))))
  114. tail_num = dimlen - reduce_num
  115. s1 = np.array(a)
  116. s = s1
  117. if axis == len(shape) - 1:
  118. s[..., 0:tail_num] = s1[..., 0:tail_num] + s1[..., reduce_num:reduce_num + tail_num]
  119. while reduce_num != 1:
  120. s = s[..., 0:reduce_num // 2] + s[..., reduce_num // 2:reduce_num]
  121. reduce_num = reduce_num // 2
  122. elif axis == 0:
  123. s[0:tail_num, :] = s1[0:tail_num, :] + s1[reduce_num:reduce_num + tail_num, :]
  124. while reduce_num != 1:
  125. s = s[0:reduce_num // 2, :] + s[reduce_num // 2:reduce_num, :]
  126. reduce_num = reduce_num // 2
  127. elif axis == 1:
  128. s[:, 0:tail_num, :] = s1[:, 0:tail_num, :] + s1[:, reduce_num:reduce_num + tail_num, :]
  129. while reduce_num != 1:
  130. s = s[:, 0:reduce_num // 2, :] + s[:, reduce_num // 2:reduce_num, :]
  131. reduce_num = reduce_num // 2
  132. elif axis == 2:
  133. s[:, :, 0:tail_num, :] = s1[:, :, 0:tail_num, :] + s1[:, :, reduce_num:reduce_num + tail_num, :]
  134. while reduce_num != 1:
  135. s = s[:, :, 0:reduce_num // 2, :] + s[:, :, reduce_num // 2:reduce_num, :]
  136. reduce_num = reduce_num // 2
  137. elif axis == 3:
  138. s[:, :, :, 0:tail_num, :] = s1[:, :, :, 0:tail_num, :] + s1[:, :, :, reduce_num:reduce_num + tail_num, :]
  139. while reduce_num != 1:
  140. s = s[:, :, :, 0:reduce_num // 2, :] + s[:, :, :, reduce_num // 2:reduce_num, :]
  141. reduce_num = reduce_num // 2
  142. elif axis == 4:
  143. s[:, :, :, :, 0:tail_num, :] = s1[:, :, :, :, 0:tail_num, :] + \
  144. s1[:, :, :, :, reduce_num:reduce_num + tail_num, :]
  145. while reduce_num != 1:
  146. s = s[:, :, :, :, 0:reduce_num // 2, :] + s[:, :, :, :, reduce_num // 2:reduce_num, :]
  147. reduce_num = reduce_num // 2
  148. elif axis == 5:
  149. s[:, :, :, :, :, 0:tail_num, :] = s1[:, :, :, :, :, 0:tail_num, :] +\
  150. s1[:, :, :, :, :, reduce_num:reduce_num + tail_num, :]
  151. while reduce_num != 1:
  152. s = s[:, :, :, :, :, 0:reduce_num // 2, :] + s[:, :, :, :, :, reduce_num // 2:reduce_num, :]
  153. reduce_num = reduce_num // 2
  154. elif axis == 6:
  155. s[:, :, :, :, :, :, 0:tail_num, :] = s1[:, :, :, :, :, :, 0:tail_num, :] + \
  156. s1[:, :, :, :, :, :, reduce_num:reduce_num + tail_num, :]
  157. while reduce_num != 1:
  158. s = s[:, :, :, :, :, :, 0:reduce_num // 2, :] + s[:, :, :, :, :, :, reduce_num // 2:reduce_num, :]
  159. reduce_num = reduce_num // 2
  160. elif axis == 7:
  161. s[:, :, :, :, :, :, :, 0:tail_num, :] = s1[:, :, :, :, :, :, :, 0:tail_num, :] + \
  162. s1[:, :, :, :, :, :, :, reduce_num:reduce_num + tail_num, :]
  163. while reduce_num != 1:
  164. s = s[:, :, :, :, :, :, :, 0:reduce_num // 2, :] + s[:, :, :, :, :, :, :, reduce_num // 2:reduce_num, :]
  165. reduce_num = reduce_num // 2
  166. if not keepdims:
  167. s = np.squeeze(s, axis)
  168. return s
  169. def get_ticks(stat_info):
  170. """get ticks from statistic info."""
  171. aic_out_path = "aic_out"
  172. calog_path = aic_out_path + "/calog"
  173. ticks_log_file = calog_path + '/core0_instr_popped_log.dump'
  174. with open(ticks_log_file, "r") as file:
  175. line = file.readlines()[-2]
  176. ticks = int(line.split(",")[1].split('tick:')[1])
  177. stat_info['run_time'] = ticks
  178. def flattened_index_to_real_index(idx, shape):
  179. index = []
  180. index_per_dim = idx
  181. for i in reversed(range(len(shape))):
  182. dim_index = index_per_dim % shape[i]
  183. index_per_dim //= shape[i]
  184. index.append(dim_index)
  185. index.reverse()
  186. return index
  187. def count_unequal_element(data_expected, data_actual, rtol, atol):
  188. """Function for asserting unequal elements in data_actual and data_expected."""
  189. if not data_expected.shape == data_actual.shape:
  190. raise AssertionError("'data_expected' and 'data_actual' should have the same shape")
  191. list_a = data_expected.flatten()
  192. list_b = data_actual.flatten()
  193. count = 0
  194. eps = 1e-10
  195. all_printed = True
  196. for i, aa in enumerate(list_a):
  197. a = list_b[i]
  198. b = aa
  199. is_bool = isinstance(a, np.bool_) or isinstance(b, np.bool_)
  200. is_nan = np.isnan(a) or np.isnan(b)
  201. is_numeric = not (is_bool or is_nan)
  202. if (is_bool and a != b) or (is_numeric and abs(a - b) > (atol + rtol * abs(b))) or is_nan:
  203. if count < 100:
  204. index = flattened_index_to_real_index(i, data_expected.shape)
  205. if is_numeric:
  206. b_1 = b + eps if b == 0.0 else b
  207. logging.error("%s: Actual[%s] Expected[%s] Ratio[%s]",
  208. str(index), str(a), str(b), str(abs(a - b) / abs(b_1)))
  209. else:
  210. logging.error("%s: Actual[%s] Expected[%s]", str(index), str(a), str(b))
  211. else:
  212. all_printed = False
  213. count += 1
  214. if count != 0:
  215. if not all_printed:
  216. logging.error("...")
  217. logging.error("Total %s mismatch detected!!!, Only print 100...", str(count))
  218. else:
  219. logging.error("Total %s mismatch detected!!!", str(count))
  220. if not count <= int(len(list_a)):
  221. raise AssertionError
  222. def allclose_nparray(data_expected, data_actual, rtol, atol=1e-08):
  223. """Compare whether arrays are element-wise equal within tolerances."""
  224. if not np.allclose(data_expected, data_actual, rtol, atol):
  225. count_unequal_element(data_expected, data_actual, rtol, atol)
  226. def gpu_profiling(mod, *args, repeat_time=1, device_id=0):
  227. """Do profiling on gpu for cuda op"""
  228. ctx = tvm.context("cuda", device_id)
  229. ftimer = mod.time_evaluator(mod.entry_name, ctx, number=repeat_time)
  230. tcost = ftimer(*args).mean
  231. print("{}: exec={} sec/op".format(ctx, tcost))