You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensor.py 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Tensor utils."""
  16. import numpy as np
  17. from mindinsight.datavisual.utils.tools import to_int
  18. from mindinsight.utils.exceptions import ParamValueError
  19. from mindinsight.utils.exceptions import ParamTypeError
  20. from mindinsight.utils.log import utils_logger as logger
  21. F32_MIN, F32_MAX = np.finfo(np.float32).min, np.finfo(np.float32).max
  22. class Statistics:
  23. """Statistics data class.
  24. Args:
  25. max_value (float): max value of tensor data.
  26. min_value (float): min value of tensor data.
  27. avg_value (float): avg value of tensor data.
  28. count (int): total count of tensor data.
  29. nan_count (int): count of NAN.
  30. neg_inf_count (int): count of negative INF.
  31. pos_inf_count (int): count of positive INF.
  32. """
  33. def __init__(self, max_value=0, min_value=0, avg_value=0,
  34. count=0, nan_count=0, neg_inf_count=0, pos_inf_count=0):
  35. self._max = max_value
  36. self._min = min_value
  37. self._avg = avg_value
  38. self._count = count
  39. self._nan_count = nan_count
  40. self._neg_inf_count = neg_inf_count
  41. self._pos_inf_count = pos_inf_count
  42. @property
  43. def max(self):
  44. """Get max value of tensor."""
  45. return self._max
  46. @property
  47. def min(self):
  48. """Get min value of tensor."""
  49. return self._min
  50. @property
  51. def avg(self):
  52. """Get avg value of tensor."""
  53. return self._avg
  54. @property
  55. def count(self):
  56. """Get total count of tensor."""
  57. return self._count
  58. @property
  59. def nan_count(self):
  60. """Get count of NAN."""
  61. return self._nan_count
  62. @property
  63. def neg_inf_count(self):
  64. """Get count of negative INF."""
  65. return self._neg_inf_count
  66. @property
  67. def pos_inf_count(self):
  68. """Get count of positive INF."""
  69. return self._pos_inf_count
  70. class TensorUtils:
  71. """Tensor Utils class."""
  72. @staticmethod
  73. def validate_dims_format(dims):
  74. """
  75. Validate correct of format of dimension parameter.
  76. Args:
  77. dims (str): Dims of tensor. Its format is something like this "[0, 0, :, :]".
  78. Raises:
  79. ParamValueError: If format of dims is not correct.
  80. """
  81. if dims is not None:
  82. if not isinstance(dims, str):
  83. raise ParamTypeError(dims, str)
  84. dims = dims.strip()
  85. if not (dims.startswith('[') and dims.endswith(']')):
  86. raise ParamValueError('The value: {} of dims must be '
  87. 'start with `[` and end with `]`.'.format(dims))
  88. for dim in dims[1:-1].split(','):
  89. dim = dim.strip()
  90. if dim == ":":
  91. continue
  92. if dim.startswith('-'):
  93. dim = dim[1:]
  94. if not dim.isdigit():
  95. raise ParamValueError('The value: {} of dims in the square brackets '
  96. 'must be int or `:`.'.format(dims))
  97. @staticmethod
  98. def convert_array_from_str_dims(dims, limit=0):
  99. """
  100. Convert string of dims data to array.
  101. Args:
  102. dims (str): Specify dims of tensor.
  103. limit (int): The max flexible dimension count, default value is 0 which means that there is no limitation.
  104. Returns:
  105. list, a string like this: "[0, 0, :, :]" will convert to this value: [0, 0, None, None].
  106. Raises:
  107. ParamValueError, If flexible dimensions exceed limit value.
  108. """
  109. dims = dims.strip().lstrip('[').rstrip(']')
  110. dims_list = []
  111. count = 0
  112. for dim in dims.split(','):
  113. dim = dim.strip()
  114. if dim == ':':
  115. dims_list.append(None)
  116. count += 1
  117. else:
  118. dims_list.append(to_int(dim, "dim"))
  119. if limit and count > limit:
  120. raise ParamValueError("Flexible dimensions cannot exceed limit value: {}, size: {}"
  121. .format(limit, count))
  122. return dims_list
  123. @staticmethod
  124. def get_specific_dims_data(ndarray, dims, tensor_dims):
  125. """
  126. Get specific dims data.
  127. Args:
  128. ndarray (numpy.ndarray): An ndarray of numpy.
  129. dims (list): A list of specific dims.
  130. tensor_dims (list): A list of tensor dims.
  131. Returns:
  132. numpy.ndarray, an ndarray of specific dims tensor data.
  133. Raises:
  134. ParamValueError, If the length of param dims is not equal to the length of tensor dims or
  135. the index of param dims out of range.
  136. """
  137. if len(dims) != len(tensor_dims):
  138. raise ParamValueError("The length of param dims: {}, is not equal to the "
  139. "length of tensor dims: {}.".format(len(dims), len(tensor_dims)))
  140. indices = []
  141. for k, d in enumerate(dims):
  142. if d is not None:
  143. if d >= tensor_dims[k]:
  144. raise ParamValueError("The index: {} of param dims out of range: {}.".format(d, tensor_dims[k]))
  145. indices.append(d)
  146. else:
  147. indices.append(slice(0, tensor_dims[k]))
  148. result = ndarray[tuple(indices)]
  149. # Make sure the return type is numpy.ndarray.
  150. if not isinstance(result, np.ndarray):
  151. result = np.array(result)
  152. return result
  153. @staticmethod
  154. def get_statistics_from_tensor(tensors):
  155. """
  156. Calculates statistics data of tensor.
  157. Args:
  158. tensors (numpy.ndarray): An numpy.ndarray of tensor data.
  159. Returns:
  160. an instance of Statistics.
  161. """
  162. ma_value = np.ma.masked_invalid(tensors)
  163. total, valid = tensors.size, ma_value.count()
  164. invalids = []
  165. for isfn in np.isnan, np.isposinf, np.isneginf:
  166. if total - valid > sum(invalids):
  167. count = np.count_nonzero(isfn(tensors))
  168. invalids.append(count)
  169. else:
  170. invalids.append(0)
  171. nan_count, pos_inf_count, neg_inf_count = invalids
  172. if not valid:
  173. logger.warning('There are no valid values in the tensors(size=%d, shape=%s)', total, tensors.shape)
  174. statistics = Statistics(max_value=0,
  175. min_value=0,
  176. avg_value=0,
  177. count=total,
  178. nan_count=nan_count,
  179. neg_inf_count=neg_inf_count,
  180. pos_inf_count=pos_inf_count)
  181. return statistics
  182. # BUG: max of a masked array with dtype np.float16 returns inf
  183. # See numpy issue#15077
  184. if issubclass(tensors.dtype.type, np.floating):
  185. tensor_min = ma_value.min(fill_value=np.PINF)
  186. tensor_max = ma_value.max(fill_value=np.NINF)
  187. if tensor_min < F32_MIN or tensor_max > F32_MAX:
  188. logger.warning('Values(%f, %f) are too large, you may encounter some undefined '
  189. 'behaviours hereafter.', tensor_min, tensor_max)
  190. else:
  191. tensor_min = ma_value.min()
  192. tensor_max = ma_value.max()
  193. tensor_sum = ma_value.sum(dtype=np.float64)
  194. statistics = Statistics(max_value=tensor_max,
  195. min_value=tensor_min,
  196. avg_value=tensor_sum / valid,
  197. count=total,
  198. nan_count=nan_count,
  199. neg_inf_count=neg_inf_count,
  200. pos_inf_count=pos_inf_count)
  201. return statistics
  202. @staticmethod
  203. def get_statistics_dict(stats):
  204. """
  205. Get statistics dict according to statistics value.
  206. Args:
  207. stats (Statistics): An instance of Statistics.
  208. Returns:
  209. dict, a dict including 'max', 'min', 'avg', 'count', 'nan_count', 'neg_inf_count', 'pos_inf_count'.
  210. """
  211. statistics = {
  212. "max": float(stats.max),
  213. "min": float(stats.min),
  214. "avg": float(stats.avg),
  215. "count": stats.count,
  216. "nan_count": stats.nan_count,
  217. "neg_inf_count": stats.neg_inf_count,
  218. "pos_inf_count": stats.pos_inf_count
  219. }
  220. return statistics
  221. @staticmethod
  222. def calc_diff_between_two_tensor(first_tensor, second_tensor, tolerance):
  223. """
  224. Calculate the difference between the first tensor and the second tensor.
  225. Args:
  226. first_tensor (numpy.ndarray): Specify the first tensor.
  227. second_tensor (numpy.ndarray): Specify the second tensor.
  228. tolerance (float): The tolerance of difference between the first tensor and the second tensor.
  229. Its is a percentage. The boundary value is equal to max(abs(min),abs(max)) * tolerance.
  230. The function of min and max is being used to calculate the min value and max value of
  231. the result of the first tensor subtract the second tensor. If the absolute value of
  232. result is less than or equal to boundary value, the result will set to be zero.
  233. Returns:
  234. tuple[numpy.ndarray, OverallDiffMetric], numpy.ndarray indicates the value of the first tensor
  235. subtract the second tensor and set the value to be zero when its less than or equal to tolerance.
  236. Raises:
  237. ParamTypeError: If the type of these two tensors is not the numpy.ndarray.
  238. ParamValueError: If the shape or dtype is not the same of these two tensors.
  239. """
  240. if not isinstance(first_tensor, np.ndarray):
  241. raise ParamTypeError('first_tensor', np.ndarray)
  242. if not isinstance(second_tensor, np.ndarray):
  243. raise ParamTypeError('second_tensor', np.ndarray)
  244. if first_tensor.shape != second_tensor.shape:
  245. raise ParamValueError("the shape: {} of first tensor is not equal to shape: {} of second tensor."
  246. .format(first_tensor.shape, second_tensor.shape))
  247. if first_tensor.dtype != second_tensor.dtype:
  248. raise ParamValueError("the dtype: {} of first tensor is not equal to dtype: {} of second tensor."
  249. .format(first_tensor.dtype, second_tensor.dtype))
  250. diff_tensor = np.subtract(first_tensor, second_tensor)
  251. stats = TensorUtils.get_statistics_from_tensor(diff_tensor)
  252. boundary_value = max(abs(stats.max), abs(stats.min)) * tolerance
  253. is_close = np.isclose(first_tensor, second_tensor, atol=boundary_value, rtol=0)
  254. result = np.multiply(diff_tensor, ~is_close)
  255. return result