You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

initializer.py 14 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Initializer for cell parameters."""
  16. import numbers
  17. import math
  18. from functools import reduce
  19. import numpy as np
  20. from scipy.stats import truncnorm
  21. from .seed import get_seed, _get_graph_seed
  22. from . import dtype as mstype
  23. from .tensor import Tensor, MetaTensor
  24. from .._c_expression import random_normal
  25. _INITIALIZER_ALIAS = dict()
  26. class Initializer:
  27. """
  28. The base class of the initializer.
  29. Initialization of tensor basic attributes and model weight values.
  30. Args:
  31. kwargs (dict): Keyword arguments for Initializer.
  32. Returns:
  33. Array, an array after being initialized.
  34. """
  35. def __init__(self, **kwargs):
  36. self._kwargs = kwargs
  37. self._seed = None
  38. @property
  39. def seed(self):
  40. seed_ = self._seed if self._seed is not None else get_seed()
  41. if seed_ is None:
  42. seed_ = 1
  43. _, seed = _get_graph_seed(seed_, "init")
  44. return seed
  45. @seed.setter
  46. def seed(self, value):
  47. if not isinstance(value, int):
  48. raise TypeError("'value' must be int type.")
  49. self._seed = value
  50. def _initialize(self, *kwargs):
  51. raise NotImplementedError('Must be overridden!')
  52. def __call__(self, arr):
  53. return self._initialize(arr)
  54. def _register(*aliases):
  55. """Return the alias register."""
  56. def alias_reg(cls):
  57. name = cls.__name__
  58. name = name.lower()
  59. if name not in _INITIALIZER_ALIAS:
  60. _INITIALIZER_ALIAS[name] = cls
  61. for alias in aliases:
  62. if alias not in _INITIALIZER_ALIAS:
  63. _INITIALIZER_ALIAS[alias] = cls
  64. return cls
  65. return alias_reg
  66. def _assignment(arr, num):
  67. """Assign the value of `num` to `arr`."""
  68. if arr.shape == ():
  69. arr = arr.reshape((1))
  70. arr[:] = num
  71. arr = arr.reshape(())
  72. else:
  73. if isinstance(num, np.ndarray):
  74. arr[:] = num[:]
  75. else:
  76. arr[:] = num
  77. return arr
  78. @_register('zeros')
  79. class Zero(Initializer):
  80. """
  81. Initialize the array to zero.
  82. Args:
  83. arr (Array): The array to be assigned.
  84. Returns:
  85. Array, an array after being assigned.
  86. """
  87. def _initialize(self, arr):
  88. _assignment(arr, 0)
  89. @_register('ones')
  90. class One(Initializer):
  91. """
  92. Initialize the array to one.
  93. Args:
  94. arr (Array): The array to be assigned.
  95. Returns:
  96. Array, assigned array.
  97. """
  98. def _initialize(self, arr):
  99. _assignment(arr, 1)
  100. def _calculate_fan_in_and_fan_out(shape):
  101. """
  102. calculate fan_in and fan_out
  103. Args:
  104. shape (tuple): input shape.
  105. Returns:
  106. Tuple, a tuple with two elements, the first element is `n_in` and the second element is `n_out`.
  107. """
  108. dimensions = len(shape)
  109. if dimensions < 2:
  110. raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions")
  111. if dimensions == 2: # Linear
  112. fan_in = shape[1]
  113. fan_out = shape[0]
  114. else:
  115. num_input_fmaps = shape[1]
  116. num_output_fmaps = shape[0]
  117. receptive_field_size = 1
  118. if dimensions > 2:
  119. receptive_field_size = shape[2] * shape[3]
  120. fan_in = num_input_fmaps * receptive_field_size
  121. fan_out = num_output_fmaps * receptive_field_size
  122. return fan_in, fan_out
  123. def _calculate_correct_fan(shape, mode):
  124. """
  125. Calculate fan.
  126. Args:
  127. shape (tuple): input shape.
  128. mode (str): only support fan_in and fan_out.
  129. Returns:
  130. fan_in or fan_out.
  131. """
  132. mode = mode.lower()
  133. valid_modes = ['fan_in', 'fan_out']
  134. if mode not in valid_modes:
  135. raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
  136. fan_in, fan_out = _calculate_fan_in_and_fan_out(shape)
  137. return fan_in if mode == 'fan_in' else fan_out
  138. def _calculate_gain(nonlinearity, param=None):
  139. """
  140. Calculate gain.
  141. Args:
  142. nonlinearity (str): nonlinearity function.
  143. param (str): used to calculate negative_slope.
  144. Returns:
  145. number.
  146. """
  147. linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
  148. if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
  149. res = 1
  150. elif nonlinearity == 'tanh':
  151. res = 5.0 / 3
  152. elif nonlinearity == 'relu':
  153. res = math.sqrt(2.0)
  154. elif nonlinearity == 'leaky_relu':
  155. if param is None:
  156. negative_slope = 0.01
  157. elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float):
  158. # True/False are instances of int, hence check above
  159. negative_slope = param
  160. else:
  161. raise ValueError("negative_slope {} not a valid number".format(param))
  162. res = math.sqrt(2.0 / (1 + negative_slope ** 2))
  163. else:
  164. raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
  165. return res
  166. def _calculate_in_and_out(arr):
  167. """
  168. Calculate n_in and n_out.
  169. Args:
  170. arr (Array): Input array.
  171. Returns:
  172. Tuple, a tuple with two elements, the first element is `n_in` and the second element is `n_out`.
  173. """
  174. dim = len(arr.shape)
  175. if dim < 2:
  176. raise ValueError("If initialize data with xavier uniform, the dimension of data must be greater than 1.")
  177. n_in = arr.shape[1]
  178. n_out = arr.shape[0]
  179. if dim > 2:
  180. counter = reduce(lambda x, y: x * y, arr.shape[2:])
  181. n_in *= counter
  182. n_out *= counter
  183. return n_in, n_out
  184. @_register('xavier_uniform')
  185. class XavierUniform(Initializer):
  186. r"""
  187. Initialize the array with xavier uniform algorithm, and from a uniform distribution collect samples within
  188. U[-boundary, boundary] The boundary is defined as :
  189. where :math:`boundary = gain * \sqrt{\frac{6}{n_{in} + n_{out}}}`.
  190. where :math:`n_{in}` is the number of input units in the weight tensor.
  191. where :math:`n_{out}` is the number of output units in the weight tensor.
  192. Args:
  193. gain (Array): The array to be assigned. Default: 1.
  194. Returns:
  195. Array, assigned array.
  196. """
  197. def __init__(self, gain=1):
  198. super(XavierUniform, self).__init__(gain=gain)
  199. self.gain = gain
  200. def _initialize(self, arr):
  201. n_in, n_out = _calculate_in_and_out(arr)
  202. boundary = self.gain * math.sqrt(6.0 / (n_in + n_out))
  203. data = np.random.uniform(-boundary, boundary, arr.shape)
  204. _assignment(arr, data)
  205. @_register('he_uniform')
  206. class HeUniform(Initializer):
  207. r"""
  208. Initialize the array with He kaiming uniform algorithm, and from a uniform distribution collect samples within
  209. U[-boundary, boundary] The boundary is defined as :
  210. where :math:`boundary = \sqrt{\frac{6}{n_{in}}}`.
  211. where :math:`n_{in}` is the number of input units in the weight tensor.
  212. Args:
  213. arr (Array): The array to be assigned.
  214. Returns:
  215. Array, assigned array.
  216. """
  217. def _initialize(self, arr):
  218. n_in, _ = _calculate_in_and_out(arr)
  219. boundary = math.sqrt(6.0 / n_in)
  220. data = np.random.uniform(-boundary, boundary, arr.shape)
  221. _assignment(arr, data)
  222. @_register('he_normal')
  223. class HeNormal(Initializer):
  224. r"""
  225. Initialize the array with He kaiming Normal algorithm, and from a normal distribution collect samples within
  226. N(0, sigma).
  227. Args:
  228. negative_slope (int, float, bool): Default: 0, used when nonlinearity is 'leaky_relu'.
  229. mode (str): Default: fan_in.
  230. nonlinearity (str): Default: leaky_relu.
  231. Returns:
  232. Array, assigned array.
  233. """
  234. def __init__(self, negative_slope=0, mode='fan_in', nonlinearity='leaky_relu'):
  235. super(HeNormal, self).__init__(negative_slope=negative_slope, mode=mode, nonlinearity=nonlinearity)
  236. self.negative_slope = negative_slope
  237. self.mode = mode
  238. self.nonlinearity = nonlinearity
  239. def _initialize(self, arr):
  240. fan = _calculate_correct_fan(arr.shape, self.mode)
  241. gain = _calculate_gain(self.nonlinearity, self.negative_slope)
  242. std = gain / math.sqrt(fan)
  243. data = np.random.normal(0, std, arr.shape)
  244. _assignment(arr, data)
  245. class Constant(Initializer):
  246. """
  247. Initialize a constant.
  248. Args:
  249. value (Union[int, numpy.ndarray]): The value to initialize.
  250. Returns:
  251. Array, an array after being assigned.
  252. """
  253. def __init__(self, value):
  254. super(Constant, self).__init__(value=value)
  255. self.value = value
  256. def _initialize(self, arr):
  257. _assignment(arr, self.value)
  258. @_register()
  259. class Uniform(Initializer):
  260. """
  261. Initialize a uniform array, and obtain values U(-scale, scale) from the uniform distribution
  262. to fill the input tensor.
  263. Args:
  264. scale (float): The scale of the array. Default: 0.07.
  265. Returns:
  266. Array, uniform array.
  267. """
  268. def __init__(self, scale=0.07):
  269. super(Uniform, self).__init__(scale=scale)
  270. self.scale = scale
  271. def _initialize(self, arr):
  272. tmp = np.random.uniform(-self.scale, self.scale, arr.shape)
  273. _assignment(arr, tmp)
  274. @_register()
  275. class Normal(Initializer):
  276. """
  277. Initialize a normal array, and obtain values N(0, sigma) from the uniform distribution
  278. to fill the input tensor.
  279. Args:
  280. sigma (float): The sigma of the array. Default: 0.01.
  281. Returns:
  282. Array, normal array.
  283. """
  284. def __init__(self, sigma=0.01):
  285. super(Normal, self).__init__(sigma=sigma)
  286. self.sigma = sigma
  287. def _initialize(self, arr):
  288. seed = self.seed
  289. output_tensor = Tensor(np.zeros(arr.shape, dtype=np.float32))
  290. random_normal(0, self.sigma, arr.shape, seed, output_tensor)
  291. output_data = output_tensor.asnumpy()
  292. output_data *= self.sigma
  293. _assignment(arr, output_data)
  294. @_register()
  295. class TruncatedNormal(Initializer):
  296. """
  297. Initialize a truncated normal distribution which is a bounded normal distribution within N(low, high).
  298. Args:
  299. sigma (float): The sigma of the array. Default: 0.01.
  300. Returns:
  301. Array, truncated normal array.
  302. """
  303. def __init__(self, sigma=0.01):
  304. super(TruncatedNormal, self).__init__(sigma=sigma)
  305. self.sigma = sigma
  306. def _initialize(self, arr):
  307. tmp = truncnorm.rvs(-2, 2, loc=0, scale=self.sigma, size=arr.shape, random_state=None)
  308. _assignment(arr, tmp)
  309. def initializer(init, shape=None, dtype=mstype.float32):
  310. """
  311. Create and initialize a tensor.
  312. Args:
  313. init (Union[Tensor, str, Initializer, numbers.Number]): Initialize value.
  314. - `str`: The `init` should be the alias of the class inheriting from `Initializer` and the corresponding
  315. class will be called.
  316. - `Initializer`: The `init` should be the class inheriting from `Initializer` to initialize tensor.
  317. - `numbers.Number`: The `Constant` will be called to initialize tensor.
  318. shape (Union[tuple, list, int]): A list of integers, a tuple of integers or an integer as the shape of
  319. output. Default: None.
  320. dtype (:class:`mindspore.dtype`): The type of data in initialized tensor. Default: mindspore.float32.
  321. Returns:
  322. Union[Tensor, MetaTensor], When `init` is Tensor, the return is Tensor object,
  323. otherwise the return is Initialize object.
  324. Examples:
  325. >>> tensor = initializer('ones', [1, 2, 3], mindspore.float32)
  326. >>> tensor = initializer(One(), [1, 2, 3], mindspore.float32)
  327. >>> tensor = initializer(0, [1, 2, 3], mindspore.float32)
  328. """
  329. if not isinstance(init, (Tensor, numbers.Number, str, Initializer)):
  330. raise TypeError("Unsupported init type '{}'.".format(type(init)))
  331. if isinstance(init, Tensor):
  332. init_shape = init.shape
  333. shape = shape if isinstance(shape, (tuple, list)) else [shape]
  334. if shape is not None and init_shape != tuple(shape):
  335. raise ValueError("The shape of init should be same as variable shape, but got the shape of init {} and "
  336. "the variable shape {}.".format(list(init.shape), shape))
  337. return init
  338. if isinstance(shape, list):
  339. shape = tuple(shape)
  340. elif isinstance(shape, numbers.Number):
  341. shape = (shape,)
  342. for value in shape if shape is not None else ():
  343. if not isinstance(value, int) or value <= 0:
  344. raise ValueError(f"shape is invalid, shape value must be positive integer, shape:{shape}")
  345. if isinstance(init, str):
  346. init = _INITIALIZER_ALIAS[init.lower()]()
  347. if init is None:
  348. raise ValueError("The class corresponding to '{}' was not found.".format(init))
  349. elif isinstance(init, numbers.Number):
  350. init = Constant(init)
  351. shape = shape if shape is not None else init.shape
  352. init_obj = MetaTensor(dtype, shape, init)
  353. return init_obj
  354. __all__ = [
  355. 'Initializer',
  356. 'initializer',
  357. 'TruncatedNormal',
  358. 'Normal',
  359. 'Uniform',
  360. 'HeUniform',
  361. 'HeNormal',
  362. 'XavierUniform',
  363. 'One',
  364. 'Zero',
  365. 'Constant']