You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

normalization.py 45 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930
  1. # Copyright 2020-2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """normalization"""
  16. from mindspore.ops import operations as P
  17. from mindspore.ops import functional as F
  18. from mindspore.common.parameter import Parameter
  19. from mindspore.common.initializer import initializer
  20. from mindspore.ops.primitive import constexpr
  21. import mindspore.context as context
  22. from mindspore._checkparam import Validator as validator
  23. from mindspore._extends import cell_attr_register
  24. from mindspore.communication.management import get_group_size, get_rank
  25. from mindspore.communication import management
  26. from mindspore.ops import _selected_ops
  27. from ..cell import Cell
  28. __all__ = ['BatchNorm1d', 'BatchNorm2d', 'LayerNorm', 'GroupNorm', 'GlobalBatchNorm', 'InstanceNorm2d']
  29. class _BatchNorm(Cell):
  30. """Batch Normalization base class."""
  31. @cell_attr_register
  32. def __init__(self,
  33. num_features,
  34. eps=1e-5,
  35. momentum=0.9,
  36. affine=True,
  37. gamma_init='ones',
  38. beta_init='zeros',
  39. moving_mean_init='zeros',
  40. moving_var_init='ones',
  41. use_batch_statistics=None,
  42. device_num_each_group=1,
  43. input_dims='2d',
  44. data_format='NCHW'):
  45. super(_BatchNorm, self).__init__()
  46. validator.check_value_type('num_features', num_features, [int], self.cls_name)
  47. if num_features < 1:
  48. raise ValueError("num_features must be at least 1")
  49. if momentum < 0 or momentum > 1:
  50. raise ValueError("momentum should be a number in range [0, 1], but got {}".format(momentum))
  51. self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name)
  52. if context.get_context("device_target") != "GPU" and self.format == "NHWC":
  53. raise ValueError("NHWC format only support in GPU target.")
  54. self.use_batch_statistics = use_batch_statistics
  55. self.num_features = num_features
  56. self.eps = eps
  57. self.input_dims = input_dims
  58. self.moving_mean = Parameter(initializer(
  59. moving_mean_init, num_features), name="mean", requires_grad=False)
  60. self.moving_variance = Parameter(initializer(
  61. moving_var_init, num_features), name="variance", requires_grad=False)
  62. self.gamma = Parameter(initializer(
  63. gamma_init, num_features), name="gamma", requires_grad=affine)
  64. self.beta = Parameter(initializer(
  65. beta_init, num_features), name="beta", requires_grad=affine)
  66. self.group = validator.check_positive_int(device_num_each_group)
  67. self.is_global = False
  68. if self.group != 1:
  69. self.rank_id = get_rank()
  70. self.rank_size = get_group_size()
  71. self.device_list = [i for i in range(0, self.rank_size)]
  72. self.rank_list = self.list_group(self.device_list, self.group)
  73. self.rank_list_idx = len(self.rank_list)
  74. for i in range(self.rank_list_idx):
  75. if self.rank_id in self.rank_list[i] and self.group != 1:
  76. self.is_global = True
  77. management.create_group('group' + str(i), self.rank_list[i])
  78. self.all_reduce = P.AllReduce(P.ReduceOp.SUM, 'group' + str(i)).add_prim_attr('fusion', 1)
  79. self.shape = P.Shape()
  80. self.reduce_mean = P.ReduceMean(keep_dims=True)
  81. self.square = P.Square()
  82. self.sqrt = P.Sqrt()
  83. self.cast = P.Cast()
  84. self.dtype = P.DType()
  85. self.reshape = P.Reshape()
  86. self._target = context.get_context("device_target")
  87. self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE
  88. self.momentum = 1.0 - momentum
  89. if context.get_context("enable_ge"):
  90. self.is_ge_backend = True
  91. else:
  92. self.is_ge_backend = False
  93. if self._target == "Ascend":
  94. self.bn_train = P.BatchNorm(is_training=True,
  95. epsilon=self.eps,
  96. momentum=self.momentum)
  97. if self._target == "GPU":
  98. self.bn_train = P.FusedBatchNormEx(mode=1,
  99. epsilon=self.eps,
  100. momentum=self.momentum,
  101. data_format=self.format)
  102. if self._target == "CPU":
  103. self.bn_train = P.FusedBatchNorm(mode=1,
  104. epsilon=self.eps,
  105. momentum=self.momentum)
  106. self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps, data_format=self.format)
  107. self.enable_global_sync = self.is_global and (self.is_ge_backend or\
  108. (self.is_graph_mode and self._target == "Ascend"))
  109. data_parallel_strategy = ((1,), (1,))
  110. data_parallel_strategy_one = ((1,), ())
  111. self.sub_mean = P.Sub().shard(data_parallel_strategy)
  112. self.sub_var = P.Sub().shard(data_parallel_strategy)
  113. self.mul_mean = P.Mul().shard(data_parallel_strategy_one)
  114. self.mul_var = P.Mul().shard(data_parallel_strategy_one)
  115. self.assign_sub_mean = P.AssignSub().shard(data_parallel_strategy)
  116. self.assign_sub_var = P.AssignSub().shard(data_parallel_strategy)
  117. def _check_data_dim(self, x):
  118. raise NotImplementedError
  119. def list_group(self, world_rank, group_size):
  120. if group_size > get_group_size():
  121. raise ValueError("group size can not be greater than local rank size, group size is {}, "
  122. "local_rank_size is {}".format(group_size, get_group_size()))
  123. if len(world_rank) % group_size != 0:
  124. raise ValueError("please make your group size correct.")
  125. world_rank_list = zip(*(iter(world_rank),) * group_size)
  126. group_list = [list(i) for i in world_rank_list]
  127. return group_list
  128. def _global_sync(self, x, axes, re_shape):
  129. """calculate global batch normalization output"""
  130. x_mean = self.reduce_mean(x, axes)
  131. x_mean_square = self.reduce_mean(self.square(x), axes)
  132. global_batch_mean = self.all_reduce(x_mean) / self.group
  133. global_batch_mean_square = self.all_reduce(x_mean_square) / self.group
  134. global_mean = global_batch_mean
  135. global_var = global_batch_mean_square - self.square(global_mean)
  136. var_sqrt = self.sqrt(global_var + self.eps)
  137. mean_first = (x - global_mean) / var_sqrt
  138. y = mean_first * self.reshape(self.gamma, re_shape) + self.reshape(self.beta, re_shape)
  139. mean_sub = self.sub_mean(self.reshape(self.moving_mean, re_shape), global_mean)
  140. tmp_mean = self.mul_mean(mean_sub, self.cast(self.momentum, self.dtype(mean_sub)))
  141. mean_sub2 = self.sub_var(self.reshape(self.moving_mean, re_shape), global_var)
  142. tmp_variance = self.mul_var(mean_sub2, self.cast(self.momentum, self.dtype(mean_sub2)))
  143. y = F.depend(y, self.assign_sub_mean(self.moving_mean, self.reshape(tmp_mean, self.shape(self.moving_mean))))
  144. y = F.depend(y, self.assign_sub_var(self.moving_variance,
  145. self.reshape(tmp_variance, self.shape(self.moving_variance))))
  146. return y
  147. def construct(self, x):
  148. _shape_check_bn(self.shape(x), self.input_dims)
  149. if self.use_batch_statistics is None:
  150. flag = self.training
  151. else:
  152. flag = self.use_batch_statistics
  153. if flag:
  154. if self.enable_global_sync:
  155. axes, re_shape = _shape_infer(F.shape(x), self.num_features)
  156. return self._global_sync(x, axes, re_shape)
  157. return self.bn_train(x,
  158. self.gamma,
  159. self.beta,
  160. self.moving_mean,
  161. self.moving_variance)[0]
  162. return self.bn_infer(x,
  163. self.gamma,
  164. self.beta,
  165. self.moving_mean,
  166. self.moving_variance)[0]
  167. def extend_repr(self):
  168. return 'num_features={}, eps={}, momentum={}, gamma={}, beta={}, moving_mean={}, moving_variance={}'.format(
  169. self.num_features, self.eps, self.momentum, self.gamma, self.beta, self.moving_mean, self.moving_variance)
  170. @constexpr
  171. def _channel_check(channel, num_channel):
  172. if channel != num_channel:
  173. raise ValueError("the input channel is not equal with num_channel")
  174. @constexpr
  175. def _shape_check(in_shape):
  176. if len(in_shape) != 4:
  177. raise ValueError("The input must has 4 dims.")
  178. @constexpr
  179. def _shape_check_bn(in_shape, in_dims):
  180. dim = len(in_shape)
  181. if in_dims == '1d' and dim != 2:
  182. raise ValueError("The input must has 2 dims.")
  183. if in_dims == '2d' and dim != 4:
  184. raise ValueError("The input must has 4 dims.")
  185. if in_dims == 'both' and dim != 2 and dim != 4:
  186. raise ValueError("The input must has 2 dims or 4 dims.")
  187. @constexpr
  188. def _shape_infer(x_shape, num_feature):
  189. """global batch normalization shape and axes infer"""
  190. if len(x_shape) == 4:
  191. axes = (0, 2, 3)
  192. re_shape = (1, num_feature, 1, 1)
  193. else:
  194. axes = (0,)
  195. re_shape = (1, num_feature)
  196. return axes, re_shape
  197. class BatchNorm1d(_BatchNorm):
  198. r"""
  199. Batch normalization layer over a 2D input.
  200. Batch Normalization is widely used in convolutional networks. This layer
  201. applies Batch Normalization over a 2D input (a mini-batch of 1D inputs) to
  202. reduce internal covariate shift as described in the paper
  203. `Batch Normalization: Accelerating Deep Network Training by
  204. Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It
  205. rescales and recenters the feature using a mini-batch of data and
  206. the learned parameters which can be described in the following formula.
  207. .. math::
  208. y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
  209. Note:
  210. The implementation of BatchNorm is different in graph mode and pynative mode, therefore the mode is not
  211. recommended to be changed after net was initialized.
  212. Args:
  213. num_features (int): `C` from an expected input of size (N, C).
  214. eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
  215. momentum (float): A floating hyperparameter of the momentum for the
  216. running_mean and running_var computation. Default: 0.9.
  217. affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True.
  218. gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight.
  219. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  220. 'he_uniform', etc. Default: 'ones'.
  221. beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight.
  222. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  223. 'he_uniform', etc. Default: 'zeros'.
  224. moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean.
  225. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  226. 'he_uniform', etc. Default: 'zeros'.
  227. moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance.
  228. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  229. 'he_uniform', etc. Default: 'ones'.
  230. use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false,
  231. use the mean value and variance value of specified value. If None, the training process will use the mean
  232. and variance of current batch data and track the running mean and variance, the evaluation process will use
  233. the running mean and variance. Default: None.
  234. Inputs:
  235. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in})`.
  236. Outputs:
  237. Tensor, the normalized, scaled, offset tensor, of shape :math:`(N, C_{out})`.
  238. Supported Platforms:
  239. ``Ascend``
  240. Raises:
  241. TypeError: If `num_features` is not an int.
  242. TypeError: If `eps` is not a float.
  243. ValueError: If `num_features` is less than 1.
  244. ValueError: If `momentum` is not in range [0, 1].
  245. Examples:
  246. >>> net = nn.BatchNorm1d(num_features=4)
  247. >>> np.random.seed(0)
  248. >>> input = Tensor(np.random.randint(0, 255, [2, 4]), mindspore.float32)
  249. >>> output = net(input)
  250. >>> print(output)
  251. [[171.99915 46.999763 116.99941 191.99904 ]
  252. [ 66.999664 250.99875 194.99902 102.99948 ]]
  253. """
  254. def __init__(self,
  255. num_features,
  256. eps=1e-5,
  257. momentum=0.9,
  258. affine=True,
  259. gamma_init='ones',
  260. beta_init='zeros',
  261. moving_mean_init='zeros',
  262. moving_var_init='ones',
  263. use_batch_statistics=None):
  264. super(BatchNorm1d, self).__init__(num_features,
  265. eps,
  266. momentum,
  267. affine,
  268. gamma_init,
  269. beta_init,
  270. moving_mean_init,
  271. moving_var_init,
  272. use_batch_statistics,
  273. input_dims='1d')
  274. def _check_data_dim(self, x):
  275. if x.ndim != 2:
  276. pass
  277. class BatchNorm2d(_BatchNorm):
  278. r"""
  279. Batch normalization layer over a 4D input.
  280. Batch Normalization is widely used in convolutional networks. This layer
  281. applies Batch Normalization over a 4D input (a mini-batch of 2D inputs with
  282. additional channel dimension) to avoid internal covariate shift as described
  283. in the paper `Batch Normalization: Accelerating Deep Network Training by
  284. Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It
  285. rescales and recenters the feature using a mini-batch of data and
  286. the learned parameters which can be described in the following formula.
  287. .. math::
  288. y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
  289. Note:
  290. The implementation of BatchNorm is different in graph mode and pynative mode, therefore that mode can not be
  291. changed after net was initialized.
  292. Note that the formula for updating the running_mean and running_var is
  293. :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times x_t + \text{momentum} \times \hat{x}`,
  294. where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the new observed value.
  295. Args:
  296. num_features (int): `C` from an expected input of size (N, C, H, W).
  297. eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
  298. momentum (float): A floating hyperparameter of the momentum for the
  299. running_mean and running_var computation. Default: 0.9.
  300. affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True.
  301. gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight.
  302. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  303. 'he_uniform', etc. Default: 'ones'.
  304. beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight.
  305. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  306. 'he_uniform', etc. Default: 'zeros'.
  307. moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean.
  308. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  309. 'he_uniform', etc. Default: 'zeros'.
  310. moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance.
  311. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  312. 'he_uniform', etc. Default: 'ones'.
  313. use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false,
  314. use the mean value and variance value of specified value. If None, the training process will use the mean
  315. and variance of current batch data and track the running mean and variance, the evaluation process will use
  316. the running mean and variance. Default: None.
  317. data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'.
  318. Default: 'NCHW'.
  319. Inputs:
  320. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  321. Outputs:
  322. Tensor, the normalized, scaled, offset tensor, of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  323. Raises:
  324. TypeError: If `num_features` is not an int.
  325. TypeError: If `eps` is not a float.
  326. ValueError: If `num_features` is less than 1.
  327. ValueError: If `momentum` is not in range [0, 1].
  328. ValueError: If `data_format` is neither 'NHWC' not 'NCHW'.
  329. Supported Platforms:
  330. ``Ascend`` ``GPU`` ``CPU``
  331. Examples:
  332. >>> net = nn.BatchNorm2d(num_features=3)
  333. >>> np.random.seed(0)
  334. >>> input = Tensor(np.random.randint(0, 255, [1, 3, 2, 2]), mindspore.float32)
  335. >>> output = net(input)
  336. >>> print(output)
  337. [[[[171.99915 46.999763 ]
  338. [116.99941 191.99904 ]]
  339. [[ 66.999664 250.99875 ]
  340. [194.99902 102.99948 ]]
  341. [[ 8.999955 210.99895 ]
  342. [ 20.999895 241.9988 ]]]]
  343. """
  344. def __init__(self,
  345. num_features,
  346. eps=1e-5,
  347. momentum=0.9,
  348. affine=True,
  349. gamma_init='ones',
  350. beta_init='zeros',
  351. moving_mean_init='zeros',
  352. moving_var_init='ones',
  353. use_batch_statistics=None,
  354. data_format='NCHW'):
  355. super(BatchNorm2d, self).__init__(num_features,
  356. eps,
  357. momentum,
  358. affine,
  359. gamma_init,
  360. beta_init,
  361. moving_mean_init,
  362. moving_var_init,
  363. use_batch_statistics,
  364. input_dims='2d',
  365. data_format=data_format)
  366. def _check_data_dim(self, x):
  367. if x.ndim != 4:
  368. pass
  369. class BatchNorm3d(Cell):
  370. r"""
  371. Batch normalization layer over a 5D input.
  372. Batch Normalization is widely used in convolutional networks. This layer
  373. applies Batch Normalization over a 5D input (a mini-batch of 3D inputs with
  374. additional channel dimension) to avoid internal covariate shift.
  375. .. math::
  376. y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
  377. Note:
  378. The implementation of BatchNorm is different in graph mode and pynative mode, therefore that mode can not be
  379. changed after net was initialized.
  380. Note that the formula for updating the running_mean and running_var is
  381. :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times x_t + \text{momentum} \times \hat{x}`,
  382. where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the new observed value.
  383. Args:
  384. num_features (int): `C` from an expected input of size (N, C, D, H, W).
  385. eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
  386. momentum (float): A floating hyperparameter of the momentum for the
  387. running_mean and running_var computation. Default: 0.9.
  388. affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True.
  389. gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight.
  390. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  391. 'he_uniform', etc. Default: 'ones'.
  392. beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight.
  393. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  394. 'he_uniform', etc. Default: 'zeros'.
  395. moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean.
  396. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  397. 'he_uniform', etc. Default: 'zeros'.
  398. moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance.
  399. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  400. 'he_uniform', etc. Default: 'ones'.
  401. use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false,
  402. use the mean value and variance value of specified value. If None, the training process will use the mean
  403. and variance of current batch data and track the running mean and variance, the evaluation process will use
  404. the running mean and variance. Default: None.
  405. data_format (str): The optional value for data format is 'NCDHW'. Default: 'NCDHW'.
  406. Inputs:
  407. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`.
  408. Outputs:
  409. Tensor, the normalized, scaled, offset tensor, of shape :math:`(N, C_{out}, D_{out},H_{out}, W_{out})`.
  410. Supported Platforms:
  411. ``Ascend`` ``GPU`` ``CPU``
  412. Examples:
  413. >>> net = nn.BatchNorm3d(num_features=3)
  414. >>> np.random.seed(0)
  415. >>> input = Tensor(np.random.randint(0, 255, [16, 3, 10, 32, 32]), mindspore.float32)
  416. >>> output = net(input)
  417. >>> print(output.shape)
  418. (16, 3, 10, 32, 32)
  419. """
  420. def __init__(self,
  421. num_features,
  422. eps=1e-5,
  423. momentum=0.9,
  424. affine=True,
  425. gamma_init='ones',
  426. beta_init='zeros',
  427. moving_mean_init='zeros',
  428. moving_var_init='ones',
  429. use_batch_statistics=None,
  430. data_format='NCDHW'):
  431. super(BatchNorm3d, self).__init__()
  432. self.format = validator.check_string(data_format, ['NCDHW'], 'format', self.cls_name)
  433. self.bn2d = BatchNorm2d(num_features=num_features,
  434. eps=eps,
  435. momentum=momentum,
  436. affine=affine,
  437. gamma_init=gamma_init,
  438. beta_init=beta_init,
  439. moving_mean_init=moving_mean_init,
  440. moving_var_init=moving_var_init,
  441. use_batch_statistics=use_batch_statistics,
  442. data_format="NCHW")
  443. self.shape = P.Shape()
  444. self.reshape = P.Reshape()
  445. def construct(self, input_x):
  446. x_shape = self.shape(input_x)
  447. input_x = self.reshape(input_x, (x_shape[0], x_shape[1], x_shape[2]*x_shape[3], x_shape[4]))
  448. bn2d_out = self.bn2d(input_x)
  449. bn3d_out = self.reshape(bn2d_out, x_shape)
  450. return bn3d_out
  451. class GlobalBatchNorm(_BatchNorm):
  452. r"""
  453. Global normalization layer over a N-dimension input.
  454. Global Normalization is cross device synchronized batch normalization. The implementation of Batch Normalization
  455. only normalizes the data within each device. Global normalization will normalize the input within the group.
  456. It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by
  457. Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
  458. feature using a mini-batch of data and the learned parameters which can be described in the following formula.
  459. .. math::
  460. y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
  461. Note:
  462. Currently, GlobalBatchNorm only supports 2D and 4D inputs.
  463. Args:
  464. num_features (int): `C` from an expected input of size (N, C, H, W).
  465. device_num_each_group (int): The number of devices in each group. Default: 2.
  466. eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
  467. momentum (float): A floating hyperparameter of the momentum for the
  468. running_mean and running_var computation. Default: 0.9.
  469. affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True.
  470. gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight.
  471. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  472. 'he_uniform', etc. Default: 'ones'.
  473. beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight.
  474. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  475. 'he_uniform', etc. Default: 'zeros'.
  476. moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean.
  477. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  478. 'he_uniform', etc. Default: 'zeros'.
  479. moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance.
  480. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  481. 'he_uniform', etc. Default: 'ones'.
  482. use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false,
  483. use the mean value and variance value of specified value. If None, training process will use the mean and
  484. variance of current batch data and track the running mean and variance, eval process will use the running
  485. mean and variance. Default: None.
  486. Inputs:
  487. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  488. Outputs:
  489. Tensor, the normalized, scaled, offset tensor, of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  490. Raises:
  491. TypeError: If `num_features` or `device_num_each_group` is not an int.
  492. TypeError: If `eps` is not a float.
  493. ValueError: If `num_features` is less than 1.
  494. ValueError: If `momentum` is not in range [0, 1].
  495. ValueError: If `device_num_each_group` is less than 2.
  496. Supported Platforms:
  497. ``Ascend``
  498. Examples:
  499. >>> # This example should be run with multiple processes.
  500. >>> # Please refer to the tutorial > Distributed Training on mindspore.cn.
  501. >>> import numpy as np
  502. >>> from mindspore.communication import init
  503. >>> from mindspore import context
  504. >>> from mindspore.context import ParallelMode
  505. >>> from mindspore import nn, Tensor
  506. >>> from mindspore.common import dtype as mstype
  507. >>>
  508. >>> context.set_context(mode=context.GRAPH_MODE)
  509. >>> init()
  510. >>> context.reset_auto_parallel_context()
  511. >>> context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL)
  512. >>> np.random.seed(0)
  513. >>> global_bn_op = nn.GlobalBatchNorm(num_features=3, device_num_each_group=2)
  514. >>> input = Tensor(np.random.randint(0, 255, [1, 3, 2, 2]), mstype.float32)
  515. >>> output = global_bn_op(input)
  516. >>> print(output)
  517. [[[[171.99915 46.999763]
  518. [116.99941 191.99904 ]]
  519. [[ 66.999664 250.99875 ]
  520. [194.99902 102.99948 ]]
  521. [[ 8.999955 210.99895 ]
  522. [ 20.9999895 241.9988 ]]]]
  523. """
  524. def __init__(self,
  525. num_features,
  526. eps=1e-5,
  527. momentum=0.9,
  528. affine=True,
  529. gamma_init='ones',
  530. beta_init='zeros',
  531. moving_mean_init='zeros',
  532. moving_var_init='ones',
  533. use_batch_statistics=None,
  534. device_num_each_group=2):
  535. super(GlobalBatchNorm, self).__init__(num_features,
  536. eps,
  537. momentum,
  538. affine,
  539. gamma_init,
  540. beta_init,
  541. moving_mean_init,
  542. moving_var_init,
  543. use_batch_statistics,
  544. device_num_each_group,
  545. input_dims='both')
  546. self.group = validator.check_positive_int(device_num_each_group)
  547. if self.group <= 1:
  548. raise ValueError("the number of group must be greater than 1.")
  549. def _check_data_dim(self, x):
  550. if x.dim == 0:
  551. pass
  552. class LayerNorm(Cell):
  553. r"""
  554. Applies Layer Normalization over a mini-batch of inputs.
  555. Layer normalization is widely used in recurrent neural networks. It applies
  556. normalization on a mini-batch of inputs for each single training case as described
  557. in the paper `Layer Normalization <https://arxiv.org/pdf/1607.06450.pdf>`_. Unlike batch
  558. normalization, layer normalization performs exactly the same computation at training and
  559. testing time. It can be described using the following formula. It is applied across all channels
  560. and pixel but only one batch size.
  561. .. math::
  562. y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
  563. Args:
  564. normalized_shape (Union(tuple[int], list[int]): The normalization is performed over axis
  565. `begin_norm_axis ... R - 1`.
  566. begin_norm_axis (int): The first normalization dimension: normalization will be performed along dimensions
  567. `begin_norm_axis: rank(inputs)`, the value should be in [-1, rank(input)). Default: -1.
  568. begin_params_axis (int): The first parameter(beta, gamma)dimension: scale and centering parameters
  569. will have dimensions `begin_params_axis: rank(inputs)` and will be broadcast with
  570. the normalized inputs accordingly, the value should be in [-1, rank(input)). Default: -1.
  571. gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight.
  572. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  573. 'he_uniform', etc. Default: 'ones'.
  574. beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight.
  575. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  576. 'he_uniform', etc. Default: 'zeros'.
  577. epsilon (float): A value added to the denominator for numerical stability. Default: 1e-7.
  578. Inputs:
  579. - **input_x** (Tensor) - The shape of 'input_x' is :math:`(x_1, x_2, ..., x_R)`,
  580. and `input_shape[begin_norm_axis:]` is equal to `normalized_shape`.
  581. Outputs:
  582. Tensor, the normalized and scaled offset tensor, has the same shape and data type as the `input_x`.
  583. Raises:
  584. TypeError: If `normalized_shape` is neither a list nor tuple.
  585. TypeError: If `begin_norm_axis` or `begin_params_axis` is not an int.
  586. TypeError: If `epsilon` is not a float.
  587. Supported Platforms:
  588. ``Ascend`` ``GPU``
  589. Examples:
  590. >>> x = Tensor(np.ones([20, 5, 10, 10]), mindspore.float32)
  591. >>> shape1 = x.shape[1:]
  592. >>> m = nn.LayerNorm(shape1, begin_norm_axis=1, begin_params_axis=1)
  593. >>> output = m(x).shape
  594. >>> print(output)
  595. (20, 5, 10, 10)
  596. """
  597. def __init__(self,
  598. normalized_shape,
  599. begin_norm_axis=-1,
  600. begin_params_axis=-1,
  601. gamma_init='ones',
  602. beta_init='zeros',
  603. epsilon=1e-7
  604. ):
  605. super(LayerNorm, self).__init__()
  606. if not isinstance(normalized_shape, (tuple, list)):
  607. raise TypeError("The type of 'normalized_shape' should be tuple[int] or list[int], but '{}' type is {}."
  608. .format(normalized_shape, type(normalized_shape)))
  609. self.normalized_shape = normalized_shape
  610. self.begin_norm_axis = begin_norm_axis
  611. self.begin_params_axis = begin_params_axis
  612. self.epsilon = epsilon
  613. self.gamma = Parameter(initializer(
  614. gamma_init, normalized_shape), name="gamma")
  615. self.beta = Parameter(initializer(
  616. beta_init, normalized_shape), name="beta")
  617. self.layer_norm = _selected_ops.LayerNorm(begin_norm_axis=self.begin_norm_axis,
  618. begin_params_axis=self.begin_params_axis,
  619. epsilon=self.epsilon)
  620. def construct(self, input_x):
  621. y, _, _ = self.layer_norm(input_x, self.gamma, self.beta)
  622. return y
  623. def extend_repr(self):
  624. """Display instance object as string."""
  625. return 'normalized_shape={}, begin_norm_axis={}, begin_params_axis={}, gamma{}, beta={}'.format(
  626. self.normalized_shape, self.begin_norm_axis, self.begin_params_axis, self.gamma, self.beta)
  627. class InstanceNorm2d(Cell):
  628. r"""
  629. Instance normalization layer over a 4D input.
  630. This layer applies Instance Normalization over a 4D input (a mini-batch of 2D inputs with
  631. additional channel dimension) as described in the paper `Instance Normalization: The Missing Ingredient for
  632. Fast Stylization <https://arxiv.org/abs/1607.08022>`_. It rescales and recenters the feature using a mini-batch
  633. of data and the learned parameters which can be described in the following formula.
  634. .. math::
  635. y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
  636. Note:
  637. Note that the formula for updating the running_mean and running_var is
  638. :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times x_t + \text{momentum} \times \hat{x}`,
  639. where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the new observed value.
  640. Args:
  641. num_features (int): `C` from an expected input of size (N, C, H, W).
  642. eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
  643. momentum (float): A floating hyperparameter of the momentum for the
  644. running_mean and running_var computation. Default: 0.1.
  645. affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True.
  646. gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight.
  647. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  648. 'he_uniform', etc. Default: 'ones'.
  649. beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight.
  650. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  651. 'he_uniform', etc. Default: 'zeros'.
  652. moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean.
  653. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  654. 'he_uniform', etc. Default: 'zeros'.
  655. moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance.
  656. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  657. 'he_uniform', etc. Default: 'ones'.
  658. use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false,
  659. use the mean value and variance value of specified value. Default: True.
  660. Inputs:
  661. - **input** (Tensor) - Tensor of shape :math:`(N, C, H, W)`. Data type: float16 or float32.
  662. Outputs:
  663. Tensor, the normalized, scaled, offset tensor, of shape :math:`(N, C, H, W)`. Same type and
  664. shape as the `input_x`.
  665. Supported Platforms:
  666. ``GPU``
  667. Raise:
  668. ValueError: If num_features is less than 1 or momentum not in (0, 1).
  669. Examples:
  670. >>> net = nn.InstanceNorm2d(3)
  671. >>> np.random.seed(0)
  672. >>> input = Tensor(np.random.randint(0, 255, [2, 3, 2, 2]), mindspore.float32)
  673. >>> output = net(input)
  674. >>> print(output.shape)
  675. (2, 3, 2, 2)
  676. """
  677. @cell_attr_register
  678. def __init__(self,
  679. num_features,
  680. eps=1e-5,
  681. momentum=0.1,
  682. affine=True,
  683. gamma_init='ones',
  684. beta_init='zeros',
  685. moving_mean_init='zeros',
  686. moving_var_init='ones',
  687. use_batch_statistics=True,
  688. input_dims='2d'):
  689. super(InstanceNorm2d, self).__init__()
  690. if num_features < 1:
  691. raise ValueError("num_features must be at least 1")
  692. if momentum < 0 or momentum > 1:
  693. raise ValueError("momentum should be a number in range [0, 1], but got {}".format(momentum))
  694. self.use_batch_statistics = use_batch_statistics
  695. self.num_features = num_features
  696. self.eps = eps
  697. self.input_dims = input_dims
  698. self.moving_mean = Parameter(initializer(
  699. moving_mean_init, num_features), name="mean", requires_grad=False)
  700. self.moving_variance = Parameter(initializer(
  701. moving_var_init, num_features), name="variance", requires_grad=False)
  702. self.gamma = Parameter(initializer(
  703. gamma_init, num_features), name="gamma", requires_grad=affine)
  704. self.beta = Parameter(initializer(
  705. beta_init, num_features), name="beta", requires_grad=affine)
  706. self.shape = P.Shape()
  707. self.momentum = momentum
  708. self.instance_bn = P.InstanceNorm(is_training=self.use_batch_statistics,
  709. epsilon=self.eps,
  710. momentum=self.momentum)
  711. def _check_data_dim(self, x):
  712. raise NotImplementedError
  713. def construct(self, x):
  714. _shape_check_bn(self.shape(x), self.input_dims)
  715. return self.instance_bn(x,
  716. self.gamma,
  717. self.beta,
  718. self.moving_mean,
  719. self.moving_variance)[0]
  720. def extend_repr(self):
  721. return 'num_features={}, eps={}, momentum={}, gamma={}, beta={}, moving_mean={}, moving_variance={}'.format(
  722. self.num_features, self.eps, self.momentum, self.gamma, self.beta, self.moving_mean, self.moving_variance)
  723. class GroupNorm(Cell):
  724. r"""
  725. Group Normalization over a mini-batch of inputs.
  726. Group normalization is widely used in recurrent neural networks. It applies
  727. normalization on a mini-batch of inputs for each single training case as described
  728. in the paper `Group Normalization <https://arxiv.org/pdf/1803.08494.pdf>`_. Group normalization
  729. divides the channels into groups and computes within each group the mean and variance for normalization,
  730. and it performs very stable over a wide range of batch size. It can be described using the following formula.
  731. .. math::
  732. y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
  733. Args:
  734. num_groups (int): The number of groups to be divided along the channel dimension.
  735. num_channels (int): The number of channels per group.
  736. eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
  737. affine (bool): A bool value, this layer will have learnable affine parameters when set to true. Default: True.
  738. gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight.
  739. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  740. 'he_uniform', etc. Default: 'ones'. If gamma_init is a Tensor, the shape must be [num_channels].
  741. beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight.
  742. The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
  743. 'he_uniform', etc. Default: 'zeros'. If beta_init is a Tensor, the shape must be [num_channels].
  744. Inputs:
  745. - **input_x** (Tensor) - The input feature with shape [N, C, H, W].
  746. Outputs:
  747. Tensor, the normalized and scaled offset tensor, has the same shape and data type as the `input_x`.
  748. Raises:
  749. TypeError: If `num_groups` or `num_channels` is not an int.
  750. TypeError: If `eps` is not a float.
  751. TypeError: If `affine` is not a bool.
  752. ValueError: If `num_groups` or `num_channels` is less than 1.
  753. ValueError: If `num_channels` is not divided by `num_groups`.
  754. Supported Platforms:
  755. ``Ascend`` ``GPU``
  756. Examples:
  757. >>> goup_norm_op = nn.GroupNorm(2, 2)
  758. >>> x = Tensor(np.ones([1, 2, 4, 4], np.float32))
  759. >>> output = goup_norm_op(x)
  760. >>> print(output)
  761. [[[[0. 0. 0. 0.]
  762. [0. 0. 0. 0.]
  763. [0. 0. 0. 0.]
  764. [0. 0. 0. 0.]]
  765. [[0. 0. 0. 0.]
  766. [0. 0. 0. 0.]
  767. [0. 0. 0. 0.]
  768. [0. 0. 0. 0.]]]]
  769. """
  770. def __init__(self, num_groups, num_channels, eps=1e-05, affine=True, gamma_init='ones', beta_init='zeros'):
  771. super(GroupNorm, self).__init__()
  772. self.num_groups = validator.check_positive_int(num_groups)
  773. self.num_channels = validator.check_positive_int(num_channels)
  774. if num_channels % num_groups != 0:
  775. raise ValueError("num_channels should be divided by num_groups")
  776. self.eps = validator.check_value_type('eps', eps, (float,), type(self).__name__)
  777. self.affine = validator.check_bool(affine)
  778. gamma = initializer(gamma_init, num_channels)
  779. beta = initializer(beta_init, num_channels)
  780. if self.affine:
  781. self.gamma = Parameter(gamma, name='gamma')
  782. self.beta = Parameter(beta, name='beta')
  783. else:
  784. self.gamma = gamma
  785. self.beta = beta
  786. self.shape = F.shape
  787. self.reshape = F.reshape
  788. self.reduce_mean = P.ReduceMean(keep_dims=True)
  789. self.square = F.square
  790. self.reduce_sum = P.ReduceSum(keep_dims=True)
  791. self.sqrt = P.Sqrt()
  792. def _cal_output(self, x):
  793. """calculate groupnorm output"""
  794. batch, channel, height, width = self.shape(x)
  795. _channel_check(channel, self.num_channels)
  796. x = self.reshape(x, (batch, self.num_groups, -1))
  797. mean = self.reduce_mean(x, 2)
  798. var = self.reduce_sum(self.square(x - mean), 2) / (channel * height * width / self.num_groups)
  799. std = self.sqrt(var + self.eps)
  800. x = (x - mean) / std
  801. x = self.reshape(x, (batch, channel, height, width))
  802. output = x * self.reshape(self.gamma, (-1, 1, 1)) + self.reshape(self.beta, (-1, 1, 1))
  803. return output
  804. def construct(self, x):
  805. _shape_check(self.shape(x))
  806. output = self._cal_output(x)
  807. return output
  808. def extend_repr(self):
  809. """Display instance object as string."""
  810. return 'num_groups={}, num_channels={}'.format(self.num_groups, self.num_channels)