You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv.py 67 kB

5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294
  1. # Copyright 2020-2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """conv"""
  16. import numpy as np
  17. from mindspore import log as logger
  18. from mindspore import context
  19. from mindspore.ops import operations as P
  20. from mindspore.ops.primitive import constexpr
  21. from mindspore.common.parameter import Parameter
  22. from mindspore.common.initializer import initializer
  23. from mindspore.common.tensor import Tensor
  24. from mindspore._checkparam import Validator, Rel, twice, _check_3d_int_or_tuple
  25. from mindspore._extends import cell_attr_register
  26. from ..cell import Cell
  27. __all__ = ['Conv2d', 'Conv2dTranspose', 'Conv1d', 'Conv1dTranspose', 'Conv3d', 'Conv3dTranspose']
  28. class _Conv(Cell):
  29. """
  30. Applies a N-D convolution over an input signal composed of several input planes.
  31. """
  32. def __init__(self,
  33. in_channels,
  34. out_channels,
  35. kernel_size,
  36. stride,
  37. pad_mode,
  38. padding,
  39. dilation,
  40. group,
  41. has_bias,
  42. weight_init,
  43. bias_init,
  44. data_format='NCHW',
  45. transposed=False):
  46. """Initialize _Conv."""
  47. super(_Conv, self).__init__()
  48. self.in_channels = Validator.check_positive_int(in_channels, 'in_channels', self.cls_name)
  49. self.out_channels = Validator.check_positive_int(out_channels, 'out_channels', self.cls_name)
  50. self.kernel_size = kernel_size
  51. self.stride = stride
  52. self.pad_mode = pad_mode
  53. self.weight_init = weight_init
  54. self.bias_init = bias_init
  55. self.format = Validator.check_string(data_format, ['NCHW', 'NHWC', 'NCDHW'], 'format', self.cls_name)
  56. if context.get_context("device_target") != "GPU" and self.format == "NHWC":
  57. raise ValueError(f"For '{self.cls_name}', the \"NHWC\" format only support in GPU target, "
  58. f"but got the 'format' is {self.format} and "
  59. f"the platform is {context.get_context('device_target')}.")
  60. if context.get_context("device_target") == "CPU" and self.format == "NCDHW":
  61. raise ValueError(f"For '{self.cls_name}', the \"NCDHW\" format only support in Ascend and GPU target, "
  62. f"but got the 'format' is {self.format} and "
  63. f"the platform is {context.get_context('device_target')}.")
  64. if isinstance(padding, int):
  65. Validator.check_non_negative_int(padding, 'padding', self.cls_name)
  66. self.padding = padding
  67. elif isinstance(padding, tuple):
  68. for pad in padding:
  69. Validator.check_non_negative_int(pad, 'padding item', self.cls_name)
  70. self.padding = padding
  71. else:
  72. raise TypeError(f"For '{self.cls_name}', the type of 'padding' must be int or tuple(int), "
  73. f"but got {type(padding).__name__}.")
  74. self.dilation = dilation
  75. self.group = Validator.check_positive_int(group)
  76. self.has_bias = has_bias
  77. for kernel_size_elem in kernel_size:
  78. Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name)
  79. for stride_elem in stride:
  80. Validator.check_positive_int(stride_elem, 'stride item', self.cls_name)
  81. for dilation_elem in dilation:
  82. Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name)
  83. if in_channels % group != 0:
  84. raise ValueError(f"For '{self.cls_name}', the attr 'in_channels' must be divisible by attr 'group', "
  85. f"but got 'in_channels': {in_channels} and 'group': {group}.")
  86. if out_channels % group != 0:
  87. raise ValueError(f"For '{self.cls_name}', the 'out_channels' must be divisible by attr 'group', "
  88. f"but got 'out_channels': {out_channels} and 'group': {group}.")
  89. if transposed:
  90. shape = [in_channels, out_channels // group, *kernel_size]
  91. else:
  92. shape = [out_channels, *kernel_size, in_channels // group] if self.format == "NHWC" else \
  93. [out_channels, in_channels // group, *kernel_size]
  94. self.weight = Parameter(initializer(self.weight_init, shape), name='weight')
  95. if Validator.check_bool(has_bias, "has_bias", self.cls_name):
  96. self.bias = Parameter(initializer(self.bias_init, [out_channels]), name='bias')
  97. else:
  98. if self.bias_init != 'zeros':
  99. logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
  100. self.bias = None
  101. def construct(self, *inputs):
  102. """Must be overridden by all subclasses."""
  103. raise NotImplementedError
  104. class Conv2d(_Conv):
  105. r"""
  106. 2D convolution layer.
  107. Applies a 2D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
  108. where :math:`N` is batch size, :math:`C_{in}` is channel number, and :math:`H_{in}, W_{in}` are height and width.
  109. For each batch of shape :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as:
  110. .. math::
  111. out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
  112. where :math:`ccor` is the cross-correlation operator, :math:`C_{in}` is the input channel number, :math:`j` ranges
  113. from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
  114. filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
  115. of kernel and it has shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`,
  116. where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of
  117. the convolution kernel. The full kernel has shape
  118. :math:`(C_{out}, C_{in} // \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
  119. where group is the group number to split the input `x` in the channel dimension.
  120. If the 'pad_mode' is set to be "valid", the output height and width will be
  121. :math:`\left \lfloor{1 + \frac{H_{in} + \text{padding[0]} + \text{padding[1]} - \text{kernel_size[0]} -
  122. (\text{kernel_size[0]} - 1) \times (\text{dilation[0]} - 1) }{\text{stride[0]}}} \right \rfloor` and
  123. :math:`\left \lfloor{1 + \frac{W_{in} + \text{padding[2]} + \text{padding[3]} - \text{kernel_size[1]} -
  124. (\text{kernel_size[1]} - 1) \times (\text{dilation[1]} - 1) }{\text{stride[1]}}} \right \rfloor` respectively.
  125. The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
  126. <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
  127. Args:
  128. in_channels (int): The number of input channel :math:`C_{in}`.
  129. out_channels (int): The number of output channel :math:`C_{out}`.
  130. kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the height
  131. and width of the 2D convolution window. Single int means the value is for both the height and the width of
  132. the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
  133. width of the kernel.
  134. stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
  135. the height and width of movement are both strides, or a tuple of two int numbers that
  136. represent height and width of movement respectively. Default: 1.
  137. pad_mode (str): Specifies padding mode. The optional values are
  138. "same", "valid", "pad". Default: "same".
  139. - same: Adopts the way of completion. The height and width of the output will be the same as
  140. the input `x`. The total number of padding will be calculated in horizontal and vertical
  141. directions and evenly distributed to top and bottom, left and right if possible. Otherwise, the
  142. last extra padding will be done from the bottom and the right side. If this mode is set, `padding`
  143. must be 0.
  144. - valid: Adopts the way of discarding. The possible largest height and width of output will be returned
  145. without padding. Extra pixels will be discarded. If this mode is set, `padding`
  146. must be 0.
  147. - pad: Implicit paddings on both sides of the input `x`. The number of `padding` will be padded to the input
  148. Tensor borders. `padding` must be greater than or equal to 0.
  149. padding (Union[int, tuple[int]]): Implicit paddings on both sides of the input `x`. If `padding` is one integer,
  150. the paddings of top, bottom, left and right are the same, equal to padding. If `padding` is a tuple
  151. with four integers, the paddings of top, bottom, left and right will be equal to padding[0],
  152. padding[1], padding[2], and padding[3] accordingly. Default: 0.
  153. dilation (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the dilation rate
  154. to use for dilated convolution. If set to be :math:`k > 1`, there will
  155. be :math:`k - 1` pixels skipped for each sampling location. Its value must
  156. be greater or equal to 1 and bounded by the height and width of the
  157. input `x`. Default: 1.
  158. group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
  159. divisible by the number of groups. If the group is equal to `in_channels` and `out_channels`,
  160. this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1.
  161. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  162. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  163. It can be a Tensor, a string, an Initializer or a number. When a string is specified,
  164. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  165. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  166. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  167. Initializer for more details. Default: 'normal'.
  168. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  169. Initializer and string are the same as 'weight_init'. Refer to the values of
  170. Initializer for more details. Default: 'zeros'.
  171. data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'.
  172. Default: 'NCHW'.
  173. Inputs:
  174. - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` \
  175. or :math:`(N, H_{in}, W_{in}, C_{in})`.
  176. Outputs:
  177. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})` or :math:`(N, H_{out}, W_{out}, C_{out})`.
  178. Raises:
  179. TypeError: If `in_channels`, `out_channels` or `group` is not an int.
  180. TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple.
  181. ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
  182. ValueError: If `padding` is less than 0.
  183. ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
  184. ValueError: If `padding` is a tuple whose length is not equal to 4.
  185. ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0).
  186. ValueError: If `data_format` is neither 'NCHW' not 'NHWC'.
  187. Supported Platforms:
  188. ``Ascend`` ``GPU`` ``CPU``
  189. Examples:
  190. >>> net = nn.Conv2d(120, 240, 4, has_bias=False, weight_init='normal')
  191. >>> x = Tensor(np.ones([1, 120, 1024, 640]), mindspore.float32)
  192. >>> output = net(x).shape
  193. >>> print(output)
  194. (1, 240, 1024, 640)
  195. """
  196. @cell_attr_register
  197. def __init__(self,
  198. in_channels,
  199. out_channels,
  200. kernel_size,
  201. stride=1,
  202. pad_mode='same',
  203. padding=0,
  204. dilation=1,
  205. group=1,
  206. has_bias=False,
  207. weight_init='normal',
  208. bias_init='zeros',
  209. data_format='NCHW'):
  210. """Initialize Conv2d."""
  211. kernel_size = twice(kernel_size)
  212. stride = twice(stride)
  213. self._dilation = dilation
  214. dilation = twice(dilation)
  215. super(Conv2d, self).__init__(
  216. in_channels,
  217. out_channels,
  218. kernel_size,
  219. stride,
  220. pad_mode,
  221. padding,
  222. dilation,
  223. group,
  224. has_bias,
  225. weight_init,
  226. bias_init,
  227. data_format)
  228. self.conv2d = P.Conv2D(out_channel=self.out_channels,
  229. kernel_size=self.kernel_size,
  230. mode=1,
  231. pad_mode=self.pad_mode,
  232. pad=self.padding,
  233. stride=self.stride,
  234. dilation=self.dilation,
  235. group=self.group,
  236. data_format=self.format)
  237. self.bias_add = P.BiasAdd(data_format=self.format)
  238. def construct(self, x):
  239. output = self.conv2d(x, self.weight)
  240. if self.has_bias:
  241. output = self.bias_add(output, self.bias)
  242. return output
  243. def extend_repr(self):
  244. s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
  245. 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
  246. 'group={}, has_bias={}, ' \
  247. 'weight_init={}, bias_init={}, format={}'.format(
  248. self.in_channels,
  249. self.out_channels,
  250. self.kernel_size,
  251. self.stride,
  252. self.pad_mode,
  253. self.padding,
  254. self.dilation,
  255. self.group,
  256. self.has_bias,
  257. self.weight_init,
  258. self.bias_init,
  259. self.format)
  260. return s
  261. @constexpr
  262. def _check_input_3d(input_shape, op_name):
  263. if len(input_shape) != 3:
  264. raise ValueError(f"For '{op_name}', the dimension of input should be 3d, but got {len(input_shape)}.")
  265. class Conv1d(_Conv):
  266. r"""
  267. 1D convolution layer.
  268. Applies a 1D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, W_{in})`,
  269. where :math:`N` is batch size and :math:`C_{in}` is channel number. For each batch of shape
  270. :math:`(C_{in}, W_{in})`, the formula is defined as:
  271. .. math::
  272. out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
  273. where :math:`ccor` is the cross correlation operator, :math:`C_{in}` is the input channel number, :math:`j` ranges
  274. from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
  275. filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
  276. of kernel and it has shape :math:`(\text{ks_w})`, where :math:`\text{ks_w}` is the width of the convolution kernel.
  277. The full kernel has shape :math:`(C_{out}, C_{in} // \text{group}, \text{ks_w})`, where group is the group number
  278. to split the input `x` in the channel dimension.
  279. If the 'pad_mode' is set to be "valid", the output width will be
  280. :math:`\left \lfloor{1 + \frac{W_{in} + 2 \times \text{padding} - \text{ks_w} -
  281. (\text{ks_w} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` respectively.
  282. The first introduction of convolution layer can be found in paper `Gradient Based Learning Applied to Document
  283. Recognition <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
  284. Args:
  285. in_channels (int): The number of input channel :math:`C_{in}`.
  286. out_channels (int): The number of output channel :math:`C_{out}`.
  287. kernel_size (int): The data type is int. Specifies the
  288. width of the 1D convolution window.
  289. stride (int): The distance of kernel moving, an int number that represents
  290. the width of movement. Default: 1.
  291. pad_mode (str): Specifies padding mode. The optional values are
  292. "same", "valid", "pad". Default: "same".
  293. - same: Adopts the way of completion. The output width will be the same as the input `x`.
  294. The total number of padding will be calculated in the horizontal
  295. direction and evenly distributed to left and right if possible. Otherwise, the
  296. last extra padding will be done from the bottom and the right side. If this mode is set, `padding`
  297. must be 0.
  298. - valid: Adopts the way of discarding. The possible largest width of the output will be returned
  299. without padding. Extra pixels will be discarded. If this mode is set, `padding`
  300. must be 0.
  301. - pad: Implicit paddings on both sides of the input `x`. The number of `padding` will be padded to the input
  302. Tensor borders. `padding` must be greater than or equal to 0.
  303. padding (int): Implicit paddings on both sides of the input `x`. Default: 0.
  304. dilation (int): The data type is int. Specifies the dilation rate
  305. to use for dilated convolution. If set to be :math:`k > 1`, there will
  306. be :math:`k - 1` pixels skipped for each sampling location. Its value must
  307. be greater or equal to 1 and bounded by the height and width of the
  308. input `x`. Default: 1.
  309. group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
  310. divisible by the number of groups. Default: 1.
  311. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  312. weight_init (Union[Tensor, str, Initializer, numbers.Number]): An initializer for the convolution kernel.
  313. It can be a Tensor, a string, an Initializer or a number. When a string is specified,
  314. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  315. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  316. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  317. Initializer for more details. Default: 'normal'.
  318. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  319. Initializer and string are the same as 'weight_init'. Refer to the values of
  320. Initializer for more details. Default: 'zeros'.
  321. Inputs:
  322. - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, W_{in})`.
  323. Outputs:
  324. Tensor of shape :math:`(N, C_{out}, W_{out})`.
  325. Raises:
  326. TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int.
  327. ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
  328. ValueError: If `padding` is less than 0.
  329. ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
  330. Supported Platforms:
  331. ``Ascend`` ``GPU`` ``CPU``
  332. Examples:
  333. >>> net = nn.Conv1d(120, 240, 4, has_bias=False, weight_init='normal')
  334. >>> x = Tensor(np.ones([1, 120, 640]), mindspore.float32)
  335. >>> output = net(x).shape
  336. >>> print(output)
  337. (1, 240, 640)
  338. """
  339. @cell_attr_register
  340. def __init__(self,
  341. in_channels,
  342. out_channels,
  343. kernel_size,
  344. stride=1,
  345. pad_mode='same',
  346. padding=0,
  347. dilation=1,
  348. group=1,
  349. has_bias=False,
  350. weight_init='normal',
  351. bias_init='zeros'):
  352. """Initialize Conv1d."""
  353. Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name)
  354. Validator.check_value_type("stride", stride, [int], self.cls_name)
  355. Validator.check_value_type("padding", padding, [int], self.cls_name)
  356. Validator.check_value_type("dilation", dilation, [int], self.cls_name)
  357. Validator.check_int(kernel_size, 1, Rel.GE, 'kernel_size', self.cls_name)
  358. Validator.check_int(stride, 1, Rel.GE, 'stride', self.cls_name)
  359. Validator.check_non_negative_int(padding, 'padding', self.cls_name)
  360. Validator.check_int(dilation, 1, Rel.GE, 'dilation', self.cls_name)
  361. kernel_size = (1, kernel_size)
  362. stride = (1, stride)
  363. dilation = (1, dilation)
  364. get_shape = P.Shape()
  365. get_dtype = P.DType()
  366. if isinstance(weight_init, Tensor):
  367. weight_init_shape = get_shape(weight_init)
  368. Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name)
  369. weight_init_dtype = get_dtype(weight_init)
  370. weight_init_value = weight_init.asnumpy()
  371. weight_init_value = np.expand_dims(weight_init_value, 2)
  372. weight_init = Tensor(weight_init_value, weight_init_dtype)
  373. super(Conv1d, self).__init__(
  374. in_channels,
  375. out_channels,
  376. kernel_size,
  377. stride,
  378. pad_mode,
  379. padding,
  380. dilation,
  381. group,
  382. has_bias,
  383. weight_init,
  384. bias_init)
  385. self.padding = (0, 0, padding, padding)
  386. Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name)
  387. self.conv2d = P.Conv2D(out_channel=self.out_channels,
  388. kernel_size=self.kernel_size,
  389. mode=1,
  390. pad_mode=self.pad_mode,
  391. pad=self.padding,
  392. stride=self.stride,
  393. dilation=self.dilation,
  394. group=self.group)
  395. self.bias_add = P.BiasAdd()
  396. self.expand_dims = P.ExpandDims()
  397. self.squeeze = P.Squeeze(2)
  398. self.shape = P.Shape()
  399. def construct(self, x):
  400. x_shape = self.shape(x)
  401. _check_input_3d(x_shape, self.cls_name)
  402. x = self.expand_dims(x, 2)
  403. output = self.conv2d(x, self.weight)
  404. if self.has_bias:
  405. output = self.bias_add(output, self.bias)
  406. output = self.squeeze(output)
  407. return output
  408. def extend_repr(self):
  409. s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
  410. 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
  411. 'group={}, has_bias={}, ' \
  412. 'weight_init={}, bias_init={}'.format(
  413. self.in_channels,
  414. self.out_channels,
  415. self.kernel_size,
  416. self.stride,
  417. self.pad_mode,
  418. self.padding,
  419. self.dilation,
  420. self.group,
  421. self.has_bias,
  422. self.weight_init,
  423. self.bias_init)
  424. return s
  425. @constexpr
  426. def _check_input_5dims(input_shape, op_name):
  427. if len(input_shape) != 5:
  428. raise ValueError(f"For '{op_name}', the dimension of input should be 5d, but got {len(input_shape)}.")
  429. class Conv3d(_Conv):
  430. r"""
  431. 3D convolution layer.
  432. Applies a 3D convolution over an input tensor which is typically of shape
  433. :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` and output shape
  434. :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. where :math:`N` is batch size. :math:`C` is channel number.
  435. the formula is defined as:
  436. .. math::
  437. \operatorname{out}\left(N_{i}, C_{\text {out}_j}\right)=\operatorname{bias}\left(C_{\text {out}_j}\right)+
  438. \sum_{k=0}^{C_{in}-1} ccor(\text {weight}\left(C_{\text {out}_j}, k\right),
  439. \operatorname{input}\left(N_{i}, k\right))
  440. where :math:`ccor` is the cross-correlation operator.
  441. If the 'pad_mode' is set to be "valid", the output depth, height and width will be
  442. :math:`\left \lfloor{1 + \frac{D_{in} + \text{padding[0]} + \text{padding[1]} - \text{kernel_size[0]} -
  443. (\text{kernel_size[0]} - 1) \times (\text{dilation[0]} - 1) }{\text{stride[0]}}} \right \rfloor` and
  444. :math:`\left \lfloor{1 + \frac{H_{in} + \text{padding[2]} + \text{padding[3]} - \text{kernel_size[1]} -
  445. (\text{kernel_size[1]} - 1) \times (\text{dilation[1]} - 1) }{\text{stride[1]}}} \right \rfloor` and
  446. :math:`\left \lfloor{1 + \frac{W_{in} + \text{padding[4]} + \text{padding[5]} - \text{kernel_size[2]} -
  447. (\text{kernel_size[2]} - 1) \times (\text{dilation[2]} - 1) }{\text{stride[2]}}} \right \rfloor` respectively.
  448. Args:
  449. in_channels (int): The number of input channel :math:`C_{in}`.
  450. out_channels (int): The number of output channel :math:`C_{out}`.
  451. kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers.
  452. Specifies the depth, height and width of the 3D convolution window.
  453. Single int means the value is for the depth, height and the width of the kernel.
  454. A tuple of 3 ints means the first value is for the depth, second value is for height and the
  455. other is for the width of the kernel.
  456. stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
  457. the depth, height and width of movement are both strides, or a tuple of three int numbers that
  458. represent depth, height and width of movement respectively. Default: 1.
  459. pad_mode (str): Specifies padding mode. The optional values are
  460. "same", "valid", "pad". Default: "same".
  461. - same: Adopts the way of completion. The depth, height and width of the output will be the same as
  462. the input `x`. The total number of padding will be calculated in depth, horizontal and vertical
  463. directions and evenly distributed to head and tail, top and bottom, left and right if possible.
  464. Otherwise, the last extra padding will be done from the tail, bottom and the right side.
  465. If this mode is set, `padding` must be 0.
  466. - valid: Adopts the way of discarding. The possible largest depth, height and width of output
  467. will be returned without padding. Extra pixels will be discarded. If this mode is set, `padding`
  468. must be 0.
  469. - pad: Implicit paddings on both sides of the input `x` in depth, height, width. The number of `padding`
  470. will be padded to the input Tensor borders. `padding` must be greater than or equal to 0.
  471. padding (Union(int, tuple[int])): Implicit paddings on both sides of the input `x`.
  472. The data type is int or a tuple of 6 integers. Default: 0. If `padding` is an integer,
  473. the paddings of head, tail, top, bottom, left and right are the same, equal to padding.
  474. If `paddings` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to
  475. padding[0], padding[1], padding[2], padding[3], padding[4] and padding[5] correspondingly.
  476. dilation (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers
  477. : math:`(dilation_d, dilation_h, dilation_w)`. Currently, dilation on depth only supports the case of 1.
  478. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
  479. there will be :math:`k - 1` pixels skipped for each sampling location.
  480. Its value must be greater or equal to 1 and bounded by the height and width of the input `x`. Default: 1.
  481. group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
  482. divisible by the number of groups. Default: 1. Only 1 is currently supported.
  483. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  484. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  485. It can be a Tensor, a string, an Initializer or a number. When a string is specified,
  486. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  487. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  488. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  489. Initializer for more details. Default: 'normal'.
  490. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  491. Initializer and string are the same as 'weight_init'. Refer to the values of
  492. Initializer for more details. Default: 'zeros'.
  493. data_format (str): The optional value for data format. Currently only support "NCDHW".
  494. Inputs:
  495. - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`.
  496. Currently input data type only support float16 and float32.
  497. Outputs:
  498. Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
  499. Raises:
  500. TypeError: If `in_channels`, `out_channels` or `group` is not an int.
  501. TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple.
  502. ValueError: If `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
  503. ValueError: If `padding` is less than 0.
  504. ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
  505. ValueError: If `padding` is a tuple whose length is not equal to 6.
  506. ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0).
  507. ValueError: If `data_format` is not 'NCDHW'.
  508. Supported Platforms:
  509. ``Ascend`` ``GPU``
  510. Examples:
  511. >>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float32)
  512. >>> conv3d = nn.Conv3d(in_channels=3, out_channels=32, kernel_size=(4, 3, 3))
  513. >>> output = conv3d(x)
  514. >>> print(output.shape)
  515. (16, 32, 10, 32, 32)
  516. """
  517. @cell_attr_register
  518. def __init__(self,
  519. in_channels,
  520. out_channels,
  521. kernel_size,
  522. stride=1,
  523. pad_mode='same',
  524. padding=0,
  525. dilation=1,
  526. group=1,
  527. has_bias=False,
  528. weight_init='normal',
  529. bias_init='zeros',
  530. data_format='NCDHW'):
  531. """Initialize Conv3d."""
  532. kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.cls_name)
  533. stride = _check_3d_int_or_tuple("stride", stride, self.cls_name)
  534. dilation = _check_3d_int_or_tuple("dilation", dilation, self.cls_name)
  535. Validator.check_value_type('padding', padding, (int, tuple), self.cls_name)
  536. if isinstance(padding, tuple):
  537. Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name)
  538. super(Conv3d, self).__init__(
  539. in_channels,
  540. out_channels,
  541. kernel_size,
  542. stride,
  543. pad_mode,
  544. padding,
  545. dilation,
  546. group,
  547. has_bias,
  548. weight_init,
  549. bias_init,
  550. data_format)
  551. self.conv3d = P.Conv3D(out_channel=self.out_channels,
  552. kernel_size=self.kernel_size,
  553. mode=1,
  554. pad_mode=self.pad_mode,
  555. pad=self.padding,
  556. stride=self.stride,
  557. dilation=self.dilation,
  558. group=self.group,
  559. data_format=self.format)
  560. self.bias_add = P.BiasAdd(data_format=self.format)
  561. self.shape = P.Shape()
  562. def construct(self, x):
  563. x_shape = self.shape(x)
  564. _check_input_5dims(x_shape, self.cls_name)
  565. output = self.conv3d(x, self.weight)
  566. if self.has_bias:
  567. output = self.bias_add(output, self.bias)
  568. return output
  569. def extend_repr(self):
  570. s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
  571. 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
  572. 'group={}, has_bias={}, ' \
  573. 'weight_init={}, bias_init={}, format={}'.format(
  574. self.in_channels,
  575. self.out_channels,
  576. self.kernel_size,
  577. self.stride,
  578. self.pad_mode,
  579. self.padding,
  580. self.dilation,
  581. self.group,
  582. self.has_bias,
  583. self.weight_init,
  584. self.bias_init,
  585. self.format)
  586. return s
  587. class Conv3dTranspose(_Conv):
  588. r"""
  589. Compute a 3D transposed convolution, which is also known as a deconvolution
  590. (although it is not an actual deconvolution).
  591. The transposed convolution operator multiplies each input value element-wise by a learnable kernel,
  592. and sums over the outputs from all input feature planes.
  593. This module can be seen as the gradient of Conv3d with respect to its input.
  594. `x` is typically of shape :math:`(N, C, D, H, W)`, where :math:`N` is batch size, :math:`C` is channel number,
  595. :math:`D` is the characteristic depth, :math:`H` is the height of the characteristic layer,
  596. and :math:`W` is the width of the characteristic layer.
  597. The calculation process of transposed convolution is equivalent to the reverse calculation of convolution.
  598. The pad_mode argument effectively adds :math:`dilation * (kernel\_size - 1) - padding` amount of zero padding
  599. to both sizes of the input. So that when a Conv3d and a ConvTranspose3d are initialized with same parameters,
  600. they are inverses of each other in regard to the input and output shapes.
  601. However, when stride > 1, Conv3d maps multiple input shapes to the same output shape.
  602. ConvTranspose3d provide padding argument to increase the calculated output shape on one or more side.
  603. The height and width of output are defined as:
  604. if the 'pad_mode' is set to be "pad",
  605. .. math::
  606. D_{out} = (D_{in} - 1) \times \text{stride_d} - 2 \times \text{padding_d} + \text{dilation_d} \times
  607. (\text{kernel_size_d} - 1) + \text{output_padding_d} + 1
  608. H_{out} = (H_{in} - 1) \times \text{stride_h} - 2 \times \text{padding_h} + \text{dilation_h} \times
  609. (\text{kernel_size_h} - 1) + \text{output_padding_h} + 1
  610. W_{out} = (W_{in} - 1) \times \text{stride_w} - 2 \times \text{padding_w} + \text{dilation_w} \times
  611. (\text{kernel_size_w} - 1) + \text{output_padding_w} + 1
  612. if the 'pad_mode' is set to be "SAME",
  613. .. math::
  614. D_{out} = (D_{in} + \text{stride_d} - 1)/\text{stride_d} \\
  615. H_{out} = (H_{in} + \text{stride_h} - 1)/\text{stride_h} \\
  616. W_{out} = (W_{in} + \text{stride_w} - 1)/\text{stride_w}
  617. if the 'pad_mode' is set to be "VALID",
  618. .. math::
  619. D_{out} = (D_{in} - 1) \times \text{stride_d} + \text{dilation_d} \times
  620. (\text{kernel_size_d} - 1) + 1 \\
  621. H_{out} = (H_{in} - 1) \times \text{stride_h} + \text{dilation_h} \times
  622. (\text{kernel_size_h} - 1) + 1 \\
  623. W_{out} = (W_{in} - 1) \times \text{stride_w} + \text{dilation_w} \times
  624. (\text{kernel_size_w} - 1) + 1
  625. Args:
  626. in_channels (int): The number of input channel :math:`C_{in}`.
  627. out_channels (int): The number of output channel :math:`C_{out}`.
  628. kernel_size (Union[int, tuple[int]]): The kernel size of the 3D convolution.
  629. stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
  630. the depth, height and width of movement are both strides, or a tuple of three int numbers that
  631. represent depth, height and width of movement respectively. Its value must be equal to or greater than 1.
  632. Default: 1.
  633. pad_mode (str): Select the mode of the pad. The optional values are
  634. "pad", "same", "valid". Default: "same".
  635. - same: Adopts the way of completion. The depth, height and width of the output will be the same as
  636. the input `x`. The total number of padding will be calculated in depth, horizontal and vertical
  637. directions and evenly distributed to head and tail, top and bottom, left and right if possible.
  638. Otherwise, the last extra padding will be done from the tail, bottom and the right side.
  639. If this mode is set, `padding` and `output_padding` must be 0.
  640. - valid: Adopts the way of discarding. The possible largest depth, height and width of output
  641. will be returned without padding. Extra pixels will be discarded. If this mode is set, `padding`
  642. and `output_padding` must be 0.
  643. - pad: Implicit paddings on both sides of the input `x` in depth, height, width. The number of `pad` will
  644. be padded to the input Tensor borders. `padding` must be greater than or equal to 0.
  645. padding (Union(int, tuple[int])): The pad value to be filled. Default: 0. If `padding` is an integer,
  646. the paddings of head, tail, top, bottom, left and right are the same, equal to padding.
  647. If `padding` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to
  648. padding[0], padding[1], padding[2], padding[3], padding[4] and padding[5] correspondingly.
  649. dilation (Union(int, tuple[int])): The data type is int or a tuple of 3 integers
  650. : math:`(dilation_d, dilation_h, dilation_w)`. Currently, dilation on depth only supports the case of 1.
  651. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
  652. there will be :math:`k - 1` pixels skipped for each sampling location.
  653. Its value must be greater or equal to 1 and bounded by the height and width of the input `x`. Default: 1.
  654. group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
  655. divisible by the number of groups. Default: 1. Only 1 is currently supported.
  656. output_padding (Union(int, tuple[int])): Add extra size to each dimension of the output. Default: 0.
  657. Must be greater than or equal to 0.
  658. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  659. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  660. It can be a Tensor, a string, an Initializer or a number. When a string is specified,
  661. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  662. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  663. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  664. Initializer for more details. Default: 'normal'.
  665. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  666. Initializer and string are the same as 'weight_init'. Refer to the values of
  667. Initializer for more details. Default: 'zeros'.
  668. data_format (str): The optional value for data format. Currently only support 'NCDHW'.
  669. Inputs:
  670. - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`.
  671. Currently input data type only support float16 and float32.
  672. Outputs:
  673. Tensor, the shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
  674. Supported Platforms:
  675. ``Ascend`` ``GPU``
  676. Raises:
  677. TypeError: If `in_channels`, `out_channels` or `group` is not an int.
  678. TypeError: If `kernel_size`, `stride`, `padding` , `dilation` or `output_padding`
  679. is neither an int not a tuple of three.
  680. TypeError: If input data type is not float16 or float32.
  681. ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
  682. ValueError: If `padding` is less than 0.
  683. ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
  684. ValueError: If `padding` is a tuple whose length is not equal to 6.
  685. ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0).
  686. ValueError: If `data_format` is not 'NCDHW'.
  687. Examples:
  688. >>> x = Tensor(np.ones([32, 16, 10, 32, 32]), mindspore.float32)
  689. >>> conv3d_transpose = nn.Conv3dTranspose(in_channels=16, out_channels=3, kernel_size=(4, 6, 2),
  690. ... pad_mode='pad')
  691. >>> output = conv3d_transpose(x)
  692. >>> print(output.shape)
  693. (32, 3, 13, 37, 33)
  694. """
  695. def __init__(self,
  696. in_channels,
  697. out_channels,
  698. kernel_size,
  699. stride=1,
  700. pad_mode='same',
  701. padding=0,
  702. dilation=1,
  703. group=1,
  704. output_padding=0,
  705. has_bias=False,
  706. weight_init='normal',
  707. bias_init='zeros',
  708. data_format='NCDHW'):
  709. """Initialize Conv3dTranspose."""
  710. kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.cls_name)
  711. stride = _check_3d_int_or_tuple("stride", stride, self.cls_name)
  712. dilation = _check_3d_int_or_tuple("dilation", dilation, self.cls_name)
  713. Validator.check_value_type('padding', padding, (int, tuple), self.cls_name)
  714. if isinstance(padding, tuple):
  715. Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name)
  716. output_padding = _check_3d_int_or_tuple("output_padding", output_padding, self.cls_name, greater_zero=False)
  717. super(Conv3dTranspose, self).__init__(
  718. in_channels,
  719. out_channels,
  720. kernel_size,
  721. stride,
  722. pad_mode,
  723. padding,
  724. dilation,
  725. group,
  726. has_bias,
  727. weight_init,
  728. bias_init,
  729. data_format,
  730. transposed=True)
  731. self.conv3d_transpose = P.Conv3DTranspose(in_channel=self.in_channels,
  732. out_channel=self.out_channels,
  733. kernel_size=self.kernel_size,
  734. mode=1,
  735. pad_mode=self.pad_mode,
  736. pad=self.padding,
  737. stride=self.stride,
  738. dilation=self.dilation,
  739. group=self.group,
  740. output_padding=output_padding,
  741. data_format=self.format)
  742. self.bias_add = P.BiasAdd(data_format=self.format)
  743. self.shape = P.Shape()
  744. def construct(self, x):
  745. x_shape = self.shape(x)
  746. _check_input_5dims(x_shape, self.cls_name)
  747. output = self.conv3d_transpose(x, self.weight)
  748. if self.has_bias:
  749. output = self.bias_add(output, self.bias)
  750. return output
  751. def extend_repr(self):
  752. s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
  753. 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
  754. 'group={}, has_bias={}, ' \
  755. 'weight_init={}, bias_init={}'.format(self.in_channels,
  756. self.out_channels,
  757. self.kernel_size,
  758. self.stride,
  759. self.pad_mode,
  760. self.padding,
  761. self.dilation,
  762. self.group,
  763. self.has_bias,
  764. self.weight_init,
  765. self.bias_init)
  766. return s
  767. def _deconv_output_length(is_valid, is_same, is_pad, input_length, filter_size, stride_size, dilation_size, padding):
  768. """Calculate the width and height of output."""
  769. length = 0
  770. filter_size = filter_size + (filter_size - 1) * (dilation_size - 1)
  771. if is_valid:
  772. if filter_size - stride_size > 0:
  773. length = input_length * stride_size + filter_size - stride_size
  774. else:
  775. length = input_length * stride_size
  776. elif is_same:
  777. length = input_length * stride_size
  778. elif is_pad:
  779. length = input_length * stride_size - padding + filter_size - stride_size
  780. return length
  781. class Conv2dTranspose(_Conv):
  782. r"""
  783. 2D transposed convolution layer.
  784. Compute a 2D transposed convolution, which is also known as a deconvolution
  785. (although it is not an actual deconvolution).
  786. This module can be seen as the gradient of Conv2d with respect to its input.
  787. `x` is typically of shape :math:`(N, C, H, W)`, where :math:`N` is batch size, :math:`C` is channel number,
  788. :math:`H` is the height of the characteristic layer and :math:`W` is the width of the characteristic layer.
  789. The pad_mode argument effectively adds :math:`dilation * (kernel\_size - 1) - padding` amount of zero padding
  790. to both sizes of the input. So that when a Conv2d and a ConvTranspose2d are initialized with same parameters,
  791. they are inverses of each other in regard to the input and output shapes.
  792. However, when stride > 1, Conv2d maps multiple input shapes to the same output shape.
  793. ConvTranspose2d provide padding argument to increase the calculated output shape on one or more side.
  794. The height and width of output are defined as:
  795. if the 'pad_mode' is set to be "pad",
  796. .. math::
  797. H_{out} = (H_{in} - 1) \times \text{stride[0]} - \left (\text{padding[0]} + \text{padding[1]}\right ) +
  798. \text{dilation[0]} \times (\text{kernel_size[0]} - 1) + 1
  799. W_{out} = (W_{in} - 1) \times \text{stride[1]} - \left (\text{padding[2]} + \text{padding[3]}\right ) +
  800. \text{dilation[1]} \times (\text{kernel_size[1]} - 1) + 1
  801. if the 'pad_mode' is set to be "SAME",
  802. .. math::
  803. H_{out} = (H_{in} + \text{stride[0]} - 1)/\text{stride[0]} \\
  804. W_{out} = (W_{in} + \text{stride[1]} - 1)/\text{stride[1]}
  805. if the 'pad_mode' is set to be "VALID",
  806. .. math::
  807. H_{out} = (H_{in} - 1) \times \text{stride[0]} + \text{dilation[0]} \times
  808. (\text{ks_w[0]} - 1) + 1 \\
  809. W_{out} = (W_{in} - 1) \times \text{stride[1]} + \text{dilation[1]} \times
  810. (\text{ks_w[1]} - 1) + 1
  811. where :math:`\text{kernel_size[0]}` is the height of the convolution kernel and :math:`\text{kernel_size[1]}`
  812. is the width of the convolution kernel.
  813. Args:
  814. in_channels (int): The number of channels in the input space.
  815. out_channels (int): The number of channels in the output space.
  816. kernel_size (Union[int, tuple]): int or a tuple of 2 integers, which specifies the height
  817. and width of the 2D convolution window. Single int means the value is for both the height and the width of
  818. the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
  819. width of the kernel.
  820. stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
  821. the height and width of movement are both strides, or a tuple of two int numbers that
  822. represent height and width of movement respectively. Its value must be equal to or greater than 1.
  823. Default: 1.
  824. pad_mode (str): Select the mode of the pad. The optional values are
  825. "pad", "same", "valid". Default: "same".
  826. - pad: Implicit paddings on both sides of the input `x`.
  827. - same: Adopted the way of completion.
  828. - valid: Adopted the way of discarding.
  829. padding (Union[int, tuple[int]]): Implicit paddings on both sides of the input `x`. If `padding` is one integer,
  830. the paddings of top, bottom, left and right are the same, equal to padding. If `padding` is a tuple
  831. with four integers, the paddings of top, bottom, left and right will be equal to padding[0],
  832. padding[1], padding[2], and padding[3] accordingly. Default: 0.
  833. dilation (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the dilation rate
  834. to use for dilated convolution. If set to be :math:`k > 1`, there will
  835. be :math:`k - 1` pixels skipped for each sampling location. Its value must
  836. be greater than or equal to 1 and bounded by the height and width of the
  837. input `x`. Default: 1.
  838. group (int): Splits filter into groups, `in_channels` and `out_channels` must be
  839. divisible by the number of groups. This does not support for Davinci devices when group > 1. Default: 1.
  840. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  841. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  842. It can be a Tensor, a string, an Initializer or a number. When a string is specified,
  843. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  844. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  845. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  846. Initializer for more details. Default: 'normal'.
  847. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  848. Initializer and string are the same as 'weight_init'. Refer to the values of
  849. Initializer for more details. Default: 'zeros'.
  850. Inputs:
  851. - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  852. Outputs:
  853. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  854. Raises:
  855. TypeError: If `in_channels`, `out_channels` or `group` is not an int.
  856. TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple.
  857. ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
  858. ValueError: If `padding` is less than 0.
  859. ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
  860. ValueError: If `padding` is a tuple whose length is not equal to 4.
  861. ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0).
  862. Supported Platforms:
  863. ``Ascend`` ``GPU`` ``CPU``
  864. Examples:
  865. >>> net = nn.Conv2dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad')
  866. >>> x = Tensor(np.ones([1, 3, 16, 50]), mindspore.float32)
  867. >>> output = net(x).shape
  868. >>> print(output)
  869. (1, 64, 19, 53)
  870. """
  871. def __init__(self,
  872. in_channels,
  873. out_channels,
  874. kernel_size,
  875. stride=1,
  876. pad_mode='same',
  877. padding=0,
  878. dilation=1,
  879. group=1,
  880. has_bias=False,
  881. weight_init='normal',
  882. bias_init='zeros'):
  883. """Initialize Conv2dTranspose."""
  884. kernel_size = twice(kernel_size)
  885. stride = twice(stride)
  886. dilation = twice(dilation)
  887. Validator.check_value_type('padding', padding, (int, tuple), self.cls_name)
  888. if isinstance(padding, tuple):
  889. Validator.check_equal_int(len(padding), 4, 'padding size', self.cls_name)
  890. # out_channels and in_channels swap.
  891. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel,
  892. # then Conv2dTranspose's out_channel refers to Conv2DBackpropInput's in_channel.
  893. super(Conv2dTranspose, self).__init__(
  894. in_channels,
  895. out_channels,
  896. kernel_size,
  897. stride,
  898. pad_mode,
  899. padding,
  900. dilation,
  901. group,
  902. has_bias,
  903. weight_init,
  904. bias_init,
  905. transposed=True)
  906. self.in_channels = in_channels
  907. self.out_channels = out_channels
  908. self.shape = P.Shape()
  909. Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name)
  910. self.is_valid = self.pad_mode == 'valid'
  911. self.is_same = self.pad_mode == 'same'
  912. self.is_pad = self.pad_mode == 'pad'
  913. if Validator.check_bool(has_bias, "has_bias", self.cls_name):
  914. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  915. # cause Conv2DTranspose's out_channel refers to Conv2D's out_channel.
  916. self.conv2d_transpose = P.Conv2DTranspose(out_channel=in_channels,
  917. kernel_size=kernel_size,
  918. mode=1,
  919. pad_mode=pad_mode,
  920. pad=padding,
  921. stride=stride,
  922. dilation=dilation,
  923. group=group)
  924. self.bias_add = P.BiasAdd()
  925. if isinstance(self.padding, int):
  926. self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = (self.padding,) * 4
  927. else:
  928. self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = self.padding
  929. def shard(self, strategy):
  930. self.conv2d_transpose.shard(strategy)
  931. return self
  932. def construct(self, x):
  933. n, _, h, w = self.shape(x)
  934. h_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, h, self.kernel_size[0],
  935. self.stride[0], self.dilation[0], self.padding_top + self.padding_bottom)
  936. w_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, w, self.kernel_size[1],
  937. self.stride[1], self.dilation[1], self.padding_left + self.padding_right)
  938. if self.has_bias:
  939. return self.bias_add(self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out)),
  940. self.bias)
  941. return self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out))
  942. def extend_repr(self):
  943. s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
  944. 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
  945. 'group={}, has_bias={}, ' \
  946. 'weight_init={}, bias_init={}'.format(self.in_channels,
  947. self.out_channels,
  948. self.kernel_size,
  949. self.stride,
  950. self.pad_mode,
  951. self.padding,
  952. self.dilation,
  953. self.group,
  954. self.has_bias,
  955. self.weight_init,
  956. self.bias_init)
  957. return s
  958. class Conv1dTranspose(_Conv):
  959. r"""
  960. 1D transposed convolution layer.
  961. Compute a 1D transposed convolution, which is also known as a deconvolution
  962. (although it is not an actual deconvolution).
  963. This module can be seen as the gradient of Conv1d with respect to its input.
  964. `x` is typically of shape :math:`(N, C, W)`, where :math:`N` is batch size, :math:`C` is channel number and
  965. :math:`W` is the characteristic length.
  966. The padding argument effectively adds :math:`dilation * (kernel\_size - 1) - padding` amount of zero padding to
  967. both sizes of the input. So that when a Conv1d and a ConvTranspose1d are initialized with same parameters,
  968. they are inverses of each other in regard to the input and output shapes. However, when stride > 1,
  969. Conv1d maps multiple input shapes to the same output shape.
  970. The width of output is defined as:
  971. .. math::
  972. W_{out} = \begin{cases}
  973. (W_{in} - 1) \times \text{stride} - 2 \times \text{padding} + \text{dilation} \times
  974. (\text{ks_w} - 1) + 1, & \text{if pad_mode='pad'}\\
  975. (W_{in} + \text{stride} - 1)/\text{stride}, & \text{if pad_mode='same'}\\
  976. (W_{in} - 1) \times \text{stride} + \text{dilation} \times
  977. (\text{ks_w} - 1) + 1, & \text{if pad_mode='valid'}
  978. \end{cases}
  979. where :math:`\text{ks_w}` is the width of the convolution kernel.
  980. Args:
  981. in_channels (int): The number of channels in the input space.
  982. out_channels (int): The number of channels in the output space.
  983. kernel_size (int): int, which specifies the width of the 1D convolution window.
  984. stride (int): The distance of kernel moving, an int number that represents
  985. the width of movement. Default: 1.
  986. pad_mode (str): Select the mode of the pad. The optional values are
  987. "pad", "same", "valid". Default: "same".
  988. - pad: Implicit paddings on both sides of the input `x`.
  989. - same: Adopted the way of completion.
  990. - valid: Adopted the way of discarding.
  991. padding (int): Implicit paddings on both sides of the input `x`. Default: 0.
  992. dilation (int): The data type is int. Specifies the dilation rate
  993. to use for dilated convolution. If set to be :math:`k > 1`, there will
  994. be :math:`k - 1` pixels skipped for each sampling location. Its value must
  995. be greater or equal to 1 and bounded by the width of the
  996. input `x`. Default: 1.
  997. group (int): Splits filter into groups, `in_channels` and `out_channels` must be
  998. divisible by the number of groups. This is not support for Davinci devices when group > 1. Default: 1.
  999. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  1000. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  1001. It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified,
  1002. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  1003. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  1004. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  1005. Initializer for more details. Default: 'normal'.
  1006. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  1007. Initializer and string are the same as 'weight_init'. Refer to the values of
  1008. Initializer for more details. Default: 'zeros'.
  1009. Inputs:
  1010. - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, W_{in})`.
  1011. Outputs:
  1012. Tensor of shape :math:`(N, C_{out}, W_{out})`.
  1013. Raises:
  1014. TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int.
  1015. ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
  1016. ValueError: If `padding` is less than 0.
  1017. ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
  1018. Supported Platforms:
  1019. ``Ascend`` ``GPU`` ``CPU``
  1020. Examples:
  1021. >>> net = nn.Conv1dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad')
  1022. >>> x = Tensor(np.ones([1, 3, 50]), mindspore.float32)
  1023. >>> output = net(x).shape
  1024. >>> print(output)
  1025. (1, 64, 53)
  1026. """
  1027. def __init__(self,
  1028. in_channels,
  1029. out_channels,
  1030. kernel_size,
  1031. stride=1,
  1032. pad_mode='same',
  1033. padding=0,
  1034. dilation=1,
  1035. group=1,
  1036. has_bias=False,
  1037. weight_init='normal',
  1038. bias_init='zeros'):
  1039. """Initialize Conv1dTranspose."""
  1040. Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name)
  1041. Validator.check_value_type("stride", stride, [int], self.cls_name)
  1042. Validator.check_value_type("padding", padding, [int], self.cls_name)
  1043. Validator.check_value_type("dilation", dilation, [int], self.cls_name)
  1044. Validator.check_int(kernel_size, 1, Rel.GE, 'kernel_size', self.cls_name)
  1045. Validator.check_int(stride, 1, Rel.GE, 'stride', self.cls_name)
  1046. Validator.check_non_negative_int(padding, 'padding', self.cls_name)
  1047. Validator.check_int(dilation, 1, Rel.GE, 'dilation', self.cls_name)
  1048. kernel_size = (1, kernel_size)
  1049. stride = (1, stride)
  1050. dilation = (1, dilation)
  1051. get_shape = P.Shape()
  1052. get_dtype = P.DType()
  1053. if isinstance(weight_init, Tensor):
  1054. weight_init_shape = get_shape(weight_init)
  1055. Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name)
  1056. weight_init_dtype = get_dtype(weight_init)
  1057. weight_init_value = weight_init.asnumpy()
  1058. weight_init_value = np.expand_dims(weight_init_value, 2)
  1059. weight_init = Tensor(weight_init_value, weight_init_dtype)
  1060. # out_channels and in_channels swap.
  1061. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel,
  1062. # then Conv1dTranspose's out_channel refers to Conv2DBackpropInput's in_channel.
  1063. super(Conv1dTranspose, self).__init__(
  1064. in_channels,
  1065. out_channels,
  1066. kernel_size,
  1067. stride,
  1068. pad_mode,
  1069. padding,
  1070. dilation,
  1071. group,
  1072. has_bias,
  1073. weight_init,
  1074. bias_init,
  1075. transposed=True)
  1076. self.padding = (0, 0, padding, padding)
  1077. self.in_channels = in_channels
  1078. self.out_channels = out_channels
  1079. self.shape = P.Shape()
  1080. Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name)
  1081. self.is_valid = self.pad_mode == 'valid'
  1082. self.is_same = self.pad_mode == 'same'
  1083. self.is_pad = self.pad_mode == 'pad'
  1084. if Validator.check_bool(has_bias, "has_bias", self.cls_name):
  1085. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  1086. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel.
  1087. self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels,
  1088. kernel_size=kernel_size,
  1089. mode=1,
  1090. pad_mode=pad_mode,
  1091. pad=self.padding,
  1092. stride=stride,
  1093. dilation=dilation,
  1094. group=group)
  1095. self.bias_add = P.BiasAdd()
  1096. self.expand_dims = P.ExpandDims()
  1097. self.squeeze = P.Squeeze(2)
  1098. def shard(self, strategy):
  1099. self.conv2d_transpose.shard(strategy)
  1100. return self
  1101. def construct(self, x):
  1102. x_shape = self.shape(x)
  1103. _check_input_3d(x_shape, self.cls_name)
  1104. x = self.expand_dims(x, 2)
  1105. n, _, h, w = self.shape(x)
  1106. h_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, h, self.kernel_size[0],
  1107. self.stride[0], self.dilation[0], self.padding[0] + self.padding[1])
  1108. w_out = _deconv_output_length(self.is_valid, self.is_same, self.is_pad, w, self.kernel_size[1],
  1109. self.stride[1], self.dilation[1], self.padding[2] + self.padding[3])
  1110. output = self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out))
  1111. if self.has_bias:
  1112. output = self.bias_add(output, self.bias)
  1113. output = self.squeeze(output)
  1114. return output
  1115. def extend_repr(self):
  1116. s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
  1117. 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
  1118. 'group={}, has_bias={}, ' \
  1119. 'weight_init={}, bias_init={}'.format(self.in_channels,
  1120. self.out_channels,
  1121. self.kernel_size,
  1122. self.stride,
  1123. self.pad_mode,
  1124. self.padding,
  1125. self.dilation,
  1126. self.group,
  1127. self.has_bias,
  1128. self.weight_init,
  1129. self.bias_init)
  1130. return s