You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv.py 44 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880
  1. # Copyright 2020-2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """conv"""
  16. import numpy as np
  17. from mindspore import log as logger
  18. from mindspore import context
  19. from mindspore.ops import operations as P
  20. from mindspore.ops.primitive import constexpr
  21. from mindspore.common.parameter import Parameter
  22. from mindspore.common.initializer import initializer
  23. from mindspore.common.tensor import Tensor
  24. from mindspore._checkparam import Validator, Rel, twice
  25. from mindspore._extends import cell_attr_register
  26. from ..cell import Cell
  27. __all__ = ['Conv2d', 'Conv2dTranspose', 'Conv1d', 'Conv1dTranspose']
  28. class _Conv(Cell):
  29. """
  30. Applies a N-D convolution over an input signal composed of several input planes.
  31. """
  32. def __init__(self,
  33. in_channels,
  34. out_channels,
  35. kernel_size,
  36. stride,
  37. pad_mode,
  38. padding,
  39. dilation,
  40. group,
  41. has_bias,
  42. weight_init,
  43. bias_init,
  44. data_format='NCHW',
  45. transposed=False):
  46. super(_Conv, self).__init__()
  47. self.in_channels = Validator.check_positive_int(in_channels)
  48. self.out_channels = Validator.check_positive_int(out_channels)
  49. self.kernel_size = kernel_size
  50. self.stride = stride
  51. self.pad_mode = pad_mode
  52. self.weight_init = weight_init
  53. self.bias_init = bias_init
  54. self.format = Validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name)
  55. if context.get_context("device_target") != "GPU" and self.format == "NHWC":
  56. raise ValueError("NHWC format only support in GPU target.")
  57. if isinstance(padding, int):
  58. Validator.check_non_negative_int(padding, 'padding', self.cls_name)
  59. self.padding = padding
  60. elif isinstance(padding, tuple):
  61. for pad in padding:
  62. Validator.check_non_negative_int(pad, 'padding item', self.cls_name)
  63. self.padding = padding
  64. else:
  65. raise TypeError("padding type must be int/tuple(int) cannot be {}!".format(type(padding)))
  66. self.dilation = dilation
  67. self.group = Validator.check_positive_int(group)
  68. self.has_bias = has_bias
  69. if (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
  70. isinstance(kernel_size[0], bool) or isinstance(kernel_size[1], bool) or \
  71. kernel_size[0] < 1 or kernel_size[1] < 1:
  72. raise ValueError("Attr 'kernel_size' of 'Conv2D' Op passed "
  73. + str(self.kernel_size) + ", should be a int or tuple and equal to or greater than 1.")
  74. if (not isinstance(stride[0], int)) or (not isinstance(stride[1], int)) or \
  75. isinstance(stride[0], bool) or isinstance(stride[1], bool) or stride[0] < 1 or stride[1] < 1:
  76. raise ValueError("Attr 'stride' of 'Conv2D' Op passed "
  77. + str(self.stride) + ", should be a int or tuple and equal to or greater than 1.")
  78. if (not isinstance(dilation[0], int)) or (not isinstance(dilation[1], int)) or \
  79. isinstance(dilation[0], bool) or isinstance(dilation[1], bool) or dilation[0] < 1 or dilation[1] < 1:
  80. raise ValueError("Attr 'dilation' of 'Conv2D' Op passed "
  81. + str(self.dilation) + ", should be a int or tuple and equal to or greater than 1.")
  82. if in_channels % group != 0:
  83. raise ValueError("Attr 'in_channels' of 'Conv2D' Op must be divisible by "
  84. "attr 'group' of 'Conv2D' Op.")
  85. if out_channels % group != 0:
  86. raise ValueError("Attr 'out_channels' of 'Conv2D' Op must be divisible by "
  87. "attr 'group' of 'Conv2D' Op.")
  88. if transposed:
  89. shape = [in_channels, out_channels // group, *kernel_size]
  90. else:
  91. shape = [out_channels, in_channels // group, *kernel_size] if self.format == "NCHW" else \
  92. [out_channels, *kernel_size, in_channels // group]
  93. self.weight = Parameter(initializer(self.weight_init, shape), name='weight')
  94. if Validator.check_bool(has_bias):
  95. self.bias = Parameter(initializer(self.bias_init, [out_channels]), name='bias')
  96. else:
  97. if self.bias_init != 'zeros':
  98. logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
  99. self.bias = None
  100. def construct(self, *inputs):
  101. """Must be overridden by all subclasses."""
  102. raise NotImplementedError
  103. class Conv2d(_Conv):
  104. r"""
  105. 2D convolution layer.
  106. Applies a 2D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
  107. where :math:`N` is batch size, :math:`C_{in}` is channel number, and :math:`H_{in}, W_{in})` are height and width.
  108. For each batch of shape :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as:
  109. .. math::
  110. out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
  111. where :math:`ccor` is the cross-correlation operator, :math:`C_{in}` is the input channel number, :math:`j` ranges
  112. from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
  113. filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
  114. of kernel and it has shape :math:`(\text{ks_h}, \text{ks_w})`, where :math:`\text{ks_h}` and
  115. :math:`\text{ks_w}` are the height and width of the convolution kernel. The full kernel has shape
  116. :math:`(C_{out}, C_{in} // \text{group}, \text{ks_h}, \text{ks_w})`, where group is the group number
  117. to split the input in the channel dimension.
  118. If the 'pad_mode' is set to be "valid", the output height and width will be
  119. :math:`\left \lfloor{1 + \frac{H_{in} + 2 \times \text{padding} - \text{ks_h} -
  120. (\text{ks_h} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` and
  121. :math:`\left \lfloor{1 + \frac{W_{in} + 2 \times \text{padding} - \text{ks_w} -
  122. (\text{ks_w} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` respectively.
  123. The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
  124. <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
  125. Args:
  126. in_channels (int): The number of input channel :math:`C_{in}`.
  127. out_channels (int): The number of output channel :math:`C_{out}`.
  128. kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the height
  129. and width of the 2D convolution window. Single int means the value is for both the height and the width of
  130. the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
  131. width of the kernel.
  132. stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
  133. the height and width of movement are both strides, or a tuple of two int numbers that
  134. represent height and width of movement respectively. Default: 1.
  135. pad_mode (str): Specifies padding mode. The optional values are
  136. "same", "valid", "pad". Default: "same".
  137. - same: Adopts the way of completion. The height and width of the output will be the same as
  138. the input. The total number of padding will be calculated in horizontal and vertical
  139. directions and evenly distributed to top and bottom, left and right if possible. Otherwise, the
  140. last extra padding will be done from the bottom and the right side. If this mode is set, `padding`
  141. must be 0.
  142. - valid: Adopts the way of discarding. The possible largest height and width of output will be returned
  143. without padding. Extra pixels will be discarded. If this mode is set, `padding`
  144. must be 0.
  145. - pad: Implicit paddings on both sides of the input. The number of `padding` will be padded to the input
  146. Tensor borders. `padding` must be greater than or equal to 0.
  147. padding (Union[int, tuple[int]]): Implicit paddings on both sides of the input. If `padding` is one integer,
  148. the paddings of top, bottom, left and right are the same, equal to padding. If `padding` is a tuple
  149. with four integers, the paddings of top, bottom, left and right will be equal to padding[0],
  150. padding[1], padding[2], and padding[3] accordingly. Default: 0.
  151. dilation (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the dilation rate
  152. to use for dilated convolution. If set to be :math:`k > 1`, there will
  153. be :math:`k - 1` pixels skipped for each sampling location. Its value must
  154. be greater or equal to 1 and bounded by the height and width of the
  155. input. Default: 1.
  156. group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
  157. divisible by the number of groups. If the group is equal to `in_channels` and `out_channels`,
  158. this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1.
  159. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  160. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  161. It can be a Tensor, a string, an Initializer or a number. When a string is specified,
  162. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  163. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  164. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  165. Initializer for more details. Default: 'normal'.
  166. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  167. Initializer and string are the same as 'weight_init'. Refer to the values of
  168. Initializer for more details. Default: 'zeros'.
  169. data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'.
  170. Default: 'NCHW'.
  171. Inputs:
  172. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` \
  173. or :math:`(N, H_{in}, W_{in}, C_{in})`.
  174. Outputs:
  175. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})` or :math:`(N, H_{out}, W_{out}, C_{out})`.
  176. Raises:
  177. TypeError: If `in_channels`, `out_channels` or `group` is not an int.
  178. TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple.
  179. ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
  180. ValueError: If `padding` is less than 0.
  181. ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
  182. ValueError: If `padding` is a tuple whose length is not equal to 4.
  183. ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0).
  184. ValueError: If `data_format` is neither 'NCHW' not 'NHWC'.
  185. Supported Platforms:
  186. ``Ascend`` ``GPU`` ``CPU``
  187. Examples:
  188. >>> net = nn.Conv2d(120, 240, 4, has_bias=False, weight_init='normal')
  189. >>> input = Tensor(np.ones([1, 120, 1024, 640]), mindspore.float32)
  190. >>> output = net(input).shape
  191. >>> print(output)
  192. (1, 240, 1024, 640)
  193. """
  194. @cell_attr_register
  195. def __init__(self,
  196. in_channels,
  197. out_channels,
  198. kernel_size,
  199. stride=1,
  200. pad_mode='same',
  201. padding=0,
  202. dilation=1,
  203. group=1,
  204. has_bias=False,
  205. weight_init='normal',
  206. bias_init='zeros',
  207. data_format='NCHW'):
  208. kernel_size = twice(kernel_size)
  209. stride = twice(stride)
  210. self._dilation = dilation
  211. dilation = twice(dilation)
  212. super(Conv2d, self).__init__(
  213. in_channels,
  214. out_channels,
  215. kernel_size,
  216. stride,
  217. pad_mode,
  218. padding,
  219. dilation,
  220. group,
  221. has_bias,
  222. weight_init,
  223. bias_init,
  224. data_format)
  225. self.conv2d = P.Conv2D(out_channel=self.out_channels,
  226. kernel_size=self.kernel_size,
  227. mode=1,
  228. pad_mode=self.pad_mode,
  229. pad=self.padding,
  230. stride=self.stride,
  231. dilation=self.dilation,
  232. group=self.group,
  233. data_format=self.format)
  234. self.bias_add = P.BiasAdd()
  235. def construct(self, x):
  236. output = self.conv2d(x, self.weight)
  237. if self.has_bias:
  238. output = self.bias_add(output, self.bias)
  239. return output
  240. def extend_repr(self):
  241. s = 'input_channels={}, output_channels={}, kernel_size={},' \
  242. 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
  243. 'group={}, has_bias={}' \
  244. 'weight_init={}, bias_init={}, format={}'.format(
  245. self.in_channels,
  246. self.out_channels,
  247. self.kernel_size,
  248. self.stride,
  249. self.pad_mode,
  250. self.padding,
  251. self.dilation,
  252. self.group,
  253. self.has_bias,
  254. self.weight_init,
  255. self.bias_init,
  256. self.format)
  257. return s
  258. @constexpr
  259. def _check_input_3d(input_shape):
  260. if len(input_shape) != 3:
  261. raise ValueError(f"Input should be 3d, but got shape {input_shape}")
  262. class Conv1d(_Conv):
  263. r"""
  264. 1D convolution layer.
  265. Applies a 1D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, W_{in})`,
  266. where :math:`N` is batch size and :math:`C_{in}` is channel number. For each batch of shape
  267. :math:`(C_{in}, W_{in})`, the formula is defined as:
  268. .. math::
  269. out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
  270. where :math:`ccor` is the cross correlation operator, :math:`C_{in}` is the input channel number, :math:`j` ranges
  271. from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
  272. filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
  273. of kernel and it has shape :math:`(\text{ks_w})`, where :math:`\text{ks_w}` is the width of the convolution kernel.
  274. The full kernel has shape :math:`(C_{out}, C_{in} // \text{group}, \text{ks_w})`, where group is the group number
  275. to split the input in the channel dimension.
  276. If the 'pad_mode' is set to be "valid", the output width will be
  277. :math:`\left \lfloor{1 + \frac{W_{in} + 2 \times \text{padding} - \text{ks_w} -
  278. (\text{ks_w} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` respectively.
  279. The first introduction of convolution layer can be found in paper `Gradient Based Learning Applied to Document
  280. Recognition <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
  281. Args:
  282. in_channels (int): The number of input channel :math:`C_{in}`.
  283. out_channels (int): The number of output channel :math:`C_{out}`.
  284. kernel_size (int): The data type is int. Specifies the
  285. width of the 1D convolution window.
  286. stride (int): The distance of kernel moving, an int number that represents
  287. the width of movement. Default: 1.
  288. pad_mode (str): Specifies padding mode. The optional values are
  289. "same", "valid", "pad". Default: "same".
  290. - same: Adopts the way of completion. The output width will be the same as the input.
  291. The total number of padding will be calculated in the horizontal
  292. direction and evenly distributed to left and right if possible. Otherwise, the
  293. last extra padding will be done from the bottom and the right side. If this mode is set, `padding`
  294. must be 0.
  295. - valid: Adopts the way of discarding. The possible largest width of the output will be returned
  296. without padding. Extra pixels will be discarded. If this mode is set, `padding`
  297. must be 0.
  298. - pad: Implicit paddings on both sides of the input. The number of `padding` will be padded to the input
  299. Tensor borders. `padding` must be greater than or equal to 0.
  300. padding (int): Implicit paddings on both sides of the input. Default: 0.
  301. dilation (int): The data type is int. Specifies the dilation rate
  302. to use for dilated convolution. If set to be :math:`k > 1`, there will
  303. be :math:`k - 1` pixels skipped for each sampling location. Its value must
  304. be greater or equal to 1 and bounded by the height and width of the
  305. input. Default: 1.
  306. group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
  307. divisible by the number of groups. Default: 1.
  308. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  309. weight_init (Union[Tensor, str, Initializer, numbers.Number]): An initializer for the convolution kernel.
  310. It can be a Tensor, a string, an Initializer or a number. When a string is specified,
  311. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  312. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  313. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  314. Initializer for more details. Default: 'normal'.
  315. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  316. Initializer and string are the same as 'weight_init'. Refer to the values of
  317. Initializer for more details. Default: 'zeros'.
  318. Inputs:
  319. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, W_{in})`.
  320. Outputs:
  321. Tensor of shape :math:`(N, C_{out}, W_{out})`.
  322. Raises:
  323. TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int.
  324. ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
  325. ValueError: If `padding` is less than 0.
  326. ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
  327. Supported Platforms:
  328. ``Ascend`` ``GPU``
  329. Examples:
  330. >>> net = nn.Conv1d(120, 240, 4, has_bias=False, weight_init='normal')
  331. >>> input = Tensor(np.ones([1, 120, 640]), mindspore.float32)
  332. >>> output = net(input).shape
  333. >>> print(output)
  334. (1, 240, 640)
  335. """
  336. @cell_attr_register
  337. def __init__(self,
  338. in_channels,
  339. out_channels,
  340. kernel_size,
  341. stride=1,
  342. pad_mode='same',
  343. padding=0,
  344. dilation=1,
  345. group=1,
  346. has_bias=False,
  347. weight_init='normal',
  348. bias_init='zeros'):
  349. Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name)
  350. Validator.check_value_type("stride", stride, [int], self.cls_name)
  351. Validator.check_value_type("padding", padding, [int], self.cls_name)
  352. Validator.check_value_type("dilation", dilation, [int], self.cls_name)
  353. Validator.check_int(kernel_size, 1, Rel.GE, 'kernel_size', self.cls_name)
  354. Validator.check_int(stride, 1, Rel.GE, 'stride', self.cls_name)
  355. Validator.check_non_negative_int(padding, 'padding', self.cls_name)
  356. Validator.check_int(dilation, 1, Rel.GE, 'dilation', self.cls_name)
  357. kernel_size = (1, kernel_size)
  358. stride = (1, stride)
  359. dilation = (1, dilation)
  360. get_shape = P.Shape()
  361. get_dtype = P.DType()
  362. if isinstance(weight_init, Tensor):
  363. weight_init_shape = get_shape(weight_init)
  364. Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name)
  365. weight_init_dtype = get_dtype(weight_init)
  366. weight_init_value = weight_init.asnumpy()
  367. weight_init_value = np.expand_dims(weight_init_value, 2)
  368. weight_init = Tensor(weight_init_value, weight_init_dtype)
  369. super(Conv1d, self).__init__(
  370. in_channels,
  371. out_channels,
  372. kernel_size,
  373. stride,
  374. pad_mode,
  375. padding,
  376. dilation,
  377. group,
  378. has_bias,
  379. weight_init,
  380. bias_init)
  381. self.padding = (0, 0, padding, padding)
  382. self.conv2d = P.Conv2D(out_channel=self.out_channels,
  383. kernel_size=self.kernel_size,
  384. mode=1,
  385. pad_mode=self.pad_mode,
  386. pad=self.padding,
  387. stride=self.stride,
  388. dilation=self.dilation,
  389. group=self.group)
  390. self.bias_add = P.BiasAdd()
  391. if pad_mode not in ('valid', 'same', 'pad'):
  392. raise ValueError('Attr \'pad_mode\' of \'Conv1d\' Op passed '
  393. + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.')
  394. self.expand_dims = P.ExpandDims()
  395. self.squeeze = P.Squeeze(2)
  396. self.shape = P.Shape()
  397. def construct(self, x):
  398. x_shape = self.shape(x)
  399. _check_input_3d(x_shape)
  400. x = self.expand_dims(x, 2)
  401. output = self.conv2d(x, self.weight)
  402. if self.has_bias:
  403. output = self.bias_add(output, self.bias)
  404. output = self.squeeze(output)
  405. return output
  406. def extend_repr(self):
  407. s = 'input_channels={}, output_channels={}, kernel_size={},' \
  408. 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
  409. 'group={}, has_bias={},' \
  410. 'weight_init={}, bias_init={}'.format(
  411. self.in_channels,
  412. self.out_channels,
  413. self.kernel_size,
  414. self.stride,
  415. self.pad_mode,
  416. self.padding,
  417. self.dilation,
  418. self.group,
  419. self.has_bias,
  420. self.weight_init,
  421. self.bias_init)
  422. return s
  423. class Conv2dTranspose(_Conv):
  424. r"""
  425. 2D transposed convolution layer.
  426. Compute a 2D transposed convolution, which is also known as a deconvolution
  427. (although it is not an actual deconvolution).
  428. Input is typically of shape :math:`(N, C, H, W)`, where :math:`N` is batch size and :math:`C` is channel number.
  429. If the 'pad_mode' is set to be "pad", the height and width of output are defined as:
  430. .. math::
  431. H_{out} = (H_{in} - 1) \times \text{stride} - 2 \times \text{padding} + \text{dilation} \times
  432. (\text{ks_h} - 1) + 1
  433. W_{out} = (W_{in} - 1) \times \text{stride} - 2 \times \text{padding} + \text{dilation} \times
  434. (\text{ks_w} - 1) + 1
  435. where :math:`\text{ks_h}` is the height of the convolution kernel and :math:`\text{ks_w}` is the width
  436. of the convolution kernel.
  437. Args:
  438. in_channels (int): The number of channels in the input space.
  439. out_channels (int): The number of channels in the output space.
  440. kernel_size (Union[int, tuple]): int or a tuple of 2 integers, which specifies the height
  441. and width of the 2D convolution window. Single int means the value is for both the height and the width of
  442. the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
  443. width of the kernel.
  444. stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
  445. the height and width of movement are both strides, or a tuple of two int numbers that
  446. represent height and width of movement respectively. Its value must be equal to or greater than 1.
  447. Default: 1.
  448. pad_mode (str): Select the mode of the pad. The optional values are
  449. "pad", "same", "valid". Default: "same".
  450. - pad: Implicit paddings on both sides of the input.
  451. - same: Adopted the way of completion.
  452. - valid: Adopted the way of discarding.
  453. padding (Union[int, tuple[int]]): Implicit paddings on both sides of the input. If `padding` is one integer,
  454. the paddings of top, bottom, left and right are the same, equal to padding. If `padding` is a tuple
  455. with four integers, the paddings of top, bottom, left and right will be equal to padding[0],
  456. padding[1], padding[2], and padding[3] accordingly. Default: 0.
  457. dilation (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the dilation rate
  458. to use for dilated convolution. If set to be :math:`k > 1`, there will
  459. be :math:`k - 1` pixels skipped for each sampling location. Its value must
  460. be greater than or equal to 1 and bounded by the height and width of the
  461. input. Default: 1.
  462. group (int): Splits filter into groups, `in_channels` and `out_channels` must be
  463. divisible by the number of groups. This does not support for Davinci devices when group > 1. Default: 1.
  464. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  465. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  466. It can be a Tensor, a string, an Initializer or a number. When a string is specified,
  467. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  468. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  469. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  470. Initializer for more details. Default: 'normal'.
  471. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  472. Initializer and string are the same as 'weight_init'. Refer to the values of
  473. Initializer for more details. Default: 'zeros'.
  474. Inputs:
  475. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  476. Outputs:
  477. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  478. Raises:
  479. TypeError: If `in_channels`, `out_channels` or `group` is not an int.
  480. TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple.
  481. ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
  482. ValueError: If `padding` is less than 0.
  483. ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
  484. ValueError: If `padding` is a tuple whose length is not equal to 4.
  485. ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0).
  486. Supported Platforms:
  487. ``Ascend`` ``GPU``
  488. Examples:
  489. >>> net = nn.Conv2dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad')
  490. >>> input = Tensor(np.ones([1, 3, 16, 50]), mindspore.float32)
  491. >>> output = net(input).shape
  492. >>> print(output)
  493. (1, 64, 19, 53)
  494. """
  495. def __init__(self,
  496. in_channels,
  497. out_channels,
  498. kernel_size,
  499. stride=1,
  500. pad_mode='same',
  501. padding=0,
  502. dilation=1,
  503. group=1,
  504. has_bias=False,
  505. weight_init='normal',
  506. bias_init='zeros'):
  507. kernel_size = twice(kernel_size)
  508. stride = twice(stride)
  509. dilation = twice(dilation)
  510. Validator.check_value_type('padding', padding, (int, tuple), self.cls_name)
  511. if isinstance(padding, tuple):
  512. Validator.check_equal_int(len(padding), 4, 'padding size', self.cls_name)
  513. # out_channels and in_channels swap.
  514. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel,
  515. # then Conv2dTranspose's out_channel refers to Conv2DBackpropInput's in_channel.
  516. super(Conv2dTranspose, self).__init__(
  517. in_channels,
  518. out_channels,
  519. kernel_size,
  520. stride,
  521. pad_mode,
  522. padding,
  523. dilation,
  524. group,
  525. has_bias,
  526. weight_init,
  527. bias_init,
  528. transposed=True)
  529. self.in_channels = in_channels
  530. self.out_channels = out_channels
  531. self.shape = P.Shape()
  532. if pad_mode not in ('valid', 'same', 'pad'):
  533. raise ValueError('Attr \'pad_mode\' of \'Conv2dTranspose\' Op passed '
  534. + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.')
  535. self.is_valid = self.pad_mode == 'valid'
  536. self.is_same = self.pad_mode == 'same'
  537. self.is_pad = self.pad_mode == 'pad'
  538. if Validator.check_bool(has_bias):
  539. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  540. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel.
  541. self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels,
  542. kernel_size=kernel_size,
  543. mode=1,
  544. pad_mode=pad_mode,
  545. pad=padding,
  546. stride=stride,
  547. dilation=dilation,
  548. group=group)
  549. self.bias_add = P.BiasAdd()
  550. if isinstance(self.padding, int):
  551. self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = (self.padding,) * 4
  552. else:
  553. self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = self.padding
  554. def shard(self, strategy):
  555. self.conv2d_transpose.shard(strategy)
  556. return self
  557. def _deconv_output_length(self, input_length, filter_size, stride_size, dilation_size, padding):
  558. """Calculate the width and height of output."""
  559. length = 0
  560. filter_size = filter_size + (filter_size - 1) * (dilation_size - 1)
  561. if self.is_valid:
  562. if filter_size - stride_size > 0:
  563. length = input_length * stride_size + filter_size - stride_size
  564. else:
  565. length = input_length * stride_size
  566. elif self.is_same:
  567. length = input_length * stride_size
  568. elif self.is_pad:
  569. length = input_length * stride_size - padding + filter_size - stride_size
  570. return length
  571. def construct(self, x):
  572. n, _, h, w = self.shape(x)
  573. h_out = self._deconv_output_length(h, self.kernel_size[0], self.stride[0], self.dilation[0],
  574. self.padding_top + self.padding_bottom)
  575. w_out = self._deconv_output_length(w, self.kernel_size[1], self.stride[1], self.dilation[1],
  576. self.padding_left + self.padding_right)
  577. if self.has_bias:
  578. return self.bias_add(self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out)),
  579. self.bias)
  580. return self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out))
  581. def extend_repr(self):
  582. s = 'input_channels={}, output_channels={}, kernel_size={},' \
  583. 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
  584. 'group={}, has_bias={},' \
  585. 'weight_init={}, bias_init={}'.format(self.in_channels,
  586. self.out_channels,
  587. self.kernel_size,
  588. self.stride,
  589. self.pad_mode,
  590. self.padding,
  591. self.dilation,
  592. self.group,
  593. self.has_bias,
  594. self.weight_init,
  595. self.bias_init)
  596. return s
  597. class Conv1dTranspose(_Conv):
  598. r"""
  599. 1D transposed convolution layer.
  600. Compute a 1D transposed convolution, which is also known as a deconvolution
  601. (although it is not an actual deconvolution).
  602. Input is typically of shape :math:`(N, C, W)`, where :math:`N` is batch size and :math:`C` is channel number.
  603. If the 'pad_mode' is set to be "pad", the width of output is defined as:
  604. .. math::
  605. W_{out} = (W_{in} - 1) \times \text{stride} - 2 \times \text{padding} + \text{dilation} \times
  606. (\text{ks_w} - 1) + 1
  607. where :math:`\text{ks_w}` is the width of the convolution kernel.
  608. Args:
  609. in_channels (int): The number of channels in the input space.
  610. out_channels (int): The number of channels in the output space.
  611. kernel_size (int): int, which specifies the width of the 1D convolution window.
  612. stride (int): The distance of kernel moving, an int number that represents
  613. the width of movement. Default: 1.
  614. pad_mode (str): Select the mode of the pad. The optional values are
  615. "pad", "same", "valid". Default: "same".
  616. - pad: Implicit paddings on both sides of the input.
  617. - same: Adopted the way of completion.
  618. - valid: Adopted the way of discarding.
  619. padding (int): Implicit paddings on both sides of the input. Default: 0.
  620. dilation (int): The data type is int. Specifies the dilation rate
  621. to use for dilated convolution. If set to be :math:`k > 1`, there will
  622. be :math:`k - 1` pixels skipped for each sampling location. Its value must
  623. be greater or equal to 1 and bounded by the width of the
  624. input. Default: 1.
  625. group (int): Splits filter into groups, `in_channels` and `out_channels` must be
  626. divisible by the number of groups. This is not support for Davinci devices when group > 1. Default: 1.
  627. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  628. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  629. It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified,
  630. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  631. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  632. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  633. Initializer for more details. Default: 'normal'.
  634. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  635. Initializer and string are the same as 'weight_init'. Refer to the values of
  636. Initializer for more details. Default: 'zeros'.
  637. Inputs:
  638. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, W_{in})`.
  639. Outputs:
  640. Tensor of shape :math:`(N, C_{out}, W_{out})`.
  641. Raises:
  642. TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int.
  643. ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
  644. ValueError: If `padding` is less than 0.
  645. ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
  646. Supported Platforms:
  647. ``Ascend`` ``GPU``
  648. Examples:
  649. >>> net = nn.Conv1dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad')
  650. >>> input = Tensor(np.ones([1, 3, 50]), mindspore.float32)
  651. >>> output = net(input).shape
  652. >>> print(output)
  653. (1, 64, 53)
  654. """
  655. def __init__(self,
  656. in_channels,
  657. out_channels,
  658. kernel_size,
  659. stride=1,
  660. pad_mode='same',
  661. padding=0,
  662. dilation=1,
  663. group=1,
  664. has_bias=False,
  665. weight_init='normal',
  666. bias_init='zeros'):
  667. Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name)
  668. Validator.check_value_type("stride", stride, [int], self.cls_name)
  669. Validator.check_value_type("padding", padding, [int], self.cls_name)
  670. Validator.check_value_type("dilation", dilation, [int], self.cls_name)
  671. Validator.check_int(kernel_size, 1, Rel.GE, 'kernel_size', self.cls_name)
  672. Validator.check_int(stride, 1, Rel.GE, 'stride', self.cls_name)
  673. Validator.check_non_negative_int(padding, 'padding', self.cls_name)
  674. Validator.check_int(dilation, 1, Rel.GE, 'dilation', self.cls_name)
  675. kernel_size = (1, kernel_size)
  676. stride = (1, stride)
  677. dilation = (1, dilation)
  678. get_shape = P.Shape()
  679. get_dtype = P.DType()
  680. if isinstance(weight_init, Tensor):
  681. weight_init_shape = get_shape(weight_init)
  682. Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name)
  683. weight_init_dtype = get_dtype(weight_init)
  684. weight_init_value = weight_init.asnumpy()
  685. weight_init_value = np.expand_dims(weight_init_value, 2)
  686. weight_init = Tensor(weight_init_value, weight_init_dtype)
  687. # out_channels and in_channels swap.
  688. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel,
  689. # then Conv1dTranspose's out_channel refers to Conv2DBackpropInput's in_channel.
  690. super(Conv1dTranspose, self).__init__(
  691. in_channels,
  692. out_channels,
  693. kernel_size,
  694. stride,
  695. pad_mode,
  696. padding,
  697. dilation,
  698. group,
  699. has_bias,
  700. weight_init,
  701. bias_init,
  702. transposed=True)
  703. self.padding = (0, 0, padding, padding)
  704. self.in_channels = in_channels
  705. self.out_channels = out_channels
  706. self.shape = P.Shape()
  707. if pad_mode not in ('valid', 'same', 'pad'):
  708. raise ValueError('Attr \'pad_mode\' of \'Conv1dTranspose\' Op passed '
  709. + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.')
  710. self.is_valid = self.pad_mode == 'valid'
  711. self.is_same = self.pad_mode == 'same'
  712. self.is_pad = self.pad_mode == 'pad'
  713. if Validator.check_bool(has_bias):
  714. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  715. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel.
  716. self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels,
  717. kernel_size=kernel_size,
  718. mode=1,
  719. pad_mode=pad_mode,
  720. pad=self.padding,
  721. stride=stride,
  722. dilation=dilation,
  723. group=group)
  724. self.bias_add = P.BiasAdd()
  725. self.expand_dims = P.ExpandDims()
  726. self.squeeze = P.Squeeze(2)
  727. def shard(self, strategy):
  728. self.conv2d_transpose.shard(strategy)
  729. return self
  730. def _deconv_output_length(self, input_length, filter_size, stride_size, dilation_size, padding):
  731. """Calculate the width and height of output."""
  732. length = 0
  733. filter_size = filter_size + (filter_size - 1) * (dilation_size - 1)
  734. if self.is_valid:
  735. if filter_size - stride_size > 0:
  736. length = input_length * stride_size + filter_size - stride_size
  737. else:
  738. length = input_length * stride_size
  739. elif self.is_same:
  740. length = input_length * stride_size
  741. elif self.is_pad:
  742. length = input_length * stride_size - padding + filter_size - stride_size
  743. return length
  744. def construct(self, x):
  745. x_shape = self.shape(x)
  746. _check_input_3d(x_shape)
  747. x = self.expand_dims(x, 2)
  748. n, _, h, w = self.shape(x)
  749. h_out = self._deconv_output_length(h, self.kernel_size[0], self.stride[0], self.dilation[0],
  750. self.padding[0] + self.padding[1])
  751. w_out = self._deconv_output_length(w, self.kernel_size[1], self.stride[1], self.dilation[1],
  752. self.padding[2] + self.padding[3])
  753. output = self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out))
  754. if self.has_bias:
  755. output = self.bias_add(output, self.bias)
  756. output = self.squeeze(output)
  757. return output
  758. def extend_repr(self):
  759. s = 'input_channels={}, output_channels={}, kernel_size={},' \
  760. 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
  761. 'group={}, has_bias={},' \
  762. 'weight_init={}, bias_init={}'.format(self.in_channels,
  763. self.out_channels,
  764. self.kernel_size,
  765. self.stride,
  766. self.pad_mode,
  767. self.padding,
  768. self.dilation,
  769. self.group,
  770. self.has_bias,
  771. self.weight_init,
  772. self.bias_init)
  773. return s