You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv.py 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """conv"""
  16. from mindspore import log as logger
  17. from mindspore.ops import operations as P
  18. from mindspore.common.parameter import Parameter
  19. from mindspore.common.initializer import initializer
  20. from mindspore._checkparam import check_bool, twice, check_int_positive, check_int_non_negative
  21. from mindspore._extends import cell_attr_register
  22. from ..cell import Cell
  23. __all__ = ['Conv2d', 'Conv2dTranspose']
  24. class _Conv(Cell):
  25. """
  26. Applies a N-D convolution over an input signal composed of several input planes.
  27. """
  28. def __init__(self,
  29. in_channels,
  30. out_channels,
  31. kernel_size,
  32. stride,
  33. pad_mode,
  34. padding,
  35. dilation,
  36. group,
  37. has_bias,
  38. weight_init,
  39. bias_init,
  40. transposed=False):
  41. super(_Conv, self).__init__()
  42. self.in_channels = check_int_positive(in_channels)
  43. self.out_channels = check_int_positive(out_channels)
  44. self.kernel_size = kernel_size
  45. self.stride = stride
  46. self.pad_mode = pad_mode
  47. self.padding = check_int_non_negative(padding)
  48. self.dilation = dilation
  49. self.group = check_int_positive(group)
  50. self.has_bias = has_bias
  51. if (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
  52. kernel_size[0] < 1 or kernel_size[1] < 1:
  53. raise ValueError("Attr 'kernel_size' of 'Conv2D' Op passed "
  54. + str(self.kernel_size) + ", should be a int or tuple and equal to or greater than 1.")
  55. if (not isinstance(stride[0], int)) or (not isinstance(stride[1], int)) or stride[0] < 1 or stride[1] < 1:
  56. raise ValueError("Attr 'stride' of 'Conv2D' Op passed "
  57. + str(self.stride) + ", should be a int or tuple and equal to or greater than 1.")
  58. if (not isinstance(dilation[0], int)) or (not isinstance(dilation[1], int)) or \
  59. dilation[0] < 1 or dilation[1] < 1:
  60. raise ValueError("Attr 'dilation' of 'Conv2D' Op passed "
  61. + str(self.dilation) + ", should equal to or greater than 1.")
  62. if in_channels % group != 0:
  63. raise ValueError("Attr 'in_channels' of 'Conv2D' Op must be divisible by "
  64. "attr 'group' of 'Conv2D' Op.")
  65. if out_channels % group != 0:
  66. raise ValueError("Attr 'out_channels' of 'Conv2D' Op must be divisible by "
  67. "attr 'group' of 'Conv2D' Op.")
  68. if transposed:
  69. shape = [in_channels, out_channels // group, *kernel_size]
  70. else:
  71. shape = [out_channels, in_channels // group, *kernel_size]
  72. self.weight = Parameter(initializer(weight_init, shape), name='weight')
  73. if check_bool(has_bias):
  74. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  75. else:
  76. if bias_init != 'zeros':
  77. logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
  78. self.bias = None
  79. def construct(self, *inputs):
  80. """Must be overridden by all subclasses."""
  81. raise NotImplementedError
  82. class Conv2d(_Conv):
  83. r"""
  84. 2D convolution layer.
  85. Applies a 2D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
  86. where :math:`N` is batch size and :math:`C_{in}` is channel number. For each batch of shape
  87. :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as:
  88. .. math::
  89. out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
  90. where :math:`ccor` is cross correlation operator, :math:`C_{in}` is the input channel number, :math:`j` ranges
  91. from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to :math:`i`-th channel of the :math:`j`-th
  92. filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
  93. of kernel and it has shape :math:`(\text{ks_h}, \text{ks_w})`, where :math:`\text{ks_h}` and
  94. :math:`\text{ks_w}` are height and width of the convolution kernel. The full kernel has shape
  95. :math:`(C_{out}, C_{in} // \text{group}, \text{ks_h}, \text{ks_w})`, where group is the group number
  96. to split the input in the channel dimension.
  97. If the 'pad_mode' is set to be "valid", the output height and width will be
  98. :math:`\left \lfloor{1 + \frac{H_{in} + 2 \times \text{padding} - \text{ks_h} -
  99. (\text{ks_h} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` and
  100. :math:`\left \lfloor{1 + \frac{W_{in} + 2 \times \text{padding} - \text{ks_w} -
  101. (\text{ks_w} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` respectively.
  102. The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
  103. <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
  104. Args:
  105. in_channels (int): The number of input channel :math:`C_{in}`.
  106. out_channels (int): The number of output channel :math:`C_{out}`.
  107. kernel_size (Union[int, tuple[int]]): The data type is int or tuple with 2 integers. Specifies the height
  108. and width of the 2D convolution window. Single int means the value if for both height and width of
  109. the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
  110. width of the kernel.
  111. stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
  112. the height and width of movement are both strides, or a tuple of two int numbers that
  113. represent height and width of movement respectively. Default: 1.
  114. pad_mode (str): Specifies padding mode. The optional values are
  115. "same", "valid", "pad". Default: "same".
  116. - same: Adopts the way of completion. Output height and width will be the same as the input.
  117. Total number of padding will be calculated for horizontal and vertical
  118. direction and evenly distributed to top and bottom, left and right if possible. Otherwise, the
  119. last extra padding will be done from the bottom and the right side. If this mode is set, `padding`
  120. must be 0.
  121. - valid: Adopts the way of discarding. The possibly largest height and width of output will be return
  122. without padding. Extra pixels will be discarded. If this mode is set, `padding`
  123. must be 0.
  124. - pad: Implicit paddings on both sides of the input. The number of `padding` will be padded to the input
  125. Tensor borders. `padding` should be greater than or equal to 0.
  126. padding (int): Implicit paddings on both sides of the input. Default: 0.
  127. dilation (Union[int, tuple[int]]): The data type is int or tuple with 2 integers. Specifies the dilation rate
  128. to use for dilated convolution. If set to be :math:`k > 1`, there will
  129. be :math:`k - 1` pixels skipped for each sampling location. Its value should
  130. be greater or equal to 1 and bounded by the height and width of the
  131. input. Default: 1.
  132. group (int): Split filter into groups, `in_ channels` and `out_channels` should be
  133. divisible by the number of groups. Default: 1.
  134. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  135. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  136. It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified,
  137. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  138. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  139. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  140. Initializer for more details. Default: 'normal'.
  141. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  142. Initializer and string are the same as 'weight_init'. Refer to the values of
  143. Initializer for more details. Default: 'zeros'.
  144. Inputs:
  145. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  146. Outputs:
  147. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  148. Examples:
  149. >>> net = nn.Conv2d(120, 240, 4, has_bias=False, weight_init='normal')
  150. >>> input = Tensor(np.ones([1, 120, 1024, 640]), mindspore.float32)
  151. >>> net(input).shape
  152. (1, 240, 1024, 640)
  153. """
  154. @cell_attr_register
  155. def __init__(self,
  156. in_channels,
  157. out_channels,
  158. kernel_size,
  159. stride=1,
  160. pad_mode='same',
  161. padding=0,
  162. dilation=1,
  163. group=1,
  164. has_bias=False,
  165. weight_init='normal',
  166. bias_init='zeros'):
  167. kernel_size = twice(kernel_size)
  168. stride = twice(stride)
  169. dilation = twice(dilation)
  170. super(Conv2d, self).__init__(
  171. in_channels,
  172. out_channels,
  173. kernel_size,
  174. stride,
  175. pad_mode,
  176. padding,
  177. dilation,
  178. group,
  179. has_bias,
  180. weight_init,
  181. bias_init)
  182. self.conv2d = P.Conv2D(out_channel=self.out_channels,
  183. kernel_size=self.kernel_size,
  184. mode=1,
  185. pad_mode=self.pad_mode,
  186. pad=self.padding,
  187. stride=self.stride,
  188. dilation=self.dilation,
  189. group=self.group)
  190. self.bias_add = P.BiasAdd()
  191. if pad_mode not in ('valid', 'same', 'pad'):
  192. raise ValueError('Attr \'pad_mode\' of \'Conv2d\' Op passed '
  193. + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.')
  194. def construct(self, x):
  195. output = self.conv2d(x, self.weight)
  196. if self.has_bias:
  197. output = self.bias_add(output, self.bias)
  198. return output
  199. def extend_repr(self):
  200. s = 'input_channels={}, output_channels={}, kernel_size={},' \
  201. 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
  202. 'group={}, has_bias={},' \
  203. 'weight_init={}, bias_init={}'.format(
  204. self.in_channels,
  205. self.out_channels,
  206. self.kernel_size,
  207. self.stride,
  208. self.pad_mode,
  209. self.padding,
  210. self.dilation,
  211. self.group,
  212. self.has_bias,
  213. self.weight,
  214. self.bias)
  215. if self.has_bias:
  216. s += ', bias={}'.format(self.bias)
  217. return s
  218. class Conv2dTranspose(_Conv):
  219. r"""
  220. 2D transposed convolution layer.
  221. Compute a 2D transposed convolution, which is also know as a deconvolution
  222. (although it is not actual deconvolution).
  223. Input is typically of shape :math:`(N, C, H, W)`, where :math:`N` is batch size and :math:`C` is channel number.
  224. Args:
  225. in_channels (int): The number of channels in the input space.
  226. out_channels (int): The number of channels in the output space.
  227. kernel_size (Union[int, tuple]): int or tuple with 2 integers, which specifies the height
  228. and width of the 2D convolution window. Single int means the value is for both height and width of
  229. the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
  230. width of the kernel.
  231. stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
  232. the height and width of movement are both strides, or a tuple of two int numbers that
  233. represent height and width of movement respectively. Default: 1.
  234. pad_mode (str): Select the mode of the pad. The optional values are
  235. "pad", "same", "valid". Default: "same".
  236. - pad: Implicit paddings on both sides of the input.
  237. - same: Adopted the way of completion.
  238. - valid: Adopted the way of discarding.
  239. padding (int): Implicit paddings on both sides of the input. Default: 0.
  240. dilation (Union[int, tuple[int]]): The data type is int or tuple with 2 integers. Specifies the dilation rate
  241. to use for dilated convolution. If set to be :math:`k > 1`, there will
  242. be :math:`k - 1` pixels skipped for each sampling location. Its value should
  243. be greater or equal to 1 and bounded by the height and width of the
  244. input. Default: 1.
  245. group (int): Split filter into groups, `in_channels` and `out_channels` should be
  246. divisible by the number of groups. This is not support for Davinci devices when group > 1. Default: 1.
  247. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  248. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  249. It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified,
  250. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  251. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  252. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  253. Initializer for more details. Default: 'normal'.
  254. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  255. Initializer and string are the same as 'weight_init'. Refer to the values of
  256. Initializer for more details. Default: 'zeros'.
  257. Inputs:
  258. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  259. Outputs:
  260. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  261. Examples:
  262. >>> net = nn.Conv2dTranspose(3, 64, 4, has_bias=False, weight_init='normal')
  263. >>> input = Tensor(np.ones([1, 3, 16, 50]), mindspore.float32)
  264. >>> net(input)
  265. """
  266. def __init__(self,
  267. in_channels,
  268. out_channels,
  269. kernel_size,
  270. stride=1,
  271. pad_mode='same',
  272. padding=0,
  273. dilation=1,
  274. group=1,
  275. has_bias=False,
  276. weight_init='normal',
  277. bias_init='zeros'):
  278. kernel_size = twice(kernel_size)
  279. stride = twice(stride)
  280. dilation = twice(dilation)
  281. # out_channels and in_channels swap.
  282. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel,
  283. # then Conv2dTranspose's out_channel refers to Conv2DBackpropInput's in_channel.
  284. super(Conv2dTranspose, self).__init__(
  285. in_channels,
  286. out_channels,
  287. kernel_size,
  288. stride,
  289. pad_mode,
  290. padding,
  291. dilation,
  292. group,
  293. has_bias,
  294. weight_init,
  295. bias_init,
  296. transposed=True)
  297. self.in_channels = in_channels
  298. self.out_channels = out_channels
  299. self.shape = P.Shape()
  300. if pad_mode not in ('valid', 'same', 'pad'):
  301. raise ValueError('Attr \'pad_mode\' of \'Conv2dTranspose\' Op passed '
  302. + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.')
  303. self.is_valid = self.pad_mode == 'valid'
  304. self.is_same = self.pad_mode == 'same'
  305. self.is_pad = self.pad_mode == 'pad'
  306. if check_bool(has_bias):
  307. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  308. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel.
  309. self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels,
  310. kernel_size=kernel_size,
  311. mode=1,
  312. pad_mode=pad_mode,
  313. pad=padding,
  314. stride=stride,
  315. dilation=dilation,
  316. group=group)
  317. self.bias_add = P.BiasAdd()
  318. def set_strategy(self, strategy):
  319. self.conv2d_transpose.set_strategy(strategy)
  320. return self
  321. def _deconv_output_length(self, input_length, filter_size, stride_size, dilation_size):
  322. """Calculate the width and height of output."""
  323. length = 0
  324. filter_size = filter_size + (filter_size - 1) * (dilation_size - 1)
  325. if self.is_valid:
  326. if filter_size - stride_size > 0:
  327. length = input_length * stride_size + filter_size - stride_size
  328. else:
  329. length = input_length * stride_size
  330. elif self.is_same:
  331. length = input_length * stride_size
  332. elif self.is_pad:
  333. length = input_length * stride_size - 2 * self.padding + filter_size - stride_size
  334. return length
  335. def construct(self, x):
  336. n, _, h, w = self.shape(x)
  337. h_out = self._deconv_output_length(h, self.kernel_size[0], self.stride[0], self.dilation[0])
  338. w_out = self._deconv_output_length(w, self.kernel_size[1], self.stride[1], self.dilation[1])
  339. if self.has_bias:
  340. return self.bias_add(self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out)),
  341. self.bias)
  342. return self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out))
  343. def extend_repr(self):
  344. s = 'input_channels={}, output_channels={}, kernel_size={},' \
  345. 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
  346. 'group={}, has_bias={},' \
  347. 'weight_init={}, bias_init={}'.format(self.in_channels,
  348. self.out_channels,
  349. self.kernel_size,
  350. self.stride,
  351. self.pad_mode,
  352. self.padding,
  353. self.dilation,
  354. self.group,
  355. self.has_bias,
  356. self.weight,
  357. self.bias)
  358. return s