You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pooling.py 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """pooling"""
  16. from mindspore.ops import operations as P
  17. from mindspore.ops import functional as F
  18. from mindspore._checkparam import Rel, Validator as validator
  19. from mindspore.ops.primitive import constexpr
  20. import mindspore.context as context
  21. from ..cell import Cell
  22. __all__ = ['AvgPool2d', 'MaxPool2d', 'AvgPool1d', 'MaxPool1d']
  23. class _PoolNd(Cell):
  24. """N-D AvgPool"""
  25. def __init__(self, kernel_size, stride, pad_mode):
  26. super(_PoolNd, self).__init__()
  27. self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.cls_name)
  28. def _check_int_or_tuple(arg_name, arg_value):
  29. validator.check_value_type(arg_name, arg_value, [int, tuple], self.cls_name)
  30. error_msg = f'For \'{self.cls_name}\' the {arg_name} should be an positive int number or ' \
  31. f'a tuple of two positive int numbers, but got {arg_value}'
  32. if isinstance(arg_value, int):
  33. if arg_value <= 0:
  34. raise ValueError(error_msg)
  35. elif len(arg_value) == 2:
  36. for item in arg_value:
  37. if isinstance(item, int) and item > 0:
  38. continue
  39. raise ValueError(error_msg)
  40. else:
  41. raise ValueError(error_msg)
  42. return arg_value
  43. self.kernel_size = _check_int_or_tuple('kernel_size', kernel_size)
  44. self.stride = _check_int_or_tuple('stride', stride)
  45. def construct(self, *inputs):
  46. pass
  47. def extend_repr(self):
  48. return 'kernel_size={kernel_size}, stride={stride}, pad_mode={pad_mode}'.format(**self.__dict__)
  49. @constexpr
  50. def _shape_check(in_shape):
  51. if len(in_shape) != 3:
  52. raise ValueError("The input must has 3 dim")
  53. class MaxPool2d(_PoolNd):
  54. r"""
  55. Max pooling operation for temporal data.
  56. Applies a 2D max pooling over an input Tensor which can be regarded as a composition of 2D planes.
  57. Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool2d outputs
  58. regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
  59. :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows.
  60. .. math::
  61. \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
  62. \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
  63. Note:
  64. pad_mode for training only supports "same" and "valid".
  65. Args:
  66. kernel_size (Union[int, tuple[int]]): The size of kernel used to take the max value,
  67. is an int number that represents height and width are both kernel_size,
  68. or a tuple of two int numbers that represent height and width respectively.
  69. Default: 1.
  70. stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
  71. the height and width of movement are both strides, or a tuple of two int numbers that
  72. represent height and width of movement respectively. Default: 1.
  73. pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive.
  74. Default: "valid".
  75. - same: Adopts the way of completion. The height and width of the output will be the same as
  76. the input. The total number of padding will be calculated in horizontal and vertical
  77. directions and evenly distributed to top and bottom, left and right if possible.
  78. Otherwise, the last extra padding will be done from the bottom and the right side.
  79. - valid: Adopts the way of discarding. The possible largest height and width of output
  80. will be returned without padding. Extra pixels will be discarded.
  81. Inputs:
  82. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  83. Outputs:
  84. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  85. Examples:
  86. >>> pool = nn.MaxPool2d(kernel_size=3, stride=1)
  87. >>> x = Tensor(np.random.randint(0, 10, [1, 2, 4, 4]), mindspore.float32)
  88. [[[[1. 5. 5. 1.]
  89. [0. 3. 4. 8.]
  90. [4. 2. 7. 6.]
  91. [4. 9. 0. 1.]]
  92. [[3. 6. 2. 6.]
  93. [4. 4. 7. 8.]
  94. [0. 0. 4. 0.]
  95. [1. 8. 7. 0.]]]]
  96. >>> output = pool(x)
  97. >>> output.shape
  98. (1, 2, 2, 2)
  99. >>> output
  100. [[[[7. 8.]
  101. [9. 9.]]
  102. [[7. 8.]
  103. [8. 8.]]]]
  104. """
  105. def __init__(self, kernel_size=1, stride=1, pad_mode="valid"):
  106. super(MaxPool2d, self).__init__(kernel_size, stride, pad_mode)
  107. self.max_pool = P.MaxPool(ksize=self.kernel_size,
  108. strides=self.stride,
  109. padding=self.pad_mode)
  110. self.max_pool_with_arg_max = P.MaxPoolWithArgmax(ksize=self.kernel_size,
  111. strides=self.stride,
  112. padding=self.pad_mode)
  113. self.is_tbe = context.get_context("device_target") == "Ascend"
  114. def construct(self, x):
  115. if self.is_tbe and self.training:
  116. out = self.max_pool_with_arg_max(x)[0]
  117. else:
  118. out = self.max_pool(x)
  119. return out
  120. class MaxPool1d(_PoolNd):
  121. r"""
  122. Max pooling operation for temporal data.
  123. Applies a 1D max pooling over an input Tensor which can be regarded as a composition of 1D planes.
  124. Typically the input is of shape :math:`(N_{in}, C_{in}, L_{in})`, MaxPool1d outputs
  125. regional maximum in the :math:`(L_{in})`-dimension. Given kernel size
  126. :math:`ks = (l_{ker})` and stride :math:`s = (s_0)`, the operation is as follows.
  127. .. math::
  128. \text{output}(N_i, C_j, l) = \max_{n=0, \ldots, l_{ker}-1}
  129. \text{input}(N_i, C_j, s_0 \times l + n)
  130. Note:
  131. pad_mode for training only supports "same" and "valid".
  132. Args:
  133. kernel_size (int): The size of kernel used to take the max value, Default: 1.
  134. stride (int): The distance of kernel moving, an int number that represents
  135. the width of movement is stride, Default: 1.
  136. pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive.
  137. Default: "valid".
  138. - same: Adopts the way of completion. The height and width of the output will be the same as
  139. the input. The total number of padding will be calculated in horizontal and vertical
  140. directions and evenly distributed to top and bottom, left and right if possible.
  141. Otherwise, the last extra padding will be done from the bottom and the right side.
  142. - valid: Adopts the way of discarding. The possible largest height and width of output
  143. will be returned without padding. Extra pixels will be discarded.
  144. Inputs:
  145. - **input** (Tensor) - Tensor of shape :math:`(N, C, L_{in})`.
  146. Outputs:
  147. Tensor of shape :math:`(N, C, L_{out}))`.
  148. Examples:
  149. >>> max_pool = nn.MaxPool1d(kernel_size=3, strides=1)
  150. >>> x = Tensor(np.random.randint(0, 10, [1, 2, 4]), mindspore.float32)
  151. >>> output = pool(x)
  152. >>> output.shape
  153. (1, 2, 2)
  154. """
  155. def __init__(self, kernel_size=1, stride=1, pad_mode="valid"):
  156. super(MaxPool1d, self).__init__(kernel_size, stride, pad_mode)
  157. validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name)
  158. validator.check_value_type('stride', stride, [int], self.cls_name)
  159. self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.cls_name)
  160. validator.check_integer("kernel_size", kernel_size, 1, Rel.GE, self.cls_name)
  161. validator.check_integer("stride", stride, 1, Rel.GE, self.cls_name)
  162. self.kernel_size = (1, kernel_size)
  163. self.stride = (1, stride)
  164. self.max_pool = P.MaxPool(ksize=self.kernel_size,
  165. strides=self.stride,
  166. padding=self.pad_mode)
  167. self.max_pool_with_arg_max = P.MaxPoolWithArgmax(ksize=self.kernel_size,
  168. strides=self.stride,
  169. padding=self.pad_mode)
  170. self.shape = F.shape
  171. self.reduce_mean = P.ReduceMean(keep_dims=True)
  172. self.expand = P.ExpandDims()
  173. self.squeeze = P.Squeeze(2)
  174. self.is_tbe = context.get_context("device_target") == "Ascend"
  175. def construct(self, x):
  176. _shape_check(self.shape(x))
  177. x = self.expand(x, 2)
  178. if self.is_tbe and self.training:
  179. output = self.max_pool_with_arg_max(x)[0]
  180. else:
  181. output = self.max_pool(x)
  182. output = self.squeeze(output)
  183. return output
  184. class AvgPool2d(_PoolNd):
  185. r"""
  186. Average pooling for temporal data.
  187. Applies a 2D average pooling over an input Tensor which can be regarded as a composition of 2D input planes.
  188. Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, AvgPool2d outputs
  189. regional average in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size
  190. :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows.
  191. .. math::
  192. \text{output}(N_i, C_j, h, w) = \frac{1}{h_{ker} * w_{ker}} \sum_{m=0}^{h_{ker}-1} \sum_{n=0}^{w_{ker}-1}
  193. \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n)
  194. Note:
  195. pad_mode for training only supports "same" and "valid".
  196. Args:
  197. kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value.
  198. The data type of kernel_size must be int and the value represents the height and width,
  199. or a tuple of two int numbers that represent height and width respectively.
  200. Default: 1.
  201. stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
  202. the height and width of movement are both strides, or a tuple of two int numbers that
  203. represent height and width of movement respectively. Default: 1.
  204. pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive.
  205. Default: "valid".
  206. - same: Adopts the way of completion. The height and width of the output will be the same as
  207. the input. The total number of padding will be calculated in horizontal and vertical
  208. directions and evenly distributed to top and bottom, left and right if possible.
  209. Otherwise, the last extra padding will be done from the bottom and the right side.
  210. - valid: Adopts the way of discarding. The possible largest height and width of output
  211. will be returned without padding. Extra pixels will be discarded.
  212. Inputs:
  213. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  214. Outputs:
  215. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  216. Examples:
  217. >>> pool = nn.AvgPool2d(kernel_size=3, stride=1)
  218. >>> x = Tensor(np.random.randint(0, 10, [1, 2, 4, 4]), mindspore.float32)
  219. [[[[5. 5. 9. 9.]
  220. [8. 4. 3. 0.]
  221. [2. 7. 1. 2.]
  222. [1. 8. 3. 3.]]
  223. [[6. 8. 2. 4.]
  224. [3. 0. 2. 1.]
  225. [0. 8. 9. 7.]
  226. [2. 1. 4. 9.]]]]
  227. >>> output = pool(x)
  228. >>> output.shape
  229. (1, 2, 2, 2)
  230. >>> output
  231. [[[[4.888889 4.4444447]
  232. [4.111111 3.4444444]]
  233. [[4.2222223 4.5555553]
  234. [3.2222223 4.5555553]]]]
  235. """
  236. def __init__(self,
  237. kernel_size=1,
  238. stride=1,
  239. pad_mode="valid"):
  240. super(AvgPool2d, self).__init__(kernel_size, stride, pad_mode)
  241. self.avg_pool = P.AvgPool(ksize=self.kernel_size,
  242. strides=self.stride,
  243. padding=self.pad_mode)
  244. def construct(self, x):
  245. return self.avg_pool(x)
  246. class AvgPool1d(_PoolNd):
  247. r"""
  248. Average pooling for temporal data.
  249. Applies a 1D average pooling over an input Tensor which can be regarded as a composition of 1D input planes.
  250. Typically the input is of shape :math:`(N_{in}, C_{in}, L_{in})`, AvgPool1d outputs
  251. regional average in the :math:`(L_{in})`-dimension. Given kernel size
  252. :math:`ks = l_{ker}` and stride :math:`s = s_0`, the operation is as follows.
  253. .. math::
  254. \text{output}(N_i, C_j, l) = \frac{1}{l_{ker}} \sum_{n=0}^{l_{ker}-1}
  255. \text{input}(N_i, C_j, s_0 \times l + n)
  256. Note:
  257. pad_mode for training only supports "same" and "valid".
  258. Args:
  259. kernel_size (int): The size of kernel window used to take the average value, Default: 1.
  260. stride (int): The distance of kernel moving, an int number that represents
  261. the width of movement is strides, Default: 1.
  262. pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive.
  263. Default: "valid".
  264. - same: Adopts the way of completion. The height and width of the output will be the same as
  265. the input. The total number of padding will be calculated in horizontal and vertical
  266. directions and evenly distributed to top and bottom, left and right if possible.
  267. Otherwise, the last extra padding will be done from the bottom and the right side.
  268. - valid: Adopts the way of discarding. The possible largest height and width of output
  269. will be returned without padding. Extra pixels will be discarded.
  270. Inputs:
  271. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, L_{in})`.
  272. Outputs:
  273. Tensor of shape :math:`(N, C_{out}, L_{out})`.
  274. Examples:
  275. >>> pool = nn.AvgPool1d(kernel_size=6, strides=1)
  276. >>> x = Tensor(np.random.randint(0, 10, [1, 3, 6]), mindspore.float32)
  277. >>> output = pool(x)
  278. >>> output.shape
  279. (1, 3, 1)
  280. """
  281. def __init__(self,
  282. kernel_size=1,
  283. stride=1,
  284. pad_mode="valid"):
  285. super(AvgPool1d, self).__init__(kernel_size, stride, pad_mode)
  286. validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name)
  287. validator.check_value_type('stride', stride, [int], self.cls_name)
  288. self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.cls_name)
  289. validator.check_integer("kernel_size", kernel_size, 1, Rel.GE, self.cls_name)
  290. validator.check_integer("stride", stride, 1, Rel.GE, self.cls_name)
  291. self.kernel_size = (1, kernel_size)
  292. self.stride = (1, stride)
  293. self.avg_pool = P.AvgPool(ksize=self.kernel_size,
  294. strides=self.stride,
  295. padding=self.pad_mode)
  296. self.shape = F.shape
  297. self.reduce_mean = P.ReduceMean(keep_dims=True)
  298. self.slice = P.Slice()
  299. self.expand = P.ExpandDims()
  300. self.squeeze = P.Squeeze(2)
  301. def construct(self, x):
  302. _shape_check(self.shape(x))
  303. batch, channel, width = self.shape(x)
  304. if width == self.kernel_size[1]:
  305. x = self.reduce_mean(x, 2)
  306. elif width - self.kernel_size[1] < self.stride[1]:
  307. x = self.slice(x, (0, 0, 0), (batch, channel, self.kernel_size[1]))
  308. x = self.reduce_mean(x, 2)
  309. else:
  310. x = self.expand(x, 2)
  311. x = self.avg_pool(x)
  312. x = self.squeeze(x)
  313. return x