You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

deformable_conv.py 12 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. #! /usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import tensorlayer as tl
  4. from tensorlayer import logging
  5. from tensorlayer.layers.core import Module
  6. __all__ = [
  7. 'DeformableConv2d',
  8. ]
  9. class DeformableConv2d(Module):
  10. """The :class:`DeformableConv2d` class is a 2D
  11. `Deformable Convolutional Networks <https://arxiv.org/abs/1703.06211>`__.
  12. Parameters
  13. ----------
  14. offset_layer : tl.Tensor
  15. To predict the offset of convolution operations.
  16. The shape is (batchsize, input height, input width, 2*(number of element in the convolution kernel))
  17. e.g. if apply a 3*3 kernel, the number of the last dimension should be 18 (2*3*3)
  18. n_filter : int
  19. The number of filters.
  20. filter_size : tuple of int
  21. The filter size (height, width).
  22. act : activation function
  23. The activation function of this layer.
  24. padding : str
  25. The padding algorithm type: "SAME" or "VALID".
  26. W_init : initializer
  27. The initializer for the weight matrix.
  28. b_init : initializer or None
  29. The initializer for the bias vector. If None, skip biases.
  30. in_channels : int
  31. The number of in channels.
  32. name : str
  33. A unique layer name.
  34. Examples
  35. --------
  36. With TensorLayer
  37. >>> net = tl.layers.Input([5, 10, 10, 16], name='input')
  38. >>> offset1 = tl.layers.Conv2d(
  39. ... n_filter=18, filter_size=(3, 3), strides=(1, 1), padding='SAME', name='offset1'
  40. ... )(net)
  41. >>> deformconv1 = tl.layers.DeformableConv2d(
  42. ... offset_layer=offset1, n_filter=32, filter_size=(3, 3), name='deformable1'
  43. ... )(net)
  44. >>> offset2 = tl.layers.Conv2d(
  45. ... n_filter=18, filter_size=(3, 3), strides=(1, 1), padding='SAME', name='offset2'
  46. ... )(deformconv1)
  47. >>> deformconv2 = tl.layers.DeformableConv2d(
  48. ... offset_layer=offset2, n_filter=64, filter_size=(3, 3), name='deformable2'
  49. ... )(deformconv1)
  50. References
  51. ----------
  52. - The deformation operation was adapted from the implementation in `here <https://github.com/kastnerkyle/deform-conv>`__
  53. Notes
  54. -----
  55. - The padding is fixed to 'SAME'.
  56. - The current implementation is not optimized for memory usgae. Please use it carefully.
  57. """
  58. # @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release
  59. def __init__(
  60. self,
  61. offset_layer=None,
  62. # shape=(3, 3, 1, 100),
  63. n_filter=32,
  64. filter_size=(3, 3),
  65. act=None,
  66. padding='SAME',
  67. W_init=tl.initializers.truncated_normal(stddev=0.02),
  68. b_init=tl.initializers.constant(value=0.0),
  69. in_channels=None,
  70. name=None # 'deformable_conv_2d',
  71. ):
  72. super().__init__(name, act=act)
  73. self.offset_layer = offset_layer
  74. self.n_filter = n_filter
  75. self.filter_size = filter_size
  76. self.padding = padding
  77. self.W_init = W_init
  78. self.b_init = b_init
  79. self.in_channels = in_channels
  80. self.kernel_n = filter_size[0] * filter_size[1]
  81. if self.offset_layer.get_shape()[-1] != 2 * self.kernel_n:
  82. raise AssertionError("offset.get_shape()[-1] is not equal to: %d" % 2 * self.kernel_n)
  83. logging.info(
  84. "DeformableConv2d %s: n_filter: %d, filter_size: %s act: %s" % (
  85. self.name, self.n_filter, str(self.filter_size
  86. ), self.act.__class__.__name__ if self.act is not None else 'No Activation'
  87. )
  88. )
  89. def __repr__(self):
  90. actstr = self.act.__class__.__name__ if self.act is not None else 'No Activation'
  91. s = (
  92. '{classname}(in_channels={in_channels}, out_channels={n_filter}, kernel_size={filter_size}'
  93. ', padding={padding}'
  94. )
  95. if self.b_init is None:
  96. s += ', bias=False'
  97. s += (', ' + actstr)
  98. if self.name is not None:
  99. s += ', name=\'{name}\''
  100. s += ')'
  101. return s.format(classname=self.__class__.__name__, **self.__dict__)
  102. def build(self, inputs_shape):
  103. self.in_channels = inputs_shape[-1]
  104. self.input_h = int(inputs_shape[1])
  105. self.input_w = int(inputs_shape[2])
  106. initial_offsets = tl.ops.stack(
  107. tl.ops.meshgrid(tl.ops.range(self.filter_size[0]), tl.ops.range(self.filter_size[1]), indexing='ij')
  108. ) # initial_offsets --> (kh, kw, 2)
  109. initial_offsets = tl.ops.reshape(initial_offsets, (-1, 2)) # initial_offsets --> (n, 2)
  110. initial_offsets = tl.ops.expand_dims(initial_offsets, 0) # initial_offsets --> (1, n, 2)
  111. initial_offsets = tl.ops.expand_dims(initial_offsets, 0) # initial_offsets --> (1, 1, n, 2)
  112. initial_offsets = tl.ops.tile(
  113. initial_offsets, [self.input_h, self.input_w, 1, 1]
  114. ) # initial_offsets --> (h, w, n, 2)
  115. initial_offsets = tl.ops.cast(initial_offsets, 'float32')
  116. grid = tl.ops.meshgrid(
  117. tl.ops.range(
  118. -int((self.filter_size[0] - 1) / 2.0), int(self.input_h - int((self.filter_size[0] - 1) / 2.0)), 1
  119. ),
  120. tl.ops.range(
  121. -int((self.filter_size[1] - 1) / 2.0), int(self.input_w - int((self.filter_size[1] - 1) / 2.0)), 1
  122. ), indexing='ij'
  123. )
  124. grid = tl.ops.stack(grid, axis=-1)
  125. grid = tl.ops.cast(grid, 'float32') # grid --> (h, w, 2)
  126. grid = tl.ops.expand_dims(grid, 2) # grid --> (h, w, 1, 2)
  127. grid = tl.ops.tile(grid, [1, 1, self.kernel_n, 1]) # grid --> (h, w, n, 2)
  128. self.grid_offset = grid + initial_offsets # grid_offset --> (h, w, n, 2)
  129. self.filter_shape = (1, 1, self.kernel_n, self.in_channels, self.n_filter)
  130. self.W = self._get_weights("W_deformableconv2d", shape=self.filter_shape, init=self.W_init)
  131. if self.b_init:
  132. self.b = self._get_weights("b_deformableconv2d", shape=(self.n_filter, ), init=self.b_init)
  133. self.conv3d = tl.ops.Conv3D(strides=[1, 1, 1, 1, 1], padding='VALID')
  134. self.bias_add = tl.ops.BiasAdd()
  135. def forward(self, inputs):
  136. if self._forward_state == False:
  137. if self._built == False:
  138. self.build(tl.get_tensor_shape(inputs))
  139. self._built = True
  140. self._forward_state = True
  141. # shape = (filter_size[0], filter_size[1], pre_channel, n_filter)
  142. offset = self.offset_layer
  143. grid_offset = self.grid_offset
  144. input_deform = self._tf_batch_map_offsets(inputs, offset, grid_offset)
  145. outputs = self.conv3d(input=input_deform, filters=self.W)
  146. outputs = tl.ops.reshape(
  147. tensor=outputs, shape=[outputs.get_shape()[0], self.input_h, self.input_w, self.n_filter]
  148. )
  149. if self.b_init:
  150. outputs = self.bias_add(outputs, self.b)
  151. if self.act:
  152. outputs = self.act(outputs)
  153. return outputs
  154. def _to_bc_h_w(self, x, x_shape):
  155. """(b, h, w, c) -> (b*c, h, w)"""
  156. x = tl.ops.transpose(a=x, perm=[0, 3, 1, 2])
  157. x = tl.ops.reshape(x, (-1, x_shape[1], x_shape[2]))
  158. return x
  159. def _to_b_h_w_n_c(self, x, x_shape):
  160. """(b*c, h, w, n) -> (b, h, w, n, c)"""
  161. x = tl.ops.reshape(x, (-1, x_shape[4], x_shape[1], x_shape[2], x_shape[3]))
  162. x = tl.ops.transpose(a=x, perm=[0, 2, 3, 4, 1])
  163. return x
  164. def tf_flatten(self, a):
  165. """Flatten tensor"""
  166. return tl.ops.reshape(a, [-1])
  167. def _get_vals_by_coords(self, inputs, coords, idx, out_shape):
  168. indices = tl.ops.stack(
  169. [idx, self.tf_flatten(coords[:, :, :, :, 0]),
  170. self.tf_flatten(coords[:, :, :, :, 1])], axis=-1
  171. )
  172. vals = tl.ops.gather_nd(inputs, indices)
  173. vals = tl.ops.reshape(vals, out_shape)
  174. return vals
  175. def _tf_repeat(self, a, repeats):
  176. """Tensorflow version of np.repeat for 1D"""
  177. # https://github.com/tensorflow/tensorflow/issues/8521
  178. if len(a.get_shape()) != 1:
  179. raise AssertionError("This is not a 1D Tensor")
  180. a = tl.ops.expand_dims(a, -1)
  181. a = tl.ops.tile(a, [1, repeats])
  182. a = self.tf_flatten(a)
  183. return a
  184. def _tf_batch_map_coordinates(self, inputs, coords):
  185. """Batch version of tf_map_coordinates
  186. Only supports 2D feature maps
  187. Parameters
  188. ----------
  189. inputs : ``tl.Tensor``
  190. shape = (b*c, h, w)
  191. coords : ``tl.Tensor``
  192. shape = (b*c, h, w, n, 2)
  193. Returns
  194. -------
  195. ``tl.Tensor``
  196. A Tensor with the shape as (b*c, h, w, n)
  197. """
  198. inputs_shape = inputs.get_shape()
  199. coords_shape = coords.get_shape()
  200. batch_channel = tl.get_tensor_shape(inputs)[0]
  201. input_h = int(inputs_shape[1])
  202. input_w = int(inputs_shape[2])
  203. kernel_n = int(coords_shape[3])
  204. n_coords = input_h * input_w * kernel_n
  205. coords_lt = tl.ops.cast(tl.ops.Floor()(coords), 'int32')
  206. coords_rb = tl.ops.cast(tl.ops.Ceil()(coords), 'int32')
  207. coords_lb = tl.ops.stack([coords_lt[:, :, :, :, 0], coords_rb[:, :, :, :, 1]], axis=-1)
  208. coords_rt = tl.ops.stack([coords_rb[:, :, :, :, 0], coords_lt[:, :, :, :, 1]], axis=-1)
  209. idx = self._tf_repeat(tl.ops.range(batch_channel), n_coords)
  210. vals_lt = self._get_vals_by_coords(inputs, coords_lt, idx, (batch_channel, input_h, input_w, kernel_n))
  211. vals_rb = self._get_vals_by_coords(inputs, coords_rb, idx, (batch_channel, input_h, input_w, kernel_n))
  212. vals_lb = self._get_vals_by_coords(inputs, coords_lb, idx, (batch_channel, input_h, input_w, kernel_n))
  213. vals_rt = self._get_vals_by_coords(inputs, coords_rt, idx, (batch_channel, input_h, input_w, kernel_n))
  214. coords_offset_lt = coords - tl.ops.cast(coords_lt, 'float32')
  215. vals_t = vals_lt + (vals_rt - vals_lt) * coords_offset_lt[:, :, :, :, 0]
  216. vals_b = vals_lb + (vals_rb - vals_lb) * coords_offset_lt[:, :, :, :, 0]
  217. mapped_vals = vals_t + (vals_b - vals_t) * coords_offset_lt[:, :, :, :, 1]
  218. return mapped_vals
  219. def _tf_batch_map_offsets(self, inputs, offsets, grid_offset):
  220. """Batch map offsets into input
  221. Parameters
  222. ------------
  223. inputs : ``tl.Tensor``
  224. shape = (b, h, w, c)
  225. offsets: ``tl.Tensor``
  226. shape = (b, h, w, 2*n)
  227. grid_offset: `tl.Tensor``
  228. Offset grids shape = (h, w, n, 2)
  229. Returns
  230. -------
  231. ``tl.Tensor``
  232. A Tensor with the shape as (b, h, w, c)
  233. """
  234. inputs_shape = inputs.get_shape()
  235. batch_size = tl.get_tensor_shape(inputs)[0]
  236. kernel_n = int(int(offsets.get_shape()[3]) / 2)
  237. input_h = inputs_shape[1]
  238. input_w = inputs_shape[2]
  239. channel = inputs_shape[3]
  240. # inputs (b, h, w, c) --> (b*c, h, w)
  241. inputs = self._to_bc_h_w(inputs, inputs_shape)
  242. # offsets (b, h, w, 2*n) --> (b, h, w, n, 2)
  243. offsets = tl.ops.reshape(offsets, (batch_size, input_h, input_w, kernel_n, 2))
  244. coords = tl.ops.expand_dims(grid_offset, 0) # grid_offset --> (1, h, w, n, 2)
  245. coords = tl.ops.tile(coords, [batch_size, 1, 1, 1, 1]) + offsets # grid_offset --> (b, h, w, n, 2)
  246. # clip out of bound
  247. coords = tl.ops.stack(
  248. [
  249. tl.ops.clip_by_value(coords[:, :, :, :, 0], 0.0, tl.ops.cast(input_h - 1, 'float32')),
  250. tl.ops.clip_by_value(coords[:, :, :, :, 1], 0.0, tl.ops.cast(input_w - 1, 'float32'))
  251. ], axis=-1
  252. )
  253. coords = tl.ops.tile(coords, [channel, 1, 1, 1, 1])
  254. mapped_vals = self._tf_batch_map_coordinates(inputs, coords)
  255. # (b*c, h, w, n) --> (b, h, w, n, c)
  256. mapped_vals = self._to_b_h_w_n_c(mapped_vals, [batch_size, input_h, input_w, kernel_n, channel])
  257. return mapped_vals

TensorLayer3.0 是一款兼容多种深度学习框架为计算后端的深度学习库。计划兼容TensorFlow, Pytorch, MindSpore, Paddle.