You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

yolo_neck.py 5.4 kB

2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. # Copyright (c) 2019 Western Digital Corporation or its affiliates.
  3. import torch
  4. import torch.nn.functional as F
  5. from mmcv.cnn import ConvModule
  6. from mmcv.runner import BaseModule
  7. from ..builder import NECKS
  8. class DetectionBlock(BaseModule):
  9. """Detection block in YOLO neck.
  10. Let out_channels = n, the DetectionBlock contains:
  11. Six ConvLayers, 1 Conv2D Layer and 1 YoloLayer.
  12. The first 6 ConvLayers are formed the following way:
  13. 1x1xn, 3x3x2n, 1x1xn, 3x3x2n, 1x1xn, 3x3x2n.
  14. The Conv2D layer is 1x1x255.
  15. Some block will have branch after the fifth ConvLayer.
  16. The input channel is arbitrary (in_channels)
  17. Args:
  18. in_channels (int): The number of input channels.
  19. out_channels (int): The number of output channels.
  20. conv_cfg (dict): Config dict for convolution layer. Default: None.
  21. norm_cfg (dict): Dictionary to construct and config norm layer.
  22. Default: dict(type='BN', requires_grad=True)
  23. act_cfg (dict): Config dict for activation layer.
  24. Default: dict(type='LeakyReLU', negative_slope=0.1).
  25. init_cfg (dict or list[dict], optional): Initialization config dict.
  26. Default: None
  27. """
  28. def __init__(self,
  29. in_channels,
  30. out_channels,
  31. conv_cfg=None,
  32. norm_cfg=dict(type='BN', requires_grad=True),
  33. act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
  34. init_cfg=None):
  35. super(DetectionBlock, self).__init__(init_cfg)
  36. double_out_channels = out_channels * 2
  37. # shortcut
  38. cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
  39. self.conv1 = ConvModule(in_channels, out_channels, 1, **cfg)
  40. self.conv2 = ConvModule(
  41. out_channels, double_out_channels, 3, padding=1, **cfg)
  42. self.conv3 = ConvModule(double_out_channels, out_channels, 1, **cfg)
  43. self.conv4 = ConvModule(
  44. out_channels, double_out_channels, 3, padding=1, **cfg)
  45. self.conv5 = ConvModule(double_out_channels, out_channels, 1, **cfg)
  46. def forward(self, x):
  47. tmp = self.conv1(x)
  48. tmp = self.conv2(tmp)
  49. tmp = self.conv3(tmp)
  50. tmp = self.conv4(tmp)
  51. out = self.conv5(tmp)
  52. return out
  53. @NECKS.register_module()
  54. class YOLOV3Neck(BaseModule):
  55. """The neck of YOLOV3.
  56. It can be treated as a simplified version of FPN. It
  57. will take the result from Darknet backbone and do some upsampling and
  58. concatenation. It will finally output the detection result.
  59. Note:
  60. The input feats should be from top to bottom.
  61. i.e., from high-lvl to low-lvl
  62. But YOLOV3Neck will process them in reversed order.
  63. i.e., from bottom (high-lvl) to top (low-lvl)
  64. Args:
  65. num_scales (int): The number of scales / stages.
  66. in_channels (List[int]): The number of input channels per scale.
  67. out_channels (List[int]): The number of output channels per scale.
  68. conv_cfg (dict, optional): Config dict for convolution layer.
  69. Default: None.
  70. norm_cfg (dict, optional): Dictionary to construct and config norm
  71. layer. Default: dict(type='BN', requires_grad=True)
  72. act_cfg (dict, optional): Config dict for activation layer.
  73. Default: dict(type='LeakyReLU', negative_slope=0.1).
  74. init_cfg (dict or list[dict], optional): Initialization config dict.
  75. Default: None
  76. """
  77. def __init__(self,
  78. num_scales,
  79. in_channels,
  80. out_channels,
  81. conv_cfg=None,
  82. norm_cfg=dict(type='BN', requires_grad=True),
  83. act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
  84. init_cfg=None):
  85. super(YOLOV3Neck, self).__init__(init_cfg)
  86. assert (num_scales == len(in_channels) == len(out_channels))
  87. self.num_scales = num_scales
  88. self.in_channels = in_channels
  89. self.out_channels = out_channels
  90. # shortcut
  91. cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
  92. # To support arbitrary scales, the code looks awful, but it works.
  93. # Better solution is welcomed.
  94. self.detect1 = DetectionBlock(in_channels[0], out_channels[0], **cfg)
  95. for i in range(1, self.num_scales):
  96. in_c, out_c = self.in_channels[i], self.out_channels[i]
  97. inter_c = out_channels[i - 1]
  98. self.add_module(f'conv{i}', ConvModule(inter_c, out_c, 1, **cfg))
  99. # in_c + out_c : High-lvl feats will be cat with low-lvl feats
  100. self.add_module(f'detect{i+1}',
  101. DetectionBlock(in_c + out_c, out_c, **cfg))
  102. def forward(self, feats):
  103. assert len(feats) == self.num_scales
  104. # processed from bottom (high-lvl) to top (low-lvl)
  105. outs = []
  106. out = self.detect1(feats[-1])
  107. outs.append(out)
  108. for i, x in enumerate(reversed(feats[:-1])):
  109. conv = getattr(self, f'conv{i+1}')
  110. tmp = conv(out)
  111. # Cat with low-lvl feats
  112. tmp = F.interpolate(tmp, scale_factor=2)
  113. tmp = torch.cat((tmp, x), 1)
  114. detect = getattr(self, f'detect{i+2}')
  115. out = detect(tmp)
  116. outs.append(out)
  117. return tuple(outs)

No Description

Contributors (2)