You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

box_utils.py 6.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Bbox utils"""
  16. import math
  17. import itertools as it
  18. import numpy as np
  19. from .config import config
  20. class GeneratDefaultBoxes():
  21. """
  22. Generate Default boxes for SSD, follows the order of (W, H, archor_sizes).
  23. `self.default_boxes` has a shape of [archor_sizes, H, W, 4], the last dimension is [y, x, h, w].
  24. `self.default_boxes_ltrb` has a shape as `self.default_boxes`, the last dimension is [y1, x1, y2, x2].
  25. """
  26. def __init__(self):
  27. fk = config.img_shape[0] / np.array(config.steps)
  28. scale_rate = (config.max_scale - config.min_scale) / (len(config.num_default) - 1)
  29. scales = [config.min_scale + scale_rate * i for i in range(len(config.num_default))] + [1.0]
  30. self.default_boxes = []
  31. for idex, feature_size in enumerate(config.feature_size):
  32. sk1 = scales[idex]
  33. sk2 = scales[idex + 1]
  34. sk3 = math.sqrt(sk1 * sk2)
  35. if idex == 0:
  36. w, h = sk1 * math.sqrt(2), sk1 / math.sqrt(2)
  37. all_sizes = [(0.1, 0.1), (w, h), (h, w)]
  38. else:
  39. all_sizes = [(sk1, sk1)]
  40. for aspect_ratio in config.aspect_ratios[idex]:
  41. w, h = sk1 * math.sqrt(aspect_ratio), sk1 / math.sqrt(aspect_ratio)
  42. all_sizes.append((w, h))
  43. all_sizes.append((h, w))
  44. all_sizes.append((sk3, sk3))
  45. assert len(all_sizes) == config.num_default[idex]
  46. for i, j in it.product(range(feature_size), repeat=2):
  47. for w, h in all_sizes:
  48. cx, cy = (j + 0.5) / fk[idex], (i + 0.5) / fk[idex]
  49. self.default_boxes.append([cy, cx, h, w])
  50. def to_ltrb(cy, cx, h, w):
  51. return cy - h / 2, cx - w / 2, cy + h / 2, cx + w / 2
  52. # For IoU calculation
  53. self.default_boxes_ltrb = np.array(tuple(to_ltrb(*i) for i in self.default_boxes), dtype='float32')
  54. self.default_boxes = np.array(self.default_boxes, dtype='float32')
  55. default_boxes_ltrb = GeneratDefaultBoxes().default_boxes_ltrb
  56. default_boxes = GeneratDefaultBoxes().default_boxes
  57. y1, x1, y2, x2 = np.split(default_boxes_ltrb[:, :4], 4, axis=-1)
  58. vol_anchors = (x2 - x1) * (y2 - y1)
  59. matching_threshold = config.match_thershold
  60. def ssd_bboxes_encode(boxes):
  61. """
  62. Labels anchors with ground truth inputs.
  63. Args:
  64. boxex: ground truth with shape [N, 5], for each row, it stores [y, x, h, w, cls].
  65. Returns:
  66. gt_loc: location ground truth with shape [num_anchors, 4].
  67. gt_label: class ground truth with shape [num_anchors, 1].
  68. num_matched_boxes: number of positives in an image.
  69. """
  70. def jaccard_with_anchors(bbox):
  71. """Compute jaccard score a box and the anchors."""
  72. # Intersection bbox and volume.
  73. ymin = np.maximum(y1, bbox[0])
  74. xmin = np.maximum(x1, bbox[1])
  75. ymax = np.minimum(y2, bbox[2])
  76. xmax = np.minimum(x2, bbox[3])
  77. w = np.maximum(xmax - xmin, 0.)
  78. h = np.maximum(ymax - ymin, 0.)
  79. # Volumes.
  80. inter_vol = h * w
  81. union_vol = vol_anchors + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) - inter_vol
  82. jaccard = inter_vol / union_vol
  83. return np.squeeze(jaccard)
  84. pre_scores = np.zeros((config.num_ssd_boxes), dtype=np.float32)
  85. t_boxes = np.zeros((config.num_ssd_boxes, 4), dtype=np.float32)
  86. t_label = np.zeros((config.num_ssd_boxes), dtype=np.int64)
  87. for bbox in boxes:
  88. label = int(bbox[4])
  89. scores = jaccard_with_anchors(bbox)
  90. idx = np.argmax(scores)
  91. scores[idx] = 2.0
  92. mask = (scores > matching_threshold)
  93. mask = mask & (scores > pre_scores)
  94. pre_scores = np.maximum(pre_scores, scores * mask)
  95. t_label = mask * label + (1 - mask) * t_label
  96. for i in range(4):
  97. t_boxes[:, i] = mask * bbox[i] + (1 - mask) * t_boxes[:, i]
  98. index = np.nonzero(t_label)
  99. # Transform to ltrb.
  100. bboxes = np.zeros((config.num_ssd_boxes, 4), dtype=np.float32)
  101. bboxes[:, [0, 1]] = (t_boxes[:, [0, 1]] + t_boxes[:, [2, 3]]) / 2
  102. bboxes[:, [2, 3]] = t_boxes[:, [2, 3]] - t_boxes[:, [0, 1]]
  103. # Encode features.
  104. bboxes_t = bboxes[index]
  105. default_boxes_t = default_boxes[index]
  106. bboxes_t[:, :2] = (bboxes_t[:, :2] - default_boxes_t[:, :2]) / (default_boxes_t[:, 2:] * config.prior_scaling[0])
  107. bboxes_t[:, 2:4] = np.log(bboxes_t[:, 2:4] / default_boxes_t[:, 2:4]) / config.prior_scaling[1]
  108. bboxes[index] = bboxes_t
  109. num_match = np.array([len(np.nonzero(t_label)[0])], dtype=np.int32)
  110. return bboxes, t_label.astype(np.int32), num_match
  111. def ssd_bboxes_decode(boxes):
  112. """Decode predict boxes to [y, x, h, w]"""
  113. boxes_t = boxes.copy()
  114. default_boxes_t = default_boxes.copy()
  115. boxes_t[:, :2] = boxes_t[:, :2] * config.prior_scaling[0] * default_boxes_t[:, 2:] + default_boxes_t[:, :2]
  116. boxes_t[:, 2:4] = np.exp(boxes_t[:, 2:4] * config.prior_scaling[1]) * default_boxes_t[:, 2:4]
  117. bboxes = np.zeros((len(boxes_t), 4), dtype=np.float32)
  118. bboxes[:, [0, 1]] = boxes_t[:, [0, 1]] - boxes_t[:, [2, 3]] / 2
  119. bboxes[:, [2, 3]] = boxes_t[:, [0, 1]] + boxes_t[:, [2, 3]] / 2
  120. return np.clip(bboxes, 0, 1)
  121. def intersect(box_a, box_b):
  122. """Compute the intersect of two sets of boxes."""
  123. max_yx = np.minimum(box_a[:, 2:4], box_b[2:4])
  124. min_yx = np.maximum(box_a[:, :2], box_b[:2])
  125. inter = np.clip((max_yx - min_yx), a_min=0, a_max=np.inf)
  126. return inter[:, 0] * inter[:, 1]
  127. def jaccard_numpy(box_a, box_b):
  128. """Compute the jaccard overlap of two sets of boxes."""
  129. inter = intersect(box_a, box_b)
  130. area_a = ((box_a[:, 2] - box_a[:, 0]) *
  131. (box_a[:, 3] - box_a[:, 1]))
  132. area_b = ((box_b[2] - box_b[0]) *
  133. (box_b[3] - box_b[1]))
  134. union = area_a + area_b - inter
  135. return inter / union