You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

roialign.cpp 7.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "roialign.h"
  15. #include <assert.h>
  16. namespace ncnn {
  17. ROIAlign::ROIAlign()
  18. {
  19. }
  20. int ROIAlign::load_param(const ParamDict& pd)
  21. {
  22. pooled_width = pd.get(0, 0);
  23. pooled_height = pd.get(1, 0);
  24. spatial_scale = pd.get(2, 1.f);
  25. sampling_ratio = pd.get(3, 0);
  26. aligned = pd.get(4, false);
  27. version = pd.get(5, 0);
  28. /*
  29. * version 0:
  30. * the original version of ROIAlign in ncnn
  31. * version 1:
  32. * the version in detectron2
  33. */
  34. assert(version >= 0 && version <= 1);
  35. return 0;
  36. }
  37. static inline float bilinear_interpolate(const float* ptr, int w, int h, float x, float y)
  38. {
  39. int x0 = (int)x;
  40. int x1 = x0 + 1;
  41. int y0 = (int)y;
  42. int y1 = y0 + 1;
  43. float a0 = x1 - x;
  44. float a1 = x - x0;
  45. float b0 = y1 - y;
  46. float b1 = y - y0;
  47. if (x1 >= w)
  48. {
  49. x1 = w - 1;
  50. a0 = 1.f;
  51. a1 = 0.f;
  52. }
  53. if (y1 >= h)
  54. {
  55. y1 = h - 1;
  56. b0 = 1.f;
  57. b1 = 0.f;
  58. }
  59. float r0 = ptr[y0 * w + x0] * a0 + ptr[y0 * w + x1] * a1;
  60. float r1 = ptr[y1 * w + x0] * a0 + ptr[y1 * w + x1] * a1;
  61. float v = r0 * b0 + r1 * b1;
  62. return v;
  63. }
  64. int ROIAlign::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
  65. {
  66. const Mat& bottom_blob = bottom_blobs[0];
  67. int w = bottom_blob.w;
  68. int h = bottom_blob.h;
  69. size_t elemsize = bottom_blob.elemsize;
  70. int channels = bottom_blob.c;
  71. const Mat& roi_blob = bottom_blobs[1];
  72. Mat& top_blob = top_blobs[0];
  73. top_blob.create(pooled_width, pooled_height, channels, elemsize, opt.blob_allocator);
  74. if (top_blob.empty())
  75. return -100;
  76. // For each ROI R = [x y w h]: avg pool over R
  77. const float* roi_ptr = roi_blob;
  78. float roi_x1 = roi_ptr[0] * spatial_scale;
  79. float roi_y1 = roi_ptr[1] * spatial_scale;
  80. float roi_x2 = roi_ptr[2] * spatial_scale;
  81. float roi_y2 = roi_ptr[3] * spatial_scale;
  82. if (aligned)
  83. {
  84. roi_x1 -= 0.5f;
  85. roi_y1 -= 0.5f;
  86. roi_x2 -= 0.5f;
  87. roi_y2 -= 0.5f;
  88. }
  89. float roi_w = roi_x2 - roi_x1;
  90. float roi_h = roi_y2 - roi_y1;
  91. if (!aligned)
  92. {
  93. roi_w = std::max(roi_w, 1.f);
  94. roi_h = std::max(roi_h, 1.f);
  95. }
  96. float bin_size_w = roi_w / (float)pooled_width;
  97. float bin_size_h = roi_h / (float)pooled_height;
  98. if (version == 0)
  99. {
  100. // original version
  101. #pragma omp parallel for num_threads(opt.num_threads)
  102. for (int q = 0; q < channels; q++)
  103. {
  104. const float* ptr = bottom_blob.channel(q);
  105. float* outptr = top_blob.channel(q);
  106. for (int ph = 0; ph < pooled_height; ph++)
  107. {
  108. for (int pw = 0; pw < pooled_width; pw++)
  109. {
  110. // Compute pooling region for this output unit:
  111. // start (included) = ph * roi_height / pooled_height
  112. // end (excluded) = (ph + 1) * roi_height / pooled_height
  113. float hstart = roi_y1 + ph * bin_size_h;
  114. float wstart = roi_x1 + pw * bin_size_w;
  115. float hend = roi_y1 + (ph + 1) * bin_size_h;
  116. float wend = roi_x1 + (pw + 1) * bin_size_w;
  117. hstart = std::min(std::max(hstart, 0.f), (float)h);
  118. wstart = std::min(std::max(wstart, 0.f), (float)w);
  119. hend = std::min(std::max(hend, 0.f), (float)h);
  120. wend = std::min(std::max(wend, 0.f), (float)w);
  121. int bin_grid_h = (int)(sampling_ratio > 0 ? sampling_ratio : ceil(hend - hstart));
  122. int bin_grid_w = (int)(sampling_ratio > 0 ? sampling_ratio : ceil(wend - wstart));
  123. bool is_empty = (hend <= hstart) || (wend <= wstart);
  124. int area = bin_grid_h * bin_grid_w;
  125. float sum = 0.f;
  126. for (int by = 0; by < bin_grid_h; by++)
  127. {
  128. float y = hstart + (by + 0.5f) * bin_size_h / (float)bin_grid_h;
  129. for (int bx = 0; bx < bin_grid_w; bx++)
  130. {
  131. float x = wstart + (bx + 0.5f) * bin_size_w / (float)bin_grid_w;
  132. // bilinear interpolate at (x,y)
  133. float v = bilinear_interpolate(ptr, w, h, x, y);
  134. sum += v;
  135. }
  136. }
  137. outptr[pw] = is_empty ? 0.f : (sum / (float)area);
  138. }
  139. outptr += pooled_width;
  140. }
  141. }
  142. }
  143. else if (version == 1)
  144. {
  145. // the version in detectron 2
  146. int roi_bin_grid_h = (int)(sampling_ratio > 0 ? sampling_ratio : ceil(roi_h / pooled_height));
  147. int roi_bin_grid_w = (int)(sampling_ratio > 0 ? sampling_ratio : ceil(roi_w / pooled_width));
  148. const float count = (float)std::max(roi_bin_grid_h * roi_bin_grid_w, 1);
  149. #pragma omp parallel for num_threads(opt.num_threads)
  150. for (int q = 0; q < channels; q++)
  151. {
  152. const float* ptr = bottom_blob.channel(q);
  153. float* outptr = top_blob.channel(q);
  154. for (int ph = 0; ph < pooled_height; ph++)
  155. {
  156. for (int pw = 0; pw < pooled_width; pw++)
  157. {
  158. float sum = 0.f;
  159. for (int by = 0; by < roi_bin_grid_h; by++)
  160. {
  161. float y = roi_y1 + ph * bin_size_h + (by + 0.5f) * bin_size_h / (float)roi_bin_grid_h;
  162. for (int bx = 0; bx < roi_bin_grid_w; bx++)
  163. {
  164. float x = roi_x1 + pw * bin_size_w + (bx + 0.5f) * bin_size_w / (float)roi_bin_grid_w;
  165. if (y < -1.0 || y > h || x < -1.0 || x > w)
  166. {
  167. // empty
  168. continue;
  169. }
  170. else
  171. {
  172. if (y <= 0) y = 0;
  173. if (x <= 0) x = 0;
  174. // bilinear interpolate at (x,y)
  175. float v = bilinear_interpolate(ptr, w, h, x, y);
  176. sum += v;
  177. }
  178. }
  179. }
  180. outptr[pw] = sum / count;
  181. }
  182. outptr += pooled_width;
  183. }
  184. }
  185. }
  186. return 0;
  187. }
  188. } // namespace ncnn