You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

reduction.cpp 7.7 kB

8 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "reduction.h"
  15. #include <float.h>
  16. #include <limits.h>
  17. #include <math.h>
  18. #include <algorithm>
  19. #include <functional>
  20. namespace ncnn {
  21. DEFINE_LAYER_CREATOR(Reduction)
  22. Reduction::Reduction()
  23. {
  24. one_blob_only = true;
  25. support_inplace = false;
  26. }
  27. int Reduction::load_param(const ParamDict& pd)
  28. {
  29. operation = pd.get(0, 0);
  30. dim = pd.get(1, 0);
  31. coeff = pd.get(2, 1.f);
  32. return 0;
  33. }
  34. template<typename Op, typename Op2>
  35. static int reduction_op(const Mat& a, Mat& b, float v0, int dim, float coeff, const Option& opt)
  36. {
  37. Op op;
  38. Op2 op2;
  39. int w = a.w;
  40. int h = a.h;
  41. int channels = a.c;
  42. size_t elemsize = a.elemsize;
  43. int size = w * h;
  44. if (dim == 0)
  45. {
  46. // w h c -> X X X
  47. b.create(1, elemsize, opt.blob_allocator);
  48. }
  49. else if (dim == 1)
  50. {
  51. // w h c -> X X c
  52. b.create(channels, elemsize, opt.blob_allocator);
  53. }
  54. else if (dim == 2)
  55. {
  56. // w h c -> X h c
  57. b.create(h, channels, elemsize, opt.blob_allocator);
  58. }
  59. else if (dim == -1)
  60. {
  61. // w h c -> w X X
  62. b.create(w, elemsize, opt.blob_allocator);
  63. }
  64. else if (dim == -2)
  65. {
  66. // w h c -> w h X
  67. b.create(w, h, elemsize, opt.blob_allocator);
  68. }
  69. if (b.empty())
  70. return -100;
  71. if (dim == 0)
  72. {
  73. Mat sums(channels, elemsize, opt.workspace_allocator);
  74. if (sums.empty())
  75. return -100;
  76. #pragma omp parallel for num_threads(opt.num_threads)
  77. for (int q=0; q<channels; q++)
  78. {
  79. const float* ptr = a.channel(q);
  80. float sum = v0;
  81. for (int i=0; i<size; i++)
  82. {
  83. sum = op(sum, ptr[i]);
  84. }
  85. sums[q] = sum;
  86. }
  87. float sum = v0;
  88. for (int i=0; i<channels; i++)
  89. {
  90. sum = op2(sum, sums[i]);
  91. }
  92. b[0] = sum * coeff;
  93. }
  94. else if (dim == 1)
  95. {
  96. #pragma omp parallel for num_threads(opt.num_threads)
  97. for (int q=0; q<channels; q++)
  98. {
  99. const float* ptr = a.channel(q);
  100. float sum = v0;
  101. for (int i=0; i<size; i++)
  102. {
  103. sum = op(sum, ptr[i]);
  104. }
  105. b[q] = sum * coeff;
  106. }
  107. }
  108. else if (dim == 2)
  109. {
  110. #pragma omp parallel for num_threads(opt.num_threads)
  111. for (int q=0; q<channels; q++)
  112. {
  113. const float* ptr = a.channel(q);
  114. float* outptr = b.row(q);
  115. for (int i=0; i<h; i++)
  116. {
  117. float sum = v0;
  118. for (int j=0; j<w; j++)
  119. {
  120. sum = op(sum, ptr[i]);
  121. }
  122. outptr[i] = sum * coeff;
  123. ptr += w;
  124. }
  125. }
  126. }
  127. else if (dim == -1)
  128. {
  129. Mat mins(w, 1, channels, elemsize, opt.workspace_allocator);
  130. if (mins.empty())
  131. return -100;
  132. mins.fill(v0);
  133. #pragma omp parallel for num_threads(opt.num_threads)
  134. for (int q=0; q<channels; q++)
  135. {
  136. const float* ptr = a.channel(q);
  137. float* mins_ptr = mins.channel(q);
  138. for (int i=0; i<h; i++)
  139. {
  140. for (int j=0; j<w; j++)
  141. {
  142. mins_ptr[j] = op(mins_ptr[j], ptr[i]);
  143. }
  144. ptr += w;
  145. }
  146. }
  147. b.fill(v0);
  148. for (int q=0; q<channels; q++)
  149. {
  150. const float* mins_ptr = mins.channel(q);
  151. for (int j=0; j<w; j++)
  152. {
  153. b[j] = op2(b[j], mins_ptr[j]);
  154. }
  155. }
  156. for (int j=0; j<w; j++)
  157. {
  158. b[j] *= coeff;
  159. }
  160. }
  161. else if (dim == -2)
  162. {
  163. b.fill(v0);
  164. for (int q=0; q<channels; q++)
  165. {
  166. const float* ptr = a.channel(q);
  167. for (int i=0; i<size; i++)
  168. {
  169. b[i] = op(b[i], ptr[i]);
  170. }
  171. }
  172. for (int i=0; i<size; i++)
  173. {
  174. b[i] *= coeff;
  175. }
  176. }
  177. return 0;
  178. }
  179. template<typename T>
  180. struct reduction_op_asum : std::binary_function<T,T,T> {
  181. T operator() (const T& x, const T& y) const { return x + fabs(y); }
  182. };
  183. template<typename T>
  184. struct reduction_op_sumsq : std::binary_function<T,T,T> {
  185. T operator() (const T& x, const T& y) const { return x + y * y; }
  186. };
  187. template<typename T>
  188. struct reduction_op_max : std::binary_function<T,T,T> {
  189. T operator() (const T& x, const T& y) const { return std::max(x, y); }
  190. };
  191. template<typename T>
  192. struct reduction_op_min : std::binary_function<T,T,T> {
  193. T operator() (const T& x, const T& y) const { return std::min(x, y); }
  194. };
  195. int Reduction::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
  196. {
  197. if (operation == ReductionOp_SUM)
  198. return reduction_op< std::plus<float>, std::plus<float> >(bottom_blob, top_blob, 0.f, dim, coeff, opt);
  199. if (operation == ReductionOp_ASUM)
  200. return reduction_op< reduction_op_asum<float>, std::plus<float> >(bottom_blob, top_blob, 0.f, dim, coeff, opt);
  201. if (operation == ReductionOp_SUMSQ)
  202. return reduction_op< reduction_op_sumsq<float>, std::plus<float> >(bottom_blob, top_blob, 0.f, dim, coeff, opt);
  203. if (operation == ReductionOp_MEAN)
  204. {
  205. int ret = reduction_op< std::plus<float>, std::plus<float> >(bottom_blob, top_blob, 0.f, dim, coeff, opt);
  206. if (ret != 0)
  207. return -100;
  208. int w = bottom_blob.w;
  209. int h = bottom_blob.h;
  210. int channels = bottom_blob.c;
  211. int size = w * h;
  212. if (dim == 0)
  213. {
  214. top_blob[0] /= channels * size;
  215. }
  216. else if (dim == 1)
  217. {
  218. for (int q=0; q<channels; q++)
  219. {
  220. top_blob[q] /= size;
  221. }
  222. }
  223. else if (dim == 2)
  224. {
  225. for (int q=0; q<channels; q++)
  226. {
  227. float* outptr = top_blob.channel(q);
  228. for (int i=0; i<h; i++)
  229. {
  230. outptr[i] /= w;
  231. }
  232. }
  233. }
  234. else if (dim == -1)
  235. {
  236. for (int j=0; j<w; j++)
  237. {
  238. top_blob[j] /= h * channels;
  239. }
  240. }
  241. else if (dim == -2)
  242. {
  243. for (int i=0; i<size; i++)
  244. {
  245. top_blob[i] /= channels;
  246. }
  247. }
  248. }
  249. if (operation == ReductionOp_MAX)
  250. return reduction_op< reduction_op_max<float>, reduction_op_max<float> >(bottom_blob, top_blob, -FLT_MAX, dim, coeff, opt);
  251. if (operation == ReductionOp_MIN)
  252. return reduction_op< reduction_op_min<float>, reduction_op_min<float> >(bottom_blob, top_blob, FLT_MAX, dim, coeff, opt);
  253. if (operation == ReductionOp_PROD)
  254. return reduction_op< std::multiplies<float>, std::multiplies<float> >(bottom_blob, top_blob, 1.f, dim, coeff, opt);
  255. return 0;
  256. }
  257. } // namespace ncnn