You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

reduction.cpp 24 kB

Fix warnings on Visual Studio (#1456) * Fix warning C4244 in src/layer/convolution.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/convolution_sgemm_int8.h C4244: 'initializing': conversion from 'double' to 'int', possible loss of data * Fix warning C4244 in src/layer/deconvolution.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/elu.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4267 in src/layer/embed.cpp C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data * Fix warning C4244 in src/layer/exp.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/innerproduct.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/log.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data C4244: 'initializing': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/lrn.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/mvn.cp C4244: 'initializing': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/power.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warnings C4244 and C4267 in src/layer/proposal.cpp C4244: 'initializing': conversion from 'double' to 'float', possible loss of data C4244: 'initializing': conversion from 'double' to 'int', possible loss of data C4267: 'argument': conversion from 'size_t' to 'int', possible loss of data C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data * Fix warning C4244 in src/layer/reduction.cpp C4244: 'return': conversion from 'double' to 'T', possible loss of data * Fix warning C4244 in src/layer/tanh.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/binaryop.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warnings C4244 and C4267 in src/layer/unaryop.cpp C4244: 'return': conversion from 'double' to 'T', possible loss of data C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data * Fix warning C4244 in src/layer/x86/convolutiondepthwise_3x3_int8.h C4244: 'initializing': conversion from 'double' to 'int', possible loss of data
6 years ago
Fix warnings on Visual Studio (#1456) * Fix warning C4244 in src/layer/convolution.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/convolution_sgemm_int8.h C4244: 'initializing': conversion from 'double' to 'int', possible loss of data * Fix warning C4244 in src/layer/deconvolution.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/elu.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4267 in src/layer/embed.cpp C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data * Fix warning C4244 in src/layer/exp.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/innerproduct.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/log.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data C4244: 'initializing': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/lrn.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/mvn.cp C4244: 'initializing': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/power.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warnings C4244 and C4267 in src/layer/proposal.cpp C4244: 'initializing': conversion from 'double' to 'float', possible loss of data C4244: 'initializing': conversion from 'double' to 'int', possible loss of data C4267: 'argument': conversion from 'size_t' to 'int', possible loss of data C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data * Fix warning C4244 in src/layer/reduction.cpp C4244: 'return': conversion from 'double' to 'T', possible loss of data * Fix warning C4244 in src/layer/tanh.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/binaryop.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warnings C4244 and C4267 in src/layer/unaryop.cpp C4244: 'return': conversion from 'double' to 'T', possible loss of data C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data * Fix warning C4244 in src/layer/x86/convolutiondepthwise_3x3_int8.h C4244: 'initializing': conversion from 'double' to 'int', possible loss of data
6 years ago
Fix warnings on Visual Studio (#1456) * Fix warning C4244 in src/layer/convolution.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/convolution_sgemm_int8.h C4244: 'initializing': conversion from 'double' to 'int', possible loss of data * Fix warning C4244 in src/layer/deconvolution.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/elu.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4267 in src/layer/embed.cpp C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data * Fix warning C4244 in src/layer/exp.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/innerproduct.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/log.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data C4244: 'initializing': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/lrn.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/mvn.cp C4244: 'initializing': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/power.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warnings C4244 and C4267 in src/layer/proposal.cpp C4244: 'initializing': conversion from 'double' to 'float', possible loss of data C4244: 'initializing': conversion from 'double' to 'int', possible loss of data C4267: 'argument': conversion from 'size_t' to 'int', possible loss of data C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data * Fix warning C4244 in src/layer/reduction.cpp C4244: 'return': conversion from 'double' to 'T', possible loss of data * Fix warning C4244 in src/layer/tanh.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/binaryop.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warnings C4244 and C4267 in src/layer/unaryop.cpp C4244: 'return': conversion from 'double' to 'T', possible loss of data C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data * Fix warning C4244 in src/layer/x86/convolutiondepthwise_3x3_int8.h C4244: 'initializing': conversion from 'double' to 'int', possible loss of data
6 years ago
Fix warnings on Visual Studio (#1456) * Fix warning C4244 in src/layer/convolution.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/convolution_sgemm_int8.h C4244: 'initializing': conversion from 'double' to 'int', possible loss of data * Fix warning C4244 in src/layer/deconvolution.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/elu.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4267 in src/layer/embed.cpp C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data * Fix warning C4244 in src/layer/exp.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/innerproduct.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/log.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data C4244: 'initializing': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/lrn.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/mvn.cp C4244: 'initializing': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/power.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warnings C4244 and C4267 in src/layer/proposal.cpp C4244: 'initializing': conversion from 'double' to 'float', possible loss of data C4244: 'initializing': conversion from 'double' to 'int', possible loss of data C4267: 'argument': conversion from 'size_t' to 'int', possible loss of data C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data * Fix warning C4244 in src/layer/reduction.cpp C4244: 'return': conversion from 'double' to 'T', possible loss of data * Fix warning C4244 in src/layer/tanh.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warning C4244 in src/layer/binaryop.cpp C4244: '=': conversion from 'double' to 'float', possible loss of data * Fix warnings C4244 and C4267 in src/layer/unaryop.cpp C4244: 'return': conversion from 'double' to 'T', possible loss of data C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data * Fix warning C4244 in src/layer/x86/convolutiondepthwise_3x3_int8.h C4244: 'initializing': conversion from 'double' to 'int', possible loss of data
6 years ago
8 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "reduction.h"
  15. #include <float.h>
  16. #include <limits.h>
  17. #include <math.h>
  18. #include <algorithm>
  19. #include <functional>
  20. namespace ncnn {
  21. DEFINE_LAYER_CREATOR(Reduction)
  22. Reduction::Reduction()
  23. {
  24. one_blob_only = true;
  25. support_inplace = false;
  26. }
  27. int Reduction::load_param(const ParamDict& pd)
  28. {
  29. operation = pd.get(0, 0);
  30. reduce_all = pd.get(1, 1);
  31. coeff = pd.get(2, 1.f);
  32. axes = pd.get(3, Mat());
  33. keepdims = pd.get(4, 0);
  34. return 0;
  35. }
  36. template<typename Op, typename Op2>
  37. static int reduction_op(const Mat& a, Mat& b, float v0, bool reduce_w, bool reduce_h, bool reduce_c, const Option& opt)
  38. {
  39. Op op;
  40. Op2 op2;
  41. size_t elemsize = a.elemsize;
  42. int dims = a.dims;
  43. if (dims == 1)
  44. {
  45. int w = a.w;
  46. b.create(1, elemsize, opt.blob_allocator);
  47. const float* ptr = a;
  48. float sum = v0;
  49. for (int i=0; i<w; i++)
  50. {
  51. sum = op(sum, ptr[i]);
  52. }
  53. b[0] = sum;
  54. return 0;
  55. }
  56. if (dims == 2)
  57. {
  58. int w = a.w;
  59. int h = a.h;
  60. if (reduce_w && reduce_h)
  61. {
  62. // w h -> X X
  63. b.create(1, elemsize, opt.blob_allocator);
  64. Mat sums(h, elemsize, opt.workspace_allocator);
  65. if (sums.empty())
  66. return -100;
  67. #pragma omp parallel for num_threads(opt.num_threads)
  68. for (int i=0; i<h; i++)
  69. {
  70. const float* ptr = a.row(i);
  71. float sum = v0;
  72. for (int j=0; j<w; j++)
  73. {
  74. sum = op(sum, ptr[j]);
  75. }
  76. sums[i] = sum;
  77. }
  78. float sum = v0;
  79. for (int i=0; i<h; i++)
  80. {
  81. sum = op2(sum, sums[i]);
  82. }
  83. b[0] = sum;
  84. return 0;
  85. }
  86. if (reduce_w && !reduce_h)
  87. {
  88. // w h -> X h
  89. b.create(h, elemsize, opt.blob_allocator);
  90. #pragma omp parallel for num_threads(opt.num_threads)
  91. for (int i=0; i<h; i++)
  92. {
  93. const float* ptr = a.row(i);
  94. float sum = v0;
  95. for (int j=0; j<w; j++)
  96. {
  97. sum = op(sum, ptr[j]);
  98. }
  99. b[i] = sum;
  100. }
  101. return 0;
  102. }
  103. if (!reduce_w && reduce_h)
  104. {
  105. // w h -> w X
  106. b.create(w, elemsize, opt.blob_allocator);
  107. b.fill(v0);
  108. for (int i=0; i<h; i++)
  109. {
  110. const float* ptr = a.row(i);
  111. for (int j=0; j<w; j++)
  112. {
  113. b[j] = op(b[j], ptr[j]);
  114. }
  115. }
  116. return 0;
  117. }
  118. }
  119. if (dims == 3)
  120. {
  121. int w = a.w;
  122. int h = a.h;
  123. int channels = a.c;
  124. int size = w * h;
  125. if (reduce_w && reduce_h && reduce_c)
  126. {
  127. // w h c -> X X X
  128. b.create(1, elemsize, opt.blob_allocator);
  129. Mat sums(channels, elemsize, opt.workspace_allocator);
  130. if (sums.empty())
  131. return -100;
  132. #pragma omp parallel for num_threads(opt.num_threads)
  133. for (int q=0; q<channels; q++)
  134. {
  135. const float* ptr = a.channel(q);
  136. float sum = v0;
  137. for (int i=0; i<size; i++)
  138. {
  139. sum = op(sum, ptr[i]);
  140. }
  141. sums[q] = sum;
  142. }
  143. float sum = v0;
  144. for (int i=0; i<channels; i++)
  145. {
  146. sum = op2(sum, sums[i]);
  147. }
  148. b[0] = sum;
  149. return 0;
  150. }
  151. if (reduce_w && reduce_h && !reduce_c)
  152. {
  153. // w h c -> X X c
  154. b.create(channels, elemsize, opt.blob_allocator);
  155. #pragma omp parallel for num_threads(opt.num_threads)
  156. for (int q=0; q<channels; q++)
  157. {
  158. const float* ptr = a.channel(q);
  159. float sum = v0;
  160. for (int i=0; i<size; i++)
  161. {
  162. sum = op(sum, ptr[i]);
  163. }
  164. b[q] = sum;
  165. }
  166. return 0;
  167. }
  168. if (reduce_w && !reduce_h && !reduce_c)
  169. {
  170. // w h c -> X h c
  171. b.create(h, channels, elemsize, opt.blob_allocator);
  172. #pragma omp parallel for num_threads(opt.num_threads)
  173. for (int q=0; q<channels; q++)
  174. {
  175. const float* ptr = a.channel(q);
  176. float* outptr = b.row(q);
  177. for (int i=0; i<h; i++)
  178. {
  179. float sum = v0;
  180. for (int j=0; j<w; j++)
  181. {
  182. sum = op(sum, ptr[j]);
  183. }
  184. outptr[i] = sum;
  185. ptr += w;
  186. }
  187. }
  188. return 0;
  189. }
  190. if (reduce_w && !reduce_h && reduce_c)
  191. {
  192. // w h c -> X h X
  193. b.create(h, elemsize, opt.blob_allocator);
  194. Mat mins(1, h, channels, elemsize, opt.workspace_allocator);
  195. if (mins.empty())
  196. return -100;
  197. mins.fill(v0);
  198. #pragma omp parallel for num_threads(opt.num_threads)
  199. for (int q=0; q<channels; q++)
  200. {
  201. const float* ptr = a.channel(q);
  202. float* mins_ptr = mins.channel(q);
  203. for (int i=0; i<h; i++)
  204. {
  205. float sum = v0;
  206. for (int j=0; j<w; j++)
  207. {
  208. sum = op(sum, ptr[j]);
  209. }
  210. mins_ptr[i] = sum;
  211. ptr += w;
  212. }
  213. }
  214. b.fill(v0);
  215. for (int q=0; q<channels; q++)
  216. {
  217. const float* mins_ptr = mins.channel(q);
  218. for (int i=0; i<h; i++)
  219. {
  220. b[i] = op2(b[i], mins_ptr[i]);
  221. }
  222. }
  223. return 0;
  224. }
  225. if (!reduce_w && reduce_h && reduce_c)
  226. {
  227. // w h c -> w X X
  228. b.create(w, elemsize, opt.blob_allocator);
  229. Mat mins(w, 1, channels, elemsize, opt.workspace_allocator);
  230. if (mins.empty())
  231. return -100;
  232. mins.fill(v0);
  233. #pragma omp parallel for num_threads(opt.num_threads)
  234. for (int q=0; q<channels; q++)
  235. {
  236. const float* ptr = a.channel(q);
  237. float* mins_ptr = mins.channel(q);
  238. for (int i=0; i<h; i++)
  239. {
  240. for (int j=0; j<w; j++)
  241. {
  242. mins_ptr[j] = op(mins_ptr[j], ptr[j]);
  243. }
  244. ptr += w;
  245. }
  246. }
  247. b.fill(v0);
  248. for (int q=0; q<channels; q++)
  249. {
  250. const float* mins_ptr = mins.channel(q);
  251. for (int j=0; j<w; j++)
  252. {
  253. b[j] = op2(b[j], mins_ptr[j]);
  254. }
  255. }
  256. return 0;
  257. }
  258. if (!reduce_w && !reduce_h && reduce_c)
  259. {
  260. // w h c -> w h X
  261. b.create(w, h, elemsize, opt.blob_allocator);
  262. b.fill(v0);
  263. for (int q=0; q<channels; q++)
  264. {
  265. const float* ptr = a.channel(q);
  266. for (int i=0; i<size; i++)
  267. {
  268. b[i] = op(b[i], ptr[i]);
  269. }
  270. }
  271. return 0;
  272. }
  273. if (!reduce_w && reduce_h && !reduce_c)
  274. {
  275. // w h c -> w X c
  276. b.create(w, channels, elemsize, opt.blob_allocator);
  277. b.fill(v0);
  278. #pragma omp parallel for num_threads(opt.num_threads)
  279. for (int q=0; q<channels; q++)
  280. {
  281. const float* ptr = a.channel(q);
  282. float* outptr = b.row(q);
  283. for (int i=0; i<h; i++)
  284. {
  285. for (int j=0; j<w; j++)
  286. {
  287. outptr[j] = op(outptr[j], ptr[j]);
  288. }
  289. ptr += w;
  290. }
  291. }
  292. return 0;
  293. }
  294. }
  295. return 0;
  296. }
  297. template<typename Op, typename Op2>
  298. static int reduction_op_keepdims(const Mat& a, Mat& b, float v0, bool reduce_w, bool reduce_h, bool reduce_c, const Option& opt)
  299. {
  300. Op op;
  301. Op2 op2;
  302. size_t elemsize = a.elemsize;
  303. int dims = a.dims;
  304. if (dims == 1)
  305. {
  306. int w = a.w;
  307. b.create(1, elemsize, opt.blob_allocator);
  308. const float* ptr = a;
  309. float sum = v0;
  310. for (int i=0; i<w; i++)
  311. {
  312. sum = op(sum, ptr[i]);
  313. }
  314. b[0] = sum;
  315. return 0;
  316. }
  317. if (dims == 2)
  318. {
  319. int w = a.w;
  320. int h = a.h;
  321. if (reduce_w && reduce_h)
  322. {
  323. // w h -> 1 1
  324. b.create(1, 1, elemsize, opt.blob_allocator);
  325. Mat sums(h, elemsize, opt.workspace_allocator);
  326. if (sums.empty())
  327. return -100;
  328. #pragma omp parallel for num_threads(opt.num_threads)
  329. for (int i=0; i<h; i++)
  330. {
  331. const float* ptr = a.row(i);
  332. float sum = v0;
  333. for (int j=0; j<w; j++)
  334. {
  335. sum = op(sum, ptr[j]);
  336. }
  337. sums[i] = sum;
  338. }
  339. float sum = v0;
  340. for (int i=0; i<h; i++)
  341. {
  342. sum = op2(sum, sums[i]);
  343. }
  344. b[0] = sum;
  345. return 0;
  346. }
  347. if (reduce_w && !reduce_h)
  348. {
  349. // w h -> 1 h
  350. b.create(1, h, elemsize, opt.blob_allocator);
  351. #pragma omp parallel for num_threads(opt.num_threads)
  352. for (int i=0; i<h; i++)
  353. {
  354. const float* ptr = a.row(i);
  355. float sum = v0;
  356. for (int j=0; j<w; j++)
  357. {
  358. sum = op(sum, ptr[j]);
  359. }
  360. b[i] = sum;
  361. }
  362. return 0;
  363. }
  364. if (!reduce_w && reduce_h)
  365. {
  366. // w h -> w 1
  367. b.create(w, 1, elemsize, opt.blob_allocator);
  368. b.fill(v0);
  369. for (int i=0; i<h; i++)
  370. {
  371. const float* ptr = a.row(i);
  372. for (int j=0; j<w; j++)
  373. {
  374. b[j] = op(b[j], ptr[j]);
  375. }
  376. }
  377. return 0;
  378. }
  379. }
  380. if (dims == 3)
  381. {
  382. int w = a.w;
  383. int h = a.h;
  384. int channels = a.c;
  385. int size = w * h;
  386. if (reduce_w && reduce_h && reduce_c)
  387. {
  388. // w h c -> 1 1 1
  389. b.create(1, 1, 1, elemsize, opt.blob_allocator);
  390. Mat sums(channels, elemsize, opt.workspace_allocator);
  391. if (sums.empty())
  392. return -100;
  393. #pragma omp parallel for num_threads(opt.num_threads)
  394. for (int q=0; q<channels; q++)
  395. {
  396. const float* ptr = a.channel(q);
  397. float sum = v0;
  398. for (int i=0; i<size; i++)
  399. {
  400. sum = op(sum, ptr[i]);
  401. }
  402. sums[q] = sum;
  403. }
  404. float sum = v0;
  405. for (int i=0; i<channels; i++)
  406. {
  407. sum = op2(sum, sums[i]);
  408. }
  409. b[0] = sum;
  410. return 0;
  411. }
  412. if (reduce_w && reduce_h && !reduce_c)
  413. {
  414. // w h c -> 1 1 c
  415. b.create(1, 1, channels, elemsize, opt.blob_allocator);
  416. #pragma omp parallel for num_threads(opt.num_threads)
  417. for (int q=0; q<channels; q++)
  418. {
  419. const float* ptr = a.channel(q);
  420. float* outptr = b.channel(q);
  421. float sum = v0;
  422. for (int i=0; i<size; i++)
  423. {
  424. sum = op(sum, ptr[i]);
  425. }
  426. outptr[0] = sum;
  427. }
  428. return 0;
  429. }
  430. if (reduce_w && !reduce_h && !reduce_c)
  431. {
  432. // w h c -> 1 h c
  433. b.create(1, h, channels, elemsize, opt.blob_allocator);
  434. #pragma omp parallel for num_threads(opt.num_threads)
  435. for (int q=0; q<channels; q++)
  436. {
  437. const float* ptr = a.channel(q);
  438. float* outptr = b.channel(q);
  439. for (int i=0; i<h; i++)
  440. {
  441. float sum = v0;
  442. for (int j=0; j<w; j++)
  443. {
  444. sum = op(sum, ptr[j]);
  445. }
  446. outptr[i] = sum;
  447. ptr += w;
  448. }
  449. }
  450. return 0;
  451. }
  452. if (reduce_w && !reduce_h && reduce_c)
  453. {
  454. // w h c -> 1 h 1
  455. b.create(1, h, 1, elemsize, opt.blob_allocator);
  456. Mat mins(1, h, channels, elemsize, opt.workspace_allocator);
  457. if (mins.empty())
  458. return -100;
  459. mins.fill(v0);
  460. #pragma omp parallel for num_threads(opt.num_threads)
  461. for (int q=0; q<channels; q++)
  462. {
  463. const float* ptr = a.channel(q);
  464. float* mins_ptr = mins.channel(q);
  465. for (int i=0; i<h; i++)
  466. {
  467. float sum = v0;
  468. for (int j=0; j<w; j++)
  469. {
  470. sum = op(sum, ptr[j]);
  471. }
  472. mins_ptr[i] = sum;
  473. ptr += w;
  474. }
  475. }
  476. b.fill(v0);
  477. for (int q=0; q<channels; q++)
  478. {
  479. const float* mins_ptr = mins.channel(q);
  480. for (int i=0; i<h; i++)
  481. {
  482. b[i] = op2(b[i], mins_ptr[i]);
  483. }
  484. }
  485. return 0;
  486. }
  487. if (!reduce_w && reduce_h && reduce_c)
  488. {
  489. // w h c -> w 1 1
  490. b.create(w, 1, 1, elemsize, opt.blob_allocator);
  491. Mat mins(w, 1, channels, elemsize, opt.workspace_allocator);
  492. if (mins.empty())
  493. return -100;
  494. mins.fill(v0);
  495. #pragma omp parallel for num_threads(opt.num_threads)
  496. for (int q=0; q<channels; q++)
  497. {
  498. const float* ptr = a.channel(q);
  499. float* mins_ptr = mins.channel(q);
  500. for (int i=0; i<h; i++)
  501. {
  502. for (int j=0; j<w; j++)
  503. {
  504. mins_ptr[j] = op(mins_ptr[j], ptr[j]);
  505. }
  506. ptr += w;
  507. }
  508. }
  509. b.fill(v0);
  510. for (int q=0; q<channels; q++)
  511. {
  512. const float* mins_ptr = mins.channel(q);
  513. for (int j=0; j<w; j++)
  514. {
  515. b[j] = op2(b[j], mins_ptr[j]);
  516. }
  517. }
  518. return 0;
  519. }
  520. if (!reduce_w && !reduce_h && reduce_c)
  521. {
  522. // w h c -> w h 1
  523. b.create(w, h, 1, elemsize, opt.blob_allocator);
  524. b.fill(v0);
  525. for (int q=0; q<channels; q++)
  526. {
  527. const float* ptr = a.channel(q);
  528. for (int i=0; i<size; i++)
  529. {
  530. b[i] = op(b[i], ptr[i]);
  531. }
  532. }
  533. return 0;
  534. }
  535. if (!reduce_w && reduce_h && !reduce_c)
  536. {
  537. // w h c -> w 1 c
  538. b.create(w, 1, channels, elemsize, opt.blob_allocator);
  539. b.fill(v0);
  540. #pragma omp parallel for num_threads(opt.num_threads)
  541. for (int q=0; q<channels; q++)
  542. {
  543. const float* ptr = a.channel(q);
  544. float* outptr = b.channel(q);
  545. for (int i=0; i<h; i++)
  546. {
  547. for (int j=0; j<w; j++)
  548. {
  549. outptr[j] = op(outptr[j], ptr[j]);
  550. }
  551. ptr += w;
  552. }
  553. }
  554. return 0;
  555. }
  556. }
  557. return 0;
  558. }
  559. template<typename MathOp>
  560. static int reduction_post_process(Mat& a, float coeff, const Option& opt)
  561. {
  562. MathOp mathop;
  563. int dims = a.dims;
  564. if (dims == 1)
  565. {
  566. int w = a.w;
  567. #pragma omp parallel for num_threads(opt.num_threads)
  568. for (int i=0; i<w; i++)
  569. a[i] = mathop(a[i]) * coeff;
  570. }
  571. else if (dims == 2)
  572. {
  573. int size = a.w * a.h;
  574. #pragma omp parallel for num_threads(opt.num_threads)
  575. for (int i=0; i<size; i++)
  576. a[i] = mathop(a[i]) * coeff;
  577. }
  578. else if (dims == 3)
  579. {
  580. int c = a.c;
  581. int size = a.w * a.h;
  582. if(c == 1)
  583. {
  584. #pragma omp parallel for num_threads(opt.num_threads)
  585. for (int i=0; i<size; i++)
  586. a[i] = mathop(a[i]) * coeff;
  587. }
  588. else
  589. {
  590. #pragma omp parallel for num_threads(opt.num_threads)
  591. for(int q=0; q<c; q++)
  592. {
  593. float* outptr = a.channel(q);
  594. for (int i=0; i<size; i++)
  595. outptr[i] = mathop(outptr[i]) * coeff;
  596. }
  597. }
  598. }
  599. return 0;
  600. }
  601. template<typename Op, typename Op2, typename Op3>
  602. static int reduction(const Mat& a, Mat& b, float v0, bool reduce_w, bool reduce_h, bool reduce_c, bool post_process, float coeff, int keepdims, const Option& opt)
  603. {
  604. int ret;
  605. if (keepdims)
  606. ret = reduction_op_keepdims<Op, Op2>(a, b, v0, reduce_w, reduce_h, reduce_c, opt);
  607. else
  608. ret = reduction_op<Op, Op2>(a, b, v0, reduce_w, reduce_h, reduce_c, opt);
  609. if (ret != 0)
  610. return -100;
  611. if (post_process || fabs(coeff - 1.f) > FLT_EPSILON)
  612. {
  613. ret = reduction_post_process<Op3>(b, coeff, opt);
  614. if (ret != 0)
  615. return -100;
  616. }
  617. return ret;
  618. }
  619. template<typename T>
  620. struct post_process_identity {
  621. T operator() (const T& x) const { return x; }
  622. };
  623. template<typename T>
  624. struct post_process_sqrt {
  625. T operator() (const T& x) const { return static_cast<T>(sqrt(x)); }
  626. };
  627. template<typename T>
  628. struct post_process_log {
  629. T operator() (const T& x) const { return static_cast<T>(log(x)); }
  630. };
  631. template<typename T>
  632. struct reduction_op_asum {
  633. T operator() (const T& x, const T& y) const { return static_cast<T>(x + fabs(y)); }
  634. };
  635. template<typename T>
  636. struct reduction_op_sumsq {
  637. T operator() (const T& x, const T& y) const { return x + y * y; }
  638. };
  639. template<typename T>
  640. struct reduction_op_sumsexp {
  641. T operator() (const T& x, const T& y) const { return static_cast<T>(x + exp(y)); }
  642. };
  643. template<typename T>
  644. struct reduction_op_max {
  645. T operator() (const T& x, const T& y) const { return std::max(x, y); }
  646. };
  647. template<typename T>
  648. struct reduction_op_min {
  649. T operator() (const T& x, const T& y) const { return std::min(x, y); }
  650. };
  651. int Reduction::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
  652. {
  653. int dims = bottom_blob.dims;
  654. int axes_flag[3] = {0};
  655. bool reduce_w = false;
  656. bool reduce_h = false;
  657. bool reduce_c = false;
  658. if (reduce_all)
  659. {
  660. reduce_w = true;
  661. reduce_h = true;
  662. reduce_c = true;
  663. }
  664. else
  665. {
  666. const int* axes_ptr = axes;
  667. int reduced_axes_num = axes.w;
  668. for (int i=0; i<reduced_axes_num; i++)
  669. {
  670. int axis = axes_ptr[i];
  671. // handle negative axis
  672. if (axis < 0)
  673. axis += dims + 1;
  674. axes_flag[axis - 1] = 1;
  675. }
  676. if (dims == 1)
  677. {
  678. reduce_w = true;
  679. }
  680. else if (dims == 2)
  681. {
  682. if (axes_flag[0] == 1) reduce_h = true;
  683. if (axes_flag[1] == 1) reduce_w = true;
  684. }
  685. else if (dims == 3)
  686. {
  687. if (axes_flag[0] == 1) reduce_c = true;
  688. if (axes_flag[1] == 1) reduce_h = true;
  689. if (axes_flag[2] == 1) reduce_w = true;
  690. }
  691. }
  692. if (operation == ReductionOp_SUM)
  693. return reduction< std::plus<float>, std::plus<float>, post_process_identity<float> >(bottom_blob, top_blob, 0.f, reduce_w, reduce_h, reduce_c, false, coeff, keepdims, opt);
  694. if (operation == ReductionOp_ASUM)
  695. return reduction< reduction_op_asum<float>, std::plus<float>, post_process_identity<float> >(bottom_blob, top_blob, 0.f, reduce_w, reduce_h, reduce_c, false, coeff, keepdims, opt);
  696. if (operation == ReductionOp_SUMSQ)
  697. return reduction< reduction_op_sumsq<float>, std::plus<float>, post_process_identity<float> >(bottom_blob, top_blob, 0.f, reduce_w, reduce_h, reduce_c, false, coeff, keepdims, opt);
  698. if (operation == ReductionOp_MEAN)
  699. {
  700. int scale = 1;
  701. int dims = bottom_blob.dims;
  702. if (dims == 1)
  703. {
  704. scale = bottom_blob.w;
  705. }
  706. else if (dims == 2)
  707. {
  708. if (reduce_w) scale *= bottom_blob.w;
  709. if (reduce_h) scale *= bottom_blob.h;
  710. }
  711. else if (dims == 3)
  712. {
  713. if (reduce_w) scale *= bottom_blob.w;
  714. if (reduce_h) scale *= bottom_blob.h;
  715. if (reduce_c) scale *= bottom_blob.c;
  716. }
  717. float coeff_mean = coeff / scale;
  718. return reduction< std::plus<float>, std::plus<float>, post_process_identity<float> >(bottom_blob, top_blob, 0.f, reduce_w, reduce_h, reduce_c, true, coeff_mean, keepdims, opt);
  719. }
  720. if (operation == ReductionOp_MAX)
  721. return reduction< reduction_op_max<float>, reduction_op_max<float>, post_process_identity<float> >(bottom_blob, top_blob, -FLT_MAX, reduce_w, reduce_h, reduce_c, false, coeff, keepdims, opt);
  722. if (operation == ReductionOp_MIN)
  723. return reduction< reduction_op_min<float>, reduction_op_min<float>, post_process_identity<float> >(bottom_blob, top_blob, FLT_MAX, reduce_w, reduce_h, reduce_c, false, coeff, keepdims, opt);
  724. if (operation == ReductionOp_PROD)
  725. return reduction< std::multiplies<float>, std::multiplies<float>, post_process_identity<float> >(bottom_blob, top_blob, 1.f, reduce_w, reduce_h, reduce_c, false, coeff, keepdims, opt);
  726. if (operation == ReductionOp_L1)
  727. return reduction< reduction_op_asum<float>, std::plus<float>, post_process_identity<float> >(bottom_blob, top_blob, 0.f, reduce_w, reduce_h, reduce_c, false, 1.f, keepdims, opt);
  728. if (operation == ReductionOp_L2)
  729. return reduction< reduction_op_sumsq<float>, std::plus<float>, post_process_sqrt<float> >(bottom_blob, top_blob, 0.f, reduce_w, reduce_h, reduce_c, true, 1.f, keepdims, opt);
  730. if (operation == ReductionOp_LogSum)
  731. return reduction< std::plus<float>, std::plus<float>, post_process_log<float> >(bottom_blob, top_blob, 0.f, reduce_w, reduce_h, reduce_c, true, 1.f, keepdims, opt);
  732. if (operation == ReductionOp_LogSumExp)
  733. return reduction< reduction_op_sumsexp<float>, std::plus<float>, post_process_log<float> >(bottom_blob, top_blob, 0.f, reduce_w, reduce_h, reduce_c, true, 1.f, keepdims, opt);
  734. return 0;
  735. }
  736. } // namespace ncnn