You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ncnnoptimize.cpp 56 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include <set>
  15. #include <vector>
  16. // ncnn public header
  17. #include "net.h"
  18. #include "layer.h"
  19. // ncnn private header
  20. #include "layer/batchnorm.h"
  21. #include "layer/bias.h"
  22. #include "layer/binaryop.h"
  23. #include "layer/clip.h"
  24. #include "layer/concat.h"
  25. #include "layer/convolution.h"
  26. #include "layer/convolutiondepthwise.h"
  27. #include "layer/crop.h"
  28. #include "layer/deconvolution.h"
  29. #include "layer/deconvolutiondepthwise.h"
  30. #include "layer/detectionoutput.h"
  31. #include "layer/dropout.h"
  32. #include "layer/eltwise.h"
  33. #include "layer/elu.h"
  34. #include "layer/exp.h"
  35. #include "layer/innerproduct.h"
  36. #include "layer/input.h"
  37. #include "layer/instancenorm.h"
  38. #include "layer/interp.h"
  39. #include "layer/log.h"
  40. #include "layer/lrn.h"
  41. #include "layer/mvn.h"
  42. #include "layer/normalize.h"
  43. #include "layer/padding.h"
  44. #include "layer/permute.h"
  45. #include "layer/pooling.h"
  46. #include "layer/power.h"
  47. #include "layer/prelu.h"
  48. #include "layer/priorbox.h"
  49. #include "layer/proposal.h"
  50. #include "layer/psroipooling.h"
  51. #include "layer/quantize.h"
  52. #include "layer/reduction.h"
  53. #include "layer/relu.h"
  54. #include "layer/reorg.h"
  55. #include "layer/requantize.h"
  56. #include "layer/reshape.h"
  57. #include "layer/roialign.h"
  58. #include "layer/roipooling.h"
  59. #include "layer/scale.h"
  60. #include "layer/slice.h"
  61. #include "layer/shufflechannel.h"
  62. #include "layer/softmax.h"
  63. #include "layer/threshold.h"
  64. #include "layer/unaryop.h"
  65. #include "layer/yolodetectionoutput.h"
  66. #include "layer/yolov3detectionoutput.h"
  67. class NetOptimize : public ncnn::Net
  68. {
  69. public:
  70. int fuse_batchnorm_scale();
  71. int fuse_convolution_batchnorm();
  72. int fuse_convolutiondepthwise_batchnorm();
  73. int fuse_deconvolution_batchnorm();
  74. int fuse_deconvolutiondepthwise_batchnorm();
  75. int fuse_innerproduct_batchnorm();
  76. int fuse_convolution_activation();
  77. int fuse_convolutiondepthwise_activation();
  78. int fuse_deconvolution_activation();
  79. int fuse_deconvolutiondepthwise_activation();
  80. int fuse_innerproduct_activation();
  81. int eliminate_dropout();
  82. public:
  83. int fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp);
  84. int fprintf_param_float_array(int id, const ncnn::Mat& m, FILE* pp);
  85. int fwrite_weight_tag(int tag, FILE* bp);
  86. int fwrite_weight_data(const ncnn::Mat& data, FILE* bp);
  87. int save(const char* parampath, const char* binpath);
  88. };
  89. int NetOptimize::fuse_batchnorm_scale()
  90. {
  91. const int layer_count = layers.size();
  92. for (int i=0; i<layer_count; i++)
  93. {
  94. if (layers[i]->type != "BatchNorm")
  95. continue;
  96. // BatchNorm - Scale
  97. int top_blob_index = layers[i]->tops[0];
  98. int j = i + 1;
  99. for (; j<layer_count; j++)
  100. {
  101. if (layers[j]->type != "Scale")
  102. continue;
  103. if (layers[j]->bottoms.size() != 1)
  104. continue;
  105. if (layers[j]->bottoms[0] == top_blob_index)
  106. break;
  107. }
  108. if (j == layer_count)
  109. continue;
  110. // fuse BatchNorm - Scale to BatchNorm
  111. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[i];
  112. ncnn::Scale* scale = (ncnn::Scale*)layers[j];
  113. fprintf(stderr, "fuse_batchnorm_scale %s %s\n", batchnorm->name.c_str(), scale->name.c_str());
  114. {
  115. // v = ((v - mean) / sqrt(var + eps) * slope + bias) * s + b
  116. // = (v - mean) / sqrt(var + eps) * (slope * s) + (bias * s + b)
  117. int channels = batchnorm->channels;
  118. float* slope = batchnorm->slope_data;
  119. float* bias = batchnorm->bias_data;
  120. for (int q=0; q<channels; q++)
  121. {
  122. slope[q] = slope[q] * scale->scale_data[q];
  123. if (scale->bias_term)
  124. bias[q] = bias[q] * scale->scale_data[q] + scale->bias_data[q];
  125. else
  126. bias[q] = bias[q] * scale->scale_data[q];
  127. }
  128. }
  129. int top_blob_index_final = scale->tops[0];
  130. batchnorm->tops[0] = top_blob_index_final;
  131. blobs[top_blob_index_final].producer = i;
  132. scale->type = "ncnnfused";
  133. }
  134. return 0;
  135. }
  136. int NetOptimize::fuse_convolution_batchnorm()
  137. {
  138. const int layer_count = layers.size();
  139. for (int i=0; i<layer_count; i++)
  140. {
  141. if (layers[i]->type != "Convolution")
  142. continue;
  143. // Convolution - BatchNorm
  144. int top_blob_index = layers[i]->tops[0];
  145. int j = i + 1;
  146. for (; j<layer_count; j++)
  147. {
  148. if (layers[j]->type != "BatchNorm")
  149. continue;
  150. if (layers[j]->bottoms.size() != 1)
  151. continue;
  152. if (layers[j]->bottoms[0] == top_blob_index)
  153. break;
  154. }
  155. if (j == layer_count)
  156. continue;
  157. // fuse Convolution - BatchNorm to Convolution
  158. ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
  159. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  160. fprintf(stderr, "fuse_convolution_batchnorm %s %s\n", convolution->name.c_str(), batchnorm->name.c_str());
  161. {
  162. int channels = batchnorm->channels;
  163. float eps = batchnorm->eps;
  164. // a = bias - slope * mean / sqrt(var + eps)
  165. // b = slope / sqrt(var + eps)
  166. // value = value * b + a
  167. std::vector<float> a(channels);
  168. std::vector<float> b(channels);
  169. for (int i=0; i<channels; i++)
  170. {
  171. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  172. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  173. b[i] = batchnorm->slope_data[i] / sqrt_var;
  174. }
  175. if (convolution->bias_term == 0)
  176. {
  177. // init bias as zero
  178. convolution->bias_term = 1;
  179. convolution->bias_data = ncnn::Mat(channels);
  180. convolution->bias_data.fill(0.f);
  181. }
  182. const int weight_per_outch = convolution->weight_data_size / channels;
  183. float* weight = convolution->weight_data;
  184. float* bias = convolution->bias_data;
  185. for (int i=0; i<channels; i++)
  186. {
  187. float* conv_weight_outch = weight + weight_per_outch * i;
  188. for (int j=0; j<weight_per_outch; j++)
  189. {
  190. conv_weight_outch[j] *= b[i];
  191. }
  192. bias[i] += a[i];
  193. }
  194. }
  195. int top_blob_index_final = batchnorm->tops[0];
  196. convolution->tops[0] = top_blob_index_final;
  197. blobs[top_blob_index_final].producer = i;
  198. batchnorm->type = "ncnnfused";
  199. }
  200. return 0;
  201. }
  202. int NetOptimize::fuse_convolutiondepthwise_batchnorm()
  203. {
  204. const int layer_count = layers.size();
  205. for (int i=0; i<layer_count; i++)
  206. {
  207. if (layers[i]->type != "ConvolutionDepthWise")
  208. continue;
  209. // ConvolutionDepthWise - BatchNorm
  210. int top_blob_index = layers[i]->tops[0];
  211. int j = i + 1;
  212. for (; j<layer_count; j++)
  213. {
  214. if (layers[j]->type != "BatchNorm")
  215. continue;
  216. if (layers[j]->bottoms.size() != 1)
  217. continue;
  218. if (layers[j]->bottoms[0] == top_blob_index)
  219. break;
  220. }
  221. if (j == layer_count)
  222. continue;
  223. // fuse ConvolutionDepthWise - BatchNorm to ConvolutionDepthWise
  224. ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i];
  225. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  226. fprintf(stderr, "fuse_convolutiondepthwise_batchnorm %s %s\n", convolutiondepthwise->name.c_str(), batchnorm->name.c_str());
  227. {
  228. int channels = batchnorm->channels;
  229. float eps = batchnorm->eps;
  230. // a = bias - slope * mean / sqrt(var + eps)
  231. // b = slope / sqrt(var + eps)
  232. // value = value * b + a
  233. std::vector<float> a(channels);
  234. std::vector<float> b(channels);
  235. for (int i=0; i<channels; i++)
  236. {
  237. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  238. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  239. b[i] = batchnorm->slope_data[i] / sqrt_var;
  240. }
  241. if (convolutiondepthwise->bias_term == 0)
  242. {
  243. // init bias as zero
  244. convolutiondepthwise->bias_term = 1;
  245. convolutiondepthwise->bias_data = ncnn::Mat(channels);
  246. convolutiondepthwise->bias_data.fill(0.f);
  247. }
  248. const int weight_per_outch = convolutiondepthwise->weight_data_size / channels;
  249. float* weight = convolutiondepthwise->weight_data;
  250. float* bias = convolutiondepthwise->bias_data;
  251. for (int i=0; i<channels; i++)
  252. {
  253. float* conv_weight_outch = weight + weight_per_outch * i;
  254. for (int j=0; j<weight_per_outch; j++)
  255. {
  256. conv_weight_outch[j] *= b[i];
  257. }
  258. bias[i] += a[i];
  259. }
  260. }
  261. int top_blob_index_final = batchnorm->tops[0];
  262. convolutiondepthwise->tops[0] = top_blob_index_final;
  263. blobs[top_blob_index_final].producer = i;
  264. batchnorm->type = "ncnnfused";
  265. }
  266. return 0;
  267. }
  268. int NetOptimize::fuse_deconvolution_batchnorm()
  269. {
  270. const int layer_count = layers.size();
  271. for (int i=0; i<layer_count; i++)
  272. {
  273. if (layers[i]->type != "Deconvolution")
  274. continue;
  275. // Deconvolution - BatchNorm
  276. int top_blob_index = layers[i]->tops[0];
  277. int j = i + 1;
  278. for (; j<layer_count; j++)
  279. {
  280. if (layers[j]->type != "BatchNorm")
  281. continue;
  282. if (layers[j]->bottoms.size() != 1)
  283. continue;
  284. if (layers[j]->bottoms[0] == top_blob_index)
  285. break;
  286. }
  287. if (j == layer_count)
  288. continue;
  289. // fuse Deconvolution - BatchNorm to Deconvolution
  290. ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i];
  291. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  292. fprintf(stderr, "fuse_deconvolution_batchnorm %s %s\n", deconvolution->name.c_str(), batchnorm->name.c_str());
  293. {
  294. int channels = batchnorm->channels;
  295. float eps = batchnorm->eps;
  296. // a = bias - slope * mean / sqrt(var + eps)
  297. // b = slope / sqrt(var + eps)
  298. // value = value * b + a
  299. std::vector<float> a(channels);
  300. std::vector<float> b(channels);
  301. for (int i=0; i<channels; i++)
  302. {
  303. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  304. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  305. b[i] = batchnorm->slope_data[i] / sqrt_var;
  306. }
  307. if (deconvolution->bias_term == 0)
  308. {
  309. // init bias as zero
  310. deconvolution->bias_term = 1;
  311. deconvolution->bias_data = ncnn::Mat(channels);
  312. deconvolution->bias_data.fill(0.f);
  313. }
  314. const int weight_per_outch = deconvolution->weight_data_size / channels;
  315. float* weight = deconvolution->weight_data;
  316. float* bias = deconvolution->bias_data;
  317. for (int i=0; i<channels; i++)
  318. {
  319. float* conv_weight_outch = weight + weight_per_outch * i;
  320. for (int j=0; j<weight_per_outch; j++)
  321. {
  322. conv_weight_outch[j] *= b[i];
  323. }
  324. bias[i] += a[i];
  325. }
  326. }
  327. int top_blob_index_final = batchnorm->tops[0];
  328. deconvolution->tops[0] = top_blob_index_final;
  329. blobs[top_blob_index_final].producer = i;
  330. batchnorm->type = "ncnnfused";
  331. }
  332. return 0;
  333. }
  334. int NetOptimize::fuse_deconvolutiondepthwise_batchnorm()
  335. {
  336. const int layer_count = layers.size();
  337. for (int i=0; i<layer_count; i++)
  338. {
  339. if (layers[i]->type != "DeconvolutionDepthWise")
  340. continue;
  341. // DeconvolutionDepthWise - BatchNorm
  342. int top_blob_index = layers[i]->tops[0];
  343. int j = i + 1;
  344. for (; j<layer_count; j++)
  345. {
  346. if (layers[j]->type != "BatchNorm")
  347. continue;
  348. if (layers[j]->bottoms.size() != 1)
  349. continue;
  350. if (layers[j]->bottoms[0] == top_blob_index)
  351. break;
  352. }
  353. if (j == layer_count)
  354. continue;
  355. // fuse DeconvolutionDepthWise - BatchNorm to DeconvolutionDepthWise
  356. ncnn::DeconvolutionDepthWise* deconvolutiondepthwise = (ncnn::DeconvolutionDepthWise*)layers[i];
  357. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  358. fprintf(stderr, "fuse_deconvolutiondepthwise_batchnorm %s %s\n", deconvolutiondepthwise->name.c_str(), batchnorm->name.c_str());
  359. {
  360. int channels = batchnorm->channels;
  361. float eps = batchnorm->eps;
  362. // a = bias - slope * mean / sqrt(var + eps)
  363. // b = slope / sqrt(var + eps)
  364. // value = value * b + a
  365. std::vector<float> a(channels);
  366. std::vector<float> b(channels);
  367. for (int i=0; i<channels; i++)
  368. {
  369. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  370. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  371. b[i] = batchnorm->slope_data[i] / sqrt_var;
  372. }
  373. if (deconvolutiondepthwise->bias_term == 0)
  374. {
  375. // init bias as zero
  376. deconvolutiondepthwise->bias_term = 1;
  377. deconvolutiondepthwise->bias_data = ncnn::Mat(channels);
  378. deconvolutiondepthwise->bias_data.fill(0.f);
  379. }
  380. const int weight_per_outch = deconvolutiondepthwise->weight_data_size / channels;
  381. float* weight = deconvolutiondepthwise->weight_data;
  382. float* bias = deconvolutiondepthwise->bias_data;
  383. for (int i=0; i<channels; i++)
  384. {
  385. float* conv_weight_outch = weight + weight_per_outch * i;
  386. for (int j=0; j<weight_per_outch; j++)
  387. {
  388. conv_weight_outch[j] *= b[i];
  389. }
  390. bias[i] += a[i];
  391. }
  392. }
  393. int top_blob_index_final = batchnorm->tops[0];
  394. deconvolutiondepthwise->tops[0] = top_blob_index_final;
  395. blobs[top_blob_index_final].producer = i;
  396. batchnorm->type = "ncnnfused";
  397. }
  398. return 0;
  399. }
  400. int NetOptimize::fuse_innerproduct_batchnorm()
  401. {
  402. const int layer_count = layers.size();
  403. for (int i=0; i<layer_count; i++)
  404. {
  405. if (layers[i]->type != "InnerProduct")
  406. continue;
  407. // InnerProduct - BatchNorm
  408. int top_blob_index = layers[i]->tops[0];
  409. int j = i + 1;
  410. for (; j<layer_count; j++)
  411. {
  412. if (layers[j]->type != "BatchNorm")
  413. continue;
  414. if (layers[j]->bottoms.size() != 1)
  415. continue;
  416. if (layers[j]->bottoms[0] == top_blob_index)
  417. break;
  418. }
  419. if (j == layer_count)
  420. continue;
  421. // fuse InnerProduct - BatchNorm to InnerProduct
  422. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
  423. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  424. fprintf(stderr, "fuse_innerproduct_batchnorm %s %s\n", innerproduct->name.c_str(), batchnorm->name.c_str());
  425. {
  426. int channels = batchnorm->channels;
  427. float eps = batchnorm->eps;
  428. // a = bias - slope * mean / sqrt(var + eps)
  429. // b = slope / sqrt(var + eps)
  430. // value = value * b + a
  431. std::vector<float> a(channels);
  432. std::vector<float> b(channels);
  433. for (int i=0; i<channels; i++)
  434. {
  435. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  436. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  437. b[i] = batchnorm->slope_data[i] / sqrt_var;
  438. }
  439. if (innerproduct->bias_term == 0)
  440. {
  441. // init bias as zero
  442. innerproduct->bias_term = 1;
  443. innerproduct->bias_data = ncnn::Mat(channels);
  444. innerproduct->bias_data.fill(0.f);
  445. }
  446. const int weight_per_outch = innerproduct->weight_data_size / channels;
  447. float* weight = innerproduct->weight_data;
  448. float* bias = innerproduct->bias_data;
  449. for (int i=0; i<channels; i++)
  450. {
  451. float* conv_weight_outch = weight + weight_per_outch * i;
  452. for (int j=0; j<weight_per_outch; j++)
  453. {
  454. conv_weight_outch[j] *= b[i];
  455. }
  456. bias[i] += a[i];
  457. }
  458. }
  459. int top_blob_index_final = batchnorm->tops[0];
  460. innerproduct->tops[0] = top_blob_index_final;
  461. blobs[top_blob_index_final].producer = i;
  462. batchnorm->type = "ncnnfused";
  463. }
  464. return 0;
  465. }
  466. int NetOptimize::fuse_convolution_activation()
  467. {
  468. const int layer_count = layers.size();
  469. for (int i=0; i<layer_count; i++)
  470. {
  471. if (layers[i]->type != "Convolution")
  472. continue;
  473. // Convolution - Activation
  474. int top_blob_index = layers[i]->tops[0];
  475. int j = i + 1;
  476. for (; j<layer_count; j++)
  477. {
  478. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip")
  479. continue;
  480. if (layers[j]->bottoms.size() != 1)
  481. continue;
  482. if (layers[j]->bottoms[0] == top_blob_index)
  483. break;
  484. }
  485. if (j == layer_count)
  486. continue;
  487. // fuse Convolution - Activation to Convolution
  488. ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
  489. ncnn::Layer* activation = layers[j];
  490. fprintf(stderr, "fuse_convolution_activation %s %s\n", convolution->name.c_str(), activation->name.c_str());
  491. if (activation->type == "ReLU")
  492. {
  493. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  494. if (relu->slope == 0.f)
  495. {
  496. convolution->activation_type = 1;
  497. }
  498. else
  499. {
  500. convolution->activation_type = 2;
  501. convolution->activation_params = ncnn::Mat(1);
  502. convolution->activation_params[0] = relu->slope;
  503. }
  504. }
  505. else if (activation->type == "Clip")
  506. {
  507. ncnn::Clip* clip = (ncnn::Clip*)activation;
  508. convolution->activation_type = 3;
  509. convolution->activation_params = ncnn::Mat(2);
  510. convolution->activation_params[0] = clip->min;
  511. convolution->activation_params[1] = clip->max;
  512. }
  513. int top_blob_index_final = activation->tops[0];
  514. convolution->tops[0] = top_blob_index_final;
  515. blobs[top_blob_index_final].producer = i;
  516. activation->type = "ncnnfused";
  517. }
  518. return 0;
  519. }
  520. int NetOptimize::fuse_convolutiondepthwise_activation()
  521. {
  522. const int layer_count = layers.size();
  523. for (int i=0; i<layer_count; i++)
  524. {
  525. if (layers[i]->type != "ConvolutionDepthWise")
  526. continue;
  527. // ConvolutionDepthWise - Activation
  528. int top_blob_index = layers[i]->tops[0];
  529. int j = i + 1;
  530. for (; j<layer_count; j++)
  531. {
  532. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip")
  533. continue;
  534. if (layers[j]->bottoms.size() != 1)
  535. continue;
  536. if (layers[j]->bottoms[0] == top_blob_index)
  537. break;
  538. }
  539. if (j == layer_count)
  540. continue;
  541. // fuse ConvolutionDepthWise - Activation to ConvolutionDepthWise
  542. ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i];
  543. ncnn::Layer* activation = layers[j];
  544. fprintf(stderr, "fuse_convolutiondepthwise_activation %s %s\n", convolutiondepthwise->name.c_str(), activation->name.c_str());
  545. if (activation->type == "ReLU")
  546. {
  547. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  548. if (relu->slope == 0.f)
  549. {
  550. convolutiondepthwise->activation_type = 1;
  551. }
  552. else
  553. {
  554. convolutiondepthwise->activation_type = 2;
  555. convolutiondepthwise->activation_params = ncnn::Mat(1);
  556. convolutiondepthwise->activation_params[0] = relu->slope;
  557. }
  558. }
  559. else if (activation->type == "Clip")
  560. {
  561. ncnn::Clip* clip = (ncnn::Clip*)activation;
  562. convolutiondepthwise->activation_type = 3;
  563. convolutiondepthwise->activation_params = ncnn::Mat(2);
  564. convolutiondepthwise->activation_params[0] = clip->min;
  565. convolutiondepthwise->activation_params[1] = clip->max;
  566. }
  567. int top_blob_index_final = activation->tops[0];
  568. convolutiondepthwise->tops[0] = top_blob_index_final;
  569. blobs[top_blob_index_final].producer = i;
  570. activation->type = "ncnnfused";
  571. }
  572. return 0;
  573. }
  574. int NetOptimize::fuse_deconvolution_activation()
  575. {
  576. const int layer_count = layers.size();
  577. for (int i=0; i<layer_count; i++)
  578. {
  579. if (layers[i]->type != "Deconvolution")
  580. continue;
  581. // Deconvolution - Activation
  582. int top_blob_index = layers[i]->tops[0];
  583. int j = i + 1;
  584. for (; j<layer_count; j++)
  585. {
  586. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip")
  587. continue;
  588. if (layers[j]->bottoms.size() != 1)
  589. continue;
  590. if (layers[j]->bottoms[0] == top_blob_index)
  591. break;
  592. }
  593. if (j == layer_count)
  594. continue;
  595. // fuse Deconvolution - Activation to Deconvolution
  596. ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i];
  597. ncnn::Layer* activation = layers[j];
  598. fprintf(stderr, "fuse_deconvolution_activation %s %s\n", deconvolution->name.c_str(), activation->name.c_str());
  599. if (activation->type == "ReLU")
  600. {
  601. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  602. if (relu->slope == 0.f)
  603. {
  604. deconvolution->activation_type = 1;
  605. }
  606. else
  607. {
  608. deconvolution->activation_type = 2;
  609. deconvolution->activation_params = ncnn::Mat(1);
  610. deconvolution->activation_params[0] = relu->slope;
  611. }
  612. }
  613. else if (activation->type == "Clip")
  614. {
  615. ncnn::Clip* clip = (ncnn::Clip*)activation;
  616. deconvolution->activation_type = 3;
  617. deconvolution->activation_params = ncnn::Mat(2);
  618. deconvolution->activation_params[0] = clip->min;
  619. deconvolution->activation_params[1] = clip->max;
  620. }
  621. int top_blob_index_final = activation->tops[0];
  622. deconvolution->tops[0] = top_blob_index_final;
  623. blobs[top_blob_index_final].producer = i;
  624. activation->type = "ncnnfused";
  625. }
  626. return 0;
  627. }
  628. int NetOptimize::fuse_deconvolutiondepthwise_activation()
  629. {
  630. const int layer_count = layers.size();
  631. for (int i=0; i<layer_count; i++)
  632. {
  633. if (layers[i]->type != "DeconvolutionDepthWise")
  634. continue;
  635. // DeconvolutionDepthWise - Activation
  636. int top_blob_index = layers[i]->tops[0];
  637. int j = i + 1;
  638. for (; j<layer_count; j++)
  639. {
  640. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip")
  641. continue;
  642. if (layers[j]->bottoms.size() != 1)
  643. continue;
  644. if (layers[j]->bottoms[0] == top_blob_index)
  645. break;
  646. }
  647. if (j == layer_count)
  648. continue;
  649. // fuse DeconvolutionDepthWise - Activation to DeconvolutionDepthWise
  650. ncnn::DeconvolutionDepthWise* deconvolutiondepthwise = (ncnn::DeconvolutionDepthWise*)layers[i];
  651. ncnn::Layer* activation = layers[j];
  652. fprintf(stderr, "fuse_deconvolutiondepthwise_activation %s %s\n", deconvolutiondepthwise->name.c_str(), activation->name.c_str());
  653. if (activation->type == "ReLU")
  654. {
  655. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  656. if (relu->slope == 0.f)
  657. {
  658. deconvolutiondepthwise->activation_type = 1;
  659. }
  660. else
  661. {
  662. deconvolutiondepthwise->activation_type = 2;
  663. deconvolutiondepthwise->activation_params = ncnn::Mat(1);
  664. deconvolutiondepthwise->activation_params[0] = relu->slope;
  665. }
  666. }
  667. else if (activation->type == "Clip")
  668. {
  669. ncnn::Clip* clip = (ncnn::Clip*)activation;
  670. deconvolutiondepthwise->activation_type = 3;
  671. deconvolutiondepthwise->activation_params = ncnn::Mat(2);
  672. deconvolutiondepthwise->activation_params[0] = clip->min;
  673. deconvolutiondepthwise->activation_params[1] = clip->max;
  674. }
  675. int top_blob_index_final = activation->tops[0];
  676. deconvolutiondepthwise->tops[0] = top_blob_index_final;
  677. blobs[top_blob_index_final].producer = i;
  678. activation->type = "ncnnfused";
  679. }
  680. return 0;
  681. }
  682. int NetOptimize::fuse_innerproduct_activation()
  683. {
  684. const int layer_count = layers.size();
  685. for (int i=0; i<layer_count; i++)
  686. {
  687. if (layers[i]->type != "InnerProduct")
  688. continue;
  689. // InnerProduct - Activation
  690. int top_blob_index = layers[i]->tops[0];
  691. int j = i + 1;
  692. for (; j<layer_count; j++)
  693. {
  694. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip")
  695. continue;
  696. if (layers[j]->bottoms.size() != 1)
  697. continue;
  698. if (layers[j]->bottoms[0] == top_blob_index)
  699. break;
  700. }
  701. if (j == layer_count)
  702. continue;
  703. // fuse InnerProduct - Activation to InnerProduct
  704. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
  705. ncnn::Layer* activation = layers[j];
  706. fprintf(stderr, "fuse_innerproduct_activation %s %s\n", innerproduct->name.c_str(), activation->name.c_str());
  707. if (activation->type == "ReLU")
  708. {
  709. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  710. if (relu->slope == 0.f)
  711. {
  712. innerproduct->activation_type = 1;
  713. }
  714. else
  715. {
  716. innerproduct->activation_type = 2;
  717. innerproduct->activation_params = ncnn::Mat(1);
  718. innerproduct->activation_params[0] = relu->slope;
  719. }
  720. }
  721. else if (activation->type == "Clip")
  722. {
  723. ncnn::Clip* clip = (ncnn::Clip*)activation;
  724. innerproduct->activation_type = 3;
  725. innerproduct->activation_params = ncnn::Mat(2);
  726. innerproduct->activation_params[0] = clip->min;
  727. innerproduct->activation_params[1] = clip->max;
  728. }
  729. int top_blob_index_final = activation->tops[0];
  730. innerproduct->tops[0] = top_blob_index_final;
  731. blobs[top_blob_index_final].producer = i;
  732. activation->type = "ncnnfused";
  733. }
  734. return 0;
  735. }
  736. int NetOptimize::eliminate_dropout()
  737. {
  738. const int layer_count = layers.size();
  739. for (int i=0; i<layer_count; i++)
  740. {
  741. if (layers[i]->type != "Dropout")
  742. continue;
  743. ncnn::Dropout* dropout = (ncnn::Dropout*)layers[i];
  744. if (dropout->scale != 1.f)
  745. continue;
  746. // Any - Dropout
  747. int bottom_blob_index = layers[i]->bottoms[0];
  748. int j = i - 1;
  749. for (; j>=0; j--)
  750. {
  751. if (layers[j]->type == "ncnnfused")
  752. continue;
  753. if (layers[j]->tops.size() != 1)
  754. continue;
  755. if (layers[j]->tops[0] == bottom_blob_index)
  756. break;
  757. }
  758. if (j == -1)
  759. continue;
  760. ncnn::Layer* any = layers[j];
  761. fprintf(stderr, "eliminate_dropout %s %s\n", any->name.c_str(), dropout->name.c_str());
  762. int top_blob_index_final = dropout->tops[0];
  763. any->tops[0] = top_blob_index_final;
  764. blobs[top_blob_index_final].producer = j;
  765. dropout->type = "ncnnfused";
  766. }
  767. return 0;
  768. }
  769. int NetOptimize::fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp)
  770. {
  771. const int count = m.w;
  772. const int* ptr = m;
  773. fprintf(pp, " -%d=%d", 23300 + id, count);
  774. for (int i=0; i<count; i++)
  775. {
  776. fprintf(pp, ",%d", ptr[i]);
  777. }
  778. return 0;
  779. }
  780. int NetOptimize::fprintf_param_float_array(int id, const ncnn::Mat& m, FILE* pp)
  781. {
  782. const int count = m.w;
  783. const float* ptr = m;
  784. fprintf(pp, " -%d=%d", 23300 + id, count);
  785. for (int i=0; i<count; i++)
  786. {
  787. fprintf(pp, ",%f", ptr[i]);
  788. }
  789. return 0;
  790. }
  791. int NetOptimize::fwrite_weight_tag(int tag, FILE* bp)
  792. {
  793. fwrite(&tag, sizeof(int), 1, bp);
  794. return 0;
  795. }
  796. int NetOptimize::fwrite_weight_data(const ncnn::Mat& data, FILE* bp)
  797. {
  798. ncnn::Mat data_flattened = data.reshape(data.w * data.h * data.c);
  799. fwrite(data_flattened.data, data_flattened.elemsize, data_flattened.w, bp);
  800. return 0;
  801. }
  802. int NetOptimize::save(const char* parampath, const char* binpath)
  803. {
  804. FILE* pp = fopen(parampath, "wb");
  805. FILE* bp = fopen(binpath, "wb");
  806. fprintf(pp, "7767517\n");
  807. const int layer_count = layers.size();
  808. int layer_count_fused = 0;
  809. std::set<std::string> blob_names;
  810. for (int i=0; i<layer_count; i++)
  811. {
  812. const ncnn::Layer* layer = layers[i];
  813. if (layer->type == "ncnnfused")
  814. continue;
  815. layer_count_fused++;
  816. int bottom_count = layer->bottoms.size();
  817. for (int j=0; j<bottom_count; j++)
  818. {
  819. int bottom_blob_index = layer->bottoms[j];
  820. blob_names.insert(blobs[bottom_blob_index].name);
  821. }
  822. int top_count = layer->tops.size();
  823. for (int j=0; j<top_count; j++)
  824. {
  825. int top_blob_index = layer->tops[j];
  826. blob_names.insert(blobs[top_blob_index].name);
  827. }
  828. }
  829. int blob_count_fused = blob_names.size();
  830. fprintf(pp, "%d %d\n", layer_count_fused, blob_count_fused);
  831. for (int i=0; i<layer_count; i++)
  832. {
  833. const ncnn::Layer* layer = layers[i];
  834. if (layer->type == "ncnnfused")
  835. continue;
  836. int bottom_count = layer->bottoms.size();
  837. int top_count = layer->tops.size();
  838. fprintf(pp, "%-24s %-24s %d %d", layer->type.c_str(), layer->name.c_str(), bottom_count, top_count);
  839. for (int j=0; j<bottom_count; j++)
  840. {
  841. int bottom_blob_index = layer->bottoms[j];
  842. fprintf(pp, " %s", blobs[bottom_blob_index].name.c_str());
  843. }
  844. for (int j=0; j<top_count; j++)
  845. {
  846. int top_blob_index = layer->tops[j];
  847. fprintf(pp, " %s", blobs[top_blob_index].name.c_str());
  848. }
  849. ncnn::Layer* layer_default = ncnn::create_layer(layer->typeindex);
  850. ncnn::ParamDict pd;
  851. layer_default->load_param(pd);
  852. #define fprintf_param_value(format, phase) \
  853. { if (op->phase != op_default->phase) fprintf(pp, format, op->phase); }
  854. if (layer->type == "BatchNorm")
  855. {
  856. ncnn::BatchNorm* op = (ncnn::BatchNorm*)layer;
  857. ncnn::BatchNorm* op_default = (ncnn::BatchNorm*)layer_default;
  858. fprintf_param_value(" 0=%d", channels)
  859. fprintf_param_value(" 1=%f", eps)
  860. fwrite_weight_data(op->slope_data, bp);
  861. fwrite_weight_data(op->mean_data, bp);
  862. fwrite_weight_data(op->var_data, bp);
  863. fwrite_weight_data(op->bias_data, bp);
  864. }
  865. else if (layer->type == "Bias")
  866. {
  867. ncnn::Bias* op = (ncnn::Bias*)layer;
  868. ncnn::Bias* op_default = (ncnn::Bias*)layer_default;
  869. fprintf_param_value(" 0=%d", bias_data_size)
  870. fwrite_weight_data(op->bias_data, bp);
  871. }
  872. else if (layer->type == "BinaryOp")
  873. {
  874. ncnn::BinaryOp* op = (ncnn::BinaryOp*)layer;
  875. ncnn::BinaryOp* op_default = (ncnn::BinaryOp*)layer_default;
  876. fprintf_param_value(" 0=%d", op_type)
  877. fprintf_param_value(" 1=%d", with_scalar)
  878. fprintf_param_value(" 2=%f", b)
  879. }
  880. else if (layer->type == "Clip")
  881. {
  882. ncnn::Clip* op = (ncnn::Clip*)layer;
  883. ncnn::Clip* op_default = (ncnn::Clip*)layer_default;
  884. fprintf_param_value(" 0=%f", min)
  885. fprintf_param_value(" 1=%f", max)
  886. }
  887. else if (layer->type == "Concat")
  888. {
  889. ncnn::Concat* op = (ncnn::Concat*)layer;
  890. ncnn::Concat* op_default = (ncnn::Concat*)layer_default;
  891. fprintf_param_value(" 0=%d", axis)
  892. }
  893. else if (layer->type == "Convolution")
  894. {
  895. ncnn::Convolution* op = (ncnn::Convolution*)layer;
  896. ncnn::Convolution* op_default = (ncnn::Convolution*)layer_default;
  897. fprintf_param_value(" 0=%d", num_output)
  898. fprintf_param_value(" 1=%d", kernel_w)
  899. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  900. fprintf_param_value(" 2=%d", dilation_w)
  901. { if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); }
  902. fprintf_param_value(" 3=%d", stride_w)
  903. { if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); }
  904. fprintf_param_value(" 4=%d", pad_w)
  905. { if (op->pad_h != op->pad_w) fprintf(pp, " 14=%d", op->pad_h); }
  906. fprintf_param_value(" 5=%d", bias_term)
  907. fprintf_param_value(" 6=%d", weight_data_size)
  908. fprintf_param_value(" 8=%d", int8_scale_term)
  909. fprintf_param_value(" 9=%d", activation_type)
  910. { if (!op->activation_params.empty()) fprintf_param_int_array(10, op->activation_params, pp); }
  911. fwrite_weight_tag(0, bp);
  912. fwrite_weight_data(op->weight_data, bp);
  913. fwrite_weight_data(op->bias_data, bp);
  914. }
  915. else if (layer->type == "ConvolutionDepthWise")
  916. {
  917. ncnn::ConvolutionDepthWise* op = (ncnn::ConvolutionDepthWise*)layer;
  918. ncnn::ConvolutionDepthWise* op_default = (ncnn::ConvolutionDepthWise*)layer_default;
  919. fprintf_param_value(" 0=%d", num_output)
  920. fprintf_param_value(" 1=%d", kernel_w)
  921. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  922. fprintf_param_value(" 2=%d", dilation_w)
  923. { if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); }
  924. fprintf_param_value(" 3=%d", stride_w)
  925. { if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); }
  926. fprintf_param_value(" 4=%d", pad_w)
  927. { if (op->pad_h != op->pad_w) fprintf(pp, " 14=%d", op->pad_h); }
  928. fprintf_param_value(" 5=%d", bias_term)
  929. fprintf_param_value(" 6=%d", weight_data_size)
  930. fprintf_param_value(" 7=%d", group)
  931. fprintf_param_value(" 8=%d", int8_scale_term)
  932. fprintf_param_value(" 9=%d", activation_type)
  933. { if (!op->activation_params.empty()) fprintf_param_int_array(10, op->activation_params, pp); }
  934. fwrite_weight_tag(0, bp);
  935. fwrite_weight_data(op->weight_data, bp);
  936. fwrite_weight_data(op->bias_data, bp);
  937. }
  938. else if (layer->type == "Crop")
  939. {
  940. ncnn::Crop* op = (ncnn::Crop*)layer;
  941. ncnn::Crop* op_default = (ncnn::Crop*)layer_default;
  942. fprintf_param_value(" 0=%d", woffset)
  943. fprintf_param_value(" 1=%d", hoffset)
  944. fprintf_param_value(" 2=%d", coffset)
  945. fprintf_param_value(" 3=%d", outw)
  946. fprintf_param_value(" 4=%d", outh)
  947. fprintf_param_value(" 5=%d", outc)
  948. }
  949. else if (layer->type == "Deconvolution")
  950. {
  951. ncnn::Deconvolution* op = (ncnn::Deconvolution*)layer;
  952. ncnn::Deconvolution* op_default = (ncnn::Deconvolution*)layer_default;
  953. fprintf_param_value(" 0=%d", num_output)
  954. fprintf_param_value(" 1=%d", kernel_w)
  955. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  956. fprintf_param_value(" 2=%d", dilation_w)
  957. { if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); }
  958. fprintf_param_value(" 3=%d", stride_w)
  959. { if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); }
  960. fprintf_param_value(" 4=%d", pad_w)
  961. { if (op->pad_h != op->pad_w) fprintf(pp, " 14=%d", op->pad_h); }
  962. fprintf_param_value(" 5=%d", bias_term)
  963. fprintf_param_value(" 6=%d", weight_data_size)
  964. fprintf_param_value(" 9=%d", activation_type)
  965. { if (!op->activation_params.empty()) fprintf_param_int_array(10, op->activation_params, pp); }
  966. fwrite_weight_tag(0, bp);
  967. fwrite_weight_data(op->weight_data, bp);
  968. fwrite_weight_data(op->bias_data, bp);
  969. }
  970. else if (layer->type == "DeconvolutionDepthWise")
  971. {
  972. ncnn::DeconvolutionDepthWise* op = (ncnn::DeconvolutionDepthWise*)layer;
  973. ncnn::DeconvolutionDepthWise* op_default = (ncnn::DeconvolutionDepthWise*)layer_default;
  974. fprintf_param_value(" 0=%d", num_output)
  975. fprintf_param_value(" 1=%d", kernel_w)
  976. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  977. fprintf_param_value(" 2=%d", dilation_w)
  978. { if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); }
  979. fprintf_param_value(" 3=%d", stride_w)
  980. { if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); }
  981. fprintf_param_value(" 4=%d", pad_w)
  982. { if (op->pad_h != op->pad_w) fprintf(pp, " 14=%d", op->pad_h); }
  983. fprintf_param_value(" 5=%d", bias_term)
  984. fprintf_param_value(" 6=%d", weight_data_size)
  985. fprintf_param_value(" 7=%d", group)
  986. fprintf_param_value(" 9=%d", activation_type)
  987. { if (!op->activation_params.empty()) fprintf_param_int_array(10, op->activation_params, pp); }
  988. fwrite_weight_tag(0, bp);
  989. fwrite_weight_data(op->weight_data, bp);
  990. fwrite_weight_data(op->bias_data, bp);
  991. }
  992. else if (layer->type == "DetectionOutput")
  993. {
  994. ncnn::DetectionOutput* op = (ncnn::DetectionOutput*)layer;
  995. ncnn::DetectionOutput* op_default = (ncnn::DetectionOutput*)layer_default;
  996. fprintf_param_value(" 0=%d", num_class)
  997. fprintf_param_value(" 1=%f", nms_threshold)
  998. fprintf_param_value(" 2=%d", nms_top_k)
  999. fprintf_param_value(" 3=%d", keep_top_k)
  1000. fprintf_param_value(" 4=%f", confidence_threshold)
  1001. fprintf_param_value(" 5=%f", variances[0])
  1002. fprintf_param_value(" 6=%f", variances[1])
  1003. fprintf_param_value(" 7=%f", variances[2])
  1004. fprintf_param_value(" 8=%f", variances[3])
  1005. }
  1006. else if (layer->type == "Dropout")
  1007. {
  1008. ncnn::Dropout* op = (ncnn::Dropout*)layer;
  1009. ncnn::Dropout* op_default = (ncnn::Dropout*)layer_default;
  1010. fprintf_param_value(" 0=%f", scale)
  1011. }
  1012. else if (layer->type == "Eltwise")
  1013. {
  1014. ncnn::Eltwise* op = (ncnn::Eltwise*)layer;
  1015. ncnn::Eltwise* op_default = (ncnn::Eltwise*)layer_default;
  1016. fprintf_param_value(" 0=%d", op_type)
  1017. { if (!op->coeffs.empty()) fprintf_param_int_array(1, op->coeffs, pp); }
  1018. }
  1019. else if (layer->type == "ELU")
  1020. {
  1021. ncnn::ELU* op = (ncnn::ELU*)layer;
  1022. ncnn::ELU* op_default = (ncnn::ELU*)layer_default;
  1023. fprintf_param_value(" 0=%f", alpha)
  1024. }
  1025. else if (layer->type == "Exp")
  1026. {
  1027. ncnn::Exp* op = (ncnn::Exp*)layer;
  1028. ncnn::Exp* op_default = (ncnn::Exp*)layer_default;
  1029. fprintf_param_value(" 0=%f", base)
  1030. fprintf_param_value(" 1=%f", scale)
  1031. fprintf_param_value(" 2=%f", shift)
  1032. }
  1033. else if (layer->type == "InnerProduct")
  1034. {
  1035. ncnn::InnerProduct* op = (ncnn::InnerProduct*)layer;
  1036. ncnn::InnerProduct* op_default = (ncnn::InnerProduct*)layer_default;
  1037. fprintf_param_value(" 0=%d", num_output)
  1038. fprintf_param_value(" 1=%d", bias_term)
  1039. fprintf_param_value(" 2=%d", weight_data_size)
  1040. fprintf_param_value(" 8=%d", int8_scale_term)
  1041. fprintf_param_value(" 9=%d", activation_type)
  1042. { if (!op->activation_params.empty()) fprintf_param_int_array(10, op->activation_params, pp); }
  1043. fwrite_weight_tag(0, bp);
  1044. fwrite_weight_data(op->weight_data, bp);
  1045. fwrite_weight_data(op->bias_data, bp);
  1046. }
  1047. else if (layer->type == "Input")
  1048. {
  1049. ncnn::Input* op = (ncnn::Input*)layer;
  1050. ncnn::Input* op_default = (ncnn::Input*)layer_default;
  1051. fprintf_param_value(" 0=%d", w)
  1052. fprintf_param_value(" 1=%d", h)
  1053. fprintf_param_value(" 2=%d", c)
  1054. }
  1055. else if (layer->type == "InstanceNorm")
  1056. {
  1057. ncnn::InstanceNorm* op = (ncnn::InstanceNorm*)layer;
  1058. ncnn::InstanceNorm* op_default = (ncnn::InstanceNorm*)layer_default;
  1059. fprintf_param_value(" 0=%d", channels)
  1060. fprintf_param_value(" 1=%f", eps)
  1061. }
  1062. else if (layer->type == "Interp")
  1063. {
  1064. ncnn::Interp* op = (ncnn::Interp*)layer;
  1065. ncnn::Interp* op_default = (ncnn::Interp*)layer_default;
  1066. fprintf_param_value(" 0=%d", resize_type)
  1067. fprintf_param_value(" 1=%f", height_scale)
  1068. fprintf_param_value(" 2=%f", width_scale)
  1069. fprintf_param_value(" 3=%d", output_height)
  1070. fprintf_param_value(" 4=%d", output_width)
  1071. }
  1072. else if (layer->type == "Log")
  1073. {
  1074. ncnn::Log* op = (ncnn::Log*)layer;
  1075. ncnn::Log* op_default = (ncnn::Log*)layer_default;
  1076. fprintf_param_value(" 0=%f", base)
  1077. fprintf_param_value(" 1=%f", scale)
  1078. fprintf_param_value(" 2=%f", shift)
  1079. }
  1080. else if (layer->type == "LRN")
  1081. {
  1082. ncnn::LRN* op = (ncnn::LRN*)layer;
  1083. ncnn::LRN* op_default = (ncnn::LRN*)layer_default;
  1084. fprintf_param_value(" 0=%d", region_type)
  1085. fprintf_param_value(" 1=%d", local_size)
  1086. fprintf_param_value(" 2=%f", alpha)
  1087. fprintf_param_value(" 3=%f", beta)
  1088. fprintf_param_value(" 4=%f", bias)
  1089. }
  1090. else if (layer->type == "MVN")
  1091. {
  1092. ncnn::MVN* op = (ncnn::MVN*)layer;
  1093. ncnn::MVN* op_default = (ncnn::MVN*)layer_default;
  1094. fprintf_param_value(" 0=%d", normalize_variance)
  1095. fprintf_param_value(" 1=%d", across_channels)
  1096. fprintf_param_value(" 2=%f", eps)
  1097. }
  1098. else if (layer->type == "Normalize")
  1099. {
  1100. ncnn::Normalize* op = (ncnn::Normalize*)layer;
  1101. ncnn::Normalize* op_default = (ncnn::Normalize*)layer_default;
  1102. fprintf_param_value(" 0=%d", across_spatial)
  1103. fprintf_param_value(" 1=%d", channel_shared)
  1104. fprintf_param_value(" 2=%f", eps)
  1105. fprintf_param_value(" 3=%d", scale_data_size)
  1106. fprintf_param_value(" 4=%d", across_channel)
  1107. fwrite_weight_data(op->scale_data, bp);
  1108. }
  1109. else if (layer->type == "Padding")
  1110. {
  1111. ncnn::Padding* op = (ncnn::Padding*)layer;
  1112. ncnn::Padding* op_default = (ncnn::Padding*)layer_default;
  1113. fprintf_param_value(" 0=%d", top)
  1114. fprintf_param_value(" 1=%d", bottom)
  1115. fprintf_param_value(" 2=%d", left)
  1116. fprintf_param_value(" 3=%d", right)
  1117. fprintf_param_value(" 4=%d", type)
  1118. fprintf_param_value(" 5=%f", value)
  1119. }
  1120. else if (layer->type == "Permute")
  1121. {
  1122. ncnn::Permute* op = (ncnn::Permute*)layer;
  1123. ncnn::Permute* op_default = (ncnn::Permute*)layer_default;
  1124. fprintf_param_value(" 0=%d", order_type)
  1125. }
  1126. else if (layer->type == "Pooling")
  1127. {
  1128. ncnn::Pooling* op = (ncnn::Pooling*)layer;
  1129. ncnn::Pooling* op_default = (ncnn::Pooling*)layer_default;
  1130. fprintf_param_value(" 0=%d", pooling_type)
  1131. fprintf_param_value(" 1=%d", kernel_w)
  1132. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  1133. fprintf_param_value(" 2=%d", stride_w)
  1134. { if (op->stride_h != op->stride_w) fprintf(pp, " 12=%d", op->stride_h); }
  1135. fprintf_param_value(" 3=%d", pad_left)
  1136. { if (op->pad_top != op->pad_left) fprintf(pp, " 13=%d", op->pad_top); }
  1137. { if (op->pad_right != op->pad_left) fprintf(pp, " 14=%d", op->pad_right); }
  1138. { if (op->pad_bottom != op->pad_top) fprintf(pp, " 15=%d", op->pad_bottom); }
  1139. fprintf_param_value(" 4=%d", global_pooling)
  1140. fprintf_param_value(" 5=%d", pad_mode)
  1141. }
  1142. else if (layer->type == "Power")
  1143. {
  1144. ncnn::Power* op = (ncnn::Power*)layer;
  1145. ncnn::Power* op_default = (ncnn::Power*)layer_default;
  1146. fprintf_param_value(" 0=%f", power)
  1147. fprintf_param_value(" 1=%f", scale)
  1148. fprintf_param_value(" 2=%f", shift)
  1149. }
  1150. else if (layer->type == "PReLU")
  1151. {
  1152. ncnn::PReLU* op = (ncnn::PReLU*)layer;
  1153. ncnn::PReLU* op_default = (ncnn::PReLU*)layer_default;
  1154. fprintf_param_value(" 0=%d", num_slope)
  1155. fwrite_weight_data(op->slope_data, bp);
  1156. }
  1157. else if (layer->type == "PriorBox")
  1158. {
  1159. ncnn::PriorBox* op = (ncnn::PriorBox*)layer;
  1160. ncnn::PriorBox* op_default = (ncnn::PriorBox*)layer_default;
  1161. { if (!op->min_sizes.empty()) fprintf_param_int_array(0, op->min_sizes, pp); }
  1162. { if (!op->max_sizes.empty()) fprintf_param_int_array(1, op->max_sizes, pp); }
  1163. { if (!op->aspect_ratios.empty()) fprintf_param_int_array(2, op->aspect_ratios, pp); }
  1164. fprintf_param_value(" 3=%f", variances[0])
  1165. fprintf_param_value(" 4=%f", variances[1])
  1166. fprintf_param_value(" 5=%f", variances[2])
  1167. fprintf_param_value(" 6=%f", variances[3])
  1168. fprintf_param_value(" 7=%d", flip)
  1169. fprintf_param_value(" 8=%d", clip)
  1170. fprintf_param_value(" 9=%d", image_width)
  1171. fprintf_param_value(" 10=%d", image_height)
  1172. fprintf_param_value(" 11=%f", step_width)
  1173. fprintf_param_value(" 12=%f", step_height)
  1174. fprintf_param_value(" 13=%f", offset)
  1175. }
  1176. else if (layer->type == "Proposal")
  1177. {
  1178. ncnn::Proposal* op = (ncnn::Proposal*)layer;
  1179. ncnn::Proposal* op_default = (ncnn::Proposal*)layer_default;
  1180. fprintf_param_value(" 0=%d", feat_stride)
  1181. fprintf_param_value(" 1=%d", base_size)
  1182. fprintf_param_value(" 2=%d", pre_nms_topN)
  1183. fprintf_param_value(" 3=%d", after_nms_topN)
  1184. fprintf_param_value(" 4=%f", nms_thresh)
  1185. fprintf_param_value(" 5=%d", min_size)
  1186. }
  1187. else if (layer->type == "PSROIPooling")
  1188. {
  1189. ncnn::PSROIPooling* op = (ncnn::PSROIPooling*)layer;
  1190. ncnn::PSROIPooling* op_default = (ncnn::PSROIPooling*)layer_default;
  1191. fprintf_param_value(" 0=%d", pooled_width)
  1192. fprintf_param_value(" 1=%d", pooled_height)
  1193. fprintf_param_value(" 2=%f", spatial_scale)
  1194. fprintf_param_value(" 3=%d", output_dim)
  1195. }
  1196. else if (layer->type == "Quantize")
  1197. {
  1198. ncnn::Quantize* op = (ncnn::Quantize*)layer;
  1199. ncnn::Quantize* op_default = (ncnn::Quantize*)layer_default;
  1200. fprintf_param_value(" 0=%f", scale)
  1201. }
  1202. else if (layer->type == "Reduction")
  1203. {
  1204. ncnn::Reduction* op = (ncnn::Reduction*)layer;
  1205. ncnn::Reduction* op_default = (ncnn::Reduction*)layer_default;
  1206. fprintf_param_value(" 0=%d", operation)
  1207. fprintf_param_value(" 1=%d", dim)
  1208. fprintf_param_value(" 2=%f", coeff)
  1209. }
  1210. else if (layer->type == "ReLU")
  1211. {
  1212. ncnn::ReLU* op = (ncnn::ReLU*)layer;
  1213. ncnn::ReLU* op_default = (ncnn::ReLU*)layer_default;
  1214. fprintf_param_value(" 0=%f", slope)
  1215. }
  1216. else if (layer->type == "Reorg")
  1217. {
  1218. ncnn::Reorg* op = (ncnn::Reorg*)layer;
  1219. ncnn::Reorg* op_default = (ncnn::Reorg*)layer_default;
  1220. fprintf_param_value(" 0=%d", stride)
  1221. }
  1222. else if (layer->type == "Requantize")
  1223. {
  1224. ncnn::Requantize* op = (ncnn::Requantize*)layer;
  1225. ncnn::Requantize* op_default = (ncnn::Requantize*)layer_default;
  1226. fprintf_param_value(" 0=%f", scale_in)
  1227. fprintf_param_value(" 1=%f", scale_out)
  1228. fprintf_param_value(" 2=%d", bias_term)
  1229. fprintf_param_value(" 3=%d", bias_data_size)
  1230. fprintf_param_value(" 4=%d", fusion_relu)
  1231. }
  1232. else if (layer->type == "Reshape")
  1233. {
  1234. ncnn::Reshape* op = (ncnn::Reshape*)layer;
  1235. ncnn::Reshape* op_default = (ncnn::Reshape*)layer_default;
  1236. fprintf_param_value(" 0=%d", w)
  1237. fprintf_param_value(" 1=%d", h)
  1238. fprintf_param_value(" 2=%d", c)
  1239. fprintf_param_value(" 3=%d", permute)
  1240. }
  1241. else if (layer->type == "ROIAlign")
  1242. {
  1243. ncnn::ROIAlign* op = (ncnn::ROIAlign*)layer;
  1244. ncnn::ROIAlign* op_default = (ncnn::ROIAlign*)layer_default;
  1245. fprintf_param_value(" 0=%d", pooled_width)
  1246. fprintf_param_value(" 1=%d", pooled_height)
  1247. fprintf_param_value(" 2=%f", spatial_scale)
  1248. }
  1249. else if (layer->type == "ROIPooling")
  1250. {
  1251. ncnn::ROIPooling* op = (ncnn::ROIPooling*)layer;
  1252. ncnn::ROIPooling* op_default = (ncnn::ROIPooling*)layer_default;
  1253. fprintf_param_value(" 0=%d", pooled_width)
  1254. fprintf_param_value(" 1=%d", pooled_height)
  1255. fprintf_param_value(" 2=%f", spatial_scale)
  1256. }
  1257. else if (layer->type == "Scale")
  1258. {
  1259. ncnn::Scale* op = (ncnn::Scale*)layer;
  1260. ncnn::Scale* op_default = (ncnn::Scale*)layer_default;
  1261. fprintf_param_value(" 0=%d", scale_data_size)
  1262. fprintf_param_value(" 1=%d", bias_term)
  1263. fwrite_weight_data(op->scale_data, bp);
  1264. fwrite_weight_data(op->bias_data, bp);
  1265. }
  1266. else if (layer->type == "ShuffleChannel")
  1267. {
  1268. ncnn::ShuffleChannel* op = (ncnn::ShuffleChannel*)layer;
  1269. ncnn::ShuffleChannel* op_default = (ncnn::ShuffleChannel*)layer_default;
  1270. fprintf_param_value(" 0=%d", group)
  1271. }
  1272. else if (layer->type == "Slice")
  1273. {
  1274. ncnn::Slice* op = (ncnn::Slice*)layer;
  1275. ncnn::Slice* op_default = (ncnn::Slice*)layer_default;
  1276. { if (!op->slices.empty()) fprintf_param_int_array(0, op->slices, pp); }
  1277. fprintf_param_value(" 1=%d", axis)
  1278. }
  1279. else if (layer->type == "Softmax")
  1280. {
  1281. ncnn::Softmax* op = (ncnn::Softmax*)layer;
  1282. ncnn::Softmax* op_default = (ncnn::Softmax*)layer_default;
  1283. fprintf_param_value(" 0=%d", axis)
  1284. // HACK
  1285. if (op->axis != 0)
  1286. {
  1287. int fixbug0 = 1;
  1288. fprintf(pp, " 1=%d", fixbug0);
  1289. }
  1290. }
  1291. else if (layer->type == "Threshold")
  1292. {
  1293. ncnn::Threshold* op = (ncnn::Threshold*)layer;
  1294. ncnn::Threshold* op_default = (ncnn::Threshold*)layer_default;
  1295. fprintf_param_value(" 0=%f", threshold)
  1296. }
  1297. else if (layer->type == "UnaryOp")
  1298. {
  1299. ncnn::UnaryOp* op = (ncnn::UnaryOp*)layer;
  1300. ncnn::UnaryOp* op_default = (ncnn::UnaryOp*)layer_default;
  1301. fprintf_param_value(" 0=%d", op_type)
  1302. }
  1303. else if (layer->type == "YoloDetectionOutput")
  1304. {
  1305. ncnn::YoloDetectionOutput* op = (ncnn::YoloDetectionOutput*)layer;
  1306. ncnn::YoloDetectionOutput* op_default = (ncnn::YoloDetectionOutput*)layer_default;
  1307. fprintf_param_value(" 0=%d", num_class)
  1308. fprintf_param_value(" 1=%d", num_box)
  1309. fprintf_param_value(" 2=%f", confidence_threshold)
  1310. fprintf_param_value(" 3=%f", nms_threshold)
  1311. { if (!op->biases.empty()) fprintf_param_int_array(4, op->biases, pp); }
  1312. }
  1313. else if (layer->type == "Yolov3DetectionOutput")
  1314. {
  1315. ncnn::Yolov3DetectionOutput* op = (ncnn::Yolov3DetectionOutput*)layer;
  1316. ncnn::Yolov3DetectionOutput* op_default = (ncnn::Yolov3DetectionOutput*)layer_default;
  1317. fprintf_param_value(" 0=%d", num_class)
  1318. fprintf_param_value(" 1=%d", num_box)
  1319. fprintf_param_value(" 2=%f", confidence_threshold)
  1320. fprintf_param_value(" 3=%f", nms_threshold)
  1321. { if (!op->biases.empty()) fprintf_param_int_array(4, op->biases, pp); }
  1322. { if (!op->mask.empty()) fprintf_param_int_array(5, op->mask, pp); }
  1323. { if (!op->anchors_scale.empty()) fprintf_param_int_array(6, op->anchors_scale, pp); }
  1324. }
  1325. #undef fprintf_param_value
  1326. fprintf(pp, "\n");
  1327. delete layer_default;
  1328. }
  1329. fclose(pp);
  1330. fclose(bp);
  1331. return 0;
  1332. }
  1333. int main(int argc, char** argv)
  1334. {
  1335. // in in out out 65535
  1336. const char* inparam = argv[1];
  1337. const char* inbin = argv[2];
  1338. const char* outparam = argv[3];
  1339. const char* outbin = argv[4];
  1340. int flag = atoi(argv[5]);
  1341. NetOptimize optimizer;
  1342. optimizer.load_param(inparam);
  1343. optimizer.load_model(inbin);
  1344. optimizer.fuse_batchnorm_scale();
  1345. optimizer.fuse_convolution_batchnorm();
  1346. optimizer.fuse_convolutiondepthwise_batchnorm();
  1347. optimizer.fuse_deconvolution_batchnorm();
  1348. optimizer.fuse_deconvolutiondepthwise_batchnorm();
  1349. optimizer.fuse_innerproduct_batchnorm();
  1350. optimizer.fuse_convolution_activation();
  1351. optimizer.fuse_convolutiondepthwise_activation();
  1352. optimizer.fuse_deconvolution_activation();
  1353. optimizer.fuse_deconvolutiondepthwise_activation();
  1354. optimizer.fuse_innerproduct_activation();
  1355. optimizer.eliminate_dropout();
  1356. optimizer.save(outparam, outbin);
  1357. return 0;
  1358. }