You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ncnnoptimize.cpp 62 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include <set>
  15. #include <vector>
  16. // ncnn public header
  17. #include "net.h"
  18. #include "layer.h"
  19. // ncnn private header
  20. #include "layer/batchnorm.h"
  21. #include "layer/bias.h"
  22. #include "layer/binaryop.h"
  23. #include "layer/clip.h"
  24. #include "layer/concat.h"
  25. #include "layer/convolution.h"
  26. #include "layer/convolutiondepthwise.h"
  27. #include "layer/crop.h"
  28. #include "layer/deconvolution.h"
  29. #include "layer/deconvolutiondepthwise.h"
  30. #include "layer/detectionoutput.h"
  31. #include "layer/dropout.h"
  32. #include "layer/eltwise.h"
  33. #include "layer/elu.h"
  34. #include "layer/exp.h"
  35. #include "layer/flatten.h"
  36. #include "layer/innerproduct.h"
  37. #include "layer/input.h"
  38. #include "layer/instancenorm.h"
  39. #include "layer/interp.h"
  40. #include "layer/log.h"
  41. #include "layer/lrn.h"
  42. #include "layer/mvn.h"
  43. #include "layer/normalize.h"
  44. #include "layer/padding.h"
  45. #include "layer/permute.h"
  46. #include "layer/pooling.h"
  47. #include "layer/power.h"
  48. #include "layer/prelu.h"
  49. #include "layer/priorbox.h"
  50. #include "layer/proposal.h"
  51. #include "layer/psroipooling.h"
  52. #include "layer/quantize.h"
  53. #include "layer/reduction.h"
  54. #include "layer/relu.h"
  55. #include "layer/reorg.h"
  56. #include "layer/requantize.h"
  57. #include "layer/reshape.h"
  58. #include "layer/roialign.h"
  59. #include "layer/roipooling.h"
  60. #include "layer/scale.h"
  61. #include "layer/slice.h"
  62. #include "layer/shufflechannel.h"
  63. #include "layer/softmax.h"
  64. #include "layer/threshold.h"
  65. #include "layer/unaryop.h"
  66. #include "layer/yolodetectionoutput.h"
  67. #include "layer/yolov3detectionoutput.h"
  68. class NetOptimize : public ncnn::Net
  69. {
  70. public:
  71. // 0=fp32 1=fp16
  72. int storage_type;
  73. public:
  74. int fuse_batchnorm_scale();
  75. int fuse_convolution_batchnorm();
  76. int fuse_convolutiondepthwise_batchnorm();
  77. int fuse_deconvolution_batchnorm();
  78. int fuse_deconvolutiondepthwise_batchnorm();
  79. int fuse_innerproduct_batchnorm();
  80. int fuse_innerproduct_dropout();
  81. int fuse_convolution_activation();
  82. int fuse_convolutiondepthwise_activation();
  83. int fuse_deconvolution_activation();
  84. int fuse_deconvolutiondepthwise_activation();
  85. int fuse_innerproduct_activation();
  86. int eliminate_dropout();
  87. int eliminate_flatten_after_global_pooling();
  88. int replace_convolution_with_innerproduct_after_global_pooling();
  89. public:
  90. int fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp);
  91. int fprintf_param_float_array(int id, const ncnn::Mat& m, FILE* pp);
  92. int fwrite_weight_tag_data(int tag, const ncnn::Mat& data, FILE* bp);
  93. int fwrite_weight_data(const ncnn::Mat& data, FILE* bp);
  94. int save(const char* parampath, const char* binpath);
  95. };
  96. int NetOptimize::fuse_batchnorm_scale()
  97. {
  98. const int layer_count = layers.size();
  99. for (int i=0; i<layer_count; i++)
  100. {
  101. if (layers[i]->type != "BatchNorm")
  102. continue;
  103. // BatchNorm - Scale
  104. int top_blob_index = layers[i]->tops[0];
  105. int j = i + 1;
  106. for (; j<layer_count; j++)
  107. {
  108. if (layers[j]->type != "Scale")
  109. continue;
  110. if (layers[j]->bottoms.size() != 1)
  111. continue;
  112. if (layers[j]->bottoms[0] == top_blob_index)
  113. break;
  114. }
  115. if (j == layer_count)
  116. continue;
  117. // fuse BatchNorm - Scale to BatchNorm
  118. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[i];
  119. ncnn::Scale* scale = (ncnn::Scale*)layers[j];
  120. fprintf(stderr, "fuse_batchnorm_scale %s %s\n", batchnorm->name.c_str(), scale->name.c_str());
  121. {
  122. // v = ((v - mean) / sqrt(var + eps) * slope + bias) * s + b
  123. // = (v - mean) / sqrt(var + eps) * (slope * s) + (bias * s + b)
  124. int channels = batchnorm->channels;
  125. float* slope = batchnorm->slope_data;
  126. float* bias = batchnorm->bias_data;
  127. for (int q=0; q<channels; q++)
  128. {
  129. slope[q] = slope[q] * scale->scale_data[q];
  130. if (scale->bias_term)
  131. bias[q] = bias[q] * scale->scale_data[q] + scale->bias_data[q];
  132. else
  133. bias[q] = bias[q] * scale->scale_data[q];
  134. }
  135. }
  136. int top_blob_index_final = scale->tops[0];
  137. batchnorm->tops[0] = top_blob_index_final;
  138. blobs[top_blob_index_final].producer = i;
  139. scale->type = "ncnnfused";
  140. }
  141. return 0;
  142. }
  143. int NetOptimize::fuse_convolution_batchnorm()
  144. {
  145. const int layer_count = layers.size();
  146. for (int i=0; i<layer_count; i++)
  147. {
  148. if (layers[i]->type != "Convolution")
  149. continue;
  150. // Convolution - BatchNorm
  151. int top_blob_index = layers[i]->tops[0];
  152. int j = i + 1;
  153. for (; j<layer_count; j++)
  154. {
  155. if (layers[j]->type != "BatchNorm")
  156. continue;
  157. if (layers[j]->bottoms.size() != 1)
  158. continue;
  159. if (layers[j]->bottoms[0] == top_blob_index)
  160. break;
  161. }
  162. if (j == layer_count)
  163. continue;
  164. // fuse Convolution - BatchNorm to Convolution
  165. ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
  166. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  167. fprintf(stderr, "fuse_convolution_batchnorm %s %s\n", convolution->name.c_str(), batchnorm->name.c_str());
  168. {
  169. int channels = batchnorm->channels;
  170. float eps = batchnorm->eps;
  171. // a = bias - slope * mean / sqrt(var + eps)
  172. // b = slope / sqrt(var + eps)
  173. // value = value * b + a
  174. std::vector<float> a(channels);
  175. std::vector<float> b(channels);
  176. for (int i=0; i<channels; i++)
  177. {
  178. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  179. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  180. b[i] = batchnorm->slope_data[i] / sqrt_var;
  181. }
  182. if (convolution->bias_term == 0)
  183. {
  184. // init bias as zero
  185. convolution->bias_term = 1;
  186. convolution->bias_data = ncnn::Mat(channels);
  187. convolution->bias_data.fill(0.f);
  188. }
  189. const int weight_per_outch = convolution->weight_data_size / channels;
  190. float* weight = convolution->weight_data;
  191. float* bias = convolution->bias_data;
  192. for (int i=0; i<channels; i++)
  193. {
  194. float* conv_weight_outch = weight + weight_per_outch * i;
  195. for (int j=0; j<weight_per_outch; j++)
  196. {
  197. conv_weight_outch[j] *= b[i];
  198. }
  199. bias[i] += a[i];
  200. }
  201. }
  202. int top_blob_index_final = batchnorm->tops[0];
  203. convolution->tops[0] = top_blob_index_final;
  204. blobs[top_blob_index_final].producer = i;
  205. batchnorm->type = "ncnnfused";
  206. }
  207. return 0;
  208. }
  209. int NetOptimize::fuse_convolutiondepthwise_batchnorm()
  210. {
  211. const int layer_count = layers.size();
  212. for (int i=0; i<layer_count; i++)
  213. {
  214. if (layers[i]->type != "ConvolutionDepthWise")
  215. continue;
  216. // ConvolutionDepthWise - BatchNorm
  217. int top_blob_index = layers[i]->tops[0];
  218. int j = i + 1;
  219. for (; j<layer_count; j++)
  220. {
  221. if (layers[j]->type != "BatchNorm")
  222. continue;
  223. if (layers[j]->bottoms.size() != 1)
  224. continue;
  225. if (layers[j]->bottoms[0] == top_blob_index)
  226. break;
  227. }
  228. if (j == layer_count)
  229. continue;
  230. // fuse ConvolutionDepthWise - BatchNorm to ConvolutionDepthWise
  231. ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i];
  232. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  233. fprintf(stderr, "fuse_convolutiondepthwise_batchnorm %s %s\n", convolutiondepthwise->name.c_str(), batchnorm->name.c_str());
  234. {
  235. int channels = batchnorm->channels;
  236. float eps = batchnorm->eps;
  237. // a = bias - slope * mean / sqrt(var + eps)
  238. // b = slope / sqrt(var + eps)
  239. // value = value * b + a
  240. std::vector<float> a(channels);
  241. std::vector<float> b(channels);
  242. for (int i=0; i<channels; i++)
  243. {
  244. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  245. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  246. b[i] = batchnorm->slope_data[i] / sqrt_var;
  247. }
  248. if (convolutiondepthwise->bias_term == 0)
  249. {
  250. // init bias as zero
  251. convolutiondepthwise->bias_term = 1;
  252. convolutiondepthwise->bias_data = ncnn::Mat(channels);
  253. convolutiondepthwise->bias_data.fill(0.f);
  254. }
  255. const int weight_per_outch = convolutiondepthwise->weight_data_size / channels;
  256. float* weight = convolutiondepthwise->weight_data;
  257. float* bias = convolutiondepthwise->bias_data;
  258. for (int i=0; i<channels; i++)
  259. {
  260. float* conv_weight_outch = weight + weight_per_outch * i;
  261. for (int j=0; j<weight_per_outch; j++)
  262. {
  263. conv_weight_outch[j] *= b[i];
  264. }
  265. bias[i] += a[i];
  266. }
  267. }
  268. int top_blob_index_final = batchnorm->tops[0];
  269. convolutiondepthwise->tops[0] = top_blob_index_final;
  270. blobs[top_blob_index_final].producer = i;
  271. batchnorm->type = "ncnnfused";
  272. }
  273. return 0;
  274. }
  275. int NetOptimize::fuse_deconvolution_batchnorm()
  276. {
  277. const int layer_count = layers.size();
  278. for (int i=0; i<layer_count; i++)
  279. {
  280. if (layers[i]->type != "Deconvolution")
  281. continue;
  282. // Deconvolution - BatchNorm
  283. int top_blob_index = layers[i]->tops[0];
  284. int j = i + 1;
  285. for (; j<layer_count; j++)
  286. {
  287. if (layers[j]->type != "BatchNorm")
  288. continue;
  289. if (layers[j]->bottoms.size() != 1)
  290. continue;
  291. if (layers[j]->bottoms[0] == top_blob_index)
  292. break;
  293. }
  294. if (j == layer_count)
  295. continue;
  296. // fuse Deconvolution - BatchNorm to Deconvolution
  297. ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i];
  298. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  299. fprintf(stderr, "fuse_deconvolution_batchnorm %s %s\n", deconvolution->name.c_str(), batchnorm->name.c_str());
  300. {
  301. int channels = batchnorm->channels;
  302. float eps = batchnorm->eps;
  303. // a = bias - slope * mean / sqrt(var + eps)
  304. // b = slope / sqrt(var + eps)
  305. // value = value * b + a
  306. std::vector<float> a(channels);
  307. std::vector<float> b(channels);
  308. for (int i=0; i<channels; i++)
  309. {
  310. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  311. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  312. b[i] = batchnorm->slope_data[i] / sqrt_var;
  313. }
  314. if (deconvolution->bias_term == 0)
  315. {
  316. // init bias as zero
  317. deconvolution->bias_term = 1;
  318. deconvolution->bias_data = ncnn::Mat(channels);
  319. deconvolution->bias_data.fill(0.f);
  320. }
  321. const int weight_per_outch = deconvolution->weight_data_size / channels;
  322. float* weight = deconvolution->weight_data;
  323. float* bias = deconvolution->bias_data;
  324. for (int i=0; i<channels; i++)
  325. {
  326. float* conv_weight_outch = weight + weight_per_outch * i;
  327. for (int j=0; j<weight_per_outch; j++)
  328. {
  329. conv_weight_outch[j] *= b[i];
  330. }
  331. bias[i] += a[i];
  332. }
  333. }
  334. int top_blob_index_final = batchnorm->tops[0];
  335. deconvolution->tops[0] = top_blob_index_final;
  336. blobs[top_blob_index_final].producer = i;
  337. batchnorm->type = "ncnnfused";
  338. }
  339. return 0;
  340. }
  341. int NetOptimize::fuse_deconvolutiondepthwise_batchnorm()
  342. {
  343. const int layer_count = layers.size();
  344. for (int i=0; i<layer_count; i++)
  345. {
  346. if (layers[i]->type != "DeconvolutionDepthWise")
  347. continue;
  348. // DeconvolutionDepthWise - BatchNorm
  349. int top_blob_index = layers[i]->tops[0];
  350. int j = i + 1;
  351. for (; j<layer_count; j++)
  352. {
  353. if (layers[j]->type != "BatchNorm")
  354. continue;
  355. if (layers[j]->bottoms.size() != 1)
  356. continue;
  357. if (layers[j]->bottoms[0] == top_blob_index)
  358. break;
  359. }
  360. if (j == layer_count)
  361. continue;
  362. // fuse DeconvolutionDepthWise - BatchNorm to DeconvolutionDepthWise
  363. ncnn::DeconvolutionDepthWise* deconvolutiondepthwise = (ncnn::DeconvolutionDepthWise*)layers[i];
  364. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  365. fprintf(stderr, "fuse_deconvolutiondepthwise_batchnorm %s %s\n", deconvolutiondepthwise->name.c_str(), batchnorm->name.c_str());
  366. {
  367. int channels = batchnorm->channels;
  368. float eps = batchnorm->eps;
  369. // a = bias - slope * mean / sqrt(var + eps)
  370. // b = slope / sqrt(var + eps)
  371. // value = value * b + a
  372. std::vector<float> a(channels);
  373. std::vector<float> b(channels);
  374. for (int i=0; i<channels; i++)
  375. {
  376. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  377. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  378. b[i] = batchnorm->slope_data[i] / sqrt_var;
  379. }
  380. if (deconvolutiondepthwise->bias_term == 0)
  381. {
  382. // init bias as zero
  383. deconvolutiondepthwise->bias_term = 1;
  384. deconvolutiondepthwise->bias_data = ncnn::Mat(channels);
  385. deconvolutiondepthwise->bias_data.fill(0.f);
  386. }
  387. const int weight_per_outch = deconvolutiondepthwise->weight_data_size / channels;
  388. float* weight = deconvolutiondepthwise->weight_data;
  389. float* bias = deconvolutiondepthwise->bias_data;
  390. for (int i=0; i<channels; i++)
  391. {
  392. float* conv_weight_outch = weight + weight_per_outch * i;
  393. for (int j=0; j<weight_per_outch; j++)
  394. {
  395. conv_weight_outch[j] *= b[i];
  396. }
  397. bias[i] += a[i];
  398. }
  399. }
  400. int top_blob_index_final = batchnorm->tops[0];
  401. deconvolutiondepthwise->tops[0] = top_blob_index_final;
  402. blobs[top_blob_index_final].producer = i;
  403. batchnorm->type = "ncnnfused";
  404. }
  405. return 0;
  406. }
  407. int NetOptimize::fuse_innerproduct_batchnorm()
  408. {
  409. const int layer_count = layers.size();
  410. for (int i=0; i<layer_count; i++)
  411. {
  412. if (layers[i]->type != "InnerProduct")
  413. continue;
  414. // InnerProduct - BatchNorm
  415. int top_blob_index = layers[i]->tops[0];
  416. int j = i + 1;
  417. for (; j<layer_count; j++)
  418. {
  419. if (layers[j]->type != "BatchNorm")
  420. continue;
  421. if (layers[j]->bottoms.size() != 1)
  422. continue;
  423. if (layers[j]->bottoms[0] == top_blob_index)
  424. break;
  425. }
  426. if (j == layer_count)
  427. continue;
  428. // fuse InnerProduct - BatchNorm to InnerProduct
  429. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
  430. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  431. fprintf(stderr, "fuse_innerproduct_batchnorm %s %s\n", innerproduct->name.c_str(), batchnorm->name.c_str());
  432. {
  433. int channels = batchnorm->channels;
  434. float eps = batchnorm->eps;
  435. // a = bias - slope * mean / sqrt(var + eps)
  436. // b = slope / sqrt(var + eps)
  437. // value = value * b + a
  438. std::vector<float> a(channels);
  439. std::vector<float> b(channels);
  440. for (int i=0; i<channels; i++)
  441. {
  442. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  443. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  444. b[i] = batchnorm->slope_data[i] / sqrt_var;
  445. }
  446. if (innerproduct->bias_term == 0)
  447. {
  448. // init bias as zero
  449. innerproduct->bias_term = 1;
  450. innerproduct->bias_data = ncnn::Mat(channels);
  451. innerproduct->bias_data.fill(0.f);
  452. }
  453. const int weight_per_outch = innerproduct->weight_data_size / channels;
  454. float* weight = innerproduct->weight_data;
  455. float* bias = innerproduct->bias_data;
  456. for (int i=0; i<channels; i++)
  457. {
  458. float* conv_weight_outch = weight + weight_per_outch * i;
  459. for (int j=0; j<weight_per_outch; j++)
  460. {
  461. conv_weight_outch[j] *= b[i];
  462. }
  463. bias[i] += a[i];
  464. }
  465. }
  466. int top_blob_index_final = batchnorm->tops[0];
  467. innerproduct->tops[0] = top_blob_index_final;
  468. blobs[top_blob_index_final].producer = i;
  469. batchnorm->type = "ncnnfused";
  470. }
  471. return 0;
  472. }
  473. int NetOptimize::fuse_innerproduct_dropout()
  474. {
  475. const int layer_count = layers.size();
  476. for (int i=0; i<layer_count; i++)
  477. {
  478. if (layers[i]->type != "InnerProduct")
  479. continue;
  480. // InnerProduct - Dropout
  481. int top_blob_index = layers[i]->tops[0];
  482. int j = i + 1;
  483. for (; j<layer_count; j++)
  484. {
  485. if (layers[j]->type != "Dropout")
  486. continue;
  487. if (layers[j]->bottoms.size() != 1)
  488. continue;
  489. if (layers[j]->bottoms[0] == top_blob_index)
  490. break;
  491. }
  492. if (j == layer_count)
  493. continue;
  494. // fuse InnerProduct - Dropout to InnerProduct
  495. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
  496. ncnn::Dropout* dropout = (ncnn::Dropout*)layers[j];
  497. fprintf(stderr, "fuse_innerproduct_dropout %s %s\n", innerproduct->name.c_str(), dropout->name.c_str());
  498. float scale = dropout->scale;
  499. if (scale != 1.f)
  500. {
  501. const int num_output = innerproduct->num_output;
  502. const int weight_per_outch = innerproduct->weight_data_size / num_output;
  503. float* weight = innerproduct->weight_data;
  504. for (int i=0; i<num_output; i++)
  505. {
  506. float* conv_weight_outch = weight + weight_per_outch * i;
  507. for (int j=0; j<weight_per_outch; j++)
  508. {
  509. conv_weight_outch[j] *= scale;
  510. }
  511. }
  512. if (innerproduct->bias_term)
  513. {
  514. float* bias = innerproduct->bias_data;
  515. for (int i=0; i<num_output; i++)
  516. {
  517. bias[i] *= scale;
  518. }
  519. }
  520. }
  521. int top_blob_index_final = dropout->tops[0];
  522. innerproduct->tops[0] = top_blob_index_final;
  523. blobs[top_blob_index_final].producer = i;
  524. dropout->type = "ncnnfused";
  525. }
  526. return 0;
  527. }
  528. int NetOptimize::fuse_convolution_activation()
  529. {
  530. const int layer_count = layers.size();
  531. for (int i=0; i<layer_count; i++)
  532. {
  533. if (layers[i]->type != "Convolution")
  534. continue;
  535. // Convolution - Activation
  536. int top_blob_index = layers[i]->tops[0];
  537. int j = i + 1;
  538. for (; j<layer_count; j++)
  539. {
  540. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip")
  541. continue;
  542. if (layers[j]->bottoms.size() != 1)
  543. continue;
  544. if (layers[j]->bottoms[0] == top_blob_index)
  545. break;
  546. }
  547. if (j == layer_count)
  548. continue;
  549. // fuse Convolution - Activation to Convolution
  550. ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
  551. ncnn::Layer* activation = layers[j];
  552. fprintf(stderr, "fuse_convolution_activation %s %s\n", convolution->name.c_str(), activation->name.c_str());
  553. if (activation->type == "ReLU")
  554. {
  555. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  556. if (relu->slope == 0.f)
  557. {
  558. convolution->activation_type = 1;
  559. }
  560. else
  561. {
  562. convolution->activation_type = 2;
  563. convolution->activation_params = ncnn::Mat(1);
  564. convolution->activation_params[0] = relu->slope;
  565. }
  566. }
  567. else if (activation->type == "Clip")
  568. {
  569. ncnn::Clip* clip = (ncnn::Clip*)activation;
  570. convolution->activation_type = 3;
  571. convolution->activation_params = ncnn::Mat(2);
  572. convolution->activation_params[0] = clip->min;
  573. convolution->activation_params[1] = clip->max;
  574. }
  575. int top_blob_index_final = activation->tops[0];
  576. convolution->tops[0] = top_blob_index_final;
  577. blobs[top_blob_index_final].producer = i;
  578. activation->type = "ncnnfused";
  579. }
  580. return 0;
  581. }
  582. int NetOptimize::fuse_convolutiondepthwise_activation()
  583. {
  584. const int layer_count = layers.size();
  585. for (int i=0; i<layer_count; i++)
  586. {
  587. if (layers[i]->type != "ConvolutionDepthWise")
  588. continue;
  589. // ConvolutionDepthWise - Activation
  590. int top_blob_index = layers[i]->tops[0];
  591. int j = i + 1;
  592. for (; j<layer_count; j++)
  593. {
  594. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip")
  595. continue;
  596. if (layers[j]->bottoms.size() != 1)
  597. continue;
  598. if (layers[j]->bottoms[0] == top_blob_index)
  599. break;
  600. }
  601. if (j == layer_count)
  602. continue;
  603. // fuse ConvolutionDepthWise - Activation to ConvolutionDepthWise
  604. ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i];
  605. ncnn::Layer* activation = layers[j];
  606. fprintf(stderr, "fuse_convolutiondepthwise_activation %s %s\n", convolutiondepthwise->name.c_str(), activation->name.c_str());
  607. if (activation->type == "ReLU")
  608. {
  609. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  610. if (relu->slope == 0.f)
  611. {
  612. convolutiondepthwise->activation_type = 1;
  613. }
  614. else
  615. {
  616. convolutiondepthwise->activation_type = 2;
  617. convolutiondepthwise->activation_params = ncnn::Mat(1);
  618. convolutiondepthwise->activation_params[0] = relu->slope;
  619. }
  620. }
  621. else if (activation->type == "Clip")
  622. {
  623. ncnn::Clip* clip = (ncnn::Clip*)activation;
  624. convolutiondepthwise->activation_type = 3;
  625. convolutiondepthwise->activation_params = ncnn::Mat(2);
  626. convolutiondepthwise->activation_params[0] = clip->min;
  627. convolutiondepthwise->activation_params[1] = clip->max;
  628. }
  629. int top_blob_index_final = activation->tops[0];
  630. convolutiondepthwise->tops[0] = top_blob_index_final;
  631. blobs[top_blob_index_final].producer = i;
  632. activation->type = "ncnnfused";
  633. }
  634. return 0;
  635. }
  636. int NetOptimize::fuse_deconvolution_activation()
  637. {
  638. const int layer_count = layers.size();
  639. for (int i=0; i<layer_count; i++)
  640. {
  641. if (layers[i]->type != "Deconvolution")
  642. continue;
  643. // Deconvolution - Activation
  644. int top_blob_index = layers[i]->tops[0];
  645. int j = i + 1;
  646. for (; j<layer_count; j++)
  647. {
  648. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip")
  649. continue;
  650. if (layers[j]->bottoms.size() != 1)
  651. continue;
  652. if (layers[j]->bottoms[0] == top_blob_index)
  653. break;
  654. }
  655. if (j == layer_count)
  656. continue;
  657. // fuse Deconvolution - Activation to Deconvolution
  658. ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i];
  659. ncnn::Layer* activation = layers[j];
  660. fprintf(stderr, "fuse_deconvolution_activation %s %s\n", deconvolution->name.c_str(), activation->name.c_str());
  661. if (activation->type == "ReLU")
  662. {
  663. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  664. if (relu->slope == 0.f)
  665. {
  666. deconvolution->activation_type = 1;
  667. }
  668. else
  669. {
  670. deconvolution->activation_type = 2;
  671. deconvolution->activation_params = ncnn::Mat(1);
  672. deconvolution->activation_params[0] = relu->slope;
  673. }
  674. }
  675. else if (activation->type == "Clip")
  676. {
  677. ncnn::Clip* clip = (ncnn::Clip*)activation;
  678. deconvolution->activation_type = 3;
  679. deconvolution->activation_params = ncnn::Mat(2);
  680. deconvolution->activation_params[0] = clip->min;
  681. deconvolution->activation_params[1] = clip->max;
  682. }
  683. int top_blob_index_final = activation->tops[0];
  684. deconvolution->tops[0] = top_blob_index_final;
  685. blobs[top_blob_index_final].producer = i;
  686. activation->type = "ncnnfused";
  687. }
  688. return 0;
  689. }
  690. int NetOptimize::fuse_deconvolutiondepthwise_activation()
  691. {
  692. const int layer_count = layers.size();
  693. for (int i=0; i<layer_count; i++)
  694. {
  695. if (layers[i]->type != "DeconvolutionDepthWise")
  696. continue;
  697. // DeconvolutionDepthWise - Activation
  698. int top_blob_index = layers[i]->tops[0];
  699. int j = i + 1;
  700. for (; j<layer_count; j++)
  701. {
  702. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip")
  703. continue;
  704. if (layers[j]->bottoms.size() != 1)
  705. continue;
  706. if (layers[j]->bottoms[0] == top_blob_index)
  707. break;
  708. }
  709. if (j == layer_count)
  710. continue;
  711. // fuse DeconvolutionDepthWise - Activation to DeconvolutionDepthWise
  712. ncnn::DeconvolutionDepthWise* deconvolutiondepthwise = (ncnn::DeconvolutionDepthWise*)layers[i];
  713. ncnn::Layer* activation = layers[j];
  714. fprintf(stderr, "fuse_deconvolutiondepthwise_activation %s %s\n", deconvolutiondepthwise->name.c_str(), activation->name.c_str());
  715. if (activation->type == "ReLU")
  716. {
  717. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  718. if (relu->slope == 0.f)
  719. {
  720. deconvolutiondepthwise->activation_type = 1;
  721. }
  722. else
  723. {
  724. deconvolutiondepthwise->activation_type = 2;
  725. deconvolutiondepthwise->activation_params = ncnn::Mat(1);
  726. deconvolutiondepthwise->activation_params[0] = relu->slope;
  727. }
  728. }
  729. else if (activation->type == "Clip")
  730. {
  731. ncnn::Clip* clip = (ncnn::Clip*)activation;
  732. deconvolutiondepthwise->activation_type = 3;
  733. deconvolutiondepthwise->activation_params = ncnn::Mat(2);
  734. deconvolutiondepthwise->activation_params[0] = clip->min;
  735. deconvolutiondepthwise->activation_params[1] = clip->max;
  736. }
  737. int top_blob_index_final = activation->tops[0];
  738. deconvolutiondepthwise->tops[0] = top_blob_index_final;
  739. blobs[top_blob_index_final].producer = i;
  740. activation->type = "ncnnfused";
  741. }
  742. return 0;
  743. }
  744. int NetOptimize::fuse_innerproduct_activation()
  745. {
  746. const int layer_count = layers.size();
  747. for (int i=0; i<layer_count; i++)
  748. {
  749. if (layers[i]->type != "InnerProduct")
  750. continue;
  751. // InnerProduct - Activation
  752. int top_blob_index = layers[i]->tops[0];
  753. int j = i + 1;
  754. for (; j<layer_count; j++)
  755. {
  756. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip")
  757. continue;
  758. if (layers[j]->bottoms.size() != 1)
  759. continue;
  760. if (layers[j]->bottoms[0] == top_blob_index)
  761. break;
  762. }
  763. if (j == layer_count)
  764. continue;
  765. // fuse InnerProduct - Activation to InnerProduct
  766. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
  767. ncnn::Layer* activation = layers[j];
  768. fprintf(stderr, "fuse_innerproduct_activation %s %s\n", innerproduct->name.c_str(), activation->name.c_str());
  769. if (activation->type == "ReLU")
  770. {
  771. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  772. if (relu->slope == 0.f)
  773. {
  774. innerproduct->activation_type = 1;
  775. }
  776. else
  777. {
  778. innerproduct->activation_type = 2;
  779. innerproduct->activation_params = ncnn::Mat(1);
  780. innerproduct->activation_params[0] = relu->slope;
  781. }
  782. }
  783. else if (activation->type == "Clip")
  784. {
  785. ncnn::Clip* clip = (ncnn::Clip*)activation;
  786. innerproduct->activation_type = 3;
  787. innerproduct->activation_params = ncnn::Mat(2);
  788. innerproduct->activation_params[0] = clip->min;
  789. innerproduct->activation_params[1] = clip->max;
  790. }
  791. int top_blob_index_final = activation->tops[0];
  792. innerproduct->tops[0] = top_blob_index_final;
  793. blobs[top_blob_index_final].producer = i;
  794. activation->type = "ncnnfused";
  795. }
  796. return 0;
  797. }
  798. int NetOptimize::eliminate_dropout()
  799. {
  800. const int layer_count = layers.size();
  801. for (int i=0; i<layer_count; i++)
  802. {
  803. if (layers[i]->type != "Dropout")
  804. continue;
  805. ncnn::Dropout* dropout = (ncnn::Dropout*)layers[i];
  806. if (dropout->scale != 1.f)
  807. continue;
  808. // Any - Dropout
  809. int bottom_blob_index = layers[i]->bottoms[0];
  810. int j = i - 1;
  811. for (; j>=0; j--)
  812. {
  813. if (layers[j]->type == "ncnnfused")
  814. continue;
  815. if (layers[j]->tops.size() != 1)
  816. continue;
  817. if (layers[j]->tops[0] == bottom_blob_index)
  818. break;
  819. }
  820. if (j == -1)
  821. continue;
  822. ncnn::Layer* any = layers[j];
  823. fprintf(stderr, "eliminate_dropout %s %s\n", any->name.c_str(), dropout->name.c_str());
  824. int top_blob_index_final = dropout->tops[0];
  825. any->tops[0] = top_blob_index_final;
  826. blobs[top_blob_index_final].producer = j;
  827. dropout->type = "ncnnfused";
  828. }
  829. return 0;
  830. }
  831. int NetOptimize::eliminate_flatten_after_global_pooling()
  832. {
  833. const int layer_count = layers.size();
  834. for (int i=0; i<layer_count; i++)
  835. {
  836. if (layers[i]->type != "Pooling")
  837. continue;
  838. ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i];
  839. if (pooling->global_pooling == 0)
  840. continue;
  841. // Pooling - Flatten
  842. int top_blob_index = layers[i]->tops[0];
  843. int j = i + 1;
  844. for (; j<layer_count; j++)
  845. {
  846. if (layers[j]->type != "Flatten")
  847. continue;
  848. if (layers[j]->bottoms.size() != 1)
  849. continue;
  850. if (layers[j]->bottoms[0] == top_blob_index)
  851. break;
  852. }
  853. if (j == layer_count)
  854. continue;
  855. ncnn::Flatten* flatten = (ncnn::Flatten*)layers[j];
  856. fprintf(stderr, "eliminate_flatten_after_global_pooling %s %s\n", pooling->name.c_str(), flatten->name.c_str());
  857. int top_blob_index_final = flatten->tops[0];
  858. pooling->tops[0] = top_blob_index_final;
  859. blobs[top_blob_index_final].producer = i;
  860. flatten->type = "ncnnfused";
  861. }
  862. return 0;
  863. }
  864. int NetOptimize::replace_convolution_with_innerproduct_after_global_pooling()
  865. {
  866. const int layer_count = layers.size();
  867. for (int i=0; i<layer_count; i++)
  868. {
  869. if (layers[i]->type != "Pooling")
  870. continue;
  871. ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i];
  872. if (pooling->global_pooling == 0)
  873. continue;
  874. // Pooling - Convolution
  875. int top_blob_index = layers[i]->tops[0];
  876. int j = i + 1;
  877. for (; j<layer_count; j++)
  878. {
  879. if (layers[j]->type != "Convolution")
  880. continue;
  881. if (layers[j]->bottoms.size() != 1)
  882. continue;
  883. if (layers[j]->bottoms[0] == top_blob_index)
  884. break;
  885. }
  886. if (j == layer_count)
  887. continue;
  888. ncnn::Convolution* convolution = (ncnn::Convolution*)layers[j];
  889. fprintf(stderr, "replace_convolution_with_innerproduct_after_global_pooling %s %s\n", pooling->name.c_str(), convolution->name.c_str());
  890. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)ncnn::create_layer("InnerProduct");
  891. innerproduct->type = "InnerProduct";
  892. innerproduct->name = convolution->name;
  893. innerproduct->bottoms = convolution->bottoms;
  894. innerproduct->tops = convolution->tops;
  895. ncnn::ParamDict pd;
  896. innerproduct->load_param(pd);
  897. innerproduct->num_output = convolution->num_output;
  898. innerproduct->bias_term = convolution->bias_term;
  899. innerproduct->weight_data_size = convolution->weight_data_size;
  900. innerproduct->weight_data = convolution->weight_data;
  901. innerproduct->bias_data = convolution->bias_data;
  902. layers[j] = innerproduct;
  903. delete convolution;
  904. }
  905. return 0;
  906. }
  907. int NetOptimize::fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp)
  908. {
  909. const int count = m.w;
  910. const int* ptr = m;
  911. fprintf(pp, " -%d=%d", 23300 + id, count);
  912. for (int i=0; i<count; i++)
  913. {
  914. fprintf(pp, ",%d", ptr[i]);
  915. }
  916. return 0;
  917. }
  918. int NetOptimize::fprintf_param_float_array(int id, const ncnn::Mat& m, FILE* pp)
  919. {
  920. const int count = m.w;
  921. const float* ptr = m;
  922. fprintf(pp, " -%d=%d", 23300 + id, count);
  923. for (int i=0; i<count; i++)
  924. {
  925. fprintf(pp, ",%f", ptr[i]);
  926. }
  927. return 0;
  928. }
  929. int NetOptimize::fwrite_weight_tag_data(int tag, const ncnn::Mat& data, FILE* bp)
  930. {
  931. ncnn::Mat data_flattened = data.reshape(data.w * data.h * data.c);
  932. if (storage_type == 1 && tag == 0)
  933. {
  934. tag = 0x01306B47; // fp16 magic
  935. fwrite(&tag, sizeof(int), 1, bp);
  936. ncnn::Mat data_flattened_fp16;
  937. ncnn::cast_float32_to_float16(data_flattened, data_flattened_fp16);
  938. fwrite(data_flattened_fp16.data, data_flattened_fp16.elemsize, data_flattened_fp16.w, bp);
  939. }
  940. else
  941. {
  942. fwrite(&tag, sizeof(int), 1, bp);
  943. fwrite(data_flattened.data, data_flattened.elemsize, data_flattened.w, bp);
  944. }
  945. return 0;
  946. }
  947. int NetOptimize::fwrite_weight_data(const ncnn::Mat& data, FILE* bp)
  948. {
  949. ncnn::Mat data_flattened = data.reshape(data.w * data.h * data.c);
  950. fwrite(data_flattened.data, data_flattened.elemsize, data_flattened.w, bp);
  951. return 0;
  952. }
  953. int NetOptimize::save(const char* parampath, const char* binpath)
  954. {
  955. FILE* pp = fopen(parampath, "wb");
  956. FILE* bp = fopen(binpath, "wb");
  957. fprintf(pp, "7767517\n");
  958. const int layer_count = layers.size();
  959. int layer_count_fused = 0;
  960. std::set<std::string> blob_names;
  961. for (int i=0; i<layer_count; i++)
  962. {
  963. const ncnn::Layer* layer = layers[i];
  964. if (layer->type == "ncnnfused")
  965. continue;
  966. layer_count_fused++;
  967. int bottom_count = layer->bottoms.size();
  968. for (int j=0; j<bottom_count; j++)
  969. {
  970. int bottom_blob_index = layer->bottoms[j];
  971. blob_names.insert(blobs[bottom_blob_index].name);
  972. }
  973. int top_count = layer->tops.size();
  974. for (int j=0; j<top_count; j++)
  975. {
  976. int top_blob_index = layer->tops[j];
  977. blob_names.insert(blobs[top_blob_index].name);
  978. }
  979. }
  980. int blob_count_fused = blob_names.size();
  981. fprintf(pp, "%d %d\n", layer_count_fused, blob_count_fused);
  982. for (int i=0; i<layer_count; i++)
  983. {
  984. const ncnn::Layer* layer = layers[i];
  985. if (layer->type == "ncnnfused")
  986. continue;
  987. int bottom_count = layer->bottoms.size();
  988. int top_count = layer->tops.size();
  989. fprintf(pp, "%-24s %-24s %d %d", layer->type.c_str(), layer->name.c_str(), bottom_count, top_count);
  990. for (int j=0; j<bottom_count; j++)
  991. {
  992. int bottom_blob_index = layer->bottoms[j];
  993. fprintf(pp, " %s", blobs[bottom_blob_index].name.c_str());
  994. }
  995. for (int j=0; j<top_count; j++)
  996. {
  997. int top_blob_index = layer->tops[j];
  998. fprintf(pp, " %s", blobs[top_blob_index].name.c_str());
  999. }
  1000. ncnn::Layer* layer_default = ncnn::create_layer(layer->typeindex);
  1001. ncnn::ParamDict pd;
  1002. layer_default->load_param(pd);
  1003. #define fprintf_param_value(format, phase) \
  1004. { if (op->phase != op_default->phase) fprintf(pp, format, op->phase); }
  1005. if (layer->type == "BatchNorm")
  1006. {
  1007. ncnn::BatchNorm* op = (ncnn::BatchNorm*)layer;
  1008. ncnn::BatchNorm* op_default = (ncnn::BatchNorm*)layer_default;
  1009. fprintf_param_value(" 0=%d", channels)
  1010. fprintf_param_value(" 1=%f", eps)
  1011. fwrite_weight_data(op->slope_data, bp);
  1012. fwrite_weight_data(op->mean_data, bp);
  1013. fwrite_weight_data(op->var_data, bp);
  1014. fwrite_weight_data(op->bias_data, bp);
  1015. }
  1016. else if (layer->type == "Bias")
  1017. {
  1018. ncnn::Bias* op = (ncnn::Bias*)layer;
  1019. ncnn::Bias* op_default = (ncnn::Bias*)layer_default;
  1020. fprintf_param_value(" 0=%d", bias_data_size)
  1021. fwrite_weight_data(op->bias_data, bp);
  1022. }
  1023. else if (layer->type == "BinaryOp")
  1024. {
  1025. ncnn::BinaryOp* op = (ncnn::BinaryOp*)layer;
  1026. ncnn::BinaryOp* op_default = (ncnn::BinaryOp*)layer_default;
  1027. fprintf_param_value(" 0=%d", op_type)
  1028. fprintf_param_value(" 1=%d", with_scalar)
  1029. fprintf_param_value(" 2=%f", b)
  1030. }
  1031. else if (layer->type == "Clip")
  1032. {
  1033. ncnn::Clip* op = (ncnn::Clip*)layer;
  1034. ncnn::Clip* op_default = (ncnn::Clip*)layer_default;
  1035. fprintf_param_value(" 0=%f", min)
  1036. fprintf_param_value(" 1=%f", max)
  1037. }
  1038. else if (layer->type == "Concat")
  1039. {
  1040. ncnn::Concat* op = (ncnn::Concat*)layer;
  1041. ncnn::Concat* op_default = (ncnn::Concat*)layer_default;
  1042. fprintf_param_value(" 0=%d", axis)
  1043. }
  1044. else if (layer->type == "Convolution")
  1045. {
  1046. ncnn::Convolution* op = (ncnn::Convolution*)layer;
  1047. ncnn::Convolution* op_default = (ncnn::Convolution*)layer_default;
  1048. fprintf_param_value(" 0=%d", num_output)
  1049. fprintf_param_value(" 1=%d", kernel_w)
  1050. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  1051. fprintf_param_value(" 2=%d", dilation_w)
  1052. { if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); }
  1053. fprintf_param_value(" 3=%d", stride_w)
  1054. { if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); }
  1055. fprintf_param_value(" 4=%d", pad_w)
  1056. { if (op->pad_h != op->pad_w) fprintf(pp, " 14=%d", op->pad_h); }
  1057. fprintf_param_value(" 5=%d", bias_term)
  1058. fprintf_param_value(" 6=%d", weight_data_size)
  1059. fprintf_param_value(" 8=%d", int8_scale_term)
  1060. fprintf_param_value(" 9=%d", activation_type)
  1061. { if (!op->activation_params.empty()) fprintf_param_int_array(10, op->activation_params, pp); }
  1062. fwrite_weight_tag_data(0, op->weight_data, bp);
  1063. fwrite_weight_data(op->bias_data, bp);
  1064. }
  1065. else if (layer->type == "ConvolutionDepthWise")
  1066. {
  1067. ncnn::ConvolutionDepthWise* op = (ncnn::ConvolutionDepthWise*)layer;
  1068. ncnn::ConvolutionDepthWise* op_default = (ncnn::ConvolutionDepthWise*)layer_default;
  1069. fprintf_param_value(" 0=%d", num_output)
  1070. fprintf_param_value(" 1=%d", kernel_w)
  1071. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  1072. fprintf_param_value(" 2=%d", dilation_w)
  1073. { if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); }
  1074. fprintf_param_value(" 3=%d", stride_w)
  1075. { if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); }
  1076. fprintf_param_value(" 4=%d", pad_w)
  1077. { if (op->pad_h != op->pad_w) fprintf(pp, " 14=%d", op->pad_h); }
  1078. fprintf_param_value(" 5=%d", bias_term)
  1079. fprintf_param_value(" 6=%d", weight_data_size)
  1080. fprintf_param_value(" 7=%d", group)
  1081. fprintf_param_value(" 8=%d", int8_scale_term)
  1082. fprintf_param_value(" 9=%d", activation_type)
  1083. { if (!op->activation_params.empty()) fprintf_param_int_array(10, op->activation_params, pp); }
  1084. fwrite_weight_tag_data(0, op->weight_data, bp);
  1085. fwrite_weight_data(op->bias_data, bp);
  1086. }
  1087. else if (layer->type == "Crop")
  1088. {
  1089. ncnn::Crop* op = (ncnn::Crop*)layer;
  1090. ncnn::Crop* op_default = (ncnn::Crop*)layer_default;
  1091. fprintf_param_value(" 0=%d", woffset)
  1092. fprintf_param_value(" 1=%d", hoffset)
  1093. fprintf_param_value(" 2=%d", coffset)
  1094. fprintf_param_value(" 3=%d", outw)
  1095. fprintf_param_value(" 4=%d", outh)
  1096. fprintf_param_value(" 5=%d", outc)
  1097. }
  1098. else if (layer->type == "Deconvolution")
  1099. {
  1100. ncnn::Deconvolution* op = (ncnn::Deconvolution*)layer;
  1101. ncnn::Deconvolution* op_default = (ncnn::Deconvolution*)layer_default;
  1102. fprintf_param_value(" 0=%d", num_output)
  1103. fprintf_param_value(" 1=%d", kernel_w)
  1104. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  1105. fprintf_param_value(" 2=%d", dilation_w)
  1106. { if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); }
  1107. fprintf_param_value(" 3=%d", stride_w)
  1108. { if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); }
  1109. fprintf_param_value(" 4=%d", pad_w)
  1110. { if (op->pad_h != op->pad_w) fprintf(pp, " 14=%d", op->pad_h); }
  1111. fprintf_param_value(" 5=%d", bias_term)
  1112. fprintf_param_value(" 6=%d", weight_data_size)
  1113. fprintf_param_value(" 9=%d", activation_type)
  1114. { if (!op->activation_params.empty()) fprintf_param_int_array(10, op->activation_params, pp); }
  1115. fwrite_weight_tag_data(0, op->weight_data, bp);
  1116. fwrite_weight_data(op->bias_data, bp);
  1117. }
  1118. else if (layer->type == "DeconvolutionDepthWise")
  1119. {
  1120. ncnn::DeconvolutionDepthWise* op = (ncnn::DeconvolutionDepthWise*)layer;
  1121. ncnn::DeconvolutionDepthWise* op_default = (ncnn::DeconvolutionDepthWise*)layer_default;
  1122. fprintf_param_value(" 0=%d", num_output)
  1123. fprintf_param_value(" 1=%d", kernel_w)
  1124. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  1125. fprintf_param_value(" 2=%d", dilation_w)
  1126. { if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); }
  1127. fprintf_param_value(" 3=%d", stride_w)
  1128. { if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); }
  1129. fprintf_param_value(" 4=%d", pad_w)
  1130. { if (op->pad_h != op->pad_w) fprintf(pp, " 14=%d", op->pad_h); }
  1131. fprintf_param_value(" 5=%d", bias_term)
  1132. fprintf_param_value(" 6=%d", weight_data_size)
  1133. fprintf_param_value(" 7=%d", group)
  1134. fprintf_param_value(" 9=%d", activation_type)
  1135. { if (!op->activation_params.empty()) fprintf_param_int_array(10, op->activation_params, pp); }
  1136. fwrite_weight_tag_data(0, op->weight_data, bp);
  1137. fwrite_weight_data(op->bias_data, bp);
  1138. }
  1139. else if (layer->type == "DetectionOutput")
  1140. {
  1141. ncnn::DetectionOutput* op = (ncnn::DetectionOutput*)layer;
  1142. ncnn::DetectionOutput* op_default = (ncnn::DetectionOutput*)layer_default;
  1143. fprintf_param_value(" 0=%d", num_class)
  1144. fprintf_param_value(" 1=%f", nms_threshold)
  1145. fprintf_param_value(" 2=%d", nms_top_k)
  1146. fprintf_param_value(" 3=%d", keep_top_k)
  1147. fprintf_param_value(" 4=%f", confidence_threshold)
  1148. fprintf_param_value(" 5=%f", variances[0])
  1149. fprintf_param_value(" 6=%f", variances[1])
  1150. fprintf_param_value(" 7=%f", variances[2])
  1151. fprintf_param_value(" 8=%f", variances[3])
  1152. }
  1153. else if (layer->type == "Dropout")
  1154. {
  1155. ncnn::Dropout* op = (ncnn::Dropout*)layer;
  1156. ncnn::Dropout* op_default = (ncnn::Dropout*)layer_default;
  1157. fprintf_param_value(" 0=%f", scale)
  1158. }
  1159. else if (layer->type == "Eltwise")
  1160. {
  1161. ncnn::Eltwise* op = (ncnn::Eltwise*)layer;
  1162. ncnn::Eltwise* op_default = (ncnn::Eltwise*)layer_default;
  1163. fprintf_param_value(" 0=%d", op_type)
  1164. { if (!op->coeffs.empty()) fprintf_param_int_array(1, op->coeffs, pp); }
  1165. }
  1166. else if (layer->type == "ELU")
  1167. {
  1168. ncnn::ELU* op = (ncnn::ELU*)layer;
  1169. ncnn::ELU* op_default = (ncnn::ELU*)layer_default;
  1170. fprintf_param_value(" 0=%f", alpha)
  1171. }
  1172. else if (layer->type == "Exp")
  1173. {
  1174. ncnn::Exp* op = (ncnn::Exp*)layer;
  1175. ncnn::Exp* op_default = (ncnn::Exp*)layer_default;
  1176. fprintf_param_value(" 0=%f", base)
  1177. fprintf_param_value(" 1=%f", scale)
  1178. fprintf_param_value(" 2=%f", shift)
  1179. }
  1180. else if (layer->type == "InnerProduct")
  1181. {
  1182. ncnn::InnerProduct* op = (ncnn::InnerProduct*)layer;
  1183. ncnn::InnerProduct* op_default = (ncnn::InnerProduct*)layer_default;
  1184. fprintf_param_value(" 0=%d", num_output)
  1185. fprintf_param_value(" 1=%d", bias_term)
  1186. fprintf_param_value(" 2=%d", weight_data_size)
  1187. fprintf_param_value(" 8=%d", int8_scale_term)
  1188. fprintf_param_value(" 9=%d", activation_type)
  1189. { if (!op->activation_params.empty()) fprintf_param_int_array(10, op->activation_params, pp); }
  1190. fwrite_weight_tag_data(0, op->weight_data, bp);
  1191. fwrite_weight_data(op->bias_data, bp);
  1192. }
  1193. else if (layer->type == "Input")
  1194. {
  1195. ncnn::Input* op = (ncnn::Input*)layer;
  1196. ncnn::Input* op_default = (ncnn::Input*)layer_default;
  1197. fprintf_param_value(" 0=%d", w)
  1198. fprintf_param_value(" 1=%d", h)
  1199. fprintf_param_value(" 2=%d", c)
  1200. }
  1201. else if (layer->type == "InstanceNorm")
  1202. {
  1203. ncnn::InstanceNorm* op = (ncnn::InstanceNorm*)layer;
  1204. ncnn::InstanceNorm* op_default = (ncnn::InstanceNorm*)layer_default;
  1205. fprintf_param_value(" 0=%d", channels)
  1206. fprintf_param_value(" 1=%f", eps)
  1207. }
  1208. else if (layer->type == "Interp")
  1209. {
  1210. ncnn::Interp* op = (ncnn::Interp*)layer;
  1211. ncnn::Interp* op_default = (ncnn::Interp*)layer_default;
  1212. fprintf_param_value(" 0=%d", resize_type)
  1213. fprintf_param_value(" 1=%f", height_scale)
  1214. fprintf_param_value(" 2=%f", width_scale)
  1215. fprintf_param_value(" 3=%d", output_height)
  1216. fprintf_param_value(" 4=%d", output_width)
  1217. }
  1218. else if (layer->type == "Log")
  1219. {
  1220. ncnn::Log* op = (ncnn::Log*)layer;
  1221. ncnn::Log* op_default = (ncnn::Log*)layer_default;
  1222. fprintf_param_value(" 0=%f", base)
  1223. fprintf_param_value(" 1=%f", scale)
  1224. fprintf_param_value(" 2=%f", shift)
  1225. }
  1226. else if (layer->type == "LRN")
  1227. {
  1228. ncnn::LRN* op = (ncnn::LRN*)layer;
  1229. ncnn::LRN* op_default = (ncnn::LRN*)layer_default;
  1230. fprintf_param_value(" 0=%d", region_type)
  1231. fprintf_param_value(" 1=%d", local_size)
  1232. fprintf_param_value(" 2=%f", alpha)
  1233. fprintf_param_value(" 3=%f", beta)
  1234. fprintf_param_value(" 4=%f", bias)
  1235. }
  1236. else if (layer->type == "MVN")
  1237. {
  1238. ncnn::MVN* op = (ncnn::MVN*)layer;
  1239. ncnn::MVN* op_default = (ncnn::MVN*)layer_default;
  1240. fprintf_param_value(" 0=%d", normalize_variance)
  1241. fprintf_param_value(" 1=%d", across_channels)
  1242. fprintf_param_value(" 2=%f", eps)
  1243. }
  1244. else if (layer->type == "Normalize")
  1245. {
  1246. ncnn::Normalize* op = (ncnn::Normalize*)layer;
  1247. ncnn::Normalize* op_default = (ncnn::Normalize*)layer_default;
  1248. fprintf_param_value(" 0=%d", across_spatial)
  1249. fprintf_param_value(" 1=%d", channel_shared)
  1250. fprintf_param_value(" 2=%f", eps)
  1251. fprintf_param_value(" 3=%d", scale_data_size)
  1252. fprintf_param_value(" 4=%d", across_channel)
  1253. fwrite_weight_data(op->scale_data, bp);
  1254. }
  1255. else if (layer->type == "Padding")
  1256. {
  1257. ncnn::Padding* op = (ncnn::Padding*)layer;
  1258. ncnn::Padding* op_default = (ncnn::Padding*)layer_default;
  1259. fprintf_param_value(" 0=%d", top)
  1260. fprintf_param_value(" 1=%d", bottom)
  1261. fprintf_param_value(" 2=%d", left)
  1262. fprintf_param_value(" 3=%d", right)
  1263. fprintf_param_value(" 4=%d", type)
  1264. fprintf_param_value(" 5=%f", value)
  1265. }
  1266. else if (layer->type == "Permute")
  1267. {
  1268. ncnn::Permute* op = (ncnn::Permute*)layer;
  1269. ncnn::Permute* op_default = (ncnn::Permute*)layer_default;
  1270. fprintf_param_value(" 0=%d", order_type)
  1271. }
  1272. else if (layer->type == "Pooling")
  1273. {
  1274. ncnn::Pooling* op = (ncnn::Pooling*)layer;
  1275. ncnn::Pooling* op_default = (ncnn::Pooling*)layer_default;
  1276. fprintf_param_value(" 0=%d", pooling_type)
  1277. fprintf_param_value(" 1=%d", kernel_w)
  1278. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  1279. fprintf_param_value(" 2=%d", stride_w)
  1280. { if (op->stride_h != op->stride_w) fprintf(pp, " 12=%d", op->stride_h); }
  1281. fprintf_param_value(" 3=%d", pad_left)
  1282. { if (op->pad_top != op->pad_left) fprintf(pp, " 13=%d", op->pad_top); }
  1283. { if (op->pad_right != op->pad_left) fprintf(pp, " 14=%d", op->pad_right); }
  1284. { if (op->pad_bottom != op->pad_top) fprintf(pp, " 15=%d", op->pad_bottom); }
  1285. fprintf_param_value(" 4=%d", global_pooling)
  1286. fprintf_param_value(" 5=%d", pad_mode)
  1287. }
  1288. else if (layer->type == "Power")
  1289. {
  1290. ncnn::Power* op = (ncnn::Power*)layer;
  1291. ncnn::Power* op_default = (ncnn::Power*)layer_default;
  1292. fprintf_param_value(" 0=%f", power)
  1293. fprintf_param_value(" 1=%f", scale)
  1294. fprintf_param_value(" 2=%f", shift)
  1295. }
  1296. else if (layer->type == "PReLU")
  1297. {
  1298. ncnn::PReLU* op = (ncnn::PReLU*)layer;
  1299. ncnn::PReLU* op_default = (ncnn::PReLU*)layer_default;
  1300. fprintf_param_value(" 0=%d", num_slope)
  1301. fwrite_weight_data(op->slope_data, bp);
  1302. }
  1303. else if (layer->type == "PriorBox")
  1304. {
  1305. ncnn::PriorBox* op = (ncnn::PriorBox*)layer;
  1306. ncnn::PriorBox* op_default = (ncnn::PriorBox*)layer_default;
  1307. { if (!op->min_sizes.empty()) fprintf_param_int_array(0, op->min_sizes, pp); }
  1308. { if (!op->max_sizes.empty()) fprintf_param_int_array(1, op->max_sizes, pp); }
  1309. { if (!op->aspect_ratios.empty()) fprintf_param_int_array(2, op->aspect_ratios, pp); }
  1310. fprintf_param_value(" 3=%f", variances[0])
  1311. fprintf_param_value(" 4=%f", variances[1])
  1312. fprintf_param_value(" 5=%f", variances[2])
  1313. fprintf_param_value(" 6=%f", variances[3])
  1314. fprintf_param_value(" 7=%d", flip)
  1315. fprintf_param_value(" 8=%d", clip)
  1316. fprintf_param_value(" 9=%d", image_width)
  1317. fprintf_param_value(" 10=%d", image_height)
  1318. fprintf_param_value(" 11=%f", step_width)
  1319. fprintf_param_value(" 12=%f", step_height)
  1320. fprintf_param_value(" 13=%f", offset)
  1321. }
  1322. else if (layer->type == "Proposal")
  1323. {
  1324. ncnn::Proposal* op = (ncnn::Proposal*)layer;
  1325. ncnn::Proposal* op_default = (ncnn::Proposal*)layer_default;
  1326. fprintf_param_value(" 0=%d", feat_stride)
  1327. fprintf_param_value(" 1=%d", base_size)
  1328. fprintf_param_value(" 2=%d", pre_nms_topN)
  1329. fprintf_param_value(" 3=%d", after_nms_topN)
  1330. fprintf_param_value(" 4=%f", nms_thresh)
  1331. fprintf_param_value(" 5=%d", min_size)
  1332. }
  1333. else if (layer->type == "PSROIPooling")
  1334. {
  1335. ncnn::PSROIPooling* op = (ncnn::PSROIPooling*)layer;
  1336. ncnn::PSROIPooling* op_default = (ncnn::PSROIPooling*)layer_default;
  1337. fprintf_param_value(" 0=%d", pooled_width)
  1338. fprintf_param_value(" 1=%d", pooled_height)
  1339. fprintf_param_value(" 2=%f", spatial_scale)
  1340. fprintf_param_value(" 3=%d", output_dim)
  1341. }
  1342. else if (layer->type == "Quantize")
  1343. {
  1344. ncnn::Quantize* op = (ncnn::Quantize*)layer;
  1345. ncnn::Quantize* op_default = (ncnn::Quantize*)layer_default;
  1346. fprintf_param_value(" 0=%f", scale)
  1347. }
  1348. else if (layer->type == "Reduction")
  1349. {
  1350. ncnn::Reduction* op = (ncnn::Reduction*)layer;
  1351. ncnn::Reduction* op_default = (ncnn::Reduction*)layer_default;
  1352. fprintf_param_value(" 0=%d", operation)
  1353. fprintf_param_value(" 1=%d", dim)
  1354. fprintf_param_value(" 2=%f", coeff)
  1355. }
  1356. else if (layer->type == "ReLU")
  1357. {
  1358. ncnn::ReLU* op = (ncnn::ReLU*)layer;
  1359. ncnn::ReLU* op_default = (ncnn::ReLU*)layer_default;
  1360. fprintf_param_value(" 0=%f", slope)
  1361. }
  1362. else if (layer->type == "Reorg")
  1363. {
  1364. ncnn::Reorg* op = (ncnn::Reorg*)layer;
  1365. ncnn::Reorg* op_default = (ncnn::Reorg*)layer_default;
  1366. fprintf_param_value(" 0=%d", stride)
  1367. }
  1368. else if (layer->type == "Requantize")
  1369. {
  1370. ncnn::Requantize* op = (ncnn::Requantize*)layer;
  1371. ncnn::Requantize* op_default = (ncnn::Requantize*)layer_default;
  1372. fprintf_param_value(" 0=%f", scale_in)
  1373. fprintf_param_value(" 1=%f", scale_out)
  1374. fprintf_param_value(" 2=%d", bias_term)
  1375. fprintf_param_value(" 3=%d", bias_data_size)
  1376. fprintf_param_value(" 4=%d", fusion_relu)
  1377. }
  1378. else if (layer->type == "Reshape")
  1379. {
  1380. ncnn::Reshape* op = (ncnn::Reshape*)layer;
  1381. ncnn::Reshape* op_default = (ncnn::Reshape*)layer_default;
  1382. fprintf_param_value(" 0=%d", w)
  1383. fprintf_param_value(" 1=%d", h)
  1384. fprintf_param_value(" 2=%d", c)
  1385. fprintf_param_value(" 3=%d", permute)
  1386. }
  1387. else if (layer->type == "ROIAlign")
  1388. {
  1389. ncnn::ROIAlign* op = (ncnn::ROIAlign*)layer;
  1390. ncnn::ROIAlign* op_default = (ncnn::ROIAlign*)layer_default;
  1391. fprintf_param_value(" 0=%d", pooled_width)
  1392. fprintf_param_value(" 1=%d", pooled_height)
  1393. fprintf_param_value(" 2=%f", spatial_scale)
  1394. }
  1395. else if (layer->type == "ROIPooling")
  1396. {
  1397. ncnn::ROIPooling* op = (ncnn::ROIPooling*)layer;
  1398. ncnn::ROIPooling* op_default = (ncnn::ROIPooling*)layer_default;
  1399. fprintf_param_value(" 0=%d", pooled_width)
  1400. fprintf_param_value(" 1=%d", pooled_height)
  1401. fprintf_param_value(" 2=%f", spatial_scale)
  1402. }
  1403. else if (layer->type == "Scale")
  1404. {
  1405. ncnn::Scale* op = (ncnn::Scale*)layer;
  1406. ncnn::Scale* op_default = (ncnn::Scale*)layer_default;
  1407. fprintf_param_value(" 0=%d", scale_data_size)
  1408. fprintf_param_value(" 1=%d", bias_term)
  1409. fwrite_weight_data(op->scale_data, bp);
  1410. fwrite_weight_data(op->bias_data, bp);
  1411. }
  1412. else if (layer->type == "ShuffleChannel")
  1413. {
  1414. ncnn::ShuffleChannel* op = (ncnn::ShuffleChannel*)layer;
  1415. ncnn::ShuffleChannel* op_default = (ncnn::ShuffleChannel*)layer_default;
  1416. fprintf_param_value(" 0=%d", group)
  1417. }
  1418. else if (layer->type == "Slice")
  1419. {
  1420. ncnn::Slice* op = (ncnn::Slice*)layer;
  1421. ncnn::Slice* op_default = (ncnn::Slice*)layer_default;
  1422. { if (!op->slices.empty()) fprintf_param_int_array(0, op->slices, pp); }
  1423. fprintf_param_value(" 1=%d", axis)
  1424. }
  1425. else if (layer->type == "Softmax")
  1426. {
  1427. ncnn::Softmax* op = (ncnn::Softmax*)layer;
  1428. ncnn::Softmax* op_default = (ncnn::Softmax*)layer_default;
  1429. fprintf_param_value(" 0=%d", axis)
  1430. // HACK
  1431. if (op->axis != 0)
  1432. {
  1433. int fixbug0 = 1;
  1434. fprintf(pp, " 1=%d", fixbug0);
  1435. }
  1436. }
  1437. else if (layer->type == "Threshold")
  1438. {
  1439. ncnn::Threshold* op = (ncnn::Threshold*)layer;
  1440. ncnn::Threshold* op_default = (ncnn::Threshold*)layer_default;
  1441. fprintf_param_value(" 0=%f", threshold)
  1442. }
  1443. else if (layer->type == "UnaryOp")
  1444. {
  1445. ncnn::UnaryOp* op = (ncnn::UnaryOp*)layer;
  1446. ncnn::UnaryOp* op_default = (ncnn::UnaryOp*)layer_default;
  1447. fprintf_param_value(" 0=%d", op_type)
  1448. }
  1449. else if (layer->type == "YoloDetectionOutput")
  1450. {
  1451. ncnn::YoloDetectionOutput* op = (ncnn::YoloDetectionOutput*)layer;
  1452. ncnn::YoloDetectionOutput* op_default = (ncnn::YoloDetectionOutput*)layer_default;
  1453. fprintf_param_value(" 0=%d", num_class)
  1454. fprintf_param_value(" 1=%d", num_box)
  1455. fprintf_param_value(" 2=%f", confidence_threshold)
  1456. fprintf_param_value(" 3=%f", nms_threshold)
  1457. { if (!op->biases.empty()) fprintf_param_int_array(4, op->biases, pp); }
  1458. }
  1459. else if (layer->type == "Yolov3DetectionOutput")
  1460. {
  1461. ncnn::Yolov3DetectionOutput* op = (ncnn::Yolov3DetectionOutput*)layer;
  1462. ncnn::Yolov3DetectionOutput* op_default = (ncnn::Yolov3DetectionOutput*)layer_default;
  1463. fprintf_param_value(" 0=%d", num_class)
  1464. fprintf_param_value(" 1=%d", num_box)
  1465. fprintf_param_value(" 2=%f", confidence_threshold)
  1466. fprintf_param_value(" 3=%f", nms_threshold)
  1467. { if (!op->biases.empty()) fprintf_param_int_array(4, op->biases, pp); }
  1468. { if (!op->mask.empty()) fprintf_param_int_array(5, op->mask, pp); }
  1469. { if (!op->anchors_scale.empty()) fprintf_param_int_array(6, op->anchors_scale, pp); }
  1470. }
  1471. #undef fprintf_param_value
  1472. fprintf(pp, "\n");
  1473. delete layer_default;
  1474. }
  1475. fclose(pp);
  1476. fclose(bp);
  1477. return 0;
  1478. }
  1479. int main(int argc, char** argv)
  1480. {
  1481. if (argc != 6)
  1482. {
  1483. fprintf(stderr, "usage: %s [inparam] [inbin] [outparam] [outbin] [flag]\n", argv[0]);
  1484. return -1;
  1485. }
  1486. const char* inparam = argv[1];
  1487. const char* inbin = argv[2];
  1488. const char* outparam = argv[3];
  1489. const char* outbin = argv[4];
  1490. int flag = atoi(argv[5]);
  1491. NetOptimize optimizer;
  1492. if (flag == 65536)
  1493. {
  1494. optimizer.storage_type = 1;
  1495. }
  1496. else
  1497. {
  1498. optimizer.storage_type = 0;
  1499. }
  1500. optimizer.load_param(inparam);
  1501. optimizer.load_model(inbin);
  1502. optimizer.fuse_batchnorm_scale();
  1503. optimizer.fuse_convolution_batchnorm();
  1504. optimizer.fuse_convolutiondepthwise_batchnorm();
  1505. optimizer.fuse_deconvolution_batchnorm();
  1506. optimizer.fuse_deconvolutiondepthwise_batchnorm();
  1507. optimizer.fuse_innerproduct_batchnorm();
  1508. optimizer.fuse_innerproduct_dropout();
  1509. optimizer.fuse_convolution_activation();
  1510. optimizer.fuse_convolutiondepthwise_activation();
  1511. optimizer.fuse_deconvolution_activation();
  1512. optimizer.fuse_deconvolutiondepthwise_activation();
  1513. optimizer.fuse_innerproduct_activation();
  1514. optimizer.eliminate_dropout();
  1515. optimizer.eliminate_flatten_after_global_pooling();
  1516. optimizer.replace_convolution_with_innerproduct_after_global_pooling();
  1517. optimizer.save(outparam, outbin);
  1518. return 0;
  1519. }