You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ncnnoptimize.cpp 83 kB

6 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
6 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
6 years ago
7 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include <algorithm>
  15. #include <set>
  16. #include <vector>
  17. // ncnn public header
  18. #include "net.h"
  19. #include "layer.h"
  20. // ncnn private header
  21. #include "layer/batchnorm.h"
  22. #include "layer/bias.h"
  23. #include "layer/binaryop.h"
  24. #include "layer/clip.h"
  25. #include "layer/concat.h"
  26. #include "layer/convolution.h"
  27. #include "layer/convolutiondepthwise.h"
  28. #include "layer/crop.h"
  29. #include "layer/deconvolution.h"
  30. #include "layer/deconvolutiondepthwise.h"
  31. #include "layer/detectionoutput.h"
  32. #include "layer/dropout.h"
  33. #include "layer/eltwise.h"
  34. #include "layer/elu.h"
  35. #include "layer/exp.h"
  36. #include "layer/expanddims.h"
  37. #include "layer/flatten.h"
  38. #include "layer/hardsigmoid.h"
  39. #include "layer/hardswish.h"
  40. #include "layer/innerproduct.h"
  41. #include "layer/input.h"
  42. #include "layer/instancenorm.h"
  43. #include "layer/interp.h"
  44. #include "layer/log.h"
  45. #include "layer/lrn.h"
  46. #include "layer/memorydata.h"
  47. #include "layer/mvn.h"
  48. #include "layer/normalize.h"
  49. #include "layer/padding.h"
  50. #include "layer/permute.h"
  51. #include "layer/pooling.h"
  52. #include "layer/power.h"
  53. #include "layer/prelu.h"
  54. #include "layer/priorbox.h"
  55. #include "layer/proposal.h"
  56. #include "layer/psroipooling.h"
  57. #include "layer/quantize.h"
  58. #include "layer/reduction.h"
  59. #include "layer/relu.h"
  60. #include "layer/reorg.h"
  61. #include "layer/requantize.h"
  62. #include "layer/reshape.h"
  63. #include "layer/roialign.h"
  64. #include "layer/roipooling.h"
  65. #include "layer/scale.h"
  66. #include "layer/slice.h"
  67. #include "layer/shufflechannel.h"
  68. #include "layer/softmax.h"
  69. #include "layer/squeeze.h"
  70. #include "layer/threshold.h"
  71. #include "layer/unaryop.h"
  72. #include "layer/yolodetectionoutput.h"
  73. #include "layer/yolov3detectionoutput.h"
  74. #if defined(__aarch64__) && defined(LINUX)
  75. #include <locale>
  76. #include <chrono>
  77. #include <random>
  78. #include <limits>
  79. #include <cassert>
  80. #define TEXT_GREEN "\033[32m"
  81. #define TEXT_YELLOW "\033[33m"
  82. #define TEXT_RED "\033[31m"
  83. #define CLR "\033[0m"
  84. #endif // defined(__aarch64__) && defined(LINUX)
  85. // always return empty weights
  86. class ModelBinFromEmpty : public ncnn::ModelBin
  87. {
  88. public:
  89. virtual ncnn::Mat load(int w, int /*type*/) const { return ncnn::Mat(w); }
  90. };
  91. class NetOptimize : public ncnn::Net
  92. {
  93. public:
  94. int load_model()
  95. {
  96. // load file
  97. int ret = 0;
  98. ModelBinFromEmpty mb;
  99. for (size_t i=0; i<layers.size(); i++)
  100. {
  101. ncnn::Layer* layer = layers[i];
  102. int lret = layer->load_model(mb);
  103. if (lret != 0)
  104. {
  105. fprintf(stderr, "layer load_model %d failed\n", (int)i);
  106. ret = -1;
  107. break;
  108. }
  109. int cret = layer->create_pipeline(opt);
  110. if (cret != 0)
  111. {
  112. fprintf(stderr, "layer create_pipeline %d failed\n", (int)i);
  113. ret = -1;
  114. break;
  115. }
  116. }
  117. #if NCNN_VULKAN
  118. if (opt.use_vulkan_compute)
  119. {
  120. upload_model();
  121. create_pipeline();
  122. }
  123. #endif // NCNN_VULKAN
  124. fuse_network();
  125. return ret;
  126. }
  127. public:
  128. // 0=fp32 1=fp16
  129. int storage_type;
  130. public:
  131. int fuse_batchnorm_scale();
  132. int fuse_convolution_batchnorm();
  133. int fuse_convolutiondepthwise_batchnorm();
  134. int fuse_deconvolution_batchnorm();
  135. int fuse_deconvolutiondepthwise_batchnorm();
  136. int fuse_innerproduct_batchnorm();
  137. int fuse_innerproduct_dropout();
  138. int fuse_convolution_activation();
  139. int fuse_convolutiondepthwise_activation();
  140. int fuse_deconvolution_activation();
  141. int fuse_deconvolutiondepthwise_activation();
  142. int fuse_innerproduct_activation();
  143. int eliminate_dropout();
  144. int eliminate_noop();
  145. int eliminate_flatten_after_global_pooling();
  146. int eliminate_reshape_after_global_pooling();
  147. int eliminate_flatten_after_innerproduct();
  148. int eliminate_reshape_before_binaryop();
  149. int replace_convolution_with_innerproduct_after_global_pooling();
  150. int replace_convolution_with_innerproduct_after_innerproduct();
  151. public:
  152. int fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp);
  153. int fprintf_param_float_array(int id, const ncnn::Mat& m, FILE* pp);
  154. int fwrite_weight_tag_data(int tag, const ncnn::Mat& data, FILE* bp);
  155. int fwrite_weight_data(const ncnn::Mat& data, FILE* bp);
  156. int save(const char* parampath, const char* binpath);
  157. #if defined(__aarch64__) && defined(LINUX)
  158. void gauss_random(ncnn::Mat &m);
  159. void find_fastest_fp32_conv(const char* name, int w, int h, int c);
  160. int support_fp32_conv_type(const ncnn::Convolution* op, const ncnn::Mat& mat, const int type);
  161. #endif
  162. };
  163. #if defined(__aarch64__) && defined(LINUX)
  164. void NetOptimize::gauss_random(ncnn::Mat &m)
  165. {
  166. std::random_device rd;
  167. std::mt19937 gen(rd());
  168. std::normal_distribution<float> d(1.0f, 1.0f);
  169. int size = m.total();
  170. for (int i = 0; i < size; ++i)
  171. {
  172. m[i] = d(gen);
  173. }
  174. }
  175. void NetOptimize::find_fastest_fp32_conv(const char* dataname, int w, int h, int c)
  176. {
  177. ncnn::PoolAllocator allocator;
  178. allocator.clear();
  179. ncnn::Option opt;
  180. // embeded system generally use single thread
  181. opt.num_threads = 1;
  182. const int layer_count = layers.size();
  183. ncnn::Extractor ex = create_extractor();
  184. ncnn::Mat input(w, h, c);
  185. if (ex.input(dataname, input) < 0)
  186. {
  187. fprintf(stderr, "set input failed, check dataname.\n");
  188. return;
  189. }
  190. const char* IMPL_NAME[6] = {"baseline", "winograd", "pointwise", "im2col", "direct", "conv3x3s2"};
  191. for (int i = 0; i < layer_count; ++i)
  192. {
  193. ncnn::Layer* layer = layers[i];
  194. if (layer->type == "Convolution")
  195. {
  196. ncnn::Convolution* op = (ncnn::Convolution*)layer;
  197. ncnn::Mat bottom_blob;
  198. ncnn::Mat top_blob;
  199. ex.extract(layer->bottoms[0], bottom_blob);
  200. ex.extract(layer->tops[0], top_blob);
  201. if (bottom_blob.empty() || top_blob.empty())
  202. {
  203. continue;
  204. }
  205. ncnn::Mat weight_blob(op->kernel_w, op->kernel_h, bottom_blob.c * top_blob.c);
  206. fprintf(stdout, TEXT_GREEN "Input [w h nc]: %d %d %d\n" CLR, bottom_blob.w, bottom_blob.h, bottom_blob.c);
  207. fprintf(stdout, TEXT_GREEN "Kernel [w h nc]: %d %d %d\n" CLR, op->kernel_w, op->kernel_h, bottom_blob.c * top_blob.c);
  208. fprintf(stdout, TEXT_GREEN "Output [w h nc]: %d %d %d\n" CLR, top_blob.w, top_blob.h, top_blob.c);
  209. // randomize input and kernel
  210. gauss_random(bottom_blob);
  211. // try every implementation
  212. double min_cost = std::numeric_limits<double>::max();
  213. int best_type = 0;
  214. // how much conv implementation type ncnn has ?
  215. for (int type = 1; type <= 5; ++type)
  216. {
  217. int support = support_fp32_conv_type(op, bottom_blob, type);
  218. if (support < 1)
  219. {
  220. // implementation type mismatch convolution configuration, skip
  221. continue;
  222. }
  223. op->impl_type = type;
  224. auto start = std::chrono::high_resolution_clock::now();
  225. const int NREPEATS = 20;
  226. op->create_pipeline(opt);
  227. for (int repeat = 0; repeat < NREPEATS; ++repeat)
  228. {
  229. op->forward(top_blob, bottom_blob, opt);
  230. }
  231. op->destroy_pipeline(opt);
  232. auto stop = std::chrono::high_resolution_clock::now();
  233. double cur_cost = std::chrono::duration<double, std::micro>(stop-start).count() / NREPEATS;
  234. fprintf(stdout, TEXT_GREEN "%s cost %0.3lfms \n" CLR, IMPL_NAME[type], cur_cost/1000);
  235. if (cur_cost < min_cost)
  236. {
  237. min_cost = cur_cost;
  238. best_type = type;
  239. }
  240. }
  241. op->impl_type = best_type;
  242. fprintf(stdout, TEXT_YELLOW "%d: %s use %s \n\n" CLR, i, layer->name.c_str(), IMPL_NAME[op->impl_type]);
  243. }
  244. }
  245. }
  246. int NetOptimize::support_fp32_conv_type(const ncnn::Convolution* op, const ncnn::Mat& bottom, const int type)
  247. {
  248. // not baseline, then k_h == k_w and s_h == s_w
  249. // no dilation conv shall be allowed
  250. if (op->kernel_w != op->kernel_h ||
  251. op->stride_w != op->stride_h ||
  252. op->dilation_w != op->dilation_h ||
  253. op->dilation_h != 1)
  254. {
  255. return -1;
  256. }
  257. // (kernel, stride) in {(1, 1), (1, 2), (2, 1), (3, 1), (3, 2), (4, 4), (5, 1), (5, 2), (7, 1), (7, 2)}
  258. const int support_table[7][4] =
  259. {
  260. {1, 1, 0, 0},
  261. {1, 0, 0, 0},
  262. {1, 1, 0, 0},
  263. {0, 0, 0, 1},
  264. {1, 1, 0, 0},
  265. {0, 0, 0, 0},
  266. {1, 1, 0, 0}
  267. };
  268. // kernel_size x stride
  269. const int kernel = op->kernel_h,
  270. stride = op->stride_h;
  271. // if match prequisation
  272. switch(type)
  273. {
  274. case 1:
  275. // winograd
  276. if (kernel != 3 || stride != 1){
  277. return -1;
  278. }
  279. break;
  280. case 2:
  281. // pointwise
  282. // input_h == 1, input_w == 1, dilation == 1, stride == 1
  283. if (bottom.h != 1 || bottom.w != 1 || stride != 1)
  284. {
  285. return -1;
  286. }
  287. break;
  288. case 3:
  289. // im2col
  290. break;
  291. case 4:
  292. // direct conv
  293. if (support_table[kernel-1][stride-1] == 0)
  294. {
  295. return -1;
  296. }
  297. break;
  298. case 5:
  299. // conv3x3s2
  300. // kernel == 3 and stride == 2
  301. if (kernel != 3 || stride != 2)
  302. {
  303. return -1;
  304. }
  305. break;
  306. default:
  307. fprintf(stderr, TEXT_RED "unrecognize convolution impl type: %d" CLR, type);
  308. break;
  309. }
  310. return 1;
  311. }
  312. #endif // defined(__aarch64__) && defined(LINUX)
  313. int NetOptimize::fuse_batchnorm_scale()
  314. {
  315. const int layer_count = layers.size();
  316. for (int i=0; i<layer_count; i++)
  317. {
  318. if (layers[i]->type != "BatchNorm")
  319. continue;
  320. // BatchNorm - Scale
  321. int top_blob_index = layers[i]->tops[0];
  322. int j = i + 1;
  323. for (; j<layer_count; j++)
  324. {
  325. if (layers[j]->type != "Scale")
  326. continue;
  327. if (layers[j]->bottoms.size() != 1)
  328. continue;
  329. if (layers[j]->bottoms[0] == top_blob_index)
  330. break;
  331. }
  332. if (j == layer_count)
  333. continue;
  334. // fuse BatchNorm - Scale to BatchNorm
  335. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[i];
  336. ncnn::Scale* scale = (ncnn::Scale*)layers[j];
  337. fprintf(stderr, "fuse_batchnorm_scale %s %s\n", batchnorm->name.c_str(), scale->name.c_str());
  338. {
  339. // v = ((v - mean) / sqrt(var + eps) * slope + bias) * s + b
  340. // = (v - mean) / sqrt(var + eps) * (slope * s) + (bias * s + b)
  341. int channels = batchnorm->channels;
  342. float* slope = batchnorm->slope_data;
  343. float* bias = batchnorm->bias_data;
  344. for (int q=0; q<channels; q++)
  345. {
  346. slope[q] = slope[q] * scale->scale_data[q];
  347. if (scale->bias_term)
  348. bias[q] = bias[q] * scale->scale_data[q] + scale->bias_data[q];
  349. else
  350. bias[q] = bias[q] * scale->scale_data[q];
  351. }
  352. }
  353. int top_blob_index_final = scale->tops[0];
  354. batchnorm->tops[0] = top_blob_index_final;
  355. blobs[top_blob_index_final].producer = i;
  356. scale->type = "ncnnfused";
  357. }
  358. return 0;
  359. }
  360. int NetOptimize::fuse_convolution_batchnorm()
  361. {
  362. const int layer_count = layers.size();
  363. for (int i=0; i<layer_count; i++)
  364. {
  365. if (layers[i]->type != "Convolution")
  366. continue;
  367. // Convolution - BatchNorm
  368. int top_blob_index = layers[i]->tops[0];
  369. int j = i + 1;
  370. for (; j<layer_count; j++)
  371. {
  372. if (layers[j]->type != "BatchNorm")
  373. continue;
  374. if (layers[j]->bottoms.size() != 1)
  375. continue;
  376. if (layers[j]->bottoms[0] == top_blob_index)
  377. break;
  378. }
  379. if (j == layer_count)
  380. continue;
  381. // fuse Convolution - BatchNorm to Convolution
  382. ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
  383. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  384. fprintf(stderr, "fuse_convolution_batchnorm %s %s\n", convolution->name.c_str(), batchnorm->name.c_str());
  385. {
  386. int channels = batchnorm->channels;
  387. float eps = batchnorm->eps;
  388. // a = bias - slope * mean / sqrt(var + eps)
  389. // b = slope / sqrt(var + eps)
  390. // value = value * b + a
  391. std::vector<float> a(channels);
  392. std::vector<float> b(channels);
  393. for (int i=0; i<channels; i++)
  394. {
  395. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  396. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  397. b[i] = batchnorm->slope_data[i] / sqrt_var;
  398. }
  399. if (convolution->bias_term == 0)
  400. {
  401. // init bias as zero
  402. convolution->bias_term = 1;
  403. convolution->bias_data = ncnn::Mat(channels);
  404. convolution->bias_data.fill(0.f);
  405. }
  406. const int weight_per_outch = convolution->weight_data_size / channels;
  407. float* weight = convolution->weight_data;
  408. float* bias = convolution->bias_data;
  409. for (int i=0; i<channels; i++)
  410. {
  411. float* conv_weight_outch = weight + weight_per_outch * i;
  412. for (int j=0; j<weight_per_outch; j++)
  413. {
  414. conv_weight_outch[j] *= b[i];
  415. }
  416. bias[i] += a[i];
  417. }
  418. }
  419. int top_blob_index_final = batchnorm->tops[0];
  420. convolution->tops[0] = top_blob_index_final;
  421. blobs[top_blob_index_final].producer = i;
  422. batchnorm->type = "ncnnfused";
  423. }
  424. return 0;
  425. }
  426. int NetOptimize::fuse_convolutiondepthwise_batchnorm()
  427. {
  428. const int layer_count = layers.size();
  429. for (int i=0; i<layer_count; i++)
  430. {
  431. if (layers[i]->type != "ConvolutionDepthWise")
  432. continue;
  433. // ConvolutionDepthWise - BatchNorm
  434. int top_blob_index = layers[i]->tops[0];
  435. int j = i + 1;
  436. for (; j<layer_count; j++)
  437. {
  438. if (layers[j]->type != "BatchNorm")
  439. continue;
  440. if (layers[j]->bottoms.size() != 1)
  441. continue;
  442. if (layers[j]->bottoms[0] == top_blob_index)
  443. break;
  444. }
  445. if (j == layer_count)
  446. continue;
  447. // fuse ConvolutionDepthWise - BatchNorm to ConvolutionDepthWise
  448. ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i];
  449. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  450. fprintf(stderr, "fuse_convolutiondepthwise_batchnorm %s %s\n", convolutiondepthwise->name.c_str(), batchnorm->name.c_str());
  451. {
  452. int channels = batchnorm->channels;
  453. float eps = batchnorm->eps;
  454. // a = bias - slope * mean / sqrt(var + eps)
  455. // b = slope / sqrt(var + eps)
  456. // value = value * b + a
  457. std::vector<float> a(channels);
  458. std::vector<float> b(channels);
  459. for (int i=0; i<channels; i++)
  460. {
  461. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  462. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  463. b[i] = batchnorm->slope_data[i] / sqrt_var;
  464. }
  465. if (convolutiondepthwise->bias_term == 0)
  466. {
  467. // init bias as zero
  468. convolutiondepthwise->bias_term = 1;
  469. convolutiondepthwise->bias_data = ncnn::Mat(channels);
  470. convolutiondepthwise->bias_data.fill(0.f);
  471. }
  472. const int weight_per_outch = convolutiondepthwise->weight_data_size / channels;
  473. float* weight = convolutiondepthwise->weight_data;
  474. float* bias = convolutiondepthwise->bias_data;
  475. for (int i=0; i<channels; i++)
  476. {
  477. float* conv_weight_outch = weight + weight_per_outch * i;
  478. for (int j=0; j<weight_per_outch; j++)
  479. {
  480. conv_weight_outch[j] *= b[i];
  481. }
  482. bias[i] += a[i];
  483. }
  484. }
  485. int top_blob_index_final = batchnorm->tops[0];
  486. convolutiondepthwise->tops[0] = top_blob_index_final;
  487. blobs[top_blob_index_final].producer = i;
  488. batchnorm->type = "ncnnfused";
  489. }
  490. return 0;
  491. }
  492. int NetOptimize::fuse_deconvolution_batchnorm()
  493. {
  494. const int layer_count = layers.size();
  495. for (int i=0; i<layer_count; i++)
  496. {
  497. if (layers[i]->type != "Deconvolution")
  498. continue;
  499. // Deconvolution - BatchNorm
  500. int top_blob_index = layers[i]->tops[0];
  501. int j = i + 1;
  502. for (; j<layer_count; j++)
  503. {
  504. if (layers[j]->type != "BatchNorm")
  505. continue;
  506. if (layers[j]->bottoms.size() != 1)
  507. continue;
  508. if (layers[j]->bottoms[0] == top_blob_index)
  509. break;
  510. }
  511. if (j == layer_count)
  512. continue;
  513. // fuse Deconvolution - BatchNorm to Deconvolution
  514. ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i];
  515. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  516. fprintf(stderr, "fuse_deconvolution_batchnorm %s %s\n", deconvolution->name.c_str(), batchnorm->name.c_str());
  517. {
  518. int channels = batchnorm->channels;
  519. float eps = batchnorm->eps;
  520. // a = bias - slope * mean / sqrt(var + eps)
  521. // b = slope / sqrt(var + eps)
  522. // value = value * b + a
  523. std::vector<float> a(channels);
  524. std::vector<float> b(channels);
  525. for (int i=0; i<channels; i++)
  526. {
  527. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  528. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  529. b[i] = batchnorm->slope_data[i] / sqrt_var;
  530. }
  531. if (deconvolution->bias_term == 0)
  532. {
  533. // init bias as zero
  534. deconvolution->bias_term = 1;
  535. deconvolution->bias_data = ncnn::Mat(channels);
  536. deconvolution->bias_data.fill(0.f);
  537. }
  538. const int weight_per_outch = deconvolution->weight_data_size / channels;
  539. float* weight = deconvolution->weight_data;
  540. float* bias = deconvolution->bias_data;
  541. for (int i=0; i<channels; i++)
  542. {
  543. float* conv_weight_outch = weight + weight_per_outch * i;
  544. for (int j=0; j<weight_per_outch; j++)
  545. {
  546. conv_weight_outch[j] *= b[i];
  547. }
  548. bias[i] += a[i];
  549. }
  550. }
  551. int top_blob_index_final = batchnorm->tops[0];
  552. deconvolution->tops[0] = top_blob_index_final;
  553. blobs[top_blob_index_final].producer = i;
  554. batchnorm->type = "ncnnfused";
  555. }
  556. return 0;
  557. }
  558. int NetOptimize::fuse_deconvolutiondepthwise_batchnorm()
  559. {
  560. const int layer_count = layers.size();
  561. for (int i=0; i<layer_count; i++)
  562. {
  563. if (layers[i]->type != "DeconvolutionDepthWise")
  564. continue;
  565. // DeconvolutionDepthWise - BatchNorm
  566. int top_blob_index = layers[i]->tops[0];
  567. int j = i + 1;
  568. for (; j<layer_count; j++)
  569. {
  570. if (layers[j]->type != "BatchNorm")
  571. continue;
  572. if (layers[j]->bottoms.size() != 1)
  573. continue;
  574. if (layers[j]->bottoms[0] == top_blob_index)
  575. break;
  576. }
  577. if (j == layer_count)
  578. continue;
  579. // fuse DeconvolutionDepthWise - BatchNorm to DeconvolutionDepthWise
  580. ncnn::DeconvolutionDepthWise* deconvolutiondepthwise = (ncnn::DeconvolutionDepthWise*)layers[i];
  581. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  582. fprintf(stderr, "fuse_deconvolutiondepthwise_batchnorm %s %s\n", deconvolutiondepthwise->name.c_str(), batchnorm->name.c_str());
  583. {
  584. int channels = batchnorm->channels;
  585. float eps = batchnorm->eps;
  586. // a = bias - slope * mean / sqrt(var + eps)
  587. // b = slope / sqrt(var + eps)
  588. // value = value * b + a
  589. std::vector<float> a(channels);
  590. std::vector<float> b(channels);
  591. for (int i=0; i<channels; i++)
  592. {
  593. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  594. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  595. b[i] = batchnorm->slope_data[i] / sqrt_var;
  596. }
  597. if (deconvolutiondepthwise->bias_term == 0)
  598. {
  599. // init bias as zero
  600. deconvolutiondepthwise->bias_term = 1;
  601. deconvolutiondepthwise->bias_data = ncnn::Mat(channels);
  602. deconvolutiondepthwise->bias_data.fill(0.f);
  603. }
  604. const int weight_per_outch = deconvolutiondepthwise->weight_data_size / channels;
  605. float* weight = deconvolutiondepthwise->weight_data;
  606. float* bias = deconvolutiondepthwise->bias_data;
  607. for (int i=0; i<channels; i++)
  608. {
  609. float* conv_weight_outch = weight + weight_per_outch * i;
  610. for (int j=0; j<weight_per_outch; j++)
  611. {
  612. conv_weight_outch[j] *= b[i];
  613. }
  614. bias[i] += a[i];
  615. }
  616. }
  617. int top_blob_index_final = batchnorm->tops[0];
  618. deconvolutiondepthwise->tops[0] = top_blob_index_final;
  619. blobs[top_blob_index_final].producer = i;
  620. batchnorm->type = "ncnnfused";
  621. }
  622. return 0;
  623. }
  624. int NetOptimize::fuse_innerproduct_batchnorm()
  625. {
  626. const int layer_count = layers.size();
  627. for (int i=0; i<layer_count; i++)
  628. {
  629. if (layers[i]->type != "InnerProduct")
  630. continue;
  631. // InnerProduct - BatchNorm
  632. int top_blob_index = layers[i]->tops[0];
  633. int j = i + 1;
  634. for (; j<layer_count; j++)
  635. {
  636. if (layers[j]->type != "BatchNorm")
  637. continue;
  638. if (layers[j]->bottoms.size() != 1)
  639. continue;
  640. if (layers[j]->bottoms[0] == top_blob_index)
  641. break;
  642. }
  643. if (j == layer_count)
  644. continue;
  645. // fuse InnerProduct - BatchNorm to InnerProduct
  646. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
  647. ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
  648. fprintf(stderr, "fuse_innerproduct_batchnorm %s %s\n", innerproduct->name.c_str(), batchnorm->name.c_str());
  649. {
  650. int channels = batchnorm->channels;
  651. float eps = batchnorm->eps;
  652. // a = bias - slope * mean / sqrt(var + eps)
  653. // b = slope / sqrt(var + eps)
  654. // value = value * b + a
  655. std::vector<float> a(channels);
  656. std::vector<float> b(channels);
  657. for (int i=0; i<channels; i++)
  658. {
  659. float sqrt_var = sqrt(batchnorm->var_data[i] + eps);
  660. a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
  661. b[i] = batchnorm->slope_data[i] / sqrt_var;
  662. }
  663. if (innerproduct->bias_term == 0)
  664. {
  665. // init bias as zero
  666. innerproduct->bias_term = 1;
  667. innerproduct->bias_data = ncnn::Mat(channels);
  668. innerproduct->bias_data.fill(0.f);
  669. }
  670. const int weight_per_outch = innerproduct->weight_data_size / channels;
  671. float* weight = innerproduct->weight_data;
  672. float* bias = innerproduct->bias_data;
  673. for (int i=0; i<channels; i++)
  674. {
  675. float* conv_weight_outch = weight + weight_per_outch * i;
  676. for (int j=0; j<weight_per_outch; j++)
  677. {
  678. conv_weight_outch[j] *= b[i];
  679. }
  680. bias[i] += a[i];
  681. }
  682. }
  683. int top_blob_index_final = batchnorm->tops[0];
  684. innerproduct->tops[0] = top_blob_index_final;
  685. blobs[top_blob_index_final].producer = i;
  686. batchnorm->type = "ncnnfused";
  687. }
  688. return 0;
  689. }
  690. int NetOptimize::fuse_innerproduct_dropout()
  691. {
  692. const int layer_count = layers.size();
  693. for (int i=0; i<layer_count; i++)
  694. {
  695. if (layers[i]->type != "InnerProduct")
  696. continue;
  697. // InnerProduct - Dropout
  698. int top_blob_index = layers[i]->tops[0];
  699. int j = i + 1;
  700. for (; j<layer_count; j++)
  701. {
  702. if (layers[j]->type != "Dropout")
  703. continue;
  704. if (layers[j]->bottoms.size() != 1)
  705. continue;
  706. if (layers[j]->bottoms[0] == top_blob_index)
  707. break;
  708. }
  709. if (j == layer_count)
  710. continue;
  711. // fuse InnerProduct - Dropout to InnerProduct
  712. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
  713. ncnn::Dropout* dropout = (ncnn::Dropout*)layers[j];
  714. fprintf(stderr, "fuse_innerproduct_dropout %s %s\n", innerproduct->name.c_str(), dropout->name.c_str());
  715. float scale = dropout->scale;
  716. if (scale != 1.f)
  717. {
  718. const int num_output = innerproduct->num_output;
  719. const int weight_per_outch = innerproduct->weight_data_size / num_output;
  720. float* weight = innerproduct->weight_data;
  721. for (int i=0; i<num_output; i++)
  722. {
  723. float* conv_weight_outch = weight + weight_per_outch * i;
  724. for (int j=0; j<weight_per_outch; j++)
  725. {
  726. conv_weight_outch[j] *= scale;
  727. }
  728. }
  729. if (innerproduct->bias_term)
  730. {
  731. float* bias = innerproduct->bias_data;
  732. for (int i=0; i<num_output; i++)
  733. {
  734. bias[i] *= scale;
  735. }
  736. }
  737. }
  738. int top_blob_index_final = dropout->tops[0];
  739. innerproduct->tops[0] = top_blob_index_final;
  740. blobs[top_blob_index_final].producer = i;
  741. dropout->type = "ncnnfused";
  742. }
  743. return 0;
  744. }
  745. int NetOptimize::fuse_convolution_activation()
  746. {
  747. const int layer_count = layers.size();
  748. for (int i=0; i<layer_count; i++)
  749. {
  750. if (layers[i]->type != "Convolution")
  751. continue;
  752. // Convolution - Activation
  753. int top_blob_index = layers[i]->tops[0];
  754. int j = i + 1;
  755. for (; j<layer_count; j++)
  756. {
  757. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid")
  758. continue;
  759. if (layers[j]->bottoms.size() != 1)
  760. continue;
  761. if (layers[j]->bottoms[0] == top_blob_index)
  762. break;
  763. }
  764. if (j == layer_count)
  765. continue;
  766. // fuse Convolution - Activation to Convolution
  767. ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
  768. ncnn::Layer* activation = layers[j];
  769. fprintf(stderr, "fuse_convolution_activation %s %s\n", convolution->name.c_str(), activation->name.c_str());
  770. if (activation->type == "ReLU")
  771. {
  772. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  773. if (relu->slope == 0.f)
  774. {
  775. convolution->activation_type = 1;
  776. }
  777. else
  778. {
  779. convolution->activation_type = 2;
  780. convolution->activation_params = ncnn::Mat(1);
  781. convolution->activation_params[0] = relu->slope;
  782. }
  783. }
  784. else if (activation->type == "Clip")
  785. {
  786. ncnn::Clip* clip = (ncnn::Clip*)activation;
  787. convolution->activation_type = 3;
  788. convolution->activation_params = ncnn::Mat(2);
  789. convolution->activation_params[0] = clip->min;
  790. convolution->activation_params[1] = clip->max;
  791. }
  792. else if (activation->type == "Sigmoid")
  793. {
  794. convolution->activation_type = 4;
  795. }
  796. int top_blob_index_final = activation->tops[0];
  797. convolution->tops[0] = top_blob_index_final;
  798. blobs[top_blob_index_final].producer = i;
  799. activation->type = "ncnnfused";
  800. }
  801. return 0;
  802. }
  803. int NetOptimize::fuse_convolutiondepthwise_activation()
  804. {
  805. const int layer_count = layers.size();
  806. for (int i=0; i<layer_count; i++)
  807. {
  808. if (layers[i]->type != "ConvolutionDepthWise")
  809. continue;
  810. // ConvolutionDepthWise - Activation
  811. int top_blob_index = layers[i]->tops[0];
  812. int j = i + 1;
  813. for (; j<layer_count; j++)
  814. {
  815. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid")
  816. continue;
  817. if (layers[j]->bottoms.size() != 1)
  818. continue;
  819. if (layers[j]->bottoms[0] == top_blob_index)
  820. break;
  821. }
  822. if (j == layer_count)
  823. continue;
  824. // fuse ConvolutionDepthWise - Activation to ConvolutionDepthWise
  825. ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i];
  826. ncnn::Layer* activation = layers[j];
  827. fprintf(stderr, "fuse_convolutiondepthwise_activation %s %s\n", convolutiondepthwise->name.c_str(), activation->name.c_str());
  828. if (activation->type == "ReLU")
  829. {
  830. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  831. if (relu->slope == 0.f)
  832. {
  833. convolutiondepthwise->activation_type = 1;
  834. }
  835. else
  836. {
  837. convolutiondepthwise->activation_type = 2;
  838. convolutiondepthwise->activation_params = ncnn::Mat(1);
  839. convolutiondepthwise->activation_params[0] = relu->slope;
  840. }
  841. }
  842. else if (activation->type == "Clip")
  843. {
  844. ncnn::Clip* clip = (ncnn::Clip*)activation;
  845. convolutiondepthwise->activation_type = 3;
  846. convolutiondepthwise->activation_params = ncnn::Mat(2);
  847. convolutiondepthwise->activation_params[0] = clip->min;
  848. convolutiondepthwise->activation_params[1] = clip->max;
  849. }
  850. else if (activation->type == "Sigmoid")
  851. {
  852. convolutiondepthwise->activation_type = 4;
  853. }
  854. int top_blob_index_final = activation->tops[0];
  855. convolutiondepthwise->tops[0] = top_blob_index_final;
  856. blobs[top_blob_index_final].producer = i;
  857. activation->type = "ncnnfused";
  858. }
  859. return 0;
  860. }
  861. int NetOptimize::fuse_deconvolution_activation()
  862. {
  863. const int layer_count = layers.size();
  864. for (int i=0; i<layer_count; i++)
  865. {
  866. if (layers[i]->type != "Deconvolution")
  867. continue;
  868. // Deconvolution - Activation
  869. int top_blob_index = layers[i]->tops[0];
  870. int j = i + 1;
  871. for (; j<layer_count; j++)
  872. {
  873. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid")
  874. continue;
  875. if (layers[j]->bottoms.size() != 1)
  876. continue;
  877. if (layers[j]->bottoms[0] == top_blob_index)
  878. break;
  879. }
  880. if (j == layer_count)
  881. continue;
  882. // fuse Deconvolution - Activation to Deconvolution
  883. ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i];
  884. ncnn::Layer* activation = layers[j];
  885. fprintf(stderr, "fuse_deconvolution_activation %s %s\n", deconvolution->name.c_str(), activation->name.c_str());
  886. if (activation->type == "ReLU")
  887. {
  888. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  889. if (relu->slope == 0.f)
  890. {
  891. deconvolution->activation_type = 1;
  892. }
  893. else
  894. {
  895. deconvolution->activation_type = 2;
  896. deconvolution->activation_params = ncnn::Mat(1);
  897. deconvolution->activation_params[0] = relu->slope;
  898. }
  899. }
  900. else if (activation->type == "Clip")
  901. {
  902. ncnn::Clip* clip = (ncnn::Clip*)activation;
  903. deconvolution->activation_type = 3;
  904. deconvolution->activation_params = ncnn::Mat(2);
  905. deconvolution->activation_params[0] = clip->min;
  906. deconvolution->activation_params[1] = clip->max;
  907. }
  908. else if (activation->type == "Sigmoid")
  909. {
  910. deconvolution->activation_type = 4;
  911. }
  912. int top_blob_index_final = activation->tops[0];
  913. deconvolution->tops[0] = top_blob_index_final;
  914. blobs[top_blob_index_final].producer = i;
  915. activation->type = "ncnnfused";
  916. }
  917. return 0;
  918. }
  919. int NetOptimize::fuse_deconvolutiondepthwise_activation()
  920. {
  921. const int layer_count = layers.size();
  922. for (int i=0; i<layer_count; i++)
  923. {
  924. if (layers[i]->type != "DeconvolutionDepthWise")
  925. continue;
  926. // DeconvolutionDepthWise - Activation
  927. int top_blob_index = layers[i]->tops[0];
  928. int j = i + 1;
  929. for (; j<layer_count; j++)
  930. {
  931. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid")
  932. continue;
  933. if (layers[j]->bottoms.size() != 1)
  934. continue;
  935. if (layers[j]->bottoms[0] == top_blob_index)
  936. break;
  937. }
  938. if (j == layer_count)
  939. continue;
  940. // fuse DeconvolutionDepthWise - Activation to DeconvolutionDepthWise
  941. ncnn::DeconvolutionDepthWise* deconvolutiondepthwise = (ncnn::DeconvolutionDepthWise*)layers[i];
  942. ncnn::Layer* activation = layers[j];
  943. fprintf(stderr, "fuse_deconvolutiondepthwise_activation %s %s\n", deconvolutiondepthwise->name.c_str(), activation->name.c_str());
  944. if (activation->type == "ReLU")
  945. {
  946. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  947. if (relu->slope == 0.f)
  948. {
  949. deconvolutiondepthwise->activation_type = 1;
  950. }
  951. else
  952. {
  953. deconvolutiondepthwise->activation_type = 2;
  954. deconvolutiondepthwise->activation_params = ncnn::Mat(1);
  955. deconvolutiondepthwise->activation_params[0] = relu->slope;
  956. }
  957. }
  958. else if (activation->type == "Clip")
  959. {
  960. ncnn::Clip* clip = (ncnn::Clip*)activation;
  961. deconvolutiondepthwise->activation_type = 3;
  962. deconvolutiondepthwise->activation_params = ncnn::Mat(2);
  963. deconvolutiondepthwise->activation_params[0] = clip->min;
  964. deconvolutiondepthwise->activation_params[1] = clip->max;
  965. }
  966. else if (activation->type == "Sigmoid")
  967. {
  968. deconvolutiondepthwise->activation_type = 4;
  969. }
  970. int top_blob_index_final = activation->tops[0];
  971. deconvolutiondepthwise->tops[0] = top_blob_index_final;
  972. blobs[top_blob_index_final].producer = i;
  973. activation->type = "ncnnfused";
  974. }
  975. return 0;
  976. }
  977. int NetOptimize::fuse_innerproduct_activation()
  978. {
  979. const int layer_count = layers.size();
  980. for (int i=0; i<layer_count; i++)
  981. {
  982. if (layers[i]->type != "InnerProduct")
  983. continue;
  984. // InnerProduct - Activation
  985. int top_blob_index = layers[i]->tops[0];
  986. int j = i + 1;
  987. for (; j<layer_count; j++)
  988. {
  989. if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid")
  990. continue;
  991. if (layers[j]->bottoms.size() != 1)
  992. continue;
  993. if (layers[j]->bottoms[0] == top_blob_index)
  994. break;
  995. }
  996. if (j == layer_count)
  997. continue;
  998. // fuse InnerProduct - Activation to InnerProduct
  999. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
  1000. ncnn::Layer* activation = layers[j];
  1001. fprintf(stderr, "fuse_innerproduct_activation %s %s\n", innerproduct->name.c_str(), activation->name.c_str());
  1002. if (activation->type == "ReLU")
  1003. {
  1004. ncnn::ReLU* relu = (ncnn::ReLU*)activation;
  1005. if (relu->slope == 0.f)
  1006. {
  1007. innerproduct->activation_type = 1;
  1008. }
  1009. else
  1010. {
  1011. innerproduct->activation_type = 2;
  1012. innerproduct->activation_params = ncnn::Mat(1);
  1013. innerproduct->activation_params[0] = relu->slope;
  1014. }
  1015. }
  1016. else if (activation->type == "Clip")
  1017. {
  1018. ncnn::Clip* clip = (ncnn::Clip*)activation;
  1019. innerproduct->activation_type = 3;
  1020. innerproduct->activation_params = ncnn::Mat(2);
  1021. innerproduct->activation_params[0] = clip->min;
  1022. innerproduct->activation_params[1] = clip->max;
  1023. }
  1024. else if (activation->type == "Sigmoid")
  1025. {
  1026. innerproduct->activation_type = 4;
  1027. }
  1028. int top_blob_index_final = activation->tops[0];
  1029. innerproduct->tops[0] = top_blob_index_final;
  1030. blobs[top_blob_index_final].producer = i;
  1031. activation->type = "ncnnfused";
  1032. }
  1033. return 0;
  1034. }
  1035. int NetOptimize::eliminate_dropout()
  1036. {
  1037. const int layer_count = layers.size();
  1038. for (int i=0; i<layer_count; i++)
  1039. {
  1040. if (layers[i]->type != "Dropout")
  1041. continue;
  1042. ncnn::Dropout* dropout = (ncnn::Dropout*)layers[i];
  1043. if (dropout->scale != 1.f)
  1044. continue;
  1045. // Any - Dropout
  1046. int bottom_blob_index = layers[i]->bottoms[0];
  1047. int j = i - 1;
  1048. for (; j>=0; j--)
  1049. {
  1050. if (layers[j]->type == "ncnnfused")
  1051. continue;
  1052. if (layers[j]->tops.size() != 1)
  1053. continue;
  1054. if (layers[j]->tops[0] == bottom_blob_index)
  1055. break;
  1056. }
  1057. if (j == -1)
  1058. continue;
  1059. ncnn::Layer* any = layers[j];
  1060. fprintf(stderr, "eliminate_dropout %s %s\n", any->name.c_str(), dropout->name.c_str());
  1061. int top_blob_index_final = dropout->tops[0];
  1062. any->tops[0] = top_blob_index_final;
  1063. blobs[top_blob_index_final].producer = j;
  1064. dropout->type = "ncnnfused";
  1065. }
  1066. return 0;
  1067. }
  1068. int NetOptimize::eliminate_noop()
  1069. {
  1070. const int layer_count = layers.size();
  1071. for (int i=0; i<layer_count; i++)
  1072. {
  1073. if (layers[i]->type != "Noop")
  1074. continue;
  1075. ncnn::Layer* noop = layers[i];
  1076. // Any - Noop
  1077. int bottom_blob_index = layers[i]->bottoms[0];
  1078. int j = i - 1;
  1079. for (; j>=0; j--)
  1080. {
  1081. if (layers[j]->type == "ncnnfused")
  1082. continue;
  1083. if (layers[j]->tops.size() != 1)
  1084. continue;
  1085. if (layers[j]->tops[0] == bottom_blob_index)
  1086. break;
  1087. }
  1088. if (j == -1)
  1089. continue;
  1090. ncnn::Layer* any = layers[j];
  1091. fprintf(stderr, "eliminate_noop %s %s\n", any->name.c_str(), noop->name.c_str());
  1092. int top_blob_index_final = noop->tops[0];
  1093. any->tops[0] = top_blob_index_final;
  1094. blobs[top_blob_index_final].producer = j;
  1095. noop->type = "ncnnfused";
  1096. }
  1097. return 0;
  1098. }
  1099. int NetOptimize::eliminate_reshape_after_global_pooling()
  1100. {
  1101. const int layer_count = layers.size();
  1102. for (int i=0; i<layer_count; i++)
  1103. {
  1104. if (layers[i]->type != "Pooling")
  1105. continue;
  1106. ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i];
  1107. if (pooling->global_pooling == 0)
  1108. continue;
  1109. // Pooling - Reshape
  1110. int top_blob_index = layers[i]->tops[0];
  1111. int j = i + 1;
  1112. for (; j<layer_count; j++)
  1113. {
  1114. if (layers[j]->type != "Reshape")
  1115. continue;
  1116. if (layers[j]->bottoms.size() != 1)
  1117. continue;
  1118. if (layers[j]->bottoms[0] == top_blob_index)
  1119. break;
  1120. }
  1121. if (j == layer_count)
  1122. continue;
  1123. ncnn::Reshape* reshape = (ncnn::Reshape*)layers[j];
  1124. if (reshape->h != -233 || reshape->c != -233 || reshape->permute != 0)
  1125. continue;
  1126. fprintf(stderr, "eliminate_reshape_after_global_pooling %s %s\n", pooling->name.c_str(), reshape->name.c_str());
  1127. int top_blob_index_final = reshape->tops[0];
  1128. pooling->tops[0] = top_blob_index_final;
  1129. blobs[top_blob_index_final].producer = i;
  1130. reshape->type = "ncnnfused";
  1131. }
  1132. return 0;
  1133. }
  1134. int NetOptimize::eliminate_flatten_after_global_pooling()
  1135. {
  1136. const int layer_count = layers.size();
  1137. for (int i=0; i<layer_count; i++)
  1138. {
  1139. if (layers[i]->type != "Pooling")
  1140. continue;
  1141. ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i];
  1142. if (pooling->global_pooling == 0)
  1143. continue;
  1144. // Pooling - Flatten
  1145. int top_blob_index = layers[i]->tops[0];
  1146. int j = i + 1;
  1147. for (; j<layer_count; j++)
  1148. {
  1149. if (layers[j]->type != "Flatten")
  1150. continue;
  1151. if (layers[j]->bottoms.size() != 1)
  1152. continue;
  1153. if (layers[j]->bottoms[0] == top_blob_index)
  1154. break;
  1155. }
  1156. if (j == layer_count)
  1157. continue;
  1158. ncnn::Flatten* flatten = (ncnn::Flatten*)layers[j];
  1159. fprintf(stderr, "eliminate_flatten_after_global_pooling %s %s\n", pooling->name.c_str(), flatten->name.c_str());
  1160. int top_blob_index_final = flatten->tops[0];
  1161. pooling->tops[0] = top_blob_index_final;
  1162. blobs[top_blob_index_final].producer = i;
  1163. flatten->type = "ncnnfused";
  1164. }
  1165. return 0;
  1166. }
  1167. int NetOptimize::eliminate_flatten_after_innerproduct()
  1168. {
  1169. const int layer_count = layers.size();
  1170. for (int i=0; i<layer_count; i++)
  1171. {
  1172. if (layers[i]->type != "InnerProduct")
  1173. continue;
  1174. // InnerProduct - Flatten
  1175. int top_blob_index = layers[i]->tops[0];
  1176. int j = i + 1;
  1177. for (; j<layer_count; j++)
  1178. {
  1179. if (layers[j]->type != "Flatten")
  1180. continue;
  1181. if (layers[j]->bottoms.size() != 1)
  1182. continue;
  1183. if (layers[j]->bottoms[0] == top_blob_index)
  1184. break;
  1185. }
  1186. if (j == layer_count)
  1187. continue;
  1188. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
  1189. ncnn::Flatten* flatten = (ncnn::Flatten*)layers[j];
  1190. fprintf(stderr, "eliminate_flatten_after_innerproduct %s %s\n", innerproduct->name.c_str(), flatten->name.c_str());
  1191. int top_blob_index_final = flatten->tops[0];
  1192. innerproduct->tops[0] = top_blob_index_final;
  1193. blobs[top_blob_index_final].producer = i;
  1194. flatten->type = "ncnnfused";
  1195. }
  1196. return 0;
  1197. }
  1198. int NetOptimize::eliminate_reshape_before_binaryop()
  1199. {
  1200. const int layer_count = layers.size();
  1201. for (int i=0; i<layer_count; i++)
  1202. {
  1203. if (layers[i]->type != "Reshape")
  1204. continue;
  1205. ncnn::Reshape* reshape = (ncnn::Reshape*)layers[i];
  1206. if (reshape->w != 1 || reshape->h != 1 || reshape->permute != 0)
  1207. continue;
  1208. // Reshape - BinaryOp
  1209. int top_blob_index = layers[i]->tops[0];
  1210. int j = i + 1;
  1211. for (; j<layer_count; j++)
  1212. {
  1213. if (layers[j]->type != "BinaryOp")
  1214. continue;
  1215. if (layers[j]->bottoms.size() != 2)
  1216. continue;
  1217. if (layers[j]->bottoms[0] == top_blob_index || layers[j]->bottoms[1] == top_blob_index)
  1218. break;
  1219. }
  1220. if (j == layer_count)
  1221. continue;
  1222. ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[j];
  1223. fprintf(stderr, "eliminate_reshape_before_binaryop %s %s\n", reshape->name.c_str(), binaryop->name.c_str());
  1224. int bottom_blob_index_final = reshape->bottoms[0];
  1225. if (layers[j]->bottoms[0] == top_blob_index)
  1226. binaryop->bottoms[0] = bottom_blob_index_final;
  1227. if (layers[j]->bottoms[1] == top_blob_index)
  1228. binaryop->bottoms[1] = bottom_blob_index_final;
  1229. blobs[bottom_blob_index_final].consumers.erase(std::find(blobs[bottom_blob_index_final].consumers.begin(), blobs[bottom_blob_index_final].consumers.end(), i));
  1230. blobs[bottom_blob_index_final].consumers.push_back(j);
  1231. reshape->type = "ncnnfused";
  1232. }
  1233. return 0;
  1234. }
  1235. int NetOptimize::replace_convolution_with_innerproduct_after_global_pooling()
  1236. {
  1237. const int layer_count = layers.size();
  1238. for (int i=0; i<layer_count; i++)
  1239. {
  1240. if (layers[i]->type != "Pooling")
  1241. continue;
  1242. ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i];
  1243. if (pooling->global_pooling == 0)
  1244. continue;
  1245. // Pooling - Convolution
  1246. int top_blob_index = layers[i]->tops[0];
  1247. int j = i + 1;
  1248. for (; j<layer_count; j++)
  1249. {
  1250. if (layers[j]->type != "Convolution")
  1251. continue;
  1252. if (layers[j]->bottoms.size() != 1)
  1253. continue;
  1254. if (layers[j]->bottoms[0] == top_blob_index)
  1255. break;
  1256. }
  1257. if (j == layer_count)
  1258. continue;
  1259. ncnn::Convolution* convolution = (ncnn::Convolution*)layers[j];
  1260. fprintf(stderr, "replace_convolution_with_innerproduct_after_global_pooling %s %s\n", pooling->name.c_str(), convolution->name.c_str());
  1261. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)ncnn::create_layer("InnerProduct");
  1262. innerproduct->type = "InnerProduct";
  1263. innerproduct->name = convolution->name;
  1264. innerproduct->bottoms = convolution->bottoms;
  1265. innerproduct->tops = convolution->tops;
  1266. ncnn::ParamDict pd;
  1267. innerproduct->load_param(pd);
  1268. innerproduct->num_output = convolution->num_output;
  1269. innerproduct->bias_term = convolution->bias_term;
  1270. innerproduct->weight_data_size = convolution->weight_data_size;
  1271. innerproduct->weight_data = convolution->weight_data;
  1272. innerproduct->bias_data = convolution->bias_data;
  1273. innerproduct->activation_type = convolution->activation_type;
  1274. innerproduct->activation_params = convolution->activation_params;
  1275. layers[j] = innerproduct;
  1276. delete convolution;
  1277. }
  1278. return 0;
  1279. }
  1280. int NetOptimize::replace_convolution_with_innerproduct_after_innerproduct()
  1281. {
  1282. const int layer_count = layers.size();
  1283. for (;;)
  1284. {
  1285. bool replaced = false;
  1286. for (int i=0; i<layer_count; i++)
  1287. {
  1288. if (layers[i]->type != "InnerProduct")
  1289. continue;
  1290. // InnerProduct - Convolution
  1291. int top_blob_index = layers[i]->tops[0];
  1292. int j = i + 1;
  1293. for (; j<layer_count; j++)
  1294. {
  1295. if (layers[j]->type != "Convolution")
  1296. continue;
  1297. if (layers[j]->bottoms.size() != 1)
  1298. continue;
  1299. if (layers[j]->bottoms[0] == top_blob_index)
  1300. break;
  1301. }
  1302. if (j == layer_count)
  1303. continue;
  1304. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
  1305. ncnn::Convolution* convolution = (ncnn::Convolution*)layers[j];
  1306. fprintf(stderr, "replace_convolution_with_innerproduct_after_innerproduct %s %s\n", innerproduct->name.c_str(), convolution->name.c_str());
  1307. ncnn::InnerProduct* innerproduct2 = (ncnn::InnerProduct*)ncnn::create_layer("InnerProduct");
  1308. innerproduct2->type = "InnerProduct";
  1309. innerproduct2->name = convolution->name;
  1310. innerproduct2->bottoms = convolution->bottoms;
  1311. innerproduct2->tops = convolution->tops;
  1312. ncnn::ParamDict pd;
  1313. innerproduct2->load_param(pd);
  1314. innerproduct2->num_output = convolution->num_output;
  1315. innerproduct2->bias_term = convolution->bias_term;
  1316. innerproduct2->weight_data_size = convolution->weight_data_size;
  1317. innerproduct2->weight_data = convolution->weight_data;
  1318. innerproduct2->bias_data = convolution->bias_data;
  1319. innerproduct2->activation_type = convolution->activation_type;
  1320. innerproduct2->activation_params = convolution->activation_params;
  1321. layers[j] = innerproduct2;
  1322. delete convolution;
  1323. replaced = true;
  1324. }
  1325. if (!replaced)
  1326. break;
  1327. }
  1328. return 0;
  1329. }
  1330. int NetOptimize::fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp)
  1331. {
  1332. const int count = m.w;
  1333. const int* ptr = m;
  1334. fprintf(pp, " -%d=%d", 23300 + id, count);
  1335. for (int i=0; i<count; i++)
  1336. {
  1337. fprintf(pp, ",%d", ptr[i]);
  1338. }
  1339. return 0;
  1340. }
  1341. int NetOptimize::fprintf_param_float_array(int id, const ncnn::Mat& m, FILE* pp)
  1342. {
  1343. const int count = m.w;
  1344. const float* ptr = m;
  1345. fprintf(pp, " -%d=%d", 23300 + id, count);
  1346. for (int i=0; i<count; i++)
  1347. {
  1348. fprintf(pp, ",%e", ptr[i]);
  1349. }
  1350. return 0;
  1351. }
  1352. static inline size_t alignSize(size_t sz, int n)
  1353. {
  1354. return (sz + n-1) & -n;
  1355. }
  1356. int NetOptimize::fwrite_weight_tag_data(int tag, const ncnn::Mat& data, FILE* bp)
  1357. {
  1358. int p0 = ftell(bp);
  1359. ncnn::Mat data_flattened = data.reshape(data.w * data.h * data.c);
  1360. if (storage_type == 1 && tag == 0)
  1361. {
  1362. tag = 0x01306B47; // fp16 magic
  1363. fwrite(&tag, sizeof(int), 1, bp);
  1364. ncnn::Mat data_flattened_fp16;
  1365. ncnn::cast_float32_to_float16(data_flattened, data_flattened_fp16);
  1366. fwrite(data_flattened_fp16.data, data_flattened_fp16.elemsize, data_flattened_fp16.w, bp);
  1367. }
  1368. else
  1369. {
  1370. fwrite(&tag, sizeof(int), 1, bp);
  1371. fwrite(data_flattened.data, data_flattened.elemsize, data_flattened.w, bp);
  1372. }
  1373. // padding to 32bit align
  1374. int nwrite = ftell(bp) - p0;
  1375. int nalign = alignSize(nwrite, 4);
  1376. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  1377. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  1378. return 0;
  1379. }
  1380. int NetOptimize::fwrite_weight_data(const ncnn::Mat& data, FILE* bp)
  1381. {
  1382. int p0 = ftell(bp);
  1383. ncnn::Mat data_flattened = data.reshape(data.w * data.h * data.c);
  1384. fwrite(data_flattened.data, data_flattened.elemsize, data_flattened.w, bp);
  1385. // padding to 32bit align
  1386. int nwrite = ftell(bp) - p0;
  1387. int nalign = alignSize(nwrite, 4);
  1388. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  1389. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  1390. return 0;
  1391. }
  1392. int NetOptimize::save(const char* parampath, const char* binpath)
  1393. {
  1394. FILE* pp = fopen(parampath, "wb");
  1395. FILE* bp = fopen(binpath, "wb");
  1396. fprintf(pp, "7767517\n");
  1397. const int layer_count = layers.size();
  1398. int layer_count_fused = 0;
  1399. std::set<std::string> blob_names;
  1400. for (int i=0; i<layer_count; i++)
  1401. {
  1402. const ncnn::Layer* layer = layers[i];
  1403. if (layer->type == "ncnnfused")
  1404. continue;
  1405. layer_count_fused++;
  1406. int bottom_count = layer->bottoms.size();
  1407. for (int j=0; j<bottom_count; j++)
  1408. {
  1409. int bottom_blob_index = layer->bottoms[j];
  1410. blob_names.insert(blobs[bottom_blob_index].name);
  1411. }
  1412. int top_count = layer->tops.size();
  1413. for (int j=0; j<top_count; j++)
  1414. {
  1415. int top_blob_index = layer->tops[j];
  1416. blob_names.insert(blobs[top_blob_index].name);
  1417. }
  1418. }
  1419. int blob_count_fused = blob_names.size();
  1420. fprintf(pp, "%d %d\n", layer_count_fused, blob_count_fused);
  1421. for (int i=0; i<layer_count; i++)
  1422. {
  1423. const ncnn::Layer* layer = layers[i];
  1424. if (layer->type == "ncnnfused")
  1425. continue;
  1426. int bottom_count = layer->bottoms.size();
  1427. int top_count = layer->tops.size();
  1428. fprintf(pp, "%-24s %-24s %d %d", layer->type.c_str(), layer->name.c_str(), bottom_count, top_count);
  1429. for (int j=0; j<bottom_count; j++)
  1430. {
  1431. int bottom_blob_index = layer->bottoms[j];
  1432. fprintf(pp, " %s", blobs[bottom_blob_index].name.c_str());
  1433. }
  1434. for (int j=0; j<top_count; j++)
  1435. {
  1436. int top_blob_index = layer->tops[j];
  1437. fprintf(pp, " %s", blobs[top_blob_index].name.c_str());
  1438. }
  1439. ncnn::Layer* layer_default = ncnn::create_layer(layer->typeindex);
  1440. ncnn::ParamDict pd;
  1441. layer_default->load_param(pd);
  1442. #define fprintf_param_value(format, phase) \
  1443. { if (op->phase != op_default->phase) fprintf(pp, format, op->phase); }
  1444. if (layer->type == "BatchNorm")
  1445. {
  1446. ncnn::BatchNorm* op = (ncnn::BatchNorm*)layer;
  1447. ncnn::BatchNorm* op_default = (ncnn::BatchNorm*)layer_default;
  1448. fprintf_param_value(" 0=%d", channels)
  1449. fprintf_param_value(" 1=%e", eps)
  1450. fwrite_weight_data(op->slope_data, bp);
  1451. fwrite_weight_data(op->mean_data, bp);
  1452. fwrite_weight_data(op->var_data, bp);
  1453. fwrite_weight_data(op->bias_data, bp);
  1454. }
  1455. else if (layer->type == "Bias")
  1456. {
  1457. ncnn::Bias* op = (ncnn::Bias*)layer;
  1458. ncnn::Bias* op_default = (ncnn::Bias*)layer_default;
  1459. fprintf_param_value(" 0=%d", bias_data_size)
  1460. fwrite_weight_data(op->bias_data, bp);
  1461. }
  1462. else if (layer->type == "BinaryOp")
  1463. {
  1464. ncnn::BinaryOp* op = (ncnn::BinaryOp*)layer;
  1465. ncnn::BinaryOp* op_default = (ncnn::BinaryOp*)layer_default;
  1466. fprintf_param_value(" 0=%d", op_type)
  1467. fprintf_param_value(" 1=%d", with_scalar)
  1468. fprintf_param_value(" 2=%e", b)
  1469. }
  1470. else if (layer->type == "Clip")
  1471. {
  1472. ncnn::Clip* op = (ncnn::Clip*)layer;
  1473. ncnn::Clip* op_default = (ncnn::Clip*)layer_default;
  1474. fprintf_param_value(" 0=%e", min)
  1475. fprintf_param_value(" 1=%e", max)
  1476. }
  1477. else if (layer->type == "Concat")
  1478. {
  1479. ncnn::Concat* op = (ncnn::Concat*)layer;
  1480. ncnn::Concat* op_default = (ncnn::Concat*)layer_default;
  1481. fprintf_param_value(" 0=%d", axis)
  1482. }
  1483. else if (layer->type == "Convolution")
  1484. {
  1485. ncnn::Convolution* op = (ncnn::Convolution*)layer;
  1486. ncnn::Convolution* op_default = (ncnn::Convolution*)layer_default;
  1487. fprintf_param_value(" 0=%d", num_output)
  1488. fprintf_param_value(" 1=%d", kernel_w)
  1489. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  1490. fprintf_param_value(" 2=%d", dilation_w)
  1491. { if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); }
  1492. fprintf_param_value(" 3=%d", stride_w)
  1493. { if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); }
  1494. fprintf_param_value(" 4=%d", pad_left)
  1495. { if (op->pad_top != op->pad_left) fprintf(pp, " 14=%d", op->pad_top); }
  1496. { if (op->pad_right != op->pad_left) fprintf(pp, " 15=%d", op->pad_right); }
  1497. { if (op->pad_bottom != op->pad_top) fprintf(pp, " 16=%d", op->pad_bottom); }
  1498. fprintf_param_value(" 18=%e", pad_value)
  1499. fprintf_param_value(" 5=%d", bias_term)
  1500. fprintf_param_value(" 6=%d", weight_data_size)
  1501. fprintf_param_value(" 8=%d", int8_scale_term)
  1502. fprintf_param_value(" 9=%d", activation_type)
  1503. { if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); }
  1504. fprintf_param_value(" 17=%d", impl_type)
  1505. fwrite_weight_tag_data(0, op->weight_data, bp);
  1506. fwrite_weight_data(op->bias_data, bp);
  1507. }
  1508. else if (layer->type == "ConvolutionDepthWise")
  1509. {
  1510. ncnn::ConvolutionDepthWise* op = (ncnn::ConvolutionDepthWise*)layer;
  1511. ncnn::ConvolutionDepthWise* op_default = (ncnn::ConvolutionDepthWise*)layer_default;
  1512. fprintf_param_value(" 0=%d", num_output)
  1513. fprintf_param_value(" 1=%d", kernel_w)
  1514. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  1515. fprintf_param_value(" 2=%d", dilation_w)
  1516. { if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); }
  1517. fprintf_param_value(" 3=%d", stride_w)
  1518. { if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); }
  1519. fprintf_param_value(" 4=%d", pad_left)
  1520. { if (op->pad_top != op->pad_left) fprintf(pp, " 14=%d", op->pad_top); }
  1521. { if (op->pad_right != op->pad_left) fprintf(pp, " 15=%d", op->pad_right); }
  1522. { if (op->pad_bottom != op->pad_top) fprintf(pp, " 16=%d", op->pad_bottom); }
  1523. fprintf_param_value(" 18=%e", pad_value)
  1524. fprintf_param_value(" 5=%d", bias_term)
  1525. fprintf_param_value(" 6=%d", weight_data_size)
  1526. fprintf_param_value(" 7=%d", group)
  1527. fprintf_param_value(" 8=%d", int8_scale_term)
  1528. fprintf_param_value(" 9=%d", activation_type)
  1529. { if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); }
  1530. fwrite_weight_tag_data(0, op->weight_data, bp);
  1531. fwrite_weight_data(op->bias_data, bp);
  1532. }
  1533. else if (layer->type == "Crop")
  1534. {
  1535. ncnn::Crop* op = (ncnn::Crop*)layer;
  1536. ncnn::Crop* op_default = (ncnn::Crop*)layer_default;
  1537. fprintf_param_value(" 0=%d", woffset)
  1538. fprintf_param_value(" 1=%d", hoffset)
  1539. fprintf_param_value(" 2=%d", coffset)
  1540. fprintf_param_value(" 3=%d", outw)
  1541. fprintf_param_value(" 4=%d", outh)
  1542. fprintf_param_value(" 5=%d", outc)
  1543. fprintf_param_value(" 6=%d", woffset2)
  1544. fprintf_param_value(" 7=%d", hoffset2)
  1545. fprintf_param_value(" 8=%d", coffset2)
  1546. { if (!op->starts.empty()) fprintf_param_int_array(9, op->starts, pp); }
  1547. { if (!op->ends.empty()) fprintf_param_int_array(10, op->ends, pp); }
  1548. { if (!op->axes.empty()) fprintf_param_int_array(11, op->axes, pp); }
  1549. }
  1550. else if (layer->type == "Deconvolution")
  1551. {
  1552. ncnn::Deconvolution* op = (ncnn::Deconvolution*)layer;
  1553. ncnn::Deconvolution* op_default = (ncnn::Deconvolution*)layer_default;
  1554. fprintf_param_value(" 0=%d", num_output)
  1555. fprintf_param_value(" 1=%d", kernel_w)
  1556. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  1557. fprintf_param_value(" 2=%d", dilation_w)
  1558. { if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); }
  1559. fprintf_param_value(" 3=%d", stride_w)
  1560. { if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); }
  1561. fprintf_param_value(" 4=%d", pad_left)
  1562. { if (op->pad_top != op->pad_left) fprintf(pp, " 14=%d", op->pad_top); }
  1563. { if (op->pad_right != op->pad_left) fprintf(pp, " 15=%d", op->pad_right); }
  1564. { if (op->pad_bottom != op->pad_top) fprintf(pp, " 16=%d", op->pad_bottom); }
  1565. fprintf_param_value(" 18=%d", output_pad_right)
  1566. { if (op->output_pad_bottom != op->output_pad_right) fprintf(pp, " 19=%d", op->output_pad_bottom); }
  1567. fprintf_param_value(" 20=%d", output_w)
  1568. { if (op->output_h != op->output_w) fprintf(pp, " 21=%d", op->output_h); }
  1569. fprintf_param_value(" 5=%d", bias_term)
  1570. fprintf_param_value(" 6=%d", weight_data_size)
  1571. fprintf_param_value(" 9=%d", activation_type)
  1572. { if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); }
  1573. fwrite_weight_tag_data(0, op->weight_data, bp);
  1574. fwrite_weight_data(op->bias_data, bp);
  1575. }
  1576. else if (layer->type == "DeconvolutionDepthWise")
  1577. {
  1578. ncnn::DeconvolutionDepthWise* op = (ncnn::DeconvolutionDepthWise*)layer;
  1579. ncnn::DeconvolutionDepthWise* op_default = (ncnn::DeconvolutionDepthWise*)layer_default;
  1580. fprintf_param_value(" 0=%d", num_output)
  1581. fprintf_param_value(" 1=%d", kernel_w)
  1582. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  1583. fprintf_param_value(" 2=%d", dilation_w)
  1584. { if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); }
  1585. fprintf_param_value(" 3=%d", stride_w)
  1586. { if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); }
  1587. fprintf_param_value(" 4=%d", pad_left)
  1588. { if (op->pad_top != op->pad_left) fprintf(pp, " 14=%d", op->pad_top); }
  1589. { if (op->pad_right != op->pad_left) fprintf(pp, " 15=%d", op->pad_right); }
  1590. { if (op->pad_bottom != op->pad_top) fprintf(pp, " 16=%d", op->pad_bottom); }
  1591. fprintf_param_value(" 18=%d", output_pad_right)
  1592. { if (op->output_pad_bottom != op->output_pad_right) fprintf(pp, " 19=%d", op->output_pad_bottom); }
  1593. fprintf_param_value(" 20=%d", output_w)
  1594. { if (op->output_h != op->output_w) fprintf(pp, " 21=%d", op->output_h); }
  1595. fprintf_param_value(" 5=%d", bias_term)
  1596. fprintf_param_value(" 6=%d", weight_data_size)
  1597. fprintf_param_value(" 7=%d", group)
  1598. fprintf_param_value(" 9=%d", activation_type)
  1599. { if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); }
  1600. fwrite_weight_tag_data(0, op->weight_data, bp);
  1601. fwrite_weight_data(op->bias_data, bp);
  1602. }
  1603. else if (layer->type == "DetectionOutput")
  1604. {
  1605. ncnn::DetectionOutput* op = (ncnn::DetectionOutput*)layer;
  1606. ncnn::DetectionOutput* op_default = (ncnn::DetectionOutput*)layer_default;
  1607. fprintf_param_value(" 0=%d", num_class)
  1608. fprintf_param_value(" 1=%e", nms_threshold)
  1609. fprintf_param_value(" 2=%d", nms_top_k)
  1610. fprintf_param_value(" 3=%d", keep_top_k)
  1611. fprintf_param_value(" 4=%e", confidence_threshold)
  1612. fprintf_param_value(" 5=%e", variances[0])
  1613. fprintf_param_value(" 6=%e", variances[1])
  1614. fprintf_param_value(" 7=%e", variances[2])
  1615. fprintf_param_value(" 8=%e", variances[3])
  1616. }
  1617. else if (layer->type == "Dropout")
  1618. {
  1619. ncnn::Dropout* op = (ncnn::Dropout*)layer;
  1620. ncnn::Dropout* op_default = (ncnn::Dropout*)layer_default;
  1621. fprintf_param_value(" 0=%e", scale)
  1622. }
  1623. else if (layer->type == "Eltwise")
  1624. {
  1625. ncnn::Eltwise* op = (ncnn::Eltwise*)layer;
  1626. ncnn::Eltwise* op_default = (ncnn::Eltwise*)layer_default;
  1627. fprintf_param_value(" 0=%d", op_type)
  1628. { if (!op->coeffs.empty()) fprintf_param_float_array(1, op->coeffs, pp); }
  1629. }
  1630. else if (layer->type == "ELU")
  1631. {
  1632. ncnn::ELU* op = (ncnn::ELU*)layer;
  1633. ncnn::ELU* op_default = (ncnn::ELU*)layer_default;
  1634. fprintf_param_value(" 0=%e", alpha)
  1635. }
  1636. else if (layer->type == "Exp")
  1637. {
  1638. ncnn::Exp* op = (ncnn::Exp*)layer;
  1639. ncnn::Exp* op_default = (ncnn::Exp*)layer_default;
  1640. fprintf_param_value(" 0=%e", base)
  1641. fprintf_param_value(" 1=%e", scale)
  1642. fprintf_param_value(" 2=%e", shift)
  1643. }
  1644. else if (layer->type == "ExpandDims")
  1645. {
  1646. ncnn::ExpandDims* op = (ncnn::ExpandDims*)layer;
  1647. ncnn::ExpandDims* op_default = (ncnn::ExpandDims*)layer_default;
  1648. fprintf_param_value(" 0=%d", expand_w)
  1649. fprintf_param_value(" 1=%d", expand_h)
  1650. fprintf_param_value(" 2=%d", expand_c)
  1651. { if (!op->axes.empty()) fprintf_param_int_array(0, op->axes, pp); }
  1652. }
  1653. else if (layer->type == "HardSigmoid")
  1654. {
  1655. ncnn::HardSigmoid* op = (ncnn::HardSigmoid*)layer;
  1656. ncnn::HardSigmoid* op_default = (ncnn::HardSigmoid*)layer_default;
  1657. fprintf_param_value(" 0=%e", alpha)
  1658. fprintf_param_value(" 1=%e", beta)
  1659. }
  1660. else if (layer->type == "HardSwish")
  1661. {
  1662. ncnn::HardSwish* op = (ncnn::HardSwish*)layer;
  1663. ncnn::HardSwish* op_default = (ncnn::HardSwish*)layer_default;
  1664. fprintf_param_value(" 0=%e", alpha)
  1665. fprintf_param_value(" 1=%e", beta)
  1666. }
  1667. else if (layer->type == "InnerProduct")
  1668. {
  1669. ncnn::InnerProduct* op = (ncnn::InnerProduct*)layer;
  1670. ncnn::InnerProduct* op_default = (ncnn::InnerProduct*)layer_default;
  1671. fprintf_param_value(" 0=%d", num_output)
  1672. fprintf_param_value(" 1=%d", bias_term)
  1673. fprintf_param_value(" 2=%d", weight_data_size)
  1674. fprintf_param_value(" 8=%d", int8_scale_term)
  1675. fprintf_param_value(" 9=%d", activation_type)
  1676. { if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); }
  1677. fwrite_weight_tag_data(0, op->weight_data, bp);
  1678. fwrite_weight_data(op->bias_data, bp);
  1679. }
  1680. else if (layer->type == "Input")
  1681. {
  1682. ncnn::Input* op = (ncnn::Input*)layer;
  1683. ncnn::Input* op_default = (ncnn::Input*)layer_default;
  1684. fprintf_param_value(" 0=%d", w)
  1685. fprintf_param_value(" 1=%d", h)
  1686. fprintf_param_value(" 2=%d", c)
  1687. }
  1688. else if (layer->type == "InstanceNorm")
  1689. {
  1690. ncnn::InstanceNorm* op = (ncnn::InstanceNorm*)layer;
  1691. ncnn::InstanceNorm* op_default = (ncnn::InstanceNorm*)layer_default;
  1692. fprintf_param_value(" 0=%d", channels)
  1693. fprintf_param_value(" 1=%e", eps)
  1694. fwrite_weight_data(op->gamma_data, bp);
  1695. fwrite_weight_data(op->beta_data, bp);
  1696. }
  1697. else if (layer->type == "Interp")
  1698. {
  1699. ncnn::Interp* op = (ncnn::Interp*)layer;
  1700. ncnn::Interp* op_default = (ncnn::Interp*)layer_default;
  1701. fprintf_param_value(" 0=%d", resize_type)
  1702. fprintf_param_value(" 1=%e", height_scale)
  1703. fprintf_param_value(" 2=%e", width_scale)
  1704. fprintf_param_value(" 3=%d", output_height)
  1705. fprintf_param_value(" 4=%d", output_width)
  1706. }
  1707. else if (layer->type == "Log")
  1708. {
  1709. ncnn::Log* op = (ncnn::Log*)layer;
  1710. ncnn::Log* op_default = (ncnn::Log*)layer_default;
  1711. fprintf_param_value(" 0=%e", base)
  1712. fprintf_param_value(" 1=%e", scale)
  1713. fprintf_param_value(" 2=%e", shift)
  1714. }
  1715. else if (layer->type == "LRN")
  1716. {
  1717. ncnn::LRN* op = (ncnn::LRN*)layer;
  1718. ncnn::LRN* op_default = (ncnn::LRN*)layer_default;
  1719. fprintf_param_value(" 0=%d", region_type)
  1720. fprintf_param_value(" 1=%d", local_size)
  1721. fprintf_param_value(" 2=%e", alpha)
  1722. fprintf_param_value(" 3=%e", beta)
  1723. fprintf_param_value(" 4=%e", bias)
  1724. }
  1725. else if (layer->type == "MemoryData")
  1726. {
  1727. ncnn::MemoryData* op = (ncnn::MemoryData*)layer;
  1728. ncnn::MemoryData* op_default = (ncnn::MemoryData*)layer_default;
  1729. fprintf_param_value(" 0=%d", w)
  1730. fprintf_param_value(" 1=%d", h)
  1731. fprintf_param_value(" 2=%d", c)
  1732. fwrite_weight_data(op->data, bp);
  1733. }
  1734. else if (layer->type == "MVN")
  1735. {
  1736. ncnn::MVN* op = (ncnn::MVN*)layer;
  1737. ncnn::MVN* op_default = (ncnn::MVN*)layer_default;
  1738. fprintf_param_value(" 0=%d", normalize_variance)
  1739. fprintf_param_value(" 1=%d", across_channels)
  1740. fprintf_param_value(" 2=%e", eps)
  1741. }
  1742. else if (layer->type == "Normalize")
  1743. {
  1744. ncnn::Normalize* op = (ncnn::Normalize*)layer;
  1745. ncnn::Normalize* op_default = (ncnn::Normalize*)layer_default;
  1746. fprintf_param_value(" 0=%d", across_spatial)
  1747. fprintf_param_value(" 1=%d", channel_shared)
  1748. fprintf_param_value(" 2=%e", eps)
  1749. fprintf_param_value(" 3=%d", scale_data_size)
  1750. fprintf_param_value(" 4=%d", across_channel)
  1751. fwrite_weight_data(op->scale_data, bp);
  1752. }
  1753. else if (layer->type == "Padding")
  1754. {
  1755. ncnn::Padding* op = (ncnn::Padding*)layer;
  1756. ncnn::Padding* op_default = (ncnn::Padding*)layer_default;
  1757. fprintf_param_value(" 0=%d", top)
  1758. fprintf_param_value(" 1=%d", bottom)
  1759. fprintf_param_value(" 2=%d", left)
  1760. fprintf_param_value(" 3=%d", right)
  1761. fprintf_param_value(" 4=%d", type)
  1762. fprintf_param_value(" 5=%e", value)
  1763. }
  1764. else if (layer->type == "Permute")
  1765. {
  1766. ncnn::Permute* op = (ncnn::Permute*)layer;
  1767. ncnn::Permute* op_default = (ncnn::Permute*)layer_default;
  1768. fprintf_param_value(" 0=%d", order_type)
  1769. }
  1770. else if (layer->type == "Pooling")
  1771. {
  1772. ncnn::Pooling* op = (ncnn::Pooling*)layer;
  1773. ncnn::Pooling* op_default = (ncnn::Pooling*)layer_default;
  1774. fprintf_param_value(" 0=%d", pooling_type)
  1775. fprintf_param_value(" 1=%d", kernel_w)
  1776. { if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); }
  1777. fprintf_param_value(" 2=%d", stride_w)
  1778. { if (op->stride_h != op->stride_w) fprintf(pp, " 12=%d", op->stride_h); }
  1779. fprintf_param_value(" 3=%d", pad_left)
  1780. { if (op->pad_top != op->pad_left) fprintf(pp, " 13=%d", op->pad_top); }
  1781. { if (op->pad_right != op->pad_left) fprintf(pp, " 14=%d", op->pad_right); }
  1782. { if (op->pad_bottom != op->pad_top) fprintf(pp, " 15=%d", op->pad_bottom); }
  1783. fprintf_param_value(" 4=%d", global_pooling)
  1784. fprintf_param_value(" 5=%d", pad_mode)
  1785. }
  1786. else if (layer->type == "Power")
  1787. {
  1788. ncnn::Power* op = (ncnn::Power*)layer;
  1789. ncnn::Power* op_default = (ncnn::Power*)layer_default;
  1790. fprintf_param_value(" 0=%e", power)
  1791. fprintf_param_value(" 1=%e", scale)
  1792. fprintf_param_value(" 2=%e", shift)
  1793. }
  1794. else if (layer->type == "PReLU")
  1795. {
  1796. ncnn::PReLU* op = (ncnn::PReLU*)layer;
  1797. ncnn::PReLU* op_default = (ncnn::PReLU*)layer_default;
  1798. fprintf_param_value(" 0=%d", num_slope)
  1799. fwrite_weight_data(op->slope_data, bp);
  1800. }
  1801. else if (layer->type == "PriorBox")
  1802. {
  1803. ncnn::PriorBox* op = (ncnn::PriorBox*)layer;
  1804. ncnn::PriorBox* op_default = (ncnn::PriorBox*)layer_default;
  1805. { if (!op->min_sizes.empty()) fprintf_param_float_array(0, op->min_sizes, pp); }
  1806. { if (!op->max_sizes.empty()) fprintf_param_float_array(1, op->max_sizes, pp); }
  1807. { if (!op->aspect_ratios.empty()) fprintf_param_float_array(2, op->aspect_ratios, pp); }
  1808. fprintf_param_value(" 3=%e", variances[0])
  1809. fprintf_param_value(" 4=%e", variances[1])
  1810. fprintf_param_value(" 5=%e", variances[2])
  1811. fprintf_param_value(" 6=%e", variances[3])
  1812. fprintf_param_value(" 7=%d", flip)
  1813. fprintf_param_value(" 8=%d", clip)
  1814. fprintf_param_value(" 9=%d", image_width)
  1815. fprintf_param_value(" 10=%d", image_height)
  1816. fprintf_param_value(" 11=%e", step_width)
  1817. fprintf_param_value(" 12=%e", step_height)
  1818. fprintf_param_value(" 13=%e", offset)
  1819. }
  1820. else if (layer->type == "Proposal")
  1821. {
  1822. ncnn::Proposal* op = (ncnn::Proposal*)layer;
  1823. ncnn::Proposal* op_default = (ncnn::Proposal*)layer_default;
  1824. fprintf_param_value(" 0=%d", feat_stride)
  1825. fprintf_param_value(" 1=%d", base_size)
  1826. fprintf_param_value(" 2=%d", pre_nms_topN)
  1827. fprintf_param_value(" 3=%d", after_nms_topN)
  1828. fprintf_param_value(" 4=%e", nms_thresh)
  1829. fprintf_param_value(" 5=%d", min_size)
  1830. }
  1831. else if (layer->type == "PSROIPooling")
  1832. {
  1833. ncnn::PSROIPooling* op = (ncnn::PSROIPooling*)layer;
  1834. ncnn::PSROIPooling* op_default = (ncnn::PSROIPooling*)layer_default;
  1835. fprintf_param_value(" 0=%d", pooled_width)
  1836. fprintf_param_value(" 1=%d", pooled_height)
  1837. fprintf_param_value(" 2=%e", spatial_scale)
  1838. fprintf_param_value(" 3=%d", output_dim)
  1839. }
  1840. else if (layer->type == "Quantize")
  1841. {
  1842. ncnn::Quantize* op = (ncnn::Quantize*)layer;
  1843. ncnn::Quantize* op_default = (ncnn::Quantize*)layer_default;
  1844. fprintf_param_value(" 0=%e", scale)
  1845. }
  1846. else if (layer->type == "Reduction")
  1847. {
  1848. ncnn::Reduction* op = (ncnn::Reduction*)layer;
  1849. ncnn::Reduction* op_default = (ncnn::Reduction*)layer_default;
  1850. fprintf_param_value(" 0=%d", operation)
  1851. fprintf_param_value(" 1=%d", reduce_all)
  1852. fprintf_param_value(" 2=%e", coeff)
  1853. }
  1854. else if (layer->type == "ReLU")
  1855. {
  1856. ncnn::ReLU* op = (ncnn::ReLU*)layer;
  1857. ncnn::ReLU* op_default = (ncnn::ReLU*)layer_default;
  1858. fprintf_param_value(" 0=%e", slope)
  1859. }
  1860. else if (layer->type == "Reorg")
  1861. {
  1862. ncnn::Reorg* op = (ncnn::Reorg*)layer;
  1863. ncnn::Reorg* op_default = (ncnn::Reorg*)layer_default;
  1864. fprintf_param_value(" 0=%d", stride)
  1865. }
  1866. else if (layer->type == "Requantize")
  1867. {
  1868. ncnn::Requantize* op = (ncnn::Requantize*)layer;
  1869. ncnn::Requantize* op_default = (ncnn::Requantize*)layer_default;
  1870. fprintf_param_value(" 0=%e", scale_in)
  1871. fprintf_param_value(" 1=%e", scale_out)
  1872. fprintf_param_value(" 2=%d", bias_term)
  1873. fprintf_param_value(" 3=%d", bias_data_size)
  1874. fprintf_param_value(" 4=%d", fusion_relu)
  1875. }
  1876. else if (layer->type == "Reshape")
  1877. {
  1878. ncnn::Reshape* op = (ncnn::Reshape*)layer;
  1879. ncnn::Reshape* op_default = (ncnn::Reshape*)layer_default;
  1880. fprintf_param_value(" 0=%d", w)
  1881. fprintf_param_value(" 1=%d", h)
  1882. fprintf_param_value(" 2=%d", c)
  1883. fprintf_param_value(" 3=%d", permute)
  1884. }
  1885. else if (layer->type == "ROIAlign")
  1886. {
  1887. ncnn::ROIAlign* op = (ncnn::ROIAlign*)layer;
  1888. ncnn::ROIAlign* op_default = (ncnn::ROIAlign*)layer_default;
  1889. fprintf_param_value(" 0=%d", pooled_width)
  1890. fprintf_param_value(" 1=%d", pooled_height)
  1891. fprintf_param_value(" 2=%e", spatial_scale)
  1892. }
  1893. else if (layer->type == "ROIPooling")
  1894. {
  1895. ncnn::ROIPooling* op = (ncnn::ROIPooling*)layer;
  1896. ncnn::ROIPooling* op_default = (ncnn::ROIPooling*)layer_default;
  1897. fprintf_param_value(" 0=%d", pooled_width)
  1898. fprintf_param_value(" 1=%d", pooled_height)
  1899. fprintf_param_value(" 2=%e", spatial_scale)
  1900. }
  1901. else if (layer->type == "Scale")
  1902. {
  1903. ncnn::Scale* op = (ncnn::Scale*)layer;
  1904. ncnn::Scale* op_default = (ncnn::Scale*)layer_default;
  1905. fprintf_param_value(" 0=%d", scale_data_size)
  1906. fprintf_param_value(" 1=%d", bias_term)
  1907. fwrite_weight_data(op->scale_data, bp);
  1908. fwrite_weight_data(op->bias_data, bp);
  1909. }
  1910. else if (layer->type == "ShuffleChannel")
  1911. {
  1912. ncnn::ShuffleChannel* op = (ncnn::ShuffleChannel*)layer;
  1913. ncnn::ShuffleChannel* op_default = (ncnn::ShuffleChannel*)layer_default;
  1914. fprintf_param_value(" 0=%d", group)
  1915. }
  1916. else if (layer->type == "Slice")
  1917. {
  1918. ncnn::Slice* op = (ncnn::Slice*)layer;
  1919. ncnn::Slice* op_default = (ncnn::Slice*)layer_default;
  1920. { if (!op->slices.empty()) fprintf_param_int_array(0, op->slices, pp); }
  1921. fprintf_param_value(" 1=%d", axis)
  1922. }
  1923. else if (layer->type == "Softmax")
  1924. {
  1925. ncnn::Softmax* op = (ncnn::Softmax*)layer;
  1926. ncnn::Softmax* op_default = (ncnn::Softmax*)layer_default;
  1927. fprintf_param_value(" 0=%d", axis)
  1928. // HACK
  1929. if (op->axis != 0)
  1930. {
  1931. int fixbug0 = 1;
  1932. fprintf(pp, " 1=%d", fixbug0);
  1933. }
  1934. }
  1935. else if (layer->type == "Squeeze")
  1936. {
  1937. ncnn::Squeeze* op = (ncnn::Squeeze*)layer;
  1938. ncnn::Squeeze* op_default = (ncnn::Squeeze*)layer_default;
  1939. fprintf_param_value(" 0=%d", squeeze_w)
  1940. fprintf_param_value(" 1=%d", squeeze_h)
  1941. fprintf_param_value(" 2=%d", squeeze_c)
  1942. { if (!op->axes.empty()) fprintf_param_int_array(0, op->axes, pp); }
  1943. }
  1944. else if (layer->type == "Threshold")
  1945. {
  1946. ncnn::Threshold* op = (ncnn::Threshold*)layer;
  1947. ncnn::Threshold* op_default = (ncnn::Threshold*)layer_default;
  1948. fprintf_param_value(" 0=%e", threshold)
  1949. }
  1950. else if (layer->type == "UnaryOp")
  1951. {
  1952. ncnn::UnaryOp* op = (ncnn::UnaryOp*)layer;
  1953. ncnn::UnaryOp* op_default = (ncnn::UnaryOp*)layer_default;
  1954. fprintf_param_value(" 0=%d", op_type)
  1955. }
  1956. else if (layer->type == "YoloDetectionOutput")
  1957. {
  1958. ncnn::YoloDetectionOutput* op = (ncnn::YoloDetectionOutput*)layer;
  1959. ncnn::YoloDetectionOutput* op_default = (ncnn::YoloDetectionOutput*)layer_default;
  1960. fprintf_param_value(" 0=%d", num_class)
  1961. fprintf_param_value(" 1=%d", num_box)
  1962. fprintf_param_value(" 2=%e", confidence_threshold)
  1963. fprintf_param_value(" 3=%e", nms_threshold)
  1964. { if (!op->biases.empty()) fprintf_param_float_array(4, op->biases, pp); }
  1965. }
  1966. else if (layer->type == "Yolov3DetectionOutput")
  1967. {
  1968. ncnn::Yolov3DetectionOutput* op = (ncnn::Yolov3DetectionOutput*)layer;
  1969. ncnn::Yolov3DetectionOutput* op_default = (ncnn::Yolov3DetectionOutput*)layer_default;
  1970. fprintf_param_value(" 0=%d", num_class)
  1971. fprintf_param_value(" 1=%d", num_box)
  1972. fprintf_param_value(" 2=%e", confidence_threshold)
  1973. fprintf_param_value(" 3=%e", nms_threshold)
  1974. { if (!op->biases.empty()) fprintf_param_float_array(4, op->biases, pp); }
  1975. { if (!op->mask.empty()) fprintf_param_int_array(5, op->mask, pp); }
  1976. { if (!op->anchors_scale.empty()) fprintf_param_float_array(6, op->anchors_scale, pp); }
  1977. }
  1978. #undef fprintf_param_value
  1979. fprintf(pp, "\n");
  1980. delete layer_default;
  1981. }
  1982. fclose(pp);
  1983. fclose(bp);
  1984. return 0;
  1985. }
  1986. int main(int argc, char** argv)
  1987. {
  1988. #if defined(__aarch64__) && defined(LINUX)
  1989. if (argc != 10)
  1990. {
  1991. fprintf(stderr, "usage: %s [inparam] [inbin] [outparam] [outbin] [flag] [dataname] [w] [h] [c]\n", argv[0]);
  1992. return -1;
  1993. }
  1994. const char* dataname = argv[6];
  1995. int inw = atoi(argv[7]);
  1996. int inh = atoi(argv[8]);
  1997. int inc = atoi(argv[9]);
  1998. #else
  1999. if (argc != 6)
  2000. {
  2001. fprintf(stderr, "usage: %s [inparam] [inbin] [outparam] [outbin] [flag]\n", argv[0]);
  2002. return -1;
  2003. }
  2004. #endif // defined(__aarch64__) && defined(LINUX)
  2005. const char* inparam = argv[1];
  2006. const char* inbin = argv[2];
  2007. const char* outparam = argv[3];
  2008. const char* outbin = argv[4];
  2009. int flag = atoi(argv[5]);
  2010. NetOptimize optimizer;
  2011. if (flag == 65536)
  2012. {
  2013. optimizer.storage_type = 1;
  2014. }
  2015. else
  2016. {
  2017. optimizer.storage_type = 0;
  2018. }
  2019. optimizer.load_param(inparam);
  2020. if (strcmp(inbin, "null") == 0)
  2021. optimizer.load_model();
  2022. else
  2023. optimizer.ncnn::Net::load_model(inbin);
  2024. #if defined(__aarch64__) && defined(LINUX)
  2025. optimizer.find_fastest_fp32_conv(dataname, inw, inh, inc);
  2026. #endif // defined(__aarch64__) && defined(LINUX)
  2027. optimizer.fuse_batchnorm_scale();
  2028. optimizer.fuse_convolution_batchnorm();
  2029. optimizer.fuse_convolutiondepthwise_batchnorm();
  2030. optimizer.fuse_deconvolution_batchnorm();
  2031. optimizer.fuse_deconvolutiondepthwise_batchnorm();
  2032. optimizer.fuse_innerproduct_batchnorm();
  2033. optimizer.fuse_innerproduct_dropout();
  2034. optimizer.fuse_convolution_activation();
  2035. optimizer.fuse_convolutiondepthwise_activation();
  2036. optimizer.fuse_deconvolution_activation();
  2037. optimizer.fuse_deconvolutiondepthwise_activation();
  2038. optimizer.fuse_innerproduct_activation();
  2039. optimizer.eliminate_dropout();
  2040. optimizer.eliminate_noop();
  2041. optimizer.eliminate_flatten_after_global_pooling();
  2042. optimizer.eliminate_reshape_after_global_pooling();
  2043. optimizer.eliminate_reshape_before_binaryop();
  2044. optimizer.replace_convolution_with_innerproduct_after_global_pooling();
  2045. optimizer.replace_convolution_with_innerproduct_after_innerproduct();
  2046. optimizer.eliminate_flatten_after_innerproduct();
  2047. optimizer.save(outparam, outbin);
  2048. return 0;
  2049. }