You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

caffe2ncnn.cpp 65 kB

8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #ifdef _MSC_VER
  15. #define _CRT_SECURE_NO_DEPRECATE
  16. #endif
  17. #include "caffe.pb.h"
  18. #include <algorithm>
  19. #include <fstream>
  20. #include <google/protobuf/io/coded_stream.h>
  21. #include <google/protobuf/io/zero_copy_stream_impl.h>
  22. #include <google/protobuf/message.h>
  23. #include <google/protobuf/text_format.h>
  24. #include <limits.h>
  25. #include <limits>
  26. #include <map>
  27. #include <math.h>
  28. #include <set>
  29. #include <stdio.h>
  30. static inline size_t alignSize(size_t sz, int n)
  31. {
  32. return (sz + n - 1) & -n;
  33. }
  34. // convert float to half precision floating point
  35. static unsigned short float2half(float value)
  36. {
  37. // 1 : 8 : 23
  38. union
  39. {
  40. unsigned int u;
  41. float f;
  42. } tmp;
  43. tmp.f = value;
  44. // 1 : 8 : 23
  45. unsigned short sign = (tmp.u & 0x80000000) >> 31;
  46. unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
  47. unsigned int significand = tmp.u & 0x7FFFFF;
  48. // fprintf(stderr, "%d %d %d\n", sign, exponent, significand);
  49. // 1 : 5 : 10
  50. unsigned short fp16;
  51. if (exponent == 0)
  52. {
  53. // zero or denormal, always underflow
  54. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  55. }
  56. else if (exponent == 0xFF)
  57. {
  58. // infinity or NaN
  59. fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
  60. }
  61. else
  62. {
  63. // normalized
  64. short newexp = exponent + (-127 + 15);
  65. if (newexp >= 31)
  66. {
  67. // overflow, return infinity
  68. fp16 = (sign << 15) | (0x1F << 10) | 0x00;
  69. }
  70. else if (newexp <= 0)
  71. {
  72. // underflow
  73. if (newexp >= -10)
  74. {
  75. // denormal half-precision
  76. unsigned short sig = (significand | 0x800000) >> (14 - newexp);
  77. fp16 = (sign << 15) | (0x00 << 10) | sig;
  78. }
  79. else
  80. {
  81. // underflow
  82. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  83. }
  84. }
  85. else
  86. {
  87. fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
  88. }
  89. }
  90. return fp16;
  91. }
  92. // round to nearest
  93. static signed char float2int8(float value)
  94. {
  95. float tmp;
  96. if (value >= 0.f)
  97. tmp = value + 0.5f;
  98. else
  99. tmp = value - 0.5f;
  100. if (tmp > 127)
  101. return 127;
  102. if (tmp < -127)
  103. return -127;
  104. return static_cast<signed char>(tmp);
  105. }
  106. static bool read_int8scale_table(const char* filepath, std::map<std::string, std::vector<float> >& blob_int8scale_table, std::map<std::string, std::vector<float> >& weight_int8scale_table)
  107. {
  108. blob_int8scale_table.clear();
  109. weight_int8scale_table.clear();
  110. FILE* fp = fopen(filepath, "rb");
  111. if (!fp)
  112. {
  113. fprintf(stderr, "fopen %s failed\n", filepath);
  114. return false;
  115. }
  116. bool in_scale_vector = false;
  117. std::string keystr;
  118. std::vector<float> scales;
  119. while (!feof(fp))
  120. {
  121. char key[256];
  122. int nscan = fscanf(fp, "%255s", key);
  123. if (nscan != 1)
  124. {
  125. break;
  126. }
  127. if (in_scale_vector)
  128. {
  129. float scale = 1.f;
  130. int nscan = sscanf(key, "%f", &scale);
  131. if (nscan == 1)
  132. {
  133. scales.push_back(scale);
  134. continue;
  135. }
  136. else
  137. {
  138. // XYZ_param_N pattern
  139. if (strstr(keystr.c_str(), "_param_"))
  140. {
  141. weight_int8scale_table[keystr] = scales;
  142. }
  143. else
  144. {
  145. blob_int8scale_table[keystr] = scales;
  146. }
  147. keystr.clear();
  148. scales.clear();
  149. in_scale_vector = false;
  150. }
  151. }
  152. if (!in_scale_vector)
  153. {
  154. keystr = key;
  155. in_scale_vector = true;
  156. }
  157. }
  158. if (in_scale_vector)
  159. {
  160. // XYZ_param_N pattern
  161. if (strstr(keystr.c_str(), "_param_"))
  162. {
  163. weight_int8scale_table[keystr] = scales;
  164. }
  165. else
  166. {
  167. blob_int8scale_table[keystr] = scales;
  168. }
  169. }
  170. fclose(fp);
  171. return true;
  172. }
  173. static int quantize_weight(float* data, size_t data_length, std::vector<unsigned short>& float16_weights)
  174. {
  175. float16_weights.resize(data_length);
  176. for (size_t i = 0; i < data_length; i++)
  177. {
  178. float f = data[i];
  179. unsigned short fp16 = float2half(f);
  180. float16_weights[i] = fp16;
  181. }
  182. // magic tag for half-precision floating point
  183. return 0x01306B47;
  184. }
  185. static int quantize_weight(float* data, size_t data_length, std::vector<float> scales, std::vector<signed char>& int8_weights)
  186. {
  187. int8_weights.resize(data_length);
  188. const int length_per_group = static_cast<int>(data_length / scales.size());
  189. for (size_t i = 0; i < data_length; i++)
  190. {
  191. float f = data[i];
  192. signed char int8 = float2int8(f * scales[i / length_per_group]);
  193. int8_weights[i] = int8;
  194. }
  195. // magic tag for int8
  196. return 0x000D4B38;
  197. }
  198. static bool quantize_weight(float* data, size_t data_length, int quantize_level, std::vector<float>& quantize_table, std::vector<unsigned char>& quantize_index)
  199. {
  200. assert(quantize_level != 0);
  201. assert(data != NULL);
  202. assert(data_length > 0);
  203. if (data_length < static_cast<size_t>(quantize_level))
  204. {
  205. fprintf(stderr, "No need quantize,because: data_length < quantize_level");
  206. return false;
  207. }
  208. quantize_table.reserve(quantize_level);
  209. quantize_index.reserve(data_length);
  210. // 1. Find min and max value
  211. float max_value = std::numeric_limits<float>::min();
  212. float min_value = std::numeric_limits<float>::max();
  213. for (size_t i = 0; i < data_length; ++i)
  214. {
  215. if (max_value < data[i]) max_value = data[i];
  216. if (min_value > data[i]) min_value = data[i];
  217. }
  218. float strides = (max_value - min_value) / quantize_level;
  219. // 2. Generate quantize table
  220. for (int i = 0; i < quantize_level; ++i)
  221. {
  222. quantize_table.push_back(min_value + i * strides);
  223. }
  224. // 3. Align data to the quantized value
  225. for (size_t i = 0; i < data_length; ++i)
  226. {
  227. int table_index = int((data[i] - min_value) / strides);
  228. table_index = std::min(table_index, quantize_level - 1);
  229. float low_value = quantize_table[table_index];
  230. float high_value = low_value + strides;
  231. // find a nearest value between low and high value.
  232. const float targetValue = data[i] - low_value < high_value - data[i] ? low_value : high_value;
  233. table_index = int((targetValue - min_value) / strides);
  234. table_index = std::min(table_index, quantize_level - 1);
  235. quantize_index.push_back(table_index);
  236. }
  237. return true;
  238. }
  239. static bool read_proto_from_text(const char* filepath, google::protobuf::Message* message)
  240. {
  241. std::ifstream fs(filepath, std::ifstream::in);
  242. if (!fs.is_open())
  243. {
  244. fprintf(stderr, "open failed %s\n", filepath);
  245. return false;
  246. }
  247. google::protobuf::io::IstreamInputStream input(&fs);
  248. bool success = google::protobuf::TextFormat::Parse(&input, message);
  249. fs.close();
  250. return success;
  251. }
  252. static bool read_proto_from_binary(const char* filepath, google::protobuf::Message* message)
  253. {
  254. std::ifstream fs(filepath, std::ifstream::in | std::ifstream::binary);
  255. if (!fs.is_open())
  256. {
  257. fprintf(stderr, "open failed %s\n", filepath);
  258. return false;
  259. }
  260. google::protobuf::io::IstreamInputStream input(&fs);
  261. google::protobuf::io::CodedInputStream codedstr(&input);
  262. codedstr.SetTotalBytesLimit(INT_MAX, INT_MAX / 2);
  263. bool success = message->ParseFromCodedStream(&codedstr);
  264. fs.close();
  265. return success;
  266. }
  267. int main(int argc, char** argv)
  268. {
  269. if (!(argc == 3 || argc == 5 || argc == 6 || argc == 7))
  270. {
  271. fprintf(stderr, "Usage: %s [caffeproto] [caffemodel] [ncnnproto] [ncnnbin] [quantizelevel] [int8scaletable]\n", argv[0]);
  272. return -1;
  273. }
  274. const char* caffeproto = argv[1];
  275. const char* caffemodel = argv[2];
  276. const char* ncnn_prototxt = argc >= 5 ? argv[3] : "ncnn.proto";
  277. const char* ncnn_modelbin = argc >= 5 ? argv[4] : "ncnn.bin";
  278. const char* quantize_param = argc >= 6 ? argv[5] : "0";
  279. const char* int8scale_table_path = argc == 7 ? argv[6] : NULL;
  280. int quantize_level = atoi(quantize_param);
  281. if (quantize_level != 0 && quantize_level != 256 && quantize_level != 65536)
  282. {
  283. fprintf(stderr, "%s: only support quantize level = 0, 256, or 65536", argv[0]);
  284. return -1;
  285. }
  286. caffe::NetParameter proto;
  287. caffe::NetParameter net;
  288. // load
  289. bool s0 = read_proto_from_text(caffeproto, &proto);
  290. if (!s0)
  291. {
  292. fprintf(stderr, "read_proto_from_text failed\n");
  293. return -1;
  294. }
  295. bool s1 = read_proto_from_binary(caffemodel, &net);
  296. if (!s1)
  297. {
  298. fprintf(stderr, "read_proto_from_binary failed\n");
  299. return -1;
  300. }
  301. std::map<std::string, std::vector<float> > blob_int8scale_table;
  302. std::map<std::string, std::vector<float> > weight_int8scale_table;
  303. if (int8scale_table_path)
  304. {
  305. bool s2 = read_int8scale_table(int8scale_table_path, blob_int8scale_table, weight_int8scale_table);
  306. if (!s2)
  307. {
  308. fprintf(stderr, "read_int8scale_table failed\n");
  309. return -1;
  310. }
  311. }
  312. FILE* pp = fopen(ncnn_prototxt, "wb");
  313. FILE* bp = fopen(ncnn_modelbin, "wb");
  314. // magic
  315. fprintf(pp, "7767517\n");
  316. // rename mapping for identical bottom top style
  317. std::map<std::string, std::string> blob_name_decorated;
  318. // bottom blob reference
  319. std::map<std::string, int> bottom_reference;
  320. // global definition line
  321. // [layer count] [blob count]
  322. int layer_count = proto.layer_size();
  323. std::set<std::string> blob_names;
  324. for (int i = 0; i < layer_count; i++)
  325. {
  326. const caffe::LayerParameter& layer = proto.layer(i);
  327. for (int j = 0; j < layer.bottom_size(); j++)
  328. {
  329. std::string blob_name = layer.bottom(j);
  330. if (blob_name_decorated.find(blob_name) != blob_name_decorated.end())
  331. {
  332. blob_name = blob_name_decorated[blob_name];
  333. }
  334. blob_names.insert(blob_name);
  335. if (bottom_reference.find(blob_name) == bottom_reference.end())
  336. {
  337. bottom_reference[blob_name] = 1;
  338. }
  339. else
  340. {
  341. bottom_reference[blob_name] = bottom_reference[blob_name] + 1;
  342. }
  343. }
  344. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  345. {
  346. std::string blob_name = layer.top(0) + "_" + layer.name();
  347. blob_name_decorated[layer.top(0)] = blob_name;
  348. blob_names.insert(blob_name);
  349. }
  350. else
  351. {
  352. for (int j = 0; j < layer.top_size(); j++)
  353. {
  354. std::string blob_name = layer.top(j);
  355. blob_names.insert(blob_name);
  356. }
  357. }
  358. }
  359. // remove bottom_reference entry with reference equals to one
  360. int splitncnn_blob_count = 0;
  361. std::map<std::string, int>::iterator it = bottom_reference.begin();
  362. while (it != bottom_reference.end())
  363. {
  364. if (it->second == 1)
  365. {
  366. bottom_reference.erase(it++);
  367. }
  368. else
  369. {
  370. splitncnn_blob_count += it->second;
  371. // fprintf(stderr, "%s %d\n", it->first.c_str(), it->second);
  372. ++it;
  373. }
  374. }
  375. fprintf(pp, "%d %d\n", int(layer_count + bottom_reference.size()), int(blob_names.size() + splitncnn_blob_count));
  376. // populate
  377. blob_name_decorated.clear();
  378. int internal_split = 0;
  379. for (int i = 0; i < layer_count; i++)
  380. {
  381. const caffe::LayerParameter& layer = proto.layer(i);
  382. // layer definition line, repeated
  383. // [type] [name] [bottom blob count] [top blob count] [bottom blobs] [top blobs] [layer specific params]
  384. if (layer.type() == "BN")
  385. {
  386. fprintf(pp, "%-16s", "Scale");
  387. }
  388. else if (layer.type() == "Convolution")
  389. {
  390. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  391. if (convolution_param.group() != 1)
  392. fprintf(pp, "%-16s", "ConvolutionDepthWise");
  393. else
  394. fprintf(pp, "%-16s", "Convolution");
  395. }
  396. else if (layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
  397. {
  398. fprintf(pp, "%-16s", "ConvolutionDepthWise");
  399. }
  400. else if (layer.type() == "Deconvolution")
  401. {
  402. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  403. if (convolution_param.group() != 1)
  404. fprintf(pp, "%-16s", "DeconvolutionDepthWise");
  405. else
  406. fprintf(pp, "%-16s", "Deconvolution");
  407. }
  408. else if (layer.type() == "MemoryData")
  409. {
  410. fprintf(pp, "%-16s", "Input");
  411. }
  412. else if (layer.type() == "Python")
  413. {
  414. const caffe::PythonParameter& python_param = layer.python_param();
  415. std::string python_layer_name = python_param.layer();
  416. if (python_layer_name == "ProposalLayer")
  417. fprintf(pp, "%-16s", "Proposal");
  418. else
  419. fprintf(pp, "%-16s", python_layer_name.c_str());
  420. }
  421. else if (layer.type() == "ReLU6")
  422. {
  423. fprintf(pp, "%-16s", "Clip");
  424. }
  425. else if (layer.type() == "Silence")
  426. {
  427. fprintf(pp, "%-16s", "Noop");
  428. }
  429. else
  430. {
  431. fprintf(pp, "%-16s", layer.type().c_str());
  432. }
  433. fprintf(pp, " %-16s %d %d", layer.name().c_str(), layer.bottom_size(), layer.top_size());
  434. for (int j = 0; j < layer.bottom_size(); j++)
  435. {
  436. std::string blob_name = layer.bottom(j);
  437. if (blob_name_decorated.find(layer.bottom(j)) != blob_name_decorated.end())
  438. {
  439. blob_name = blob_name_decorated[layer.bottom(j)];
  440. }
  441. if (bottom_reference.find(blob_name) != bottom_reference.end())
  442. {
  443. int refidx = bottom_reference[blob_name] - 1;
  444. bottom_reference[blob_name] = refidx;
  445. char splitsuffix[256];
  446. sprintf(splitsuffix, "_splitncnn_%d", refidx);
  447. blob_name = blob_name + splitsuffix;
  448. }
  449. fprintf(pp, " %s", blob_name.c_str());
  450. }
  451. // decorated
  452. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  453. {
  454. std::string blob_name = layer.top(0) + "_" + layer.name();
  455. blob_name_decorated[layer.top(0)] = blob_name;
  456. fprintf(pp, " %s", blob_name.c_str());
  457. }
  458. else
  459. {
  460. for (int j = 0; j < layer.top_size(); j++)
  461. {
  462. std::string blob_name = layer.top(j);
  463. fprintf(pp, " %s", blob_name.c_str());
  464. }
  465. }
  466. // find blob binary by layer name
  467. int netidx;
  468. for (netidx = 0; netidx < net.layer_size(); netidx++)
  469. {
  470. if (net.layer(netidx).name() == layer.name())
  471. {
  472. break;
  473. }
  474. }
  475. // layer specific params
  476. if (layer.type() == "BatchNorm")
  477. {
  478. const caffe::LayerParameter& binlayer = net.layer(netidx);
  479. const caffe::BlobProto& mean_blob = binlayer.blobs(0);
  480. const caffe::BlobProto& var_blob = binlayer.blobs(1);
  481. fprintf(pp, " 0=%d", (int)mean_blob.data_size());
  482. const caffe::BatchNormParameter& batch_norm_param = layer.batch_norm_param();
  483. float eps = batch_norm_param.eps();
  484. std::vector<float> ones(mean_blob.data_size(), 1.f);
  485. fwrite(ones.data(), sizeof(float), ones.size(), bp); // slope
  486. if (binlayer.blobs_size() < 3)
  487. {
  488. fwrite(mean_blob.data().data(), sizeof(float), mean_blob.data_size(), bp);
  489. float tmp;
  490. for (int j = 0; j < var_blob.data_size(); j++)
  491. {
  492. tmp = var_blob.data().data()[j] + eps;
  493. fwrite(&tmp, sizeof(float), 1, bp);
  494. }
  495. }
  496. else
  497. {
  498. float scale_factor = binlayer.blobs(2).data().data()[0] == 0 ? 0 : 1 / binlayer.blobs(2).data().data()[0];
  499. // premultiply scale_factor to mean and variance
  500. float tmp;
  501. for (int j = 0; j < mean_blob.data_size(); j++)
  502. {
  503. tmp = mean_blob.data().data()[j] * scale_factor;
  504. fwrite(&tmp, sizeof(float), 1, bp);
  505. }
  506. for (int j = 0; j < var_blob.data_size(); j++)
  507. {
  508. tmp = var_blob.data().data()[j] * scale_factor + eps;
  509. fwrite(&tmp, sizeof(float), 1, bp);
  510. }
  511. }
  512. std::vector<float> zeros(mean_blob.data_size(), 0.f);
  513. fwrite(zeros.data(), sizeof(float), zeros.size(), bp); // bias
  514. }
  515. else if (layer.type() == "BN")
  516. {
  517. const caffe::LayerParameter& binlayer = net.layer(netidx);
  518. const caffe::BlobProto& scale_blob = binlayer.blobs(0);
  519. const caffe::BlobProto& shift_blob = binlayer.blobs(1);
  520. fprintf(pp, " 0=%d", (int)scale_blob.data_size());
  521. fprintf(pp, " 1=1");
  522. fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
  523. fwrite(shift_blob.data().data(), sizeof(float), shift_blob.data_size(), bp);
  524. }
  525. else if (layer.type() == "Concat")
  526. {
  527. const caffe::ConcatParameter& concat_param = layer.concat_param();
  528. int axis = concat_param.axis() - 1;
  529. fprintf(pp, " 0=%d", axis);
  530. }
  531. else if (layer.type() == "Convolution" || layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
  532. {
  533. const caffe::LayerParameter& binlayer = net.layer(netidx);
  534. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  535. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  536. fprintf(pp, " 0=%d", convolution_param.num_output());
  537. if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
  538. {
  539. fprintf(pp, " 1=%d", convolution_param.kernel_w());
  540. fprintf(pp, " 11=%d", convolution_param.kernel_h());
  541. }
  542. else
  543. {
  544. fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
  545. }
  546. fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
  547. if (convolution_param.has_stride_w() && convolution_param.has_stride_h())
  548. {
  549. fprintf(pp, " 3=%d", convolution_param.stride_w());
  550. fprintf(pp, " 13=%d", convolution_param.stride_h());
  551. }
  552. else
  553. {
  554. fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
  555. }
  556. if (convolution_param.has_pad_w() && convolution_param.has_pad_h())
  557. {
  558. fprintf(pp, " 4=%d", convolution_param.pad_w());
  559. fprintf(pp, " 14=%d", convolution_param.pad_h());
  560. }
  561. else
  562. {
  563. fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
  564. }
  565. fprintf(pp, " 5=%d", convolution_param.bias_term());
  566. fprintf(pp, " 6=%d", weight_blob.data_size());
  567. int num_group = 1;
  568. if (layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
  569. {
  570. num_group = convolution_param.num_output();
  571. }
  572. else
  573. {
  574. num_group = convolution_param.group();
  575. }
  576. if (num_group != 1)
  577. {
  578. fprintf(pp, " 7=%d", num_group);
  579. }
  580. bool int8_scale_term = false;
  581. std::vector<float> weight_int8scale;
  582. std::vector<float> blob_int8scale;
  583. if (int8scale_table_path)
  584. {
  585. char key[256];
  586. sprintf(key, "%s_param_0", layer.name().c_str());
  587. if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
  588. {
  589. weight_int8scale = weight_int8scale_table[std::string(key)];
  590. }
  591. if (blob_int8scale_table.find(layer.name()) != blob_int8scale_table.end())
  592. {
  593. blob_int8scale = blob_int8scale_table[layer.name()];
  594. }
  595. int8_scale_term = !weight_int8scale.empty() && !blob_int8scale.empty();
  596. if (int8_scale_term)
  597. {
  598. if ((int)weight_int8scale.size() == num_group)
  599. {
  600. fprintf(pp, " 8=1");
  601. }
  602. else
  603. {
  604. fprintf(pp, " 8=2");
  605. }
  606. }
  607. }
  608. for (int j = 0; j < binlayer.blobs_size(); j++)
  609. {
  610. int quantize_tag = 0;
  611. const caffe::BlobProto& blob = binlayer.blobs(j);
  612. std::vector<float> quantize_table;
  613. std::vector<unsigned char> quantize_index;
  614. std::vector<unsigned short> float16_weights;
  615. std::vector<signed char> int8_weights;
  616. // we will not quantize the bias values
  617. if (j == 0)
  618. {
  619. if (int8_scale_term)
  620. {
  621. if (quantize_level == 0)
  622. {
  623. quantize_tag = 0x0002C056;
  624. }
  625. else if (quantize_level == 256)
  626. {
  627. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), weight_int8scale, int8_weights);
  628. }
  629. }
  630. else if (quantize_level == 256)
  631. {
  632. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  633. }
  634. else if (quantize_level == 65536)
  635. {
  636. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
  637. }
  638. // write quantize tag first
  639. fwrite(&quantize_tag, sizeof(int), 1, bp);
  640. if (quantize_tag)
  641. {
  642. int p0 = ftell(bp);
  643. if (int8_scale_term)
  644. {
  645. if (quantize_level == 0)
  646. {
  647. // write original data and int8scale
  648. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  649. }
  650. else if (quantize_level == 256)
  651. {
  652. fwrite(int8_weights.data(), sizeof(signed char), int8_weights.size(), bp);
  653. }
  654. }
  655. else if (quantize_level == 256)
  656. {
  657. // write quantize table and index
  658. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  659. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  660. }
  661. else if (quantize_level == 65536)
  662. {
  663. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  664. }
  665. // padding to 32bit align
  666. int nwrite = ftell(bp) - p0;
  667. int nalign = int(alignSize(nwrite, 4));
  668. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  669. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  670. }
  671. else
  672. {
  673. // write original data
  674. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  675. }
  676. }
  677. else
  678. {
  679. // write original data
  680. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  681. }
  682. }
  683. if (int8_scale_term)
  684. {
  685. // write int8_scale data
  686. fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
  687. fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
  688. }
  689. }
  690. else if (layer.type() == "Crop")
  691. {
  692. const caffe::CropParameter& crop_param = layer.crop_param();
  693. int num_offset = crop_param.offset_size();
  694. if (num_offset == 1)
  695. {
  696. int offset = crop_param.offset(0);
  697. int axis = crop_param.axis() - 1;
  698. if (axis == 0)
  699. {
  700. fprintf(pp, " 0=%d", offset);
  701. fprintf(pp, " 1=%d", offset);
  702. fprintf(pp, " 2=%d", offset);
  703. }
  704. else if (axis == 1)
  705. {
  706. fprintf(pp, " 0=%d", offset);
  707. fprintf(pp, " 1=%d", offset);
  708. }
  709. else if (axis == 2)
  710. {
  711. fprintf(pp, " 0=%d", offset);
  712. }
  713. }
  714. else if (num_offset == 2)
  715. {
  716. int woffset = crop_param.offset(1);
  717. int hoffset = crop_param.offset(0);
  718. fprintf(pp, " 0=%d", woffset);
  719. fprintf(pp, " 1=%d", hoffset);
  720. }
  721. else if (num_offset == 3)
  722. {
  723. int woffset = crop_param.offset(2);
  724. int hoffset = crop_param.offset(1);
  725. int coffset = crop_param.offset(0);
  726. fprintf(pp, " 0=%d", woffset);
  727. fprintf(pp, " 1=%d", hoffset);
  728. fprintf(pp, " 2=%d", coffset);
  729. }
  730. }
  731. else if (layer.type() == "Deconvolution")
  732. {
  733. const caffe::LayerParameter& binlayer = net.layer(netidx);
  734. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  735. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  736. fprintf(pp, " 0=%d", convolution_param.num_output());
  737. if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
  738. {
  739. fprintf(pp, " 1=%d", convolution_param.kernel_w());
  740. fprintf(pp, " 11=%d", convolution_param.kernel_h());
  741. }
  742. else
  743. {
  744. fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
  745. }
  746. fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
  747. if (convolution_param.has_stride_w() && convolution_param.has_stride_h())
  748. {
  749. fprintf(pp, " 3=%d", convolution_param.stride_w());
  750. fprintf(pp, " 13=%d", convolution_param.stride_h());
  751. }
  752. else
  753. {
  754. fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
  755. }
  756. if (convolution_param.has_pad_w() && convolution_param.has_pad_h())
  757. {
  758. fprintf(pp, " 4=%d", convolution_param.pad_w());
  759. fprintf(pp, " 14=%d", convolution_param.pad_h());
  760. }
  761. else
  762. {
  763. fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
  764. }
  765. fprintf(pp, " 5=%d", convolution_param.bias_term());
  766. fprintf(pp, " 6=%d", weight_blob.data_size());
  767. int group = convolution_param.group();
  768. if (group != 1)
  769. {
  770. fprintf(pp, " 7=%d", group);
  771. }
  772. int quantized_weight = 0;
  773. fwrite(&quantized_weight, sizeof(int), 1, bp);
  774. int maxk = 0;
  775. if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
  776. {
  777. maxk = convolution_param.kernel_w() * convolution_param.kernel_h();
  778. }
  779. else
  780. {
  781. maxk = convolution_param.kernel_size(0) * convolution_param.kernel_size(0);
  782. }
  783. for (int g = 0; g < group; g++)
  784. {
  785. // reorder weight from inch-outch to outch-inch
  786. int num_output = convolution_param.num_output() / group;
  787. int num_input = weight_blob.data_size() / maxk / num_output / group;
  788. const float* weight_data_ptr = weight_blob.data().data() + g * maxk * num_output * num_input;
  789. for (int k = 0; k < num_output; k++)
  790. {
  791. for (int j = 0; j < num_input; j++)
  792. {
  793. fwrite(weight_data_ptr + (j * num_output + k) * maxk, sizeof(float), maxk, bp);
  794. }
  795. }
  796. }
  797. for (int j = 1; j < binlayer.blobs_size(); j++)
  798. {
  799. const caffe::BlobProto& blob = binlayer.blobs(j);
  800. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  801. }
  802. }
  803. else if (layer.type() == "DetectionOutput")
  804. {
  805. const caffe::DetectionOutputParameter& detection_output_param = layer.detection_output_param();
  806. const caffe::NonMaximumSuppressionParameter& nms_param = detection_output_param.nms_param();
  807. fprintf(pp, " 0=%d", detection_output_param.num_classes());
  808. fprintf(pp, " 1=%e", nms_param.nms_threshold());
  809. fprintf(pp, " 2=%d", nms_param.top_k());
  810. fprintf(pp, " 3=%d", detection_output_param.keep_top_k());
  811. fprintf(pp, " 4=%e", detection_output_param.confidence_threshold());
  812. }
  813. else if (layer.type() == "Dropout")
  814. {
  815. const caffe::DropoutParameter& dropout_param = layer.dropout_param();
  816. if (dropout_param.has_scale_train() && !dropout_param.scale_train())
  817. {
  818. float scale = 1.f - dropout_param.dropout_ratio();
  819. fprintf(pp, " 0=%e", scale);
  820. }
  821. }
  822. else if (layer.type() == "Eltwise")
  823. {
  824. const caffe::EltwiseParameter& eltwise_param = layer.eltwise_param();
  825. int coeff_size = eltwise_param.coeff_size();
  826. fprintf(pp, " 0=%d", (int)eltwise_param.operation());
  827. fprintf(pp, " -23301=%d", coeff_size);
  828. for (int j = 0; j < coeff_size; j++)
  829. {
  830. fprintf(pp, ",%e", eltwise_param.coeff(j));
  831. }
  832. }
  833. else if (layer.type() == "ELU")
  834. {
  835. const caffe::ELUParameter& elu_param = layer.elu_param();
  836. fprintf(pp, " 0=%e", elu_param.alpha());
  837. }
  838. else if (layer.type() == "Embed")
  839. {
  840. const caffe::LayerParameter& binlayer = net.layer(netidx);
  841. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  842. const caffe::EmbedParameter& embed_param = layer.embed_param();
  843. fprintf(pp, " 0=%d", embed_param.num_output());
  844. fprintf(pp, " 1=%d", embed_param.input_dim());
  845. fprintf(pp, " 2=%d", embed_param.bias_term());
  846. fprintf(pp, " 3=%d", weight_blob.data_size());
  847. for (int j = 0; j < binlayer.blobs_size(); j++)
  848. {
  849. int quantize_tag = 0;
  850. const caffe::BlobProto& blob = binlayer.blobs(j);
  851. std::vector<float> quantize_table;
  852. std::vector<unsigned char> quantize_index;
  853. std::vector<unsigned short> float16_weights;
  854. // we will not quantize the bias values
  855. if (j == 0 && quantize_level != 0)
  856. {
  857. if (quantize_level == 256)
  858. {
  859. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  860. }
  861. else if (quantize_level == 65536)
  862. {
  863. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
  864. }
  865. }
  866. // write quantize tag first
  867. if (j == 0)
  868. fwrite(&quantize_tag, sizeof(int), 1, bp);
  869. if (quantize_tag)
  870. {
  871. int p0 = ftell(bp);
  872. if (quantize_level == 256)
  873. {
  874. // write quantize table and index
  875. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  876. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  877. }
  878. else if (quantize_level == 65536)
  879. {
  880. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  881. }
  882. // padding to 32bit align
  883. int nwrite = ftell(bp) - p0;
  884. int nalign = int(alignSize(nwrite, 4));
  885. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  886. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  887. }
  888. else
  889. {
  890. // write original data
  891. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  892. }
  893. }
  894. }
  895. else if (layer.type() == "InnerProduct")
  896. {
  897. const caffe::LayerParameter& binlayer = net.layer(netidx);
  898. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  899. const caffe::InnerProductParameter& inner_product_param = layer.inner_product_param();
  900. fprintf(pp, " 0=%d", inner_product_param.num_output());
  901. fprintf(pp, " 1=%d", inner_product_param.bias_term());
  902. fprintf(pp, " 2=%d", weight_blob.data_size());
  903. bool int8_scale_term = false;
  904. std::vector<float> weight_int8scale;
  905. std::vector<float> blob_int8scale;
  906. if (int8scale_table_path)
  907. {
  908. char key[256];
  909. sprintf(key, "%s_param_0", layer.name().c_str());
  910. if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
  911. {
  912. weight_int8scale = weight_int8scale_table[std::string(key)];
  913. }
  914. if (blob_int8scale_table.find(layer.name()) != blob_int8scale_table.end())
  915. {
  916. blob_int8scale = blob_int8scale_table[layer.name()];
  917. }
  918. int8_scale_term = !weight_int8scale.empty() && !blob_int8scale.empty();
  919. if (int8_scale_term)
  920. {
  921. fprintf(pp, " 8=1");
  922. }
  923. }
  924. for (int j = 0; j < binlayer.blobs_size(); j++)
  925. {
  926. int quantize_tag = 0;
  927. const caffe::BlobProto& blob = binlayer.blobs(j);
  928. std::vector<float> quantize_table;
  929. std::vector<unsigned char> quantize_index;
  930. std::vector<unsigned short> float16_weights;
  931. std::vector<signed char> int8_weights;
  932. // we will not quantize the bias values
  933. if (j == 0)
  934. {
  935. if (int8_scale_term)
  936. {
  937. if (quantize_level == 0)
  938. {
  939. quantize_tag = 0x0002C056;
  940. }
  941. else if (quantize_level == 256)
  942. {
  943. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), weight_int8scale, int8_weights);
  944. }
  945. }
  946. else if (quantize_level == 256)
  947. {
  948. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  949. }
  950. else if (quantize_level == 65536)
  951. {
  952. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
  953. }
  954. // write quantize tag first
  955. fwrite(&quantize_tag, sizeof(int), 1, bp);
  956. if (quantize_tag)
  957. {
  958. int p0 = ftell(bp);
  959. if (int8_scale_term)
  960. {
  961. if (quantize_level == 0)
  962. {
  963. // write original data and int8scale
  964. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  965. }
  966. else if (quantize_level == 256)
  967. {
  968. fwrite(int8_weights.data(), sizeof(signed char), int8_weights.size(), bp);
  969. }
  970. }
  971. else if (quantize_level == 256)
  972. {
  973. // write quantize table and index
  974. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  975. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  976. }
  977. else if (quantize_level == 65536)
  978. {
  979. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  980. }
  981. // padding to 32bit align
  982. int nwrite = ftell(bp) - p0;
  983. int nalign = int(alignSize(nwrite, 4));
  984. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  985. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  986. }
  987. else
  988. {
  989. // write original data
  990. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  991. }
  992. }
  993. else
  994. {
  995. // write original data
  996. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  997. }
  998. }
  999. if (int8_scale_term)
  1000. {
  1001. // write int8_scale data
  1002. fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
  1003. fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
  1004. }
  1005. }
  1006. else if (layer.type() == "Input")
  1007. {
  1008. const caffe::InputParameter& input_param = layer.input_param();
  1009. const caffe::BlobShape& bs = input_param.shape(0);
  1010. if (bs.dim_size() == 4)
  1011. {
  1012. fprintf(pp, " 0=%zd", size_t(bs.dim(3)));
  1013. fprintf(pp, " 1=%zd", size_t(bs.dim(2)));
  1014. fprintf(pp, " 2=%zd", size_t(bs.dim(1)));
  1015. }
  1016. else if (bs.dim_size() == 3)
  1017. {
  1018. fprintf(pp, " 0=%zd", size_t(bs.dim(2)));
  1019. fprintf(pp, " 1=%zd", size_t(bs.dim(1)));
  1020. fprintf(pp, " 2=-233");
  1021. }
  1022. else if (bs.dim_size() == 2)
  1023. {
  1024. fprintf(pp, " 0=%zd", size_t(bs.dim(1)));
  1025. fprintf(pp, " 1=-233");
  1026. fprintf(pp, " 2=-233");
  1027. }
  1028. }
  1029. else if (layer.type() == "Interp")
  1030. {
  1031. const caffe::InterpParameter& interp_param = layer.interp_param();
  1032. fprintf(pp, " 0=%d", 2);
  1033. fprintf(pp, " 1=%e", (float)interp_param.zoom_factor());
  1034. fprintf(pp, " 2=%e", (float)interp_param.zoom_factor());
  1035. fprintf(pp, " 3=%d", interp_param.height());
  1036. fprintf(pp, " 4=%d", interp_param.width());
  1037. }
  1038. else if (layer.type() == "LRN")
  1039. {
  1040. const caffe::LRNParameter& lrn_param = layer.lrn_param();
  1041. fprintf(pp, " 0=%d", lrn_param.norm_region());
  1042. fprintf(pp, " 1=%d", lrn_param.local_size());
  1043. fprintf(pp, " 2=%e", lrn_param.alpha());
  1044. fprintf(pp, " 3=%e", lrn_param.beta());
  1045. }
  1046. else if (layer.type() == "LSTM")
  1047. {
  1048. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1049. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  1050. const caffe::RecurrentParameter& recurrent_param = layer.recurrent_param();
  1051. fprintf(pp, " 0=%d", recurrent_param.num_output());
  1052. fprintf(pp, " 1=%d", weight_blob.data_size());
  1053. for (int j = 0; j < binlayer.blobs_size(); j++)
  1054. {
  1055. int quantize_tag = 0;
  1056. const caffe::BlobProto& blob = binlayer.blobs(j);
  1057. std::vector<float> quantize_table;
  1058. std::vector<unsigned char> quantize_index;
  1059. std::vector<unsigned short> float16_weights;
  1060. if (quantize_level != 0)
  1061. {
  1062. if (quantize_level == 256)
  1063. {
  1064. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  1065. }
  1066. else if (quantize_level == 65536)
  1067. {
  1068. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
  1069. }
  1070. }
  1071. // write quantize tag first
  1072. fwrite(&quantize_tag, sizeof(int), 1, bp);
  1073. if (quantize_tag)
  1074. {
  1075. int p0 = ftell(bp);
  1076. if (quantize_level == 256)
  1077. {
  1078. // write quantize table and index
  1079. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  1080. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  1081. }
  1082. else if (quantize_level == 65536)
  1083. {
  1084. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  1085. }
  1086. // padding to 32bit align
  1087. int nwrite = ftell(bp) - p0;
  1088. int nalign = int(alignSize(nwrite, 4));
  1089. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  1090. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  1091. }
  1092. else
  1093. {
  1094. // write original data
  1095. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  1096. }
  1097. }
  1098. }
  1099. else if (layer.type() == "MemoryData")
  1100. {
  1101. const caffe::MemoryDataParameter& memory_data_param = layer.memory_data_param();
  1102. fprintf(pp, " 0=%d", memory_data_param.width());
  1103. fprintf(pp, " 1=%d", memory_data_param.height());
  1104. fprintf(pp, " 2=%d", memory_data_param.channels());
  1105. }
  1106. else if (layer.type() == "MVN")
  1107. {
  1108. const caffe::MVNParameter& mvn_param = layer.mvn_param();
  1109. fprintf(pp, " 0=%d", mvn_param.normalize_variance());
  1110. fprintf(pp, " 1=%d", mvn_param.across_channels());
  1111. fprintf(pp, " 2=%e", mvn_param.eps());
  1112. }
  1113. else if (layer.type() == "Normalize")
  1114. {
  1115. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1116. const caffe::BlobProto& scale_blob = binlayer.blobs(0);
  1117. const caffe::NormalizeParameter& norm_param = layer.norm_param();
  1118. fprintf(pp, " 0=%d", norm_param.across_spatial());
  1119. fprintf(pp, " 1=%d", norm_param.channel_shared());
  1120. fprintf(pp, " 2=%e", norm_param.eps());
  1121. fprintf(pp, " 3=%d", scale_blob.data_size());
  1122. fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
  1123. }
  1124. else if (layer.type() == "Permute")
  1125. {
  1126. const caffe::PermuteParameter& permute_param = layer.permute_param();
  1127. int order_size = permute_param.order_size();
  1128. int order_type = 0;
  1129. if (order_size == 0)
  1130. order_type = 0;
  1131. if (order_size == 1)
  1132. {
  1133. int order0 = permute_param.order(0);
  1134. if (order0 == 0)
  1135. order_type = 0;
  1136. // permute with N not supported
  1137. }
  1138. if (order_size == 2)
  1139. {
  1140. int order0 = permute_param.order(0);
  1141. int order1 = permute_param.order(1);
  1142. if (order0 == 0)
  1143. {
  1144. if (order1 == 1) // 0 1 2 3
  1145. order_type = 0;
  1146. else if (order1 == 2) // 0 2 1 3
  1147. order_type = 2;
  1148. else if (order1 == 3) // 0 3 1 2
  1149. order_type = 4;
  1150. }
  1151. // permute with N not supported
  1152. }
  1153. if (order_size == 3 || order_size == 4)
  1154. {
  1155. int order0 = permute_param.order(0);
  1156. int order1 = permute_param.order(1);
  1157. int order2 = permute_param.order(2);
  1158. if (order0 == 0)
  1159. {
  1160. if (order1 == 1)
  1161. {
  1162. if (order2 == 2) // 0 1 2 3
  1163. order_type = 0;
  1164. if (order2 == 3) // 0 1 3 2
  1165. order_type = 1;
  1166. }
  1167. else if (order1 == 2)
  1168. {
  1169. if (order2 == 1) // 0 2 1 3
  1170. order_type = 2;
  1171. if (order2 == 3) // 0 2 3 1
  1172. order_type = 3;
  1173. }
  1174. else if (order1 == 3)
  1175. {
  1176. if (order2 == 1) // 0 3 1 2
  1177. order_type = 4;
  1178. if (order2 == 2) // 0 3 2 1
  1179. order_type = 5;
  1180. }
  1181. }
  1182. // permute with N not supported
  1183. }
  1184. fprintf(pp, " 0=%d", order_type);
  1185. }
  1186. else if (layer.type() == "Pooling")
  1187. {
  1188. const caffe::PoolingParameter& pooling_param = layer.pooling_param();
  1189. fprintf(pp, " 0=%d", pooling_param.pool());
  1190. if (pooling_param.has_kernel_w() && pooling_param.has_kernel_h())
  1191. {
  1192. fprintf(pp, " 1=%d", pooling_param.kernel_w());
  1193. fprintf(pp, " 11=%d", pooling_param.kernel_h());
  1194. }
  1195. else
  1196. {
  1197. fprintf(pp, " 1=%d", pooling_param.kernel_size());
  1198. }
  1199. if (pooling_param.has_stride_w() && pooling_param.has_stride_h())
  1200. {
  1201. fprintf(pp, " 2=%d", pooling_param.stride_w());
  1202. fprintf(pp, " 12=%d", pooling_param.stride_h());
  1203. }
  1204. else
  1205. {
  1206. fprintf(pp, " 2=%d", pooling_param.stride());
  1207. }
  1208. if (pooling_param.has_pad_w() && pooling_param.has_pad_h())
  1209. {
  1210. fprintf(pp, " 3=%d", pooling_param.pad_w());
  1211. fprintf(pp, " 13=%d", pooling_param.pad_h());
  1212. }
  1213. else
  1214. {
  1215. fprintf(pp, " 3=%d", pooling_param.pad());
  1216. }
  1217. fprintf(pp, " 4=%d", pooling_param.has_global_pooling() ? pooling_param.global_pooling() : 0);
  1218. }
  1219. else if (layer.type() == "Power")
  1220. {
  1221. const caffe::PowerParameter& power_param = layer.power_param();
  1222. fprintf(pp, " 0=%e", power_param.power());
  1223. fprintf(pp, " 1=%e", power_param.scale());
  1224. fprintf(pp, " 2=%e", power_param.shift());
  1225. }
  1226. else if (layer.type() == "PReLU")
  1227. {
  1228. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1229. const caffe::BlobProto& slope_blob = binlayer.blobs(0);
  1230. fprintf(pp, " 0=%d", slope_blob.data_size());
  1231. fwrite(slope_blob.data().data(), sizeof(float), slope_blob.data_size(), bp);
  1232. }
  1233. else if (layer.type() == "PriorBox")
  1234. {
  1235. const caffe::PriorBoxParameter& prior_box_param = layer.prior_box_param();
  1236. int num_aspect_ratio = prior_box_param.aspect_ratio_size();
  1237. for (int j = 0; j < prior_box_param.aspect_ratio_size(); j++)
  1238. {
  1239. float ar = prior_box_param.aspect_ratio(j);
  1240. if (fabs(ar - 1.) < 1e-6)
  1241. {
  1242. num_aspect_ratio--;
  1243. }
  1244. }
  1245. float variances[4] = {0.1f, 0.1f, 0.1f, 0.1f};
  1246. if (prior_box_param.variance_size() == 4)
  1247. {
  1248. variances[0] = prior_box_param.variance(0);
  1249. variances[1] = prior_box_param.variance(1);
  1250. variances[2] = prior_box_param.variance(2);
  1251. variances[3] = prior_box_param.variance(3);
  1252. }
  1253. else if (prior_box_param.variance_size() == 1)
  1254. {
  1255. variances[0] = prior_box_param.variance(0);
  1256. variances[1] = prior_box_param.variance(0);
  1257. variances[2] = prior_box_param.variance(0);
  1258. variances[3] = prior_box_param.variance(0);
  1259. }
  1260. int flip = prior_box_param.has_flip() ? prior_box_param.flip() : 1;
  1261. int clip = prior_box_param.has_clip() ? prior_box_param.clip() : 0;
  1262. int image_width = -233;
  1263. int image_height = -233;
  1264. if (prior_box_param.has_img_size())
  1265. {
  1266. image_width = prior_box_param.img_size();
  1267. image_height = prior_box_param.img_size();
  1268. }
  1269. else if (prior_box_param.has_img_w() && prior_box_param.has_img_h())
  1270. {
  1271. image_width = prior_box_param.img_w();
  1272. image_height = prior_box_param.img_h();
  1273. }
  1274. float step_width = -233;
  1275. float step_height = -233;
  1276. if (prior_box_param.has_step())
  1277. {
  1278. step_width = prior_box_param.step();
  1279. step_height = prior_box_param.step();
  1280. }
  1281. else if (prior_box_param.has_step_w() && prior_box_param.has_step_h())
  1282. {
  1283. step_width = prior_box_param.step_w();
  1284. step_height = prior_box_param.step_h();
  1285. }
  1286. fprintf(pp, " -23300=%d", prior_box_param.min_size_size());
  1287. for (int j = 0; j < prior_box_param.min_size_size(); j++)
  1288. {
  1289. fprintf(pp, ",%e", prior_box_param.min_size(j));
  1290. }
  1291. fprintf(pp, " -23301=%d", prior_box_param.max_size_size());
  1292. for (int j = 0; j < prior_box_param.max_size_size(); j++)
  1293. {
  1294. fprintf(pp, ",%e", prior_box_param.max_size(j));
  1295. }
  1296. fprintf(pp, " -23302=%d", num_aspect_ratio);
  1297. for (int j = 0; j < prior_box_param.aspect_ratio_size(); j++)
  1298. {
  1299. float ar = prior_box_param.aspect_ratio(j);
  1300. if (fabs(ar - 1.) < 1e-6)
  1301. {
  1302. continue;
  1303. }
  1304. fprintf(pp, ",%e", ar);
  1305. }
  1306. fprintf(pp, " 3=%e", variances[0]);
  1307. fprintf(pp, " 4=%e", variances[1]);
  1308. fprintf(pp, " 5=%e", variances[2]);
  1309. fprintf(pp, " 6=%e", variances[3]);
  1310. fprintf(pp, " 7=%d", flip);
  1311. fprintf(pp, " 8=%d", clip);
  1312. fprintf(pp, " 9=%d", image_width);
  1313. fprintf(pp, " 10=%d", image_height);
  1314. fprintf(pp, " 11=%e", step_width);
  1315. fprintf(pp, " 12=%e", step_height);
  1316. fprintf(pp, " 13=%e", prior_box_param.offset());
  1317. }
  1318. else if (layer.type() == "PSROIPooling")
  1319. {
  1320. const caffe::PSROIPoolingParameter& psroi_pooling_param = layer.psroi_pooling_param();
  1321. fprintf(pp, " 0=%d", psroi_pooling_param.group_size());
  1322. fprintf(pp, " 1=%d", psroi_pooling_param.group_size());
  1323. fprintf(pp, " 2=%e", psroi_pooling_param.spatial_scale());
  1324. fprintf(pp, " 3=%d", psroi_pooling_param.output_dim());
  1325. }
  1326. else if (layer.type() == "Python")
  1327. {
  1328. const caffe::PythonParameter& python_param = layer.python_param();
  1329. std::string python_layer_name = python_param.layer();
  1330. if (python_layer_name == "ProposalLayer")
  1331. {
  1332. int feat_stride = 16;
  1333. sscanf(python_param.param_str().c_str(), "'feat_stride': %d", &feat_stride);
  1334. int base_size = 16;
  1335. // float ratio;
  1336. // float scale;
  1337. int pre_nms_topN = 6000;
  1338. int after_nms_topN = 300;
  1339. float nms_thresh = 0.7f;
  1340. int min_size = 16;
  1341. fprintf(pp, " 0=%d", feat_stride);
  1342. fprintf(pp, " 1=%d", base_size);
  1343. fprintf(pp, " 2=%d", pre_nms_topN);
  1344. fprintf(pp, " 3=%d", after_nms_topN);
  1345. fprintf(pp, " 4=%e", nms_thresh);
  1346. fprintf(pp, " 5=%d", min_size);
  1347. }
  1348. }
  1349. else if (layer.type() == "ReLU")
  1350. {
  1351. const caffe::ReLUParameter& relu_param = layer.relu_param();
  1352. if (relu_param.has_negative_slope())
  1353. {
  1354. fprintf(pp, " 0=%e", relu_param.negative_slope());
  1355. }
  1356. }
  1357. else if (layer.type() == "ReLU6")
  1358. {
  1359. float min = 0.f;
  1360. float max = 6.f;
  1361. fprintf(pp, " 0=%e", min);
  1362. fprintf(pp, " 1=%e", max);
  1363. }
  1364. else if (layer.type() == "Reorg")
  1365. {
  1366. const caffe::ReorgParameter& reorg_param = layer.reorg_param();
  1367. fprintf(pp, " 0=%d", reorg_param.stride());
  1368. }
  1369. else if (layer.type() == "Reshape")
  1370. {
  1371. const caffe::ReshapeParameter& reshape_param = layer.reshape_param();
  1372. const caffe::BlobShape& bs = reshape_param.shape();
  1373. if (bs.dim_size() == 1)
  1374. {
  1375. fprintf(pp, " 0=%zd 1=-233 2=-233", size_t(bs.dim(0)));
  1376. }
  1377. else if (bs.dim_size() == 2)
  1378. {
  1379. fprintf(pp, " 0=%zd 1=-233 2=-233", size_t(bs.dim(1)));
  1380. }
  1381. else if (bs.dim_size() == 3)
  1382. {
  1383. fprintf(pp, " 0=%zd 1=%zd 2=-233", size_t(bs.dim(2)), bs.dim(1));
  1384. }
  1385. else // bs.dim_size() == 4
  1386. {
  1387. fprintf(pp, " 0=%zd 1=%zd 2=%zd", size_t(bs.dim(3)), size_t(bs.dim(2)), size_t(bs.dim(1)));
  1388. }
  1389. fprintf(pp, " 3=0"); // permute
  1390. }
  1391. else if (layer.type() == "ROIAlign")
  1392. {
  1393. const caffe::ROIAlignParameter& roi_align_param = layer.roi_align_param();
  1394. fprintf(pp, " 0=%d", roi_align_param.pooled_w());
  1395. fprintf(pp, " 1=%d", roi_align_param.pooled_h());
  1396. fprintf(pp, " 2=%e", roi_align_param.spatial_scale());
  1397. fprintf(pp, " 3=%d", 0);
  1398. fprintf(pp, " 4=%d", false);
  1399. fprintf(pp, " 5=%d", 0);
  1400. }
  1401. else if (layer.type() == "ROIPooling")
  1402. {
  1403. const caffe::ROIPoolingParameter& roi_pooling_param = layer.roi_pooling_param();
  1404. fprintf(pp, " 0=%d", roi_pooling_param.pooled_w());
  1405. fprintf(pp, " 1=%d", roi_pooling_param.pooled_h());
  1406. fprintf(pp, " 2=%e", roi_pooling_param.spatial_scale());
  1407. }
  1408. else if (layer.type() == "Scale")
  1409. {
  1410. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1411. const caffe::ScaleParameter& scale_param = layer.scale_param();
  1412. bool scale_weight = scale_param.bias_term() ? (binlayer.blobs_size() == 2) : (binlayer.blobs_size() == 1);
  1413. if (scale_weight)
  1414. {
  1415. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  1416. fprintf(pp, " 0=%d", int(weight_blob.data_size()));
  1417. }
  1418. else
  1419. {
  1420. fprintf(pp, " 0=-233");
  1421. }
  1422. fprintf(pp, " 1=%d", scale_param.bias_term());
  1423. for (int j = 0; j < binlayer.blobs_size(); j++)
  1424. {
  1425. const caffe::BlobProto& blob = binlayer.blobs(j);
  1426. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  1427. }
  1428. }
  1429. else if (layer.type() == "ShuffleChannel")
  1430. {
  1431. const caffe::ShuffleChannelParameter& shuffle_channel_param = layer.shuffle_channel_param();
  1432. fprintf(pp, " 0=%d", shuffle_channel_param.group());
  1433. }
  1434. else if (layer.type() == "Slice")
  1435. {
  1436. const caffe::SliceParameter& slice_param = layer.slice_param();
  1437. if (slice_param.slice_point_size() == 0)
  1438. {
  1439. int num_slice = layer.top_size();
  1440. fprintf(pp, " -23300=%d", num_slice);
  1441. for (int j = 0; j < num_slice; j++)
  1442. {
  1443. fprintf(pp, ",-233");
  1444. }
  1445. }
  1446. else
  1447. {
  1448. int num_slice = slice_param.slice_point_size() + 1;
  1449. fprintf(pp, " -23300=%d", num_slice);
  1450. int prev_offset = 0;
  1451. for (int j = 0; j < slice_param.slice_point_size(); j++)
  1452. {
  1453. int offset = slice_param.slice_point(j);
  1454. fprintf(pp, ",%d", offset - prev_offset);
  1455. prev_offset = offset;
  1456. }
  1457. fprintf(pp, ",-233");
  1458. }
  1459. int axis = 0;
  1460. if (slice_param.has_axis())
  1461. {
  1462. axis = slice_param.axis() - 1;
  1463. }
  1464. else if (slice_param.has_slice_dim())
  1465. {
  1466. axis = slice_param.slice_dim() - 1;
  1467. }
  1468. fprintf(pp, " 1=%d", axis);
  1469. }
  1470. else if (layer.type() == "Softmax")
  1471. {
  1472. const caffe::SoftmaxParameter& softmax_param = layer.softmax_param();
  1473. int dim = softmax_param.axis() - 1;
  1474. fprintf(pp, " 0=%d", dim);
  1475. fprintf(pp, " 1=1");
  1476. }
  1477. else if (layer.type() == "Threshold")
  1478. {
  1479. const caffe::ThresholdParameter& threshold_param = layer.threshold_param();
  1480. fprintf(pp, " 0=%e", threshold_param.threshold());
  1481. }
  1482. else if (layer.type() == "YoloDetectionOutput")
  1483. {
  1484. const caffe::YoloDetectionOutputParameter& yolo_detection_output_param = layer.yolo_detection_output_param();
  1485. fprintf(pp, " 0=%d", yolo_detection_output_param.num_classes());
  1486. fprintf(pp, " 1=%d", yolo_detection_output_param.num_box());
  1487. fprintf(pp, " 2=%e", yolo_detection_output_param.confidence_threshold());
  1488. fprintf(pp, " 3=%e", yolo_detection_output_param.nms_threshold());
  1489. int num_bias = yolo_detection_output_param.biases_size();
  1490. fprintf(pp, " -23304=%d", num_bias);
  1491. for (int j = 0; j < num_bias; j++)
  1492. {
  1493. fprintf(pp, ",%e", yolo_detection_output_param.biases(j));
  1494. }
  1495. }
  1496. else if (layer.type() == "Yolov3DetectionOutput")
  1497. {
  1498. const caffe::Yolov3DetectionOutputParameter& yolov3_detection_output_param = layer.yolov3_detection_output_param();
  1499. fprintf(pp, " 0=%d", yolov3_detection_output_param.num_classes());
  1500. fprintf(pp, " 1=%d", yolov3_detection_output_param.num_box());
  1501. fprintf(pp, " 2=%e", yolov3_detection_output_param.confidence_threshold());
  1502. fprintf(pp, " 3=%e", yolov3_detection_output_param.nms_threshold());
  1503. int num_bias = yolov3_detection_output_param.biases_size();
  1504. fprintf(pp, " -23304=%d", num_bias);
  1505. for (int j = 0; j < num_bias; j++)
  1506. {
  1507. fprintf(pp, ",%e", yolov3_detection_output_param.biases(j));
  1508. }
  1509. int num_mask = yolov3_detection_output_param.mask_size();
  1510. fprintf(pp, " -23305=%d", num_mask);
  1511. for (int j = 0; j < num_mask; j++)
  1512. {
  1513. fprintf(pp, ",%e", (float)yolov3_detection_output_param.mask(j));
  1514. }
  1515. int num_anchors = yolov3_detection_output_param.anchors_scale_size();
  1516. fprintf(pp, " -23306=%d", num_anchors);
  1517. for (int j = 0; j < num_anchors; j++)
  1518. {
  1519. fprintf(pp, ",%e", (float)yolov3_detection_output_param.anchors_scale(j));
  1520. }
  1521. fprintf(pp, " 7=%d", yolov3_detection_output_param.mask_group_num());
  1522. }
  1523. fprintf(pp, "\n");
  1524. // add split layer if top reference larger than one
  1525. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  1526. {
  1527. std::string blob_name = blob_name_decorated[layer.top(0)];
  1528. if (bottom_reference.find(blob_name) != bottom_reference.end())
  1529. {
  1530. int refcount = bottom_reference[blob_name];
  1531. if (refcount > 1)
  1532. {
  1533. char splitname[256];
  1534. sprintf(splitname, "splitncnn_%d", internal_split);
  1535. fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
  1536. fprintf(pp, " %s", blob_name.c_str());
  1537. for (int j = 0; j < refcount; j++)
  1538. {
  1539. fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
  1540. }
  1541. fprintf(pp, "\n");
  1542. internal_split++;
  1543. }
  1544. }
  1545. }
  1546. else
  1547. {
  1548. for (int j = 0; j < layer.top_size(); j++)
  1549. {
  1550. std::string blob_name = layer.top(j);
  1551. if (bottom_reference.find(blob_name) != bottom_reference.end())
  1552. {
  1553. int refcount = bottom_reference[blob_name];
  1554. if (refcount > 1)
  1555. {
  1556. char splitname[256];
  1557. sprintf(splitname, "splitncnn_%d", internal_split);
  1558. fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
  1559. fprintf(pp, " %s", blob_name.c_str());
  1560. for (int j = 0; j < refcount; j++)
  1561. {
  1562. fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
  1563. }
  1564. fprintf(pp, "\n");
  1565. internal_split++;
  1566. }
  1567. }
  1568. }
  1569. }
  1570. }
  1571. fclose(pp);
  1572. fclose(bp);
  1573. return 0;
  1574. }