You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

caffe2ncnn.cpp 60 kB

8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include <stdio.h>
  15. #include <limits.h>
  16. #include <math.h>
  17. #include <fstream>
  18. #include <set>
  19. #include <limits>
  20. #include <map>
  21. #include <algorithm>
  22. #include <google/protobuf/io/coded_stream.h>
  23. #include <google/protobuf/io/zero_copy_stream_impl.h>
  24. #include <google/protobuf/text_format.h>
  25. #include <google/protobuf/message.h>
  26. #include "caffe.pb.h"
  27. static inline size_t alignSize(size_t sz, int n)
  28. {
  29. return (sz + n-1) & -n;
  30. }
  31. // convert float to half precision floating point
  32. static unsigned short float2half(float value)
  33. {
  34. // 1 : 8 : 23
  35. union
  36. {
  37. unsigned int u;
  38. float f;
  39. } tmp;
  40. tmp.f = value;
  41. // 1 : 8 : 23
  42. unsigned short sign = (tmp.u & 0x80000000) >> 31;
  43. unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
  44. unsigned int significand = tmp.u & 0x7FFFFF;
  45. // fprintf(stderr, "%d %d %d\n", sign, exponent, significand);
  46. // 1 : 5 : 10
  47. unsigned short fp16;
  48. if (exponent == 0)
  49. {
  50. // zero or denormal, always underflow
  51. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  52. }
  53. else if (exponent == 0xFF)
  54. {
  55. // infinity or NaN
  56. fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
  57. }
  58. else
  59. {
  60. // normalized
  61. short newexp = exponent + (- 127 + 15);
  62. if (newexp >= 31)
  63. {
  64. // overflow, return infinity
  65. fp16 = (sign << 15) | (0x1F << 10) | 0x00;
  66. }
  67. else if (newexp <= 0)
  68. {
  69. // underflow
  70. if (newexp >= -10)
  71. {
  72. // denormal half-precision
  73. unsigned short sig = (significand | 0x800000) >> (14 - newexp);
  74. fp16 = (sign << 15) | (0x00 << 10) | sig;
  75. }
  76. else
  77. {
  78. // underflow
  79. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  80. }
  81. }
  82. else
  83. {
  84. fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
  85. }
  86. }
  87. return fp16;
  88. }
  89. // round to nearest
  90. static signed char float2int8(float value)
  91. {
  92. float tmp;
  93. if (value >= 0.f) tmp = value + 0.5;
  94. else tmp = value - 0.5;
  95. if (tmp > 127)
  96. return 127;
  97. if (tmp < -128)
  98. return -128;
  99. return tmp;
  100. }
  101. static bool read_int8scale_table(const char* filepath, std::map<std::string, float>& blob_int8scale_table, std::map<std::string, float>& weight_int8scale_table)
  102. {
  103. blob_int8scale_table.clear();
  104. weight_int8scale_table.clear();
  105. FILE* fp = fopen(filepath, "rb");
  106. if (!fp)
  107. {
  108. fprintf(stderr, "fopen %s failed\n", filepath);
  109. return false;
  110. }
  111. char line[1024];
  112. while (!feof(fp))
  113. {
  114. char* s = fgets(line, 1024, fp);
  115. if (!s)
  116. break;
  117. char key[256];
  118. float scale = 1.f;
  119. int nscan = sscanf(line, "%255s %f", key, &scale);
  120. if (nscan != 2)
  121. continue;
  122. std::string keystr = key;
  123. // XYZ_param_N pattern
  124. if (strstr(key, "_param_"))
  125. {
  126. weight_int8scale_table[ keystr ] = scale;
  127. }
  128. else
  129. {
  130. blob_int8scale_table[ keystr ] = scale;
  131. }
  132. }
  133. fclose(fp);
  134. return true;
  135. }
  136. static int quantize_weight(float *data, size_t data_length, std::vector<unsigned short>& float16_weights)
  137. {
  138. float16_weights.resize(data_length);
  139. for (size_t i = 0; i < data_length; i++)
  140. {
  141. float f = data[i];
  142. unsigned short fp16 = float2half(f);
  143. float16_weights[i] = fp16;
  144. }
  145. // magic tag for half-precision floating point
  146. return 0x01306B47;
  147. }
  148. static int quantize_weight(float *data, size_t data_length, float scale, std::vector<signed char>& int8_weights)
  149. {
  150. int8_weights.resize(data_length);
  151. for (size_t i = 0; i < data_length; i++)
  152. {
  153. float f = data[i];
  154. signed char int8 = float2int8(f * scale);
  155. int8_weights[i] = int8;
  156. }
  157. // magic tag for int8
  158. return 0x000D4B38;
  159. }
  160. static bool quantize_weight(float *data, size_t data_length, int quantize_level, std::vector<float> &quantize_table, std::vector<unsigned char> &quantize_index) {
  161. assert(quantize_level != 0);
  162. assert(data != NULL);
  163. assert(data_length > 0);
  164. if (data_length < static_cast<size_t>(quantize_level)) {
  165. fprintf(stderr, "No need quantize,because: data_length < quantize_level");
  166. return false;
  167. }
  168. quantize_table.reserve(quantize_level);
  169. quantize_index.reserve(data_length);
  170. // 1. Find min and max value
  171. float max_value = std::numeric_limits<float>::min();
  172. float min_value = std::numeric_limits<float>::max();
  173. for (size_t i = 0; i < data_length; ++i)
  174. {
  175. if (max_value < data[i]) max_value = data[i];
  176. if (min_value > data[i]) min_value = data[i];
  177. }
  178. float strides = (max_value - min_value) / quantize_level;
  179. // 2. Generate quantize table
  180. for (int i = 0; i < quantize_level; ++i)
  181. {
  182. quantize_table.push_back(min_value + i * strides);
  183. }
  184. // 3. Align data to the quantized value
  185. for (size_t i = 0; i < data_length; ++i)
  186. {
  187. size_t table_index = int((data[i] - min_value) / strides);
  188. table_index = std::min<float>(table_index, quantize_level - 1);
  189. float low_value = quantize_table[table_index];
  190. float high_value = low_value + strides;
  191. // find a nearest value between low and high value.
  192. float targetValue = data[i] - low_value < high_value - data[i] ? low_value : high_value;
  193. table_index = int((targetValue - min_value) / strides);
  194. table_index = std::min<float>(table_index, quantize_level - 1);
  195. quantize_index.push_back(table_index);
  196. }
  197. return true;
  198. }
  199. static bool read_proto_from_text(const char* filepath, google::protobuf::Message* message)
  200. {
  201. std::ifstream fs(filepath, std::ifstream::in);
  202. if (!fs.is_open())
  203. {
  204. fprintf(stderr, "open failed %s\n", filepath);
  205. return false;
  206. }
  207. google::protobuf::io::IstreamInputStream input(&fs);
  208. bool success = google::protobuf::TextFormat::Parse(&input, message);
  209. fs.close();
  210. return success;
  211. }
  212. static bool read_proto_from_binary(const char* filepath, google::protobuf::Message* message)
  213. {
  214. std::ifstream fs(filepath, std::ifstream::in | std::ifstream::binary);
  215. if (!fs.is_open())
  216. {
  217. fprintf(stderr, "open failed %s\n", filepath);
  218. return false;
  219. }
  220. google::protobuf::io::IstreamInputStream input(&fs);
  221. google::protobuf::io::CodedInputStream codedstr(&input);
  222. codedstr.SetTotalBytesLimit(INT_MAX, INT_MAX / 2);
  223. bool success = message->ParseFromCodedStream(&codedstr);
  224. fs.close();
  225. return success;
  226. }
  227. int main(int argc, char** argv)
  228. {
  229. if (!(argc == 3 || argc == 5 || argc == 6 || argc == 7))
  230. {
  231. fprintf(stderr, "Usage: %s [caffeproto] [caffemodel] [ncnnproto] [ncnnbin] [quantizelevel] [int8scaletable]\n", argv[0]);
  232. return -1;
  233. }
  234. const char* caffeproto = argv[1];
  235. const char* caffemodel = argv[2];
  236. const char* ncnn_prototxt = argc >= 5 ? argv[3] : "ncnn.proto";
  237. const char* ncnn_modelbin = argc >= 5 ? argv[4] : "ncnn.bin";
  238. const char* quantize_param = argc >= 6 ? argv[5] : "0";
  239. const char* int8scale_table_path = argc == 7 ? argv[6] : NULL;
  240. int quantize_level = atoi(quantize_param);
  241. if (quantize_level != 0 && quantize_level != 256 && quantize_level != 65536) {
  242. fprintf(stderr, "%s: only support quantize level = 0, 256, or 65536", argv[0]);
  243. return -1;
  244. }
  245. caffe::NetParameter proto;
  246. caffe::NetParameter net;
  247. // load
  248. bool s0 = read_proto_from_text(caffeproto, &proto);
  249. if (!s0)
  250. {
  251. fprintf(stderr, "read_proto_from_text failed\n");
  252. return -1;
  253. }
  254. bool s1 = read_proto_from_binary(caffemodel, &net);
  255. if (!s1)
  256. {
  257. fprintf(stderr, "read_proto_from_binary failed\n");
  258. return -1;
  259. }
  260. std::map<std::string, float> blob_int8scale_table;
  261. std::map<std::string, float> weight_int8scale_table;
  262. if (int8scale_table_path)
  263. {
  264. bool s2 = read_int8scale_table(int8scale_table_path, blob_int8scale_table, weight_int8scale_table);
  265. if (!s2)
  266. {
  267. fprintf(stderr, "read_int8scale_table failed\n");
  268. return -1;
  269. }
  270. }
  271. FILE* pp = fopen(ncnn_prototxt, "wb");
  272. FILE* bp = fopen(ncnn_modelbin, "wb");
  273. // magic
  274. fprintf(pp, "7767517\n");
  275. // rename mapping for identical bottom top style
  276. std::map<std::string, std::string> blob_name_decorated;
  277. // bottom blob reference
  278. std::map<std::string, int> bottom_reference;
  279. // global definition line
  280. // [layer count] [blob count]
  281. int layer_count = proto.layer_size();
  282. std::set<std::string> blob_names;
  283. for (int i=0; i<layer_count; i++)
  284. {
  285. const caffe::LayerParameter& layer = proto.layer(i);
  286. for (int j=0; j<layer.bottom_size(); j++)
  287. {
  288. std::string blob_name = layer.bottom(j);
  289. if (blob_name_decorated.find(blob_name) != blob_name_decorated.end())
  290. {
  291. blob_name = blob_name_decorated[blob_name];
  292. }
  293. blob_names.insert(blob_name);
  294. if (bottom_reference.find(blob_name) == bottom_reference.end())
  295. {
  296. bottom_reference[blob_name] = 1;
  297. }
  298. else
  299. {
  300. bottom_reference[blob_name] = bottom_reference[blob_name] + 1;
  301. }
  302. }
  303. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  304. {
  305. std::string blob_name = layer.top(0) + "_" + layer.name();
  306. blob_name_decorated[layer.top(0)] = blob_name;
  307. blob_names.insert(blob_name);
  308. }
  309. else
  310. {
  311. for (int j=0; j<layer.top_size(); j++)
  312. {
  313. std::string blob_name = layer.top(j);
  314. blob_names.insert(blob_name);
  315. }
  316. }
  317. }
  318. // remove bottom_reference entry with reference equals to one
  319. int splitncnn_blob_count = 0;
  320. std::map<std::string, int>::iterator it = bottom_reference.begin();
  321. while (it != bottom_reference.end())
  322. {
  323. if (it->second == 1)
  324. {
  325. bottom_reference.erase(it++);
  326. }
  327. else
  328. {
  329. splitncnn_blob_count += it->second;
  330. // fprintf(stderr, "%s %d\n", it->first.c_str(), it->second);
  331. ++it;
  332. }
  333. }
  334. fprintf(pp, "%lu %lu\n", layer_count + bottom_reference.size(), blob_names.size() + splitncnn_blob_count);
  335. // populate
  336. blob_name_decorated.clear();
  337. int internal_split = 0;
  338. for (int i=0; i<layer_count; i++)
  339. {
  340. const caffe::LayerParameter& layer = proto.layer(i);
  341. // layer definition line, repeated
  342. // [type] [name] [bottom blob count] [top blob count] [bottom blobs] [top blobs] [layer specific params]
  343. if (layer.type() == "BN")
  344. {
  345. fprintf(pp, "%-16s", "Scale");
  346. }
  347. else if (layer.type() == "Convolution")
  348. {
  349. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  350. if (convolution_param.group() != 1)
  351. fprintf(pp, "%-16s", "ConvolutionDepthWise");
  352. else
  353. fprintf(pp, "%-16s", "Convolution");
  354. }
  355. else if (layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
  356. {
  357. fprintf(pp, "%-16s", "ConvolutionDepthWise");
  358. }
  359. else if (layer.type() == "Deconvolution")
  360. {
  361. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  362. if (convolution_param.group() != 1)
  363. fprintf(pp, "%-16s", "DeconvolutionDepthWise");
  364. else
  365. fprintf(pp, "%-16s", "Deconvolution");
  366. }
  367. else if (layer.type() == "MemoryData")
  368. {
  369. fprintf(pp, "%-16s", "Input");
  370. }
  371. else if (layer.type() == "Python")
  372. {
  373. const caffe::PythonParameter& python_param = layer.python_param();
  374. std::string python_layer_name = python_param.layer();
  375. if (python_layer_name == "ProposalLayer")
  376. fprintf(pp, "%-16s", "Proposal");
  377. else
  378. fprintf(pp, "%-16s", python_layer_name.c_str());
  379. }
  380. else if (layer.type() == "ReLU6")
  381. {
  382. fprintf(pp, "%-16s", "Clip");
  383. }
  384. else
  385. {
  386. fprintf(pp, "%-16s", layer.type().c_str());
  387. }
  388. fprintf(pp, " %-16s %d %d", layer.name().c_str(), layer.bottom_size(), layer.top_size());
  389. for (int j=0; j<layer.bottom_size(); j++)
  390. {
  391. std::string blob_name = layer.bottom(j);
  392. if (blob_name_decorated.find(layer.bottom(j)) != blob_name_decorated.end())
  393. {
  394. blob_name = blob_name_decorated[layer.bottom(j)];
  395. }
  396. if (bottom_reference.find(blob_name) != bottom_reference.end())
  397. {
  398. int refidx = bottom_reference[blob_name] - 1;
  399. bottom_reference[blob_name] = refidx;
  400. char splitsuffix[256];
  401. sprintf(splitsuffix, "_splitncnn_%d", refidx);
  402. blob_name = blob_name + splitsuffix;
  403. }
  404. fprintf(pp, " %s", blob_name.c_str());
  405. }
  406. // decorated
  407. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  408. {
  409. std::string blob_name = layer.top(0) + "_" + layer.name();
  410. blob_name_decorated[layer.top(0)] = blob_name;
  411. fprintf(pp, " %s", blob_name.c_str());
  412. }
  413. else
  414. {
  415. for (int j=0; j<layer.top_size(); j++)
  416. {
  417. std::string blob_name = layer.top(j);
  418. fprintf(pp, " %s", blob_name.c_str());
  419. }
  420. }
  421. // find blob binary by layer name
  422. int netidx;
  423. for (netidx=0; netidx<net.layer_size(); netidx++)
  424. {
  425. if (net.layer(netidx).name() == layer.name())
  426. {
  427. break;
  428. }
  429. }
  430. // layer specific params
  431. if (layer.type() == "BatchNorm")
  432. {
  433. const caffe::LayerParameter& binlayer = net.layer(netidx);
  434. const caffe::BlobProto& mean_blob = binlayer.blobs(0);
  435. const caffe::BlobProto& var_blob = binlayer.blobs(1);
  436. fprintf(pp, " 0=%d", (int)mean_blob.data_size());
  437. const caffe::BatchNormParameter& batch_norm_param = layer.batch_norm_param();
  438. float eps = batch_norm_param.eps();
  439. std::vector<float> ones(mean_blob.data_size(), 1.f);
  440. fwrite(ones.data(), sizeof(float), ones.size(), bp);// slope
  441. if (binlayer.blobs_size() < 3)
  442. {
  443. fwrite(mean_blob.data().data(), sizeof(float), mean_blob.data_size(), bp);
  444. float tmp;
  445. for (int j=0; j<var_blob.data_size(); j++)
  446. {
  447. tmp = var_blob.data().data()[j] + eps;
  448. fwrite(&tmp, sizeof(float), 1, bp);
  449. }
  450. }
  451. else
  452. {
  453. float scale_factor = binlayer.blobs(2).data().data()[0] == 0 ? 0 : 1 / binlayer.blobs(2).data().data()[0];
  454. // premultiply scale_factor to mean and variance
  455. float tmp;
  456. for (int j=0; j<mean_blob.data_size(); j++)
  457. {
  458. tmp = mean_blob.data().data()[j] * scale_factor;
  459. fwrite(&tmp, sizeof(float), 1, bp);
  460. }
  461. for (int j=0; j<var_blob.data_size(); j++)
  462. {
  463. tmp = var_blob.data().data()[j] * scale_factor + eps;
  464. fwrite(&tmp, sizeof(float), 1, bp);
  465. }
  466. }
  467. std::vector<float> zeros(mean_blob.data_size(), 0.f);
  468. fwrite(zeros.data(), sizeof(float), zeros.size(), bp);// bias
  469. }
  470. else if (layer.type() == "BN")
  471. {
  472. const caffe::LayerParameter& binlayer = net.layer(netidx);
  473. const caffe::BlobProto& scale_blob = binlayer.blobs(0);
  474. const caffe::BlobProto& shift_blob = binlayer.blobs(1);
  475. fprintf(pp, " 0=%d", (int)scale_blob.data_size());
  476. fprintf(pp, " 1=1");
  477. fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
  478. fwrite(shift_blob.data().data(), sizeof(float), shift_blob.data_size(), bp);
  479. }
  480. else if (layer.type() == "Concat")
  481. {
  482. const caffe::ConcatParameter& concat_param = layer.concat_param();
  483. int dim = concat_param.axis() - 1;
  484. fprintf(pp, " 0=%d", dim);
  485. }
  486. else if (layer.type() == "Convolution" || layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
  487. {
  488. const caffe::LayerParameter& binlayer = net.layer(netidx);
  489. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  490. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  491. fprintf(pp, " 0=%d", convolution_param.num_output());
  492. if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
  493. {
  494. fprintf(pp, " 1=%d", convolution_param.kernel_w());
  495. fprintf(pp, " 11=%d", convolution_param.kernel_h());
  496. }
  497. else
  498. {
  499. fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
  500. }
  501. fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
  502. if (convolution_param.has_stride_w() && convolution_param.has_stride_h())
  503. {
  504. fprintf(pp, " 3=%d", convolution_param.stride_w());
  505. fprintf(pp, " 13=%d", convolution_param.stride_h());
  506. }
  507. else
  508. {
  509. fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
  510. }
  511. if (convolution_param.has_pad_w() && convolution_param.has_pad_h())
  512. {
  513. fprintf(pp, " 4=%d", convolution_param.pad_w());
  514. fprintf(pp, " 14=%d", convolution_param.pad_h());
  515. }
  516. else
  517. {
  518. fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
  519. }
  520. fprintf(pp, " 5=%d", convolution_param.bias_term());
  521. fprintf(pp, " 6=%d", weight_blob.data_size());
  522. int num_group = 1;
  523. if (layer.type() == "ConvolutionDepthwise")
  524. {
  525. num_group = convolution_param.num_output();
  526. }
  527. else
  528. {
  529. num_group = convolution_param.group();
  530. }
  531. if (num_group != 1)
  532. {
  533. fprintf(pp, " 7=%d", num_group);
  534. }
  535. bool has_int8scale = false;
  536. float weight_int8scale = 0.f;
  537. float blob_int8scale = 0.f;
  538. if (int8scale_table_path)
  539. {
  540. char key[256];
  541. sprintf(key, "%s_param_0", layer.name().c_str());
  542. if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
  543. {
  544. weight_int8scale = weight_int8scale_table[std::string(key)];
  545. }
  546. std::string bottom_blob_name = layer.bottom(0);
  547. if (blob_int8scale_table.find(bottom_blob_name) != blob_int8scale_table.end())
  548. {
  549. blob_int8scale = blob_int8scale_table[bottom_blob_name];
  550. }
  551. has_int8scale = weight_int8scale != 0.f && blob_int8scale != 0.f;
  552. }
  553. if (has_int8scale)
  554. {
  555. if (num_group != 1)
  556. {
  557. fprintf(pp, " -23308=1,%.8e", weight_int8scale);
  558. fprintf(pp, " -23309=1,%.8e", blob_int8scale);
  559. }
  560. else
  561. {
  562. fprintf(pp, " 8=%.8e", weight_int8scale);
  563. fprintf(pp, " 9=%.8e", blob_int8scale);
  564. }
  565. }
  566. for (int j = 0; j < binlayer.blobs_size(); j++)
  567. {
  568. int quantize_tag = 0;
  569. const caffe::BlobProto& blob = binlayer.blobs(j);
  570. std::vector<float> quantize_table;
  571. std::vector<unsigned char> quantize_index;
  572. std::vector<unsigned short> float16_weights;
  573. std::vector<signed char> int8_weights;
  574. // we will not quantize the bias values
  575. if (j == 0)
  576. {
  577. if (has_int8scale)
  578. {
  579. if (quantize_level == 0)
  580. {
  581. quantize_tag = 0x0002C056;
  582. }
  583. else if (quantize_level == 256)
  584. {
  585. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), weight_int8scale, int8_weights);
  586. }
  587. }
  588. else if (quantize_level == 256)
  589. {
  590. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  591. }
  592. else if (quantize_level == 65536)
  593. {
  594. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), float16_weights);
  595. }
  596. // write quantize tag first
  597. fwrite(&quantize_tag, sizeof(int), 1, bp);
  598. if (quantize_tag)
  599. {
  600. int p0 = ftell(bp);
  601. if (has_int8scale)
  602. {
  603. if (quantize_level == 0)
  604. {
  605. // write original data and int8scale
  606. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  607. }
  608. else if (quantize_level == 256)
  609. {
  610. fwrite(int8_weights.data(), sizeof(signed char), int8_weights.size(), bp);
  611. }
  612. }
  613. else if (quantize_level == 256)
  614. {
  615. // write quantize table and index
  616. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  617. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  618. }
  619. else if (quantize_level == 65536)
  620. {
  621. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  622. }
  623. // padding to 32bit align
  624. int nwrite = ftell(bp) - p0;
  625. int nalign = alignSize(nwrite, 4);
  626. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  627. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  628. }
  629. else
  630. {
  631. // write original data
  632. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  633. }
  634. }
  635. else
  636. {
  637. // write original data
  638. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  639. }
  640. }
  641. }
  642. else if (layer.type() == "Crop")
  643. {
  644. const caffe::CropParameter& crop_param = layer.crop_param();
  645. int num_offset = crop_param.offset_size();
  646. if (num_offset == 2)
  647. {
  648. int woffset = crop_param.offset(1);
  649. int hoffset = crop_param.offset(0);
  650. fprintf(pp, " 0=%d", woffset);
  651. fprintf(pp, " 1=%d", hoffset);
  652. }
  653. else if (num_offset == 3)
  654. {
  655. int woffset = crop_param.offset(2);
  656. int hoffset = crop_param.offset(1);
  657. int coffset = crop_param.offset(0);
  658. fprintf(pp, " 0=%d", woffset);
  659. fprintf(pp, " 1=%d", hoffset);
  660. fprintf(pp, " 2=%d", coffset);
  661. }
  662. }
  663. else if (layer.type() == "Deconvolution")
  664. {
  665. const caffe::LayerParameter& binlayer = net.layer(netidx);
  666. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  667. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  668. fprintf(pp, " 0=%d", convolution_param.num_output());
  669. if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
  670. {
  671. fprintf(pp, " 1=%d", convolution_param.kernel_w());
  672. fprintf(pp, " 11=%d", convolution_param.kernel_h());
  673. }
  674. else
  675. {
  676. fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
  677. }
  678. fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
  679. if (convolution_param.has_stride_w() && convolution_param.has_stride_h())
  680. {
  681. fprintf(pp, " 3=%d", convolution_param.stride_w());
  682. fprintf(pp, " 13=%d", convolution_param.stride_h());
  683. }
  684. else
  685. {
  686. fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
  687. }
  688. if (convolution_param.has_pad_w() && convolution_param.has_pad_h())
  689. {
  690. fprintf(pp, " 4=%d", convolution_param.pad_w());
  691. fprintf(pp, " 14=%d", convolution_param.pad_h());
  692. }
  693. else
  694. {
  695. fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
  696. }
  697. fprintf(pp, " 5=%d", convolution_param.bias_term());
  698. fprintf(pp, " 6=%d", weight_blob.data_size());
  699. int group = convolution_param.group();
  700. if (group != 1)
  701. {
  702. fprintf(pp, " 7=%d", group);
  703. }
  704. int quantized_weight = 0;
  705. fwrite(&quantized_weight, sizeof(int), 1, bp);
  706. int maxk = 0;
  707. if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
  708. {
  709. maxk = convolution_param.kernel_w() * convolution_param.kernel_h();
  710. }
  711. else
  712. {
  713. maxk = convolution_param.kernel_size(0) * convolution_param.kernel_size(0);
  714. }
  715. for (int g=0; g<group; g++)
  716. {
  717. // reorder weight from inch-outch to outch-inch
  718. int num_output = convolution_param.num_output() / group;
  719. int num_input = weight_blob.data_size() / maxk / num_output / group;
  720. const float* weight_data_ptr = weight_blob.data().data() + g * maxk * num_output * num_input;
  721. for (int k=0; k<num_output; k++)
  722. {
  723. for (int j=0; j<num_input; j++)
  724. {
  725. fwrite(weight_data_ptr + (j*num_output + k) * maxk, sizeof(float), maxk, bp);
  726. }
  727. }
  728. }
  729. for (int j=1; j<binlayer.blobs_size(); j++)
  730. {
  731. const caffe::BlobProto& blob = binlayer.blobs(j);
  732. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  733. }
  734. }
  735. else if (layer.type() == "DetectionOutput")
  736. {
  737. const caffe::DetectionOutputParameter& detection_output_param = layer.detection_output_param();
  738. const caffe::NonMaximumSuppressionParameter& nms_param = detection_output_param.nms_param();
  739. fprintf(pp, " 0=%d", detection_output_param.num_classes());
  740. fprintf(pp, " 1=%f", nms_param.nms_threshold());
  741. fprintf(pp, " 2=%d", nms_param.top_k());
  742. fprintf(pp, " 3=%d", detection_output_param.keep_top_k());
  743. fprintf(pp, " 4=%f", detection_output_param.confidence_threshold());
  744. }
  745. else if (layer.type() == "Dropout")
  746. {
  747. const caffe::DropoutParameter& dropout_param = layer.dropout_param();
  748. if (dropout_param.has_scale_train() && !dropout_param.scale_train())
  749. {
  750. float scale = 1.f - dropout_param.dropout_ratio();
  751. fprintf(pp, " 0=%f", scale);
  752. }
  753. }
  754. else if (layer.type() == "Eltwise")
  755. {
  756. const caffe::EltwiseParameter& eltwise_param = layer.eltwise_param();
  757. int coeff_size = eltwise_param.coeff_size();
  758. fprintf(pp, " 0=%d", (int)eltwise_param.operation());
  759. fprintf(pp, " -23301=%d", coeff_size);
  760. for (int j=0; j<coeff_size; j++)
  761. {
  762. fprintf(pp, ",%f", eltwise_param.coeff(j));
  763. }
  764. }
  765. else if (layer.type() == "ELU")
  766. {
  767. const caffe::ELUParameter& elu_param = layer.elu_param();
  768. fprintf(pp, " 0=%f", elu_param.alpha());
  769. }
  770. else if (layer.type() == "Embed")
  771. {
  772. const caffe::LayerParameter& binlayer = net.layer(netidx);
  773. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  774. const caffe::EmbedParameter& embed_param = layer.embed_param();
  775. fprintf(pp, " 0=%d", embed_param.num_output());
  776. fprintf(pp, " 1=%d", embed_param.input_dim());
  777. fprintf(pp, " 2=%d", embed_param.bias_term());
  778. fprintf(pp, " 3=%d", weight_blob.data_size());
  779. for (int j=0; j<binlayer.blobs_size(); j++)
  780. {
  781. int quantize_tag = 0;
  782. const caffe::BlobProto& blob = binlayer.blobs(j);
  783. std::vector<float> quantize_table;
  784. std::vector<unsigned char> quantize_index;
  785. std::vector<unsigned short> float16_weights;
  786. // we will not quantize the bias values
  787. if (j == 0 && quantize_level != 0)
  788. {
  789. if (quantize_level == 256)
  790. {
  791. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  792. }
  793. else if (quantize_level == 65536)
  794. {
  795. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), float16_weights);
  796. }
  797. }
  798. // write quantize tag first
  799. if (j == 0)
  800. fwrite(&quantize_tag, sizeof(int), 1, bp);
  801. if (quantize_tag)
  802. {
  803. int p0 = ftell(bp);
  804. if (quantize_level == 256)
  805. {
  806. // write quantize table and index
  807. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  808. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  809. }
  810. else if (quantize_level == 65536)
  811. {
  812. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  813. }
  814. // padding to 32bit align
  815. int nwrite = ftell(bp) - p0;
  816. int nalign = alignSize(nwrite, 4);
  817. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  818. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  819. }
  820. else
  821. {
  822. // write original data
  823. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  824. }
  825. }
  826. }
  827. else if (layer.type() == "InnerProduct")
  828. {
  829. const caffe::LayerParameter& binlayer = net.layer(netidx);
  830. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  831. const caffe::InnerProductParameter& inner_product_param = layer.inner_product_param();
  832. fprintf(pp, " 0=%d", inner_product_param.num_output());
  833. fprintf(pp, " 1=%d", inner_product_param.bias_term());
  834. fprintf(pp, " 2=%d", weight_blob.data_size());
  835. bool has_int8scale = false;
  836. float weight_int8scale = 0.f;
  837. float blob_int8scale = 0.f;
  838. if (int8scale_table_path)
  839. {
  840. char key[256];
  841. sprintf(key, "%s_param_0", layer.name().c_str());
  842. if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
  843. {
  844. weight_int8scale = weight_int8scale_table[std::string(key)];
  845. }
  846. std::string bottom_blob_name = layer.bottom(0);
  847. if (blob_int8scale_table.find(bottom_blob_name) != blob_int8scale_table.end())
  848. {
  849. blob_int8scale = blob_int8scale_table[bottom_blob_name];
  850. }
  851. has_int8scale = weight_int8scale != 0.f && blob_int8scale != 0.f;
  852. if (has_int8scale)
  853. {
  854. fprintf(pp, " 8=%.8e", weight_int8scale);
  855. fprintf(pp, " 9=%.8e", blob_int8scale);
  856. }
  857. }
  858. for (int j=0; j<binlayer.blobs_size(); j++)
  859. {
  860. int quantize_tag = 0;
  861. const caffe::BlobProto& blob = binlayer.blobs(j);
  862. std::vector<float> quantize_table;
  863. std::vector<unsigned char> quantize_index;
  864. std::vector<unsigned short> float16_weights;
  865. std::vector<signed char> int8_weights;
  866. // we will not quantize the bias values
  867. if (j == 0)
  868. {
  869. if (has_int8scale)
  870. {
  871. if (quantize_level == 0)
  872. {
  873. quantize_tag = 0x0002C056;
  874. }
  875. else if (quantize_level == 256)
  876. {
  877. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), weight_int8scale, int8_weights);
  878. }
  879. }
  880. else if (quantize_level == 256)
  881. {
  882. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  883. }
  884. else if (quantize_level == 65536)
  885. {
  886. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), float16_weights);
  887. }
  888. // write quantize tag first
  889. fwrite(&quantize_tag, sizeof(int), 1, bp);
  890. if (quantize_tag)
  891. {
  892. int p0 = ftell(bp);
  893. if (has_int8scale)
  894. {
  895. if (quantize_level == 0)
  896. {
  897. // write original data and int8scale
  898. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  899. }
  900. else if (quantize_level == 256)
  901. {
  902. fwrite(int8_weights.data(), sizeof(signed char), int8_weights.size(), bp);
  903. }
  904. }
  905. else if (quantize_level == 256)
  906. {
  907. // write quantize table and index
  908. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  909. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  910. }
  911. else if (quantize_level == 65536)
  912. {
  913. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  914. }
  915. // padding to 32bit align
  916. int nwrite = ftell(bp) - p0;
  917. int nalign = alignSize(nwrite, 4);
  918. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  919. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  920. }
  921. else
  922. {
  923. // write original data
  924. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  925. }
  926. }
  927. else
  928. {
  929. // write original data
  930. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  931. }
  932. }
  933. }
  934. else if (layer.type() == "Input")
  935. {
  936. const caffe::InputParameter& input_param = layer.input_param();
  937. const caffe::BlobShape& bs = input_param.shape(0);
  938. if (bs.dim_size() == 4)
  939. {
  940. fprintf(pp, " 0=%ld", bs.dim(3));
  941. fprintf(pp, " 1=%ld", bs.dim(2));
  942. fprintf(pp, " 2=%ld", bs.dim(1));
  943. }
  944. else if (bs.dim_size() == 3)
  945. {
  946. fprintf(pp, " 0=%ld", bs.dim(2));
  947. fprintf(pp, " 1=%ld", bs.dim(1));
  948. fprintf(pp, " 2=-233");
  949. }
  950. else if (bs.dim_size() == 2)
  951. {
  952. fprintf(pp, " 0=%ld", bs.dim(1));
  953. fprintf(pp, " 1=-233");
  954. fprintf(pp, " 2=-233");
  955. }
  956. }
  957. else if (layer.type() == "Interp")
  958. {
  959. const caffe::InterpParameter& interp_param = layer.interp_param();
  960. fprintf(pp, " 0=%d", 2);
  961. fprintf(pp, " 1=%f", (float)interp_param.zoom_factor());
  962. fprintf(pp, " 2=%f", (float)interp_param.zoom_factor());
  963. fprintf(pp, " 3=%d", interp_param.height());
  964. fprintf(pp, " 4=%d", interp_param.width());
  965. }
  966. else if (layer.type() == "LRN")
  967. {
  968. const caffe::LRNParameter& lrn_param = layer.lrn_param();
  969. fprintf(pp, " 0=%d", lrn_param.norm_region());
  970. fprintf(pp, " 1=%d", lrn_param.local_size());
  971. fprintf(pp, " 2=%f", lrn_param.alpha());
  972. fprintf(pp, " 3=%f", lrn_param.beta());
  973. }
  974. else if (layer.type() == "LSTM")
  975. {
  976. const caffe::LayerParameter& binlayer = net.layer(netidx);
  977. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  978. const caffe::RecurrentParameter& recurrent_param = layer.recurrent_param();
  979. fprintf(pp, " 0=%d", recurrent_param.num_output());
  980. fprintf(pp, " 1=%d", weight_blob.data_size());
  981. for (int j=0; j<binlayer.blobs_size(); j++)
  982. {
  983. int quantize_tag = 0;
  984. const caffe::BlobProto& blob = binlayer.blobs(j);
  985. std::vector<float> quantize_table;
  986. std::vector<unsigned char> quantize_index;
  987. std::vector<unsigned short> float16_weights;
  988. if (quantize_level != 0)
  989. {
  990. if (quantize_level == 256)
  991. {
  992. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  993. }
  994. else if (quantize_level == 65536)
  995. {
  996. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), float16_weights);
  997. }
  998. }
  999. // write quantize tag first
  1000. fwrite(&quantize_tag, sizeof(int), 1, bp);
  1001. if (quantize_tag)
  1002. {
  1003. int p0 = ftell(bp);
  1004. if (quantize_level == 256)
  1005. {
  1006. // write quantize table and index
  1007. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  1008. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  1009. }
  1010. else if (quantize_level == 65536)
  1011. {
  1012. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  1013. }
  1014. // padding to 32bit align
  1015. int nwrite = ftell(bp) - p0;
  1016. int nalign = alignSize(nwrite, 4);
  1017. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  1018. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  1019. }
  1020. else
  1021. {
  1022. // write original data
  1023. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  1024. }
  1025. }
  1026. }
  1027. else if (layer.type() == "MemoryData")
  1028. {
  1029. const caffe::MemoryDataParameter& memory_data_param = layer.memory_data_param();
  1030. fprintf(pp, " 0=%d", memory_data_param.width());
  1031. fprintf(pp, " 1=%d", memory_data_param.height());
  1032. fprintf(pp, " 2=%d", memory_data_param.channels());
  1033. }
  1034. else if (layer.type() == "MVN")
  1035. {
  1036. const caffe::MVNParameter& mvn_param = layer.mvn_param();
  1037. fprintf(pp, " 0=%d", mvn_param.normalize_variance());
  1038. fprintf(pp, " 1=%d", mvn_param.across_channels());
  1039. fprintf(pp, " 2=%f", mvn_param.eps());
  1040. }
  1041. else if (layer.type() == "Normalize")
  1042. {
  1043. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1044. const caffe::BlobProto& scale_blob = binlayer.blobs(0);
  1045. const caffe::NormalizeParameter& norm_param = layer.norm_param();
  1046. fprintf(pp, " 0=%d", norm_param.across_spatial());
  1047. fprintf(pp, " 1=%d", norm_param.channel_shared());
  1048. fprintf(pp, " 2=%f", norm_param.eps());
  1049. fprintf(pp, " 3=%d", scale_blob.data_size());
  1050. fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
  1051. }
  1052. else if (layer.type() == "Permute")
  1053. {
  1054. const caffe::PermuteParameter& permute_param = layer.permute_param();
  1055. int order_size = permute_param.order_size();
  1056. int order_type = 0;
  1057. if (order_size == 0)
  1058. order_type = 0;
  1059. if (order_size == 1)
  1060. {
  1061. int order0 = permute_param.order(0);
  1062. if (order0 == 0)
  1063. order_type = 0;
  1064. // permute with N not supported
  1065. }
  1066. if (order_size == 2)
  1067. {
  1068. int order0 = permute_param.order(0);
  1069. int order1 = permute_param.order(1);
  1070. if (order0 == 0)
  1071. {
  1072. if (order1 == 1) // 0 1 2 3
  1073. order_type = 0;
  1074. else if (order1 == 2) // 0 2 1 3
  1075. order_type = 2;
  1076. else if (order1 == 3) // 0 3 1 2
  1077. order_type = 4;
  1078. }
  1079. // permute with N not supported
  1080. }
  1081. if (order_size == 3 || order_size == 4)
  1082. {
  1083. int order0 = permute_param.order(0);
  1084. int order1 = permute_param.order(1);
  1085. int order2 = permute_param.order(2);
  1086. if (order0 == 0)
  1087. {
  1088. if (order1 == 1)
  1089. {
  1090. if (order2 == 2) // 0 1 2 3
  1091. order_type = 0;
  1092. if (order2 == 3) // 0 1 3 2
  1093. order_type = 1;
  1094. }
  1095. else if (order1 == 2)
  1096. {
  1097. if (order2 == 1) // 0 2 1 3
  1098. order_type = 2;
  1099. if (order2 == 3) // 0 2 3 1
  1100. order_type = 3;
  1101. }
  1102. else if (order1 == 3)
  1103. {
  1104. if (order2 == 1) // 0 3 1 2
  1105. order_type = 4;
  1106. if (order2 == 2) // 0 3 2 1
  1107. order_type = 5;
  1108. }
  1109. }
  1110. // permute with N not supported
  1111. }
  1112. fprintf(pp, " 0=%d", order_type);
  1113. }
  1114. else if (layer.type() == "Pooling")
  1115. {
  1116. const caffe::PoolingParameter& pooling_param = layer.pooling_param();
  1117. fprintf(pp, " 0=%d", pooling_param.pool());
  1118. if (pooling_param.has_kernel_w() && pooling_param.has_kernel_h())
  1119. {
  1120. fprintf(pp, " 1=%d", pooling_param.kernel_w());
  1121. fprintf(pp, " 11=%d", pooling_param.kernel_h());
  1122. }
  1123. else
  1124. {
  1125. fprintf(pp, " 1=%d", pooling_param.kernel_size());
  1126. }
  1127. if (pooling_param.has_stride_w() && pooling_param.has_stride_h())
  1128. {
  1129. fprintf(pp, " 2=%d", pooling_param.stride_w());
  1130. fprintf(pp, " 12=%d", pooling_param.stride_h());
  1131. }
  1132. else
  1133. {
  1134. fprintf(pp, " 2=%d", pooling_param.stride());
  1135. }
  1136. if (pooling_param.has_pad_w() && pooling_param.has_pad_h())
  1137. {
  1138. fprintf(pp, " 3=%d", pooling_param.pad_w());
  1139. fprintf(pp, " 13=%d", pooling_param.pad_h());
  1140. }
  1141. else
  1142. {
  1143. fprintf(pp, " 3=%d", pooling_param.pad());
  1144. }
  1145. fprintf(pp, " 4=%d", pooling_param.has_global_pooling() ? pooling_param.global_pooling() : 0);
  1146. }
  1147. else if (layer.type() == "Power")
  1148. {
  1149. const caffe::PowerParameter& power_param = layer.power_param();
  1150. fprintf(pp, " 0=%f", power_param.power());
  1151. fprintf(pp, " 1=%f", power_param.scale());
  1152. fprintf(pp, " 2=%f", power_param.shift());
  1153. }
  1154. else if (layer.type() == "PReLU")
  1155. {
  1156. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1157. const caffe::BlobProto& slope_blob = binlayer.blobs(0);
  1158. fprintf(pp, " 0=%d", slope_blob.data_size());
  1159. fwrite(slope_blob.data().data(), sizeof(float), slope_blob.data_size(), bp);
  1160. }
  1161. else if (layer.type() == "PriorBox")
  1162. {
  1163. const caffe::PriorBoxParameter& prior_box_param = layer.prior_box_param();
  1164. int num_aspect_ratio = prior_box_param.aspect_ratio_size();
  1165. for (int j=0; j<prior_box_param.aspect_ratio_size(); j++)
  1166. {
  1167. float ar = prior_box_param.aspect_ratio(j);
  1168. if (fabs(ar - 1.) < 1e-6) {
  1169. num_aspect_ratio--;
  1170. }
  1171. }
  1172. float variances[4] = {0.1f, 0.1f, 0.1f, 0.1f};
  1173. if (prior_box_param.variance_size() == 4)
  1174. {
  1175. variances[0] = prior_box_param.variance(0);
  1176. variances[1] = prior_box_param.variance(1);
  1177. variances[2] = prior_box_param.variance(2);
  1178. variances[3] = prior_box_param.variance(3);
  1179. }
  1180. else if (prior_box_param.variance_size() == 1)
  1181. {
  1182. variances[0] = prior_box_param.variance(0);
  1183. variances[1] = prior_box_param.variance(0);
  1184. variances[2] = prior_box_param.variance(0);
  1185. variances[3] = prior_box_param.variance(0);
  1186. }
  1187. int flip = prior_box_param.has_flip() ? prior_box_param.flip() : 1;
  1188. int clip = prior_box_param.has_clip() ? prior_box_param.clip() : 0;
  1189. int image_width = -233;
  1190. int image_height = -233;
  1191. if (prior_box_param.has_img_size())
  1192. {
  1193. image_width = prior_box_param.img_size();
  1194. image_height = prior_box_param.img_size();
  1195. }
  1196. else if (prior_box_param.has_img_w() && prior_box_param.has_img_h())
  1197. {
  1198. image_width = prior_box_param.img_w();
  1199. image_height = prior_box_param.img_h();
  1200. }
  1201. float step_width = -233;
  1202. float step_height = -233;
  1203. if (prior_box_param.has_step())
  1204. {
  1205. step_width = prior_box_param.step();
  1206. step_height = prior_box_param.step();
  1207. }
  1208. else if (prior_box_param.has_step_w() && prior_box_param.has_step_h())
  1209. {
  1210. step_width = prior_box_param.step_w();
  1211. step_height = prior_box_param.step_h();
  1212. }
  1213. fprintf(pp, " -23300=%d", prior_box_param.min_size_size());
  1214. for (int j=0; j<prior_box_param.min_size_size(); j++)
  1215. {
  1216. fprintf(pp, ",%f", prior_box_param.min_size(j));
  1217. }
  1218. fprintf(pp, " -23301=%d", prior_box_param.max_size_size());
  1219. for (int j=0; j<prior_box_param.max_size_size(); j++)
  1220. {
  1221. fprintf(pp, ",%f", prior_box_param.max_size(j));
  1222. }
  1223. fprintf(pp, " -23302=%d", num_aspect_ratio);
  1224. for (int j=0; j<prior_box_param.aspect_ratio_size(); j++)
  1225. {
  1226. float ar = prior_box_param.aspect_ratio(j);
  1227. if (fabs(ar - 1.) < 1e-6) {
  1228. continue;
  1229. }
  1230. fprintf(pp, ",%f", ar);
  1231. }
  1232. fprintf(pp, " 3=%f", variances[0]);
  1233. fprintf(pp, " 4=%f", variances[1]);
  1234. fprintf(pp, " 5=%f", variances[2]);
  1235. fprintf(pp, " 6=%f", variances[3]);
  1236. fprintf(pp, " 7=%d", flip);
  1237. fprintf(pp, " 8=%d", clip);
  1238. fprintf(pp, " 9=%d", image_width);
  1239. fprintf(pp, " 10=%d", image_height);
  1240. fprintf(pp, " 11=%f", step_width);
  1241. fprintf(pp, " 12=%f", step_height);
  1242. fprintf(pp, " 13=%f", prior_box_param.offset());
  1243. }
  1244. else if (layer.type() == "Python")
  1245. {
  1246. const caffe::PythonParameter& python_param = layer.python_param();
  1247. std::string python_layer_name = python_param.layer();
  1248. if (python_layer_name == "ProposalLayer")
  1249. {
  1250. int feat_stride = 16;
  1251. sscanf(python_param.param_str().c_str(), "'feat_stride': %d", &feat_stride);
  1252. int base_size = 16;
  1253. // float ratio;
  1254. // float scale;
  1255. int pre_nms_topN = 6000;
  1256. int after_nms_topN = 300;
  1257. float nms_thresh = 0.7;
  1258. int min_size = 16;
  1259. fprintf(pp, " 0=%d", feat_stride);
  1260. fprintf(pp, " 1=%d", base_size);
  1261. fprintf(pp, " 2=%d", pre_nms_topN);
  1262. fprintf(pp, " 3=%d", after_nms_topN);
  1263. fprintf(pp, " 4=%f", nms_thresh);
  1264. fprintf(pp, " 5=%d", min_size);
  1265. }
  1266. }
  1267. else if (layer.type() == "ReLU")
  1268. {
  1269. const caffe::ReLUParameter& relu_param = layer.relu_param();
  1270. if (relu_param.has_negative_slope())
  1271. {
  1272. fprintf(pp, " 0=%f", relu_param.negative_slope());
  1273. }
  1274. }
  1275. else if (layer.type() == "ReLU6")
  1276. {
  1277. float min = 0.f;
  1278. float max = 6.f;
  1279. fprintf(pp, " 0=%f", min);
  1280. fprintf(pp, " 1=%f", max);
  1281. }
  1282. else if (layer.type() == "Reorg")
  1283. {
  1284. const caffe::ReorgParameter& reorg_param = layer.reorg_param();
  1285. fprintf(pp, " 0=%d", reorg_param.stride());
  1286. }
  1287. else if (layer.type() == "Reshape")// -1 1 512
  1288. {
  1289. const caffe::ReshapeParameter& reshape_param = layer.reshape_param();
  1290. const caffe::BlobShape& bs = reshape_param.shape();
  1291. if (bs.dim_size() == 1)
  1292. {
  1293. fprintf(pp, " 0=%ld 1=-233 2=-233", bs.dim(0));
  1294. }
  1295. else if (bs.dim_size() == 2)
  1296. {
  1297. fprintf(pp, " 0=%ld 1=%ld 2=-233", bs.dim(1), bs.dim(0));
  1298. }
  1299. else if (bs.dim_size() == 3)
  1300. {
  1301. fprintf(pp, " 0=%ld 1=%ld 2=%ld", bs.dim(2), bs.dim(1), bs.dim(0));
  1302. }
  1303. else // bs.dim_size() == 4
  1304. {
  1305. fprintf(pp, " 0=%ld 1=%ld 2=%ld", bs.dim(3), bs.dim(2), bs.dim(1));
  1306. }
  1307. fprintf(pp, " 3=0");// permute
  1308. }
  1309. else if (layer.type() == "ROIPooling")
  1310. {
  1311. const caffe::ROIPoolingParameter& roi_pooling_param = layer.roi_pooling_param();
  1312. fprintf(pp, " 0=%d", roi_pooling_param.pooled_w());
  1313. fprintf(pp, " 1=%d", roi_pooling_param.pooled_h());
  1314. fprintf(pp, " 2=%f", roi_pooling_param.spatial_scale());
  1315. }
  1316. else if (layer.type() == "Scale")
  1317. {
  1318. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1319. const caffe::ScaleParameter& scale_param = layer.scale_param();
  1320. bool scale_weight = scale_param.bias_term() ? (binlayer.blobs_size() == 2) : (binlayer.blobs_size() == 1);
  1321. if (scale_weight)
  1322. {
  1323. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  1324. fprintf(pp, " 0=%d", (int)weight_blob.data_size());
  1325. }
  1326. else
  1327. {
  1328. fprintf(pp, " 0=-233");
  1329. }
  1330. fprintf(pp, " 1=%d", scale_param.bias_term());
  1331. for (int j=0; j<binlayer.blobs_size(); j++)
  1332. {
  1333. const caffe::BlobProto& blob = binlayer.blobs(j);
  1334. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  1335. }
  1336. }
  1337. else if (layer.type() == "ShuffleChannel")
  1338. {
  1339. const caffe::ShuffleChannelParameter& shuffle_channel_param = layer.shuffle_channel_param();
  1340. fprintf(pp, " 0=%d", shuffle_channel_param.group());
  1341. }
  1342. else if (layer.type() == "Slice")
  1343. {
  1344. const caffe::SliceParameter& slice_param = layer.slice_param();
  1345. if (slice_param.slice_point_size() == 0)
  1346. {
  1347. int num_slice = layer.top_size();
  1348. fprintf(pp, " -23300=%d", num_slice);
  1349. for (int j=0; j<num_slice; j++)
  1350. {
  1351. fprintf(pp, ",-233");
  1352. }
  1353. }
  1354. else
  1355. {
  1356. int num_slice = slice_param.slice_point_size() + 1;
  1357. fprintf(pp, " -23300=%d", num_slice);
  1358. int prev_offset = 0;
  1359. for (int j=0; j<slice_param.slice_point_size(); j++)
  1360. {
  1361. int offset = slice_param.slice_point(j);
  1362. fprintf(pp, ",%d", offset - prev_offset);
  1363. prev_offset = offset;
  1364. }
  1365. fprintf(pp, ",-233");
  1366. }
  1367. int dim = slice_param.axis() - 1;
  1368. fprintf(pp, " 1=%d", dim);
  1369. }
  1370. else if (layer.type() == "Softmax")
  1371. {
  1372. const caffe::SoftmaxParameter& softmax_param = layer.softmax_param();
  1373. int dim = softmax_param.axis() - 1;
  1374. fprintf(pp, " 0=%d", dim);
  1375. }
  1376. else if (layer.type() == "Threshold")
  1377. {
  1378. const caffe::ThresholdParameter& threshold_param = layer.threshold_param();
  1379. fprintf(pp, " 0=%f", threshold_param.threshold());
  1380. }
  1381. else if (layer.type() == "YoloDetectionOutput")
  1382. {
  1383. const caffe::YoloDetectionOutputParameter& yolo_detection_output_param = layer.yolo_detection_output_param();
  1384. fprintf(pp, " 0=%d", yolo_detection_output_param.num_classes());
  1385. fprintf(pp, " 1=%d", yolo_detection_output_param.num_box());
  1386. fprintf(pp, " 2=%f", yolo_detection_output_param.confidence_threshold());
  1387. fprintf(pp, " 3=%f", yolo_detection_output_param.nms_threshold());
  1388. int num_bias = yolo_detection_output_param.biases_size();
  1389. fprintf(pp, " -23304=%d", num_bias);
  1390. for (int j=0; j<num_bias; j++)
  1391. {
  1392. fprintf(pp, ",%f", yolo_detection_output_param.biases(j));
  1393. }
  1394. }
  1395. fprintf(pp, "\n");
  1396. // add split layer if top reference larger than one
  1397. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  1398. {
  1399. std::string blob_name = blob_name_decorated[layer.top(0)];
  1400. if (bottom_reference.find(blob_name) != bottom_reference.end())
  1401. {
  1402. int refcount = bottom_reference[blob_name];
  1403. if (refcount > 1)
  1404. {
  1405. char splitname[256];
  1406. sprintf(splitname, "splitncnn_%d", internal_split);
  1407. fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
  1408. fprintf(pp, " %s", blob_name.c_str());
  1409. for (int j=0; j<refcount; j++)
  1410. {
  1411. fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
  1412. }
  1413. fprintf(pp, "\n");
  1414. internal_split++;
  1415. }
  1416. }
  1417. }
  1418. else
  1419. {
  1420. for (int j=0; j<layer.top_size(); j++)
  1421. {
  1422. std::string blob_name = layer.top(j);
  1423. if (bottom_reference.find(blob_name) != bottom_reference.end())
  1424. {
  1425. int refcount = bottom_reference[blob_name];
  1426. if (refcount > 1)
  1427. {
  1428. char splitname[256];
  1429. sprintf(splitname, "splitncnn_%d", internal_split);
  1430. fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
  1431. fprintf(pp, " %s", blob_name.c_str());
  1432. for (int j=0; j<refcount; j++)
  1433. {
  1434. fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
  1435. }
  1436. fprintf(pp, "\n");
  1437. internal_split++;
  1438. }
  1439. }
  1440. }
  1441. }
  1442. }
  1443. fclose(pp);
  1444. fclose(bp);
  1445. return 0;
  1446. }