You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

caffe2ncnn.cpp 41 kB

8 years ago
8 years ago
8 years ago
8 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include <stdio.h>
  15. #include <limits.h>
  16. #include <math.h>
  17. #include <fstream>
  18. #include <set>
  19. #include <limits>
  20. #include <algorithm>
  21. #include <google/protobuf/io/coded_stream.h>
  22. #include <google/protobuf/io/zero_copy_stream_impl.h>
  23. #include <google/protobuf/text_format.h>
  24. #include <google/protobuf/message.h>
  25. #include "caffe.pb.h"
  26. static inline size_t alignSize(size_t sz, int n)
  27. {
  28. return (sz + n-1) & -n;
  29. }
  30. // convert float to half precision floating point
  31. static unsigned short float2half(float value)
  32. {
  33. // 1 : 8 : 23
  34. union
  35. {
  36. unsigned int u;
  37. float f;
  38. } tmp;
  39. tmp.f = value;
  40. // 1 : 8 : 23
  41. unsigned short sign = (tmp.u & 0x80000000) >> 31;
  42. unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
  43. unsigned int significand = tmp.u & 0x7FFFFF;
  44. // fprintf(stderr, "%d %d %d\n", sign, exponent, significand);
  45. // 1 : 5 : 10
  46. unsigned short fp16;
  47. if (exponent == 0)
  48. {
  49. // zero or denormal, always underflow
  50. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  51. }
  52. else if (exponent == 0xFF)
  53. {
  54. // infinity or NaN
  55. fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
  56. }
  57. else
  58. {
  59. // normalized
  60. short newexp = exponent + (- 127 + 15);
  61. if (newexp >= 31)
  62. {
  63. // overflow, return infinity
  64. fp16 = (sign << 15) | (0x1F << 10) | 0x00;
  65. }
  66. else if (newexp <= 0)
  67. {
  68. // underflow
  69. if (newexp >= -10)
  70. {
  71. // denormal half-precision
  72. unsigned short sig = (significand | 0x800000) >> (14 - newexp);
  73. fp16 = (sign << 15) | (0x00 << 10) | sig;
  74. }
  75. else
  76. {
  77. // underflow
  78. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  79. }
  80. }
  81. else
  82. {
  83. fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
  84. }
  85. }
  86. return fp16;
  87. }
  88. static int quantize_weight(float *data, size_t data_length, std::vector<unsigned short>& float16_weights)
  89. {
  90. float16_weights.resize(data_length);
  91. for (size_t i = 0; i < data_length; i++)
  92. {
  93. float f = data[i];
  94. unsigned short fp16 = float2half(f);
  95. float16_weights[i] = fp16;
  96. }
  97. // magic tag for half-precision floating point
  98. return 0x01306B47;
  99. }
  100. static bool quantize_weight(float *data, size_t data_length, int quantize_level, std::vector<float> &quantize_table, std::vector<unsigned char> &quantize_index) {
  101. assert(quantize_level != 0);
  102. assert(data != NULL);
  103. assert(data_length > 0);
  104. if (data_length < static_cast<size_t>(quantize_level)) {
  105. fprintf(stderr, "No need quantize,because: data_length < quantize_level");
  106. return false;
  107. }
  108. quantize_table.reserve(quantize_level);
  109. quantize_index.reserve(data_length);
  110. // 1. Find min and max value
  111. float max_value = std::numeric_limits<float>::min();
  112. float min_value = std::numeric_limits<float>::max();
  113. for (size_t i = 0; i < data_length; ++i)
  114. {
  115. if (max_value < data[i]) max_value = data[i];
  116. if (min_value > data[i]) min_value = data[i];
  117. }
  118. float strides = (max_value - min_value) / quantize_level;
  119. // 2. Generate quantize table
  120. for (int i = 0; i < quantize_level; ++i)
  121. {
  122. quantize_table.push_back(min_value + i * strides);
  123. }
  124. // 3. Align data to the quantized value
  125. for (size_t i = 0; i < data_length; ++i)
  126. {
  127. size_t table_index = int((data[i] - min_value) / strides);
  128. table_index = std::min<float>(table_index, quantize_level - 1);
  129. float low_value = quantize_table[table_index];
  130. float high_value = low_value + strides;
  131. // find a nearest value between low and high value.
  132. float targetValue = data[i] - low_value < high_value - data[i] ? low_value : high_value;
  133. table_index = int((targetValue - min_value) / strides);
  134. table_index = std::min<float>(table_index, quantize_level - 1);
  135. quantize_index.push_back(table_index);
  136. }
  137. return true;
  138. }
  139. static bool read_proto_from_text(const char* filepath, google::protobuf::Message* message)
  140. {
  141. std::ifstream fs(filepath, std::ifstream::in);
  142. if (!fs.is_open())
  143. {
  144. fprintf(stderr, "open failed %s\n", filepath);
  145. return false;
  146. }
  147. google::protobuf::io::IstreamInputStream input(&fs);
  148. bool success = google::protobuf::TextFormat::Parse(&input, message);
  149. fs.close();
  150. return success;
  151. }
  152. static bool read_proto_from_binary(const char* filepath, google::protobuf::Message* message)
  153. {
  154. std::ifstream fs(filepath, std::ifstream::in | std::ifstream::binary);
  155. if (!fs.is_open())
  156. {
  157. fprintf(stderr, "open failed %s\n", filepath);
  158. return false;
  159. }
  160. google::protobuf::io::IstreamInputStream input(&fs);
  161. google::protobuf::io::CodedInputStream codedstr(&input);
  162. codedstr.SetTotalBytesLimit(INT_MAX, INT_MAX / 2);
  163. bool success = message->ParseFromCodedStream(&codedstr);
  164. fs.close();
  165. return success;
  166. }
  167. int main(int argc, char** argv)
  168. {
  169. if (!(argc == 3 || argc == 5 || argc == 6))
  170. {
  171. fprintf(stderr, "Usage: %s [caffeproto] [caffemodel] [ncnnproto] [ncnnbin] [quantizelevel]\n", argv[0]);
  172. return -1;
  173. }
  174. const char* caffeproto = argv[1];
  175. const char* caffemodel = argv[2];
  176. const char* ncnn_prototxt = argc >= 5 ? argv[3] : "ncnn.proto";
  177. const char* ncnn_modelbin = argc >= 5 ? argv[4] : "ncnn.bin";
  178. const char* quantize_param = argc == 6 ? argv[5] : "0";
  179. int quantize_level = atoi(quantize_param);
  180. if (quantize_level != 0 && quantize_level != 256 && quantize_level != 65536) {
  181. fprintf(stderr, "%s: only support quantize level = 0, 256, or 65536", argv[0]);
  182. return -1;
  183. }
  184. caffe::NetParameter proto;
  185. caffe::NetParameter net;
  186. // load
  187. bool s0 = read_proto_from_text(caffeproto, &proto);
  188. if (!s0)
  189. {
  190. fprintf(stderr, "read_proto_from_text failed\n");
  191. return -1;
  192. }
  193. bool s1 = read_proto_from_binary(caffemodel, &net);
  194. if (!s1)
  195. {
  196. fprintf(stderr, "read_proto_from_binary failed\n");
  197. return -1;
  198. }
  199. FILE* pp = fopen(ncnn_prototxt, "wb");
  200. FILE* bp = fopen(ncnn_modelbin, "wb");
  201. // magic
  202. fprintf(pp, "7767517\n");
  203. // rename mapping for identical bottom top style
  204. std::map<std::string, std::string> blob_name_decorated;
  205. // bottom blob reference
  206. std::map<std::string, int> bottom_reference;
  207. // global definition line
  208. // [layer count] [blob count]
  209. int layer_count = proto.layer_size();
  210. std::set<std::string> blob_names;
  211. for (int i=0; i<layer_count; i++)
  212. {
  213. const caffe::LayerParameter& layer = proto.layer(i);
  214. for (int j=0; j<layer.bottom_size(); j++)
  215. {
  216. std::string blob_name = layer.bottom(j);
  217. if (blob_name_decorated.find(blob_name) != blob_name_decorated.end())
  218. {
  219. blob_name = blob_name_decorated[blob_name];
  220. }
  221. blob_names.insert(blob_name);
  222. if (bottom_reference.find(blob_name) == bottom_reference.end())
  223. {
  224. bottom_reference[blob_name] = 1;
  225. }
  226. else
  227. {
  228. bottom_reference[blob_name] = bottom_reference[blob_name] + 1;
  229. }
  230. }
  231. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  232. {
  233. std::string blob_name = layer.top(0) + "_" + layer.name();
  234. blob_name_decorated[layer.top(0)] = blob_name;
  235. blob_names.insert(blob_name);
  236. }
  237. else
  238. {
  239. for (int j=0; j<layer.top_size(); j++)
  240. {
  241. std::string blob_name = layer.top(j);
  242. blob_names.insert(blob_name);
  243. }
  244. }
  245. }
  246. // remove bottom_reference entry with reference equals to one
  247. int splitncnn_blob_count = 0;
  248. std::map<std::string, int>::iterator it = bottom_reference.begin();
  249. while (it != bottom_reference.end())
  250. {
  251. if (it->second == 1)
  252. {
  253. bottom_reference.erase(it++);
  254. }
  255. else
  256. {
  257. splitncnn_blob_count += it->second;
  258. // fprintf(stderr, "%s %d\n", it->first.c_str(), it->second);
  259. ++it;
  260. }
  261. }
  262. fprintf(pp, "%lu %lu\n", layer_count + bottom_reference.size(), blob_names.size() + splitncnn_blob_count);
  263. // populate
  264. blob_name_decorated.clear();
  265. int internal_split = 0;
  266. for (int i=0; i<layer_count; i++)
  267. {
  268. const caffe::LayerParameter& layer = proto.layer(i);
  269. // layer definition line, repeated
  270. // [type] [name] [bottom blob count] [top blob count] [bottom blobs] [top blobs] [layer specific params]
  271. if (layer.type() == "Convolution")
  272. {
  273. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  274. if (convolution_param.group() != 1)
  275. fprintf(pp, "%-16s", "ConvolutionDepthWise");
  276. else
  277. fprintf(pp, "%-16s", "Convolution");
  278. }
  279. else if (layer.type() == "ConvolutionDepthwise")
  280. {
  281. fprintf(pp, "%-16s", "ConvolutionDepthWise");
  282. }
  283. else if (layer.type() == "Deconvolution")
  284. {
  285. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  286. if (convolution_param.group() != 1)
  287. fprintf(pp, "%-16s", "DeconvolutionDepthWise");
  288. else
  289. fprintf(pp, "%-16s", "Deconvolution");
  290. }
  291. else if (layer.type() == "Python")
  292. {
  293. const caffe::PythonParameter& python_param = layer.python_param();
  294. std::string python_layer_name = python_param.layer();
  295. if (python_layer_name == "ProposalLayer")
  296. fprintf(pp, "%-16s", "Proposal");
  297. else
  298. fprintf(pp, "%-16s", python_layer_name.c_str());
  299. }
  300. else
  301. {
  302. fprintf(pp, "%-16s", layer.type().c_str());
  303. }
  304. fprintf(pp, " %-16s %d %d", layer.name().c_str(), layer.bottom_size(), layer.top_size());
  305. for (int j=0; j<layer.bottom_size(); j++)
  306. {
  307. std::string blob_name = layer.bottom(j);
  308. if (blob_name_decorated.find(layer.bottom(j)) != blob_name_decorated.end())
  309. {
  310. blob_name = blob_name_decorated[layer.bottom(j)];
  311. }
  312. if (bottom_reference.find(blob_name) != bottom_reference.end())
  313. {
  314. int refidx = bottom_reference[blob_name] - 1;
  315. bottom_reference[blob_name] = refidx;
  316. char splitsuffix[256];
  317. sprintf(splitsuffix, "_splitncnn_%d", refidx);
  318. blob_name = blob_name + splitsuffix;
  319. }
  320. fprintf(pp, " %s", blob_name.c_str());
  321. }
  322. // decorated
  323. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  324. {
  325. std::string blob_name = layer.top(0) + "_" + layer.name();
  326. blob_name_decorated[layer.top(0)] = blob_name;
  327. fprintf(pp, " %s", blob_name.c_str());
  328. }
  329. else
  330. {
  331. for (int j=0; j<layer.top_size(); j++)
  332. {
  333. std::string blob_name = layer.top(j);
  334. fprintf(pp, " %s", blob_name.c_str());
  335. }
  336. }
  337. // find blob binary by layer name
  338. int netidx;
  339. for (netidx=0; netidx<net.layer_size(); netidx++)
  340. {
  341. if (net.layer(netidx).name() == layer.name())
  342. {
  343. break;
  344. }
  345. }
  346. // layer specific params
  347. if (layer.type() == "BatchNorm")
  348. {
  349. const caffe::LayerParameter& binlayer = net.layer(netidx);
  350. const caffe::BlobProto& mean_blob = binlayer.blobs(0);
  351. const caffe::BlobProto& var_blob = binlayer.blobs(1);
  352. fprintf(pp, " 0=%d", (int)mean_blob.data_size());
  353. const caffe::BatchNormParameter& batch_norm_param = layer.batch_norm_param();
  354. float eps = batch_norm_param.eps();
  355. std::vector<float> ones(mean_blob.data_size(), 1.f);
  356. fwrite(ones.data(), sizeof(float), ones.size(), bp);// slope
  357. if (binlayer.blobs_size() < 3)
  358. {
  359. fwrite(mean_blob.data().data(), sizeof(float), mean_blob.data_size(), bp);
  360. float tmp;
  361. for (int j=0; j<var_blob.data_size(); j++)
  362. {
  363. tmp = var_blob.data().data()[j] + eps;
  364. fwrite(&tmp, sizeof(float), 1, bp);
  365. }
  366. }
  367. else
  368. {
  369. float scale_factor = 1 / binlayer.blobs(2).data().data()[0];
  370. // premultiply scale_factor to mean and variance
  371. float tmp;
  372. for (int j=0; j<mean_blob.data_size(); j++)
  373. {
  374. tmp = mean_blob.data().data()[j] * scale_factor;
  375. fwrite(&tmp, sizeof(float), 1, bp);
  376. }
  377. for (int j=0; j<var_blob.data_size(); j++)
  378. {
  379. tmp = var_blob.data().data()[j] * scale_factor + eps;
  380. fwrite(&tmp, sizeof(float), 1, bp);
  381. }
  382. }
  383. std::vector<float> zeros(mean_blob.data_size(), 0.f);
  384. fwrite(zeros.data(), sizeof(float), zeros.size(), bp);// bias
  385. }
  386. else if (layer.type() == "Concat")
  387. {
  388. const caffe::ConcatParameter& concat_param = layer.concat_param();
  389. int dim = concat_param.axis() - 1;
  390. fprintf(pp, " 0=%d", dim);
  391. }
  392. else if (layer.type() == "Convolution" || layer.type() == "ConvolutionDepthwise")
  393. {
  394. const caffe::LayerParameter& binlayer = net.layer(netidx);
  395. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  396. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  397. fprintf(pp, " 0=%d", convolution_param.num_output());
  398. fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
  399. fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
  400. fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
  401. fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
  402. fprintf(pp, " 5=%d", convolution_param.bias_term());
  403. fprintf(pp, " 6=%d", weight_blob.data_size());
  404. if (layer.type() == "ConvolutionDepthwise")
  405. {
  406. fprintf(pp, " 7=%d", convolution_param.num_output());
  407. }
  408. else if (convolution_param.group() != 1)
  409. {
  410. fprintf(pp, " 7=%d", convolution_param.group());
  411. }
  412. for (int j = 0; j < binlayer.blobs_size(); j++)
  413. {
  414. int quantize_tag = 0;
  415. const caffe::BlobProto& blob = binlayer.blobs(j);
  416. std::vector<float> quantize_table;
  417. std::vector<unsigned char> quantize_index;
  418. std::vector<unsigned short> float16_weights;
  419. // we will not quantize the bias values
  420. if (j == 0 && quantize_level != 0)
  421. {
  422. if (quantize_level == 256)
  423. {
  424. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  425. }
  426. else if (quantize_level == 65536)
  427. {
  428. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), float16_weights);
  429. }
  430. }
  431. // write quantize tag first
  432. if (j == 0)
  433. fwrite(&quantize_tag, sizeof(int), 1, bp);
  434. if (quantize_tag)
  435. {
  436. int p0 = ftell(bp);
  437. if (quantize_level == 256)
  438. {
  439. // write quantize table and index
  440. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  441. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  442. }
  443. else if (quantize_level == 65536)
  444. {
  445. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  446. }
  447. // padding to 32bit align
  448. int nwrite = ftell(bp) - p0;
  449. int nalign = alignSize(nwrite, 4);
  450. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  451. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  452. }
  453. else
  454. {
  455. // write original data
  456. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  457. }
  458. }
  459. }
  460. else if (layer.type() == "Crop")
  461. {
  462. const caffe::CropParameter& crop_param = layer.crop_param();
  463. int num_offset = crop_param.offset_size();
  464. int woffset = (num_offset == 2) ? crop_param.offset(0) : 0;
  465. int hoffset = (num_offset == 2) ? crop_param.offset(1) : 0;
  466. fprintf(pp, " 0=%d", woffset);
  467. fprintf(pp, " 1=%d", hoffset);
  468. }
  469. else if (layer.type() == "Deconvolution")
  470. {
  471. const caffe::LayerParameter& binlayer = net.layer(netidx);
  472. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  473. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  474. fprintf(pp, " 0=%d", convolution_param.num_output());
  475. fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
  476. fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
  477. fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
  478. fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
  479. fprintf(pp, " 5=%d", convolution_param.bias_term());
  480. fprintf(pp, " 6=%d", weight_blob.data_size());
  481. if (convolution_param.group() != 1)
  482. {
  483. fprintf(pp, " 7=%d", convolution_param.group());
  484. }
  485. int quantized_weight = 0;
  486. fwrite(&quantized_weight, sizeof(int), 1, bp);
  487. // reorder weight from inch-outch to outch-inch
  488. int ksize = convolution_param.kernel_size(0);
  489. int num_output = convolution_param.num_output();
  490. int num_input = weight_blob.data_size() / (ksize * ksize) / num_output;
  491. const float* weight_data_ptr = weight_blob.data().data();
  492. for (int k=0; k<num_output; k++)
  493. {
  494. for (int j=0; j<num_input; j++)
  495. {
  496. fwrite(weight_data_ptr + (j*num_output + k) * ksize * ksize, sizeof(float), ksize * ksize, bp);
  497. }
  498. }
  499. for (int j=1; j<binlayer.blobs_size(); j++)
  500. {
  501. const caffe::BlobProto& blob = binlayer.blobs(j);
  502. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  503. }
  504. }
  505. else if (layer.type() == "DetectionOutput")
  506. {
  507. const caffe::DetectionOutputParameter& detection_output_param = layer.detection_output_param();
  508. const caffe::NonMaximumSuppressionParameter& nms_param = detection_output_param.nms_param();
  509. fprintf(pp, " 0=%d", detection_output_param.num_classes());
  510. fprintf(pp, " 1=%f", nms_param.nms_threshold());
  511. fprintf(pp, " 2=%d", nms_param.top_k());
  512. fprintf(pp, " 3=%d", detection_output_param.keep_top_k());
  513. fprintf(pp, " 4=%f", detection_output_param.confidence_threshold());
  514. }
  515. else if (layer.type() == "Dropout")
  516. {
  517. const caffe::DropoutParameter& dropout_param = layer.dropout_param();
  518. if (dropout_param.has_scale_train() && !dropout_param.scale_train())
  519. {
  520. float scale = 1.f - dropout_param.dropout_ratio();
  521. fprintf(pp, " 0=%f", scale);
  522. }
  523. }
  524. else if (layer.type() == "Eltwise")
  525. {
  526. const caffe::EltwiseParameter& eltwise_param = layer.eltwise_param();
  527. int coeff_size = eltwise_param.coeff_size();
  528. fprintf(pp, " 0=%d", (int)eltwise_param.operation());
  529. fprintf(pp, " -23301=%d", coeff_size);
  530. for (int j=0; j<coeff_size; j++)
  531. {
  532. fprintf(pp, ",%f", eltwise_param.coeff(j));
  533. }
  534. }
  535. else if (layer.type() == "ELU")
  536. {
  537. const caffe::ELUParameter& elu_param = layer.elu_param();
  538. fprintf(pp, " 0=%f", elu_param.alpha());
  539. }
  540. else if (layer.type() == "InnerProduct")
  541. {
  542. const caffe::LayerParameter& binlayer = net.layer(netidx);
  543. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  544. const caffe::InnerProductParameter& inner_product_param = layer.inner_product_param();
  545. fprintf(pp, " 0=%d", inner_product_param.num_output());
  546. fprintf(pp, " 1=%d", inner_product_param.bias_term());
  547. fprintf(pp, " 2=%d", weight_blob.data_size());
  548. for (int j=0; j<binlayer.blobs_size(); j++)
  549. {
  550. int quantize_tag = 0;
  551. const caffe::BlobProto& blob = binlayer.blobs(j);
  552. std::vector<float> quantize_table;
  553. std::vector<unsigned char> quantize_index;
  554. std::vector<unsigned short> float16_weights;
  555. // we will not quantize the bias values
  556. if (j == 0 && quantize_level != 0)
  557. {
  558. if (quantize_level == 256)
  559. {
  560. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  561. }
  562. else if (quantize_level == 65536)
  563. {
  564. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), float16_weights);
  565. }
  566. }
  567. // write quantize tag first
  568. if (j == 0)
  569. fwrite(&quantize_tag, sizeof(int), 1, bp);
  570. if (quantize_tag)
  571. {
  572. int p0 = ftell(bp);
  573. if (quantize_level == 256)
  574. {
  575. // write quantize table and index
  576. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  577. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  578. }
  579. else if (quantize_level == 65536)
  580. {
  581. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  582. }
  583. // padding to 32bit align
  584. int nwrite = ftell(bp) - p0;
  585. int nalign = alignSize(nwrite, 4);
  586. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  587. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  588. }
  589. else
  590. {
  591. // write original data
  592. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  593. }
  594. }
  595. }
  596. else if (layer.type() == "Input")
  597. {
  598. const caffe::InputParameter& input_param = layer.input_param();
  599. const caffe::BlobShape& bs = input_param.shape(0);
  600. for (int j=1; j<std::min((int)bs.dim_size(), 4); j++)
  601. {
  602. fprintf(pp, " %d=%ld", j-1, bs.dim(j));
  603. }
  604. for (int j=bs.dim_size(); j<4; j++)
  605. {
  606. fprintf(pp, " %d=-233", j-1);
  607. }
  608. }
  609. else if (layer.type() == "Interp")
  610. {
  611. const caffe::InterpParameter& interp_param = layer.interp_param();
  612. fprintf(pp, " 0=%d", 2);
  613. fprintf(pp, " 1=%f", (float)interp_param.zoom_factor());
  614. fprintf(pp, " 2=%f", (float)interp_param.zoom_factor());
  615. fprintf(pp, " 3=%d", interp_param.height());
  616. fprintf(pp, " 4=%d", interp_param.width());
  617. }
  618. else if (layer.type() == "LRN")
  619. {
  620. const caffe::LRNParameter& lrn_param = layer.lrn_param();
  621. fprintf(pp, " 0=%d", lrn_param.norm_region());
  622. fprintf(pp, " 1=%d", lrn_param.local_size());
  623. fprintf(pp, " 2=%f", lrn_param.alpha());
  624. fprintf(pp, " 3=%f", lrn_param.beta());
  625. }
  626. else if (layer.type() == "MemoryData")
  627. {
  628. const caffe::MemoryDataParameter& memory_data_param = layer.memory_data_param();
  629. fprintf(pp, " 0=%d", memory_data_param.width());
  630. fprintf(pp, " 1=%d", memory_data_param.height());
  631. fprintf(pp, " 2=%d", memory_data_param.channels());
  632. }
  633. else if (layer.type() == "Normalize")
  634. {
  635. const caffe::LayerParameter& binlayer = net.layer(netidx);
  636. const caffe::BlobProto& scale_blob = binlayer.blobs(0);
  637. const caffe::NormalizeParameter& norm_param = layer.norm_param();
  638. fprintf(pp, " 0=%d", norm_param.across_spatial());
  639. fprintf(pp, " 1=%d", norm_param.channel_shared());
  640. fprintf(pp, " 2=%f", norm_param.eps());
  641. fprintf(pp, " 3=%d", scale_blob.data_size());
  642. fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
  643. }
  644. else if (layer.type() == "Permute")
  645. {
  646. const caffe::PermuteParameter& permute_param = layer.permute_param();
  647. int order_size = permute_param.order_size();
  648. int order_type = 0;
  649. if (order_size == 0)
  650. order_type = 0;
  651. if (order_size == 1)
  652. {
  653. int order0 = permute_param.order(0);
  654. if (order0 == 0)
  655. order_type = 0;
  656. // permute with N not supported
  657. }
  658. if (order_size == 2)
  659. {
  660. int order0 = permute_param.order(0);
  661. int order1 = permute_param.order(1);
  662. if (order0 == 0)
  663. {
  664. if (order1 == 1) // 0 1 2 3
  665. order_type = 0;
  666. else if (order1 == 2) // 0 2 1 3
  667. order_type = 2;
  668. else if (order1 == 3) // 0 3 1 2
  669. order_type = 4;
  670. }
  671. // permute with N not supported
  672. }
  673. if (order_size == 3 || order_size == 4)
  674. {
  675. int order0 = permute_param.order(0);
  676. int order1 = permute_param.order(1);
  677. int order2 = permute_param.order(2);
  678. if (order0 == 0)
  679. {
  680. if (order1 == 1)
  681. {
  682. if (order2 == 2) // 0 1 2 3
  683. order_type = 0;
  684. if (order2 == 3) // 0 1 3 2
  685. order_type = 1;
  686. }
  687. else if (order1 == 2)
  688. {
  689. if (order2 == 1) // 0 2 1 3
  690. order_type = 2;
  691. if (order2 == 3) // 0 2 3 1
  692. order_type = 3;
  693. }
  694. else if (order1 == 3)
  695. {
  696. if (order2 == 1) // 0 3 1 2
  697. order_type = 4;
  698. if (order2 == 2) // 0 3 2 1
  699. order_type = 5;
  700. }
  701. }
  702. // permute with N not supported
  703. }
  704. fprintf(pp, " 0=%d", order_type);
  705. }
  706. else if (layer.type() == "Pooling")
  707. {
  708. const caffe::PoolingParameter& pooling_param = layer.pooling_param();
  709. fprintf(pp, " 0=%d", pooling_param.pool());
  710. fprintf(pp, " 1=%d", pooling_param.kernel_size());
  711. fprintf(pp, " 2=%d", pooling_param.stride());
  712. fprintf(pp, " 3=%d", pooling_param.pad());
  713. fprintf(pp, " 4=%d", pooling_param.has_global_pooling() ? pooling_param.global_pooling() : 0);
  714. }
  715. else if (layer.type() == "Power")
  716. {
  717. const caffe::PowerParameter& power_param = layer.power_param();
  718. fprintf(pp, " 0=%f", power_param.power());
  719. fprintf(pp, " 1=%f", power_param.scale());
  720. fprintf(pp, " 2=%f", power_param.shift());
  721. }
  722. else if (layer.type() == "PReLU")
  723. {
  724. const caffe::LayerParameter& binlayer = net.layer(netidx);
  725. const caffe::BlobProto& slope_blob = binlayer.blobs(0);
  726. fprintf(pp, " 0=%d", slope_blob.data_size());
  727. fwrite(slope_blob.data().data(), sizeof(float), slope_blob.data_size(), bp);
  728. }
  729. else if (layer.type() == "PriorBox")
  730. {
  731. const caffe::PriorBoxParameter& prior_box_param = layer.prior_box_param();
  732. int num_aspect_ratio = prior_box_param.aspect_ratio_size();
  733. for (int j=0; j<prior_box_param.aspect_ratio_size(); j++)
  734. {
  735. float ar = prior_box_param.aspect_ratio(j);
  736. if (fabs(ar - 1.) < 1e-6) {
  737. num_aspect_ratio--;
  738. }
  739. }
  740. float variances[4] = {0.1f, 0.1f, 0.1f, 0.1f};
  741. if (prior_box_param.variance_size() == 4)
  742. {
  743. variances[0] = prior_box_param.variance(0);
  744. variances[1] = prior_box_param.variance(1);
  745. variances[2] = prior_box_param.variance(2);
  746. variances[3] = prior_box_param.variance(3);
  747. }
  748. else if (prior_box_param.variance_size() == 1)
  749. {
  750. variances[0] = prior_box_param.variance(0);
  751. variances[1] = prior_box_param.variance(0);
  752. variances[2] = prior_box_param.variance(0);
  753. variances[3] = prior_box_param.variance(0);
  754. }
  755. int flip = prior_box_param.has_flip() ? prior_box_param.flip() : 1;
  756. int clip = prior_box_param.has_clip() ? prior_box_param.clip() : 0;
  757. int image_width = -233;
  758. int image_height = -233;
  759. if (prior_box_param.has_img_size())
  760. {
  761. image_width = prior_box_param.img_size();
  762. image_height = prior_box_param.img_size();
  763. }
  764. else if (prior_box_param.has_img_w() && prior_box_param.has_img_h())
  765. {
  766. image_width = prior_box_param.img_w();
  767. image_height = prior_box_param.img_h();
  768. }
  769. float step_width = -233;
  770. float step_height = -233;
  771. if (prior_box_param.has_step())
  772. {
  773. step_width = prior_box_param.step();
  774. step_height = prior_box_param.step();
  775. }
  776. else if (prior_box_param.has_step_w() && prior_box_param.has_step_h())
  777. {
  778. step_width = prior_box_param.step_w();
  779. step_height = prior_box_param.step_h();
  780. }
  781. fprintf(pp, " -23300=%d", prior_box_param.min_size_size());
  782. for (int j=0; j<prior_box_param.min_size_size(); j++)
  783. {
  784. fprintf(pp, ",%f", prior_box_param.min_size(j));
  785. }
  786. fprintf(pp, " -23301=%d", prior_box_param.max_size_size());
  787. for (int j=0; j<prior_box_param.max_size_size(); j++)
  788. {
  789. fprintf(pp, ",%f", prior_box_param.max_size(j));
  790. }
  791. fprintf(pp, " -23302=%d", num_aspect_ratio);
  792. for (int j=0; j<prior_box_param.aspect_ratio_size(); j++)
  793. {
  794. float ar = prior_box_param.aspect_ratio(j);
  795. if (fabs(ar - 1.) < 1e-6) {
  796. continue;
  797. }
  798. fprintf(pp, ",%f", ar);
  799. }
  800. fprintf(pp, " 3=%f", variances[0]);
  801. fprintf(pp, " 4=%f", variances[1]);
  802. fprintf(pp, " 5=%f", variances[2]);
  803. fprintf(pp, " 6=%f", variances[3]);
  804. fprintf(pp, " 7=%d", flip);
  805. fprintf(pp, " 8=%d", clip);
  806. fprintf(pp, " 9=%d", image_width);
  807. fprintf(pp, " 10=%d", image_height);
  808. fprintf(pp, " 11=%f", step_width);
  809. fprintf(pp, " 12=%f", step_height);
  810. fprintf(pp, " 13=%f", prior_box_param.offset());
  811. }
  812. else if (layer.type() == "Python")
  813. {
  814. const caffe::PythonParameter& python_param = layer.python_param();
  815. std::string python_layer_name = python_param.layer();
  816. if (python_layer_name == "ProposalLayer")
  817. {
  818. int feat_stride = 16;
  819. sscanf(python_param.param_str().c_str(), "'feat_stride': %d", &feat_stride);
  820. int base_size = 16;
  821. // float ratio;
  822. // float scale;
  823. int pre_nms_topN = 6000;
  824. int after_nms_topN = 300;
  825. float nms_thresh = 0.7;
  826. int min_size = 16;
  827. fprintf(pp, " 0=%d", feat_stride);
  828. fprintf(pp, " 1=%d", base_size);
  829. fprintf(pp, " 2=%d", pre_nms_topN);
  830. fprintf(pp, " 3=%d", after_nms_topN);
  831. fprintf(pp, " 4=%f", nms_thresh);
  832. fprintf(pp, " 5=%d", min_size);
  833. }
  834. }
  835. else if (layer.type() == "ReLU")
  836. {
  837. const caffe::ReLUParameter& relu_param = layer.relu_param();
  838. if (relu_param.has_negative_slope())
  839. {
  840. fprintf(pp, " 0=%f", relu_param.negative_slope());
  841. }
  842. }
  843. else if (layer.type() == "Reshape")
  844. {
  845. const caffe::ReshapeParameter& reshape_param = layer.reshape_param();
  846. const caffe::BlobShape& bs = reshape_param.shape();
  847. if (bs.dim_size() == 1)
  848. {
  849. fprintf(pp, " 0=%ld 1=-233 2=-233", bs.dim(0));
  850. }
  851. else if (bs.dim_size() == 2)
  852. {
  853. fprintf(pp, " 0=%ld 1=%ld 2=-233", bs.dim(1), bs.dim(0));
  854. }
  855. else if (bs.dim_size() == 3)
  856. {
  857. fprintf(pp, " 0=%ld 1=%ld 2=%ld", bs.dim(2), bs.dim(1), bs.dim(0));
  858. }
  859. else // bs.dim_size() == 4
  860. {
  861. fprintf(pp, " 0=%ld 1=%ld 2=%ld", bs.dim(3), bs.dim(2), bs.dim(1));
  862. }
  863. fprintf(pp, " 3=0");// permute
  864. }
  865. else if (layer.type() == "ROIPooling")
  866. {
  867. const caffe::ROIPoolingParameter& roi_pooling_param = layer.roi_pooling_param();
  868. fprintf(pp, " 0=%d", roi_pooling_param.pooled_w());
  869. fprintf(pp, " 1=%d", roi_pooling_param.pooled_h());
  870. fprintf(pp, " 2=%f", roi_pooling_param.spatial_scale());
  871. }
  872. else if (layer.type() == "Scale")
  873. {
  874. const caffe::LayerParameter& binlayer = net.layer(netidx);
  875. const caffe::ScaleParameter& scale_param = layer.scale_param();
  876. bool scale_weight = scale_param.bias_term() ? (binlayer.blobs_size() == 2) : (binlayer.blobs_size() == 1);
  877. if (scale_weight)
  878. {
  879. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  880. fprintf(pp, " 0=%d", (int)weight_blob.data_size());
  881. }
  882. else
  883. {
  884. fprintf(pp, " 0=-233");
  885. }
  886. fprintf(pp, " 1=%d", scale_param.bias_term());
  887. for (int j=0; j<binlayer.blobs_size(); j++)
  888. {
  889. const caffe::BlobProto& blob = binlayer.blobs(j);
  890. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  891. }
  892. }
  893. else if (layer.type() == "Slice")
  894. {
  895. const caffe::SliceParameter& slice_param = layer.slice_param();
  896. if (slice_param.has_slice_dim())
  897. {
  898. int num_slice = layer.top_size();
  899. fprintf(pp, " -23300=%d", num_slice);
  900. for (int j=0; j<num_slice; j++)
  901. {
  902. fprintf(pp, ",-233");
  903. }
  904. }
  905. else
  906. {
  907. int num_slice = slice_param.slice_point_size() + 1;
  908. fprintf(pp, " -23300=%d", num_slice);
  909. int prev_offset = 0;
  910. for (int j=0; j<slice_param.slice_point_size(); j++)
  911. {
  912. int offset = slice_param.slice_point(j);
  913. fprintf(pp, ",%d", offset - prev_offset);
  914. prev_offset = offset;
  915. }
  916. fprintf(pp, ",-233");
  917. }
  918. }
  919. else if (layer.type() == "Softmax")
  920. {
  921. const caffe::SoftmaxParameter& softmax_param = layer.softmax_param();
  922. int dim = softmax_param.axis() - 1;
  923. fprintf(pp, " 0=%d", dim);
  924. }
  925. else if (layer.type() == "Threshold")
  926. {
  927. const caffe::ThresholdParameter& threshold_param = layer.threshold_param();
  928. fprintf(pp, " 0=%f", threshold_param.threshold());
  929. }
  930. fprintf(pp, "\n");
  931. // add split layer if top reference larger than one
  932. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  933. {
  934. std::string blob_name = blob_name_decorated[layer.top(0)];
  935. if (bottom_reference.find(blob_name) != bottom_reference.end())
  936. {
  937. int refcount = bottom_reference[blob_name];
  938. if (refcount > 1)
  939. {
  940. char splitname[256];
  941. sprintf(splitname, "splitncnn_%d", internal_split);
  942. fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
  943. fprintf(pp, " %s", blob_name.c_str());
  944. for (int j=0; j<refcount; j++)
  945. {
  946. fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
  947. }
  948. fprintf(pp, "\n");
  949. internal_split++;
  950. }
  951. }
  952. }
  953. else
  954. {
  955. for (int j=0; j<layer.top_size(); j++)
  956. {
  957. std::string blob_name = layer.top(j);
  958. if (bottom_reference.find(blob_name) != bottom_reference.end())
  959. {
  960. int refcount = bottom_reference[blob_name];
  961. if (refcount > 1)
  962. {
  963. char splitname[256];
  964. sprintf(splitname, "splitncnn_%d", internal_split);
  965. fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
  966. fprintf(pp, " %s", blob_name.c_str());
  967. for (int j=0; j<refcount; j++)
  968. {
  969. fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
  970. }
  971. fprintf(pp, "\n");
  972. internal_split++;
  973. }
  974. }
  975. }
  976. }
  977. }
  978. fclose(pp);
  979. fclose(bp);
  980. return 0;
  981. }