You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

caffe2ncnn.cpp 38 kB

8 years ago
8 years ago
8 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include <stdio.h>
  15. #include <limits.h>
  16. #include <math.h>
  17. #include <fstream>
  18. #include <set>
  19. #include <limits>
  20. #include <algorithm>
  21. #include <google/protobuf/io/coded_stream.h>
  22. #include <google/protobuf/io/zero_copy_stream_impl.h>
  23. #include <google/protobuf/text_format.h>
  24. #include <google/protobuf/message.h>
  25. #include "caffe.pb.h"
  26. static inline size_t alignSize(size_t sz, int n)
  27. {
  28. return (sz + n-1) & -n;
  29. }
  30. // convert float to half precision floating point
  31. static unsigned short float2half(float value)
  32. {
  33. // 1 : 8 : 23
  34. union
  35. {
  36. unsigned int u;
  37. float f;
  38. } tmp;
  39. tmp.f = value;
  40. // 1 : 8 : 23
  41. unsigned short sign = (tmp.u & 0x80000000) >> 31;
  42. unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
  43. unsigned int significand = tmp.u & 0x7FFFFF;
  44. // fprintf(stderr, "%d %d %d\n", sign, exponent, significand);
  45. // 1 : 5 : 10
  46. unsigned short fp16;
  47. if (exponent == 0)
  48. {
  49. // zero or denormal, always underflow
  50. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  51. }
  52. else if (exponent == 0xFF)
  53. {
  54. // infinity or NaN
  55. fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
  56. }
  57. else
  58. {
  59. // normalized
  60. short newexp = exponent + (- 127 + 15);
  61. if (newexp >= 31)
  62. {
  63. // overflow, return infinity
  64. fp16 = (sign << 15) | (0x1F << 10) | 0x00;
  65. }
  66. else if (newexp <= 0)
  67. {
  68. // underflow
  69. if (newexp >= -10)
  70. {
  71. // denormal half-precision
  72. unsigned short sig = (significand | 0x800000) >> (14 - newexp);
  73. fp16 = (sign << 15) | (0x00 << 10) | sig;
  74. }
  75. else
  76. {
  77. // underflow
  78. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  79. }
  80. }
  81. else
  82. {
  83. fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
  84. }
  85. }
  86. return fp16;
  87. }
  88. static int quantize_weight(float *data, size_t data_length, std::vector<unsigned short>& float16_weights)
  89. {
  90. float16_weights.resize(data_length);
  91. for (size_t i = 0; i < data_length; i++)
  92. {
  93. float f = data[i];
  94. unsigned short fp16 = float2half(f);
  95. float16_weights[i] = fp16;
  96. }
  97. // magic tag for half-precision floating point
  98. return 0x01306B47;
  99. }
  100. static bool quantize_weight(float *data, size_t data_length, int quantize_level, std::vector<float> &quantize_table, std::vector<unsigned char> &quantize_index) {
  101. assert(quantize_level != 0);
  102. assert(data != NULL);
  103. assert(data_length > 0);
  104. if (data_length < static_cast<size_t>(quantize_level)) {
  105. fprintf(stderr, "No need quantize,because: data_length < quantize_level");
  106. return false;
  107. }
  108. quantize_table.reserve(quantize_level);
  109. quantize_index.reserve(data_length);
  110. // 1. Find min and max value
  111. float max_value = std::numeric_limits<float>::min();
  112. float min_value = std::numeric_limits<float>::max();
  113. for (size_t i = 0; i < data_length; ++i)
  114. {
  115. if (max_value < data[i]) max_value = data[i];
  116. if (min_value > data[i]) min_value = data[i];
  117. }
  118. float strides = (max_value - min_value) / quantize_level;
  119. // 2. Generate quantize table
  120. for (int i = 0; i < quantize_level; ++i)
  121. {
  122. quantize_table.push_back(min_value + i * strides);
  123. }
  124. // 3. Align data to the quantized value
  125. for (size_t i = 0; i < data_length; ++i)
  126. {
  127. size_t table_index = int((data[i] - min_value) / strides);
  128. table_index = std::min<float>(table_index, quantize_level - 1);
  129. float low_value = quantize_table[table_index];
  130. float high_value = low_value + strides;
  131. // find a nearest value between low and high value.
  132. float targetValue = data[i] - low_value < high_value - data[i] ? low_value : high_value;
  133. table_index = int((targetValue - min_value) / strides);
  134. table_index = std::min<float>(table_index, quantize_level - 1);
  135. quantize_index.push_back(table_index);
  136. }
  137. return true;
  138. }
  139. static bool read_proto_from_text(const char* filepath, google::protobuf::Message* message)
  140. {
  141. std::ifstream fs(filepath, std::ifstream::in);
  142. if (!fs.is_open())
  143. {
  144. fprintf(stderr, "open failed %s\n", filepath);
  145. return false;
  146. }
  147. google::protobuf::io::IstreamInputStream input(&fs);
  148. bool success = google::protobuf::TextFormat::Parse(&input, message);
  149. fs.close();
  150. return success;
  151. }
  152. static bool read_proto_from_binary(const char* filepath, google::protobuf::Message* message)
  153. {
  154. std::ifstream fs(filepath, std::ifstream::in | std::ifstream::binary);
  155. if (!fs.is_open())
  156. {
  157. fprintf(stderr, "open failed %s\n", filepath);
  158. return false;
  159. }
  160. google::protobuf::io::IstreamInputStream input(&fs);
  161. google::protobuf::io::CodedInputStream codedstr(&input);
  162. codedstr.SetTotalBytesLimit(INT_MAX, INT_MAX / 2);
  163. bool success = message->ParseFromCodedStream(&codedstr);
  164. fs.close();
  165. return success;
  166. }
  167. int main(int argc, char** argv)
  168. {
  169. if (!(argc == 3 || argc == 5 || argc == 6))
  170. {
  171. fprintf(stderr, "Usage: %s [caffeproto] [caffemodel] [ncnnproto] [ncnnbin] [quantizelevel]\n", argv[0]);
  172. return -1;
  173. }
  174. const char* caffeproto = argv[1];
  175. const char* caffemodel = argv[2];
  176. const char* ncnn_prototxt = argc >= 5 ? argv[3] : "ncnn.proto";
  177. const char* ncnn_modelbin = argc >= 5 ? argv[4] : "ncnn.bin";
  178. const char* quantize_param = argc == 6 ? argv[5] : "0";
  179. int quantize_level = atoi(quantize_param);
  180. if (quantize_level != 0 && quantize_level != 256 && quantize_level != 65536) {
  181. fprintf(stderr, "%s: only support quantize level = 0, 256, or 65536", argv[0]);
  182. return -1;
  183. }
  184. caffe::NetParameter proto;
  185. caffe::NetParameter net;
  186. // load
  187. bool s0 = read_proto_from_text(caffeproto, &proto);
  188. if (!s0)
  189. {
  190. fprintf(stderr, "read_proto_from_text failed\n");
  191. return -1;
  192. }
  193. bool s1 = read_proto_from_binary(caffemodel, &net);
  194. if (!s1)
  195. {
  196. fprintf(stderr, "read_proto_from_binary failed\n");
  197. return -1;
  198. }
  199. FILE* pp = fopen(ncnn_prototxt, "wb");
  200. FILE* bp = fopen(ncnn_modelbin, "wb");
  201. // rename mapping for identical bottom top style
  202. std::map<std::string, std::string> blob_name_decorated;
  203. // bottom blob reference
  204. std::map<std::string, int> bottom_reference;
  205. // global definition line
  206. // [layer count] [blob count]
  207. int layer_count = proto.layer_size();
  208. std::set<std::string> blob_names;
  209. for (int i=0; i<layer_count; i++)
  210. {
  211. const caffe::LayerParameter& layer = proto.layer(i);
  212. for (int j=0; j<layer.bottom_size(); j++)
  213. {
  214. std::string blob_name = layer.bottom(j);
  215. if (blob_name_decorated.find(blob_name) != blob_name_decorated.end())
  216. {
  217. blob_name = blob_name_decorated[blob_name];
  218. }
  219. blob_names.insert(blob_name);
  220. if (bottom_reference.find(blob_name) == bottom_reference.end())
  221. {
  222. bottom_reference[blob_name] = 1;
  223. }
  224. else
  225. {
  226. bottom_reference[blob_name] = bottom_reference[blob_name] + 1;
  227. }
  228. }
  229. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  230. {
  231. std::string blob_name = layer.top(0) + "_" + layer.name();
  232. blob_name_decorated[layer.top(0)] = blob_name;
  233. blob_names.insert(blob_name);
  234. }
  235. else
  236. {
  237. for (int j=0; j<layer.top_size(); j++)
  238. {
  239. std::string blob_name = layer.top(j);
  240. blob_names.insert(blob_name);
  241. }
  242. }
  243. }
  244. // remove bottom_reference entry with reference equals to one
  245. int splitncnn_blob_count = 0;
  246. std::map<std::string, int>::iterator it = bottom_reference.begin();
  247. while (it != bottom_reference.end())
  248. {
  249. if (it->second == 1)
  250. {
  251. bottom_reference.erase(it++);
  252. }
  253. else
  254. {
  255. splitncnn_blob_count += it->second;
  256. // fprintf(stderr, "%s %d\n", it->first.c_str(), it->second);
  257. ++it;
  258. }
  259. }
  260. fprintf(pp, "%lu %lu\n", layer_count + bottom_reference.size(), blob_names.size() + splitncnn_blob_count);
  261. // populate
  262. blob_name_decorated.clear();
  263. int internal_split = 0;
  264. for (int i=0; i<layer_count; i++)
  265. {
  266. const caffe::LayerParameter& layer = proto.layer(i);
  267. // layer definition line, repeated
  268. // [type] [name] [bottom blob count] [top blob count] [bottom blobs] [top blobs] [layer specific params]
  269. if (layer.type() == "Concat")
  270. {
  271. const caffe::ConcatParameter& concat_param = layer.concat_param();
  272. if (concat_param.axis() != 1)
  273. fprintf(pp, "%-16s", "ConcatV2");
  274. else
  275. fprintf(pp, "%-16s", "Concat");
  276. }
  277. else if (layer.type() == "Convolution")
  278. {
  279. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  280. if (convolution_param.group() != 1)
  281. fprintf(pp, "%-16s", "ConvolutionDepthWise");
  282. else
  283. fprintf(pp, "%-16s", "Convolution");
  284. }
  285. else if (layer.type() == "Python")
  286. {
  287. const caffe::PythonParameter& python_param = layer.python_param();
  288. std::string python_layer_name = python_param.layer();
  289. if (python_layer_name == "ProposalLayer")
  290. fprintf(pp, "%-16s", "Proposal");
  291. else
  292. fprintf(pp, "%-16s", python_layer_name.c_str());
  293. }
  294. else if (layer.type() == "Softmax")
  295. {
  296. const caffe::SoftmaxParameter& softmax_param = layer.softmax_param();
  297. if (softmax_param.axis() != 1)
  298. fprintf(pp, "%-16s", "SoftmaxV2");
  299. else
  300. fprintf(pp, "%-16s", "Softmax");
  301. }
  302. else
  303. {
  304. fprintf(pp, "%-16s", layer.type().c_str());
  305. }
  306. fprintf(pp, " %-16s %d %d", layer.name().c_str(), layer.bottom_size(), layer.top_size());
  307. for (int j=0; j<layer.bottom_size(); j++)
  308. {
  309. std::string blob_name = layer.bottom(j);
  310. if (blob_name_decorated.find(layer.bottom(j)) != blob_name_decorated.end())
  311. {
  312. blob_name = blob_name_decorated[layer.bottom(j)];
  313. }
  314. if (bottom_reference.find(blob_name) != bottom_reference.end())
  315. {
  316. int refidx = bottom_reference[blob_name] - 1;
  317. bottom_reference[blob_name] = refidx;
  318. char splitsuffix[256];
  319. sprintf(splitsuffix, "_splitncnn_%d", refidx);
  320. blob_name = blob_name + splitsuffix;
  321. }
  322. fprintf(pp, " %s", blob_name.c_str());
  323. }
  324. // decorated
  325. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  326. {
  327. std::string blob_name = layer.top(0) + "_" + layer.name();
  328. blob_name_decorated[layer.top(0)] = blob_name;
  329. fprintf(pp, " %s", blob_name.c_str());
  330. }
  331. else
  332. {
  333. for (int j=0; j<layer.top_size(); j++)
  334. {
  335. std::string blob_name = layer.top(j);
  336. fprintf(pp, " %s", blob_name.c_str());
  337. }
  338. }
  339. // find blob binary by layer name
  340. int netidx;
  341. for (netidx=0; netidx<net.layer_size(); netidx++)
  342. {
  343. if (net.layer(netidx).name() == layer.name())
  344. {
  345. break;
  346. }
  347. }
  348. // layer specific params
  349. if (layer.type() == "BatchNorm")
  350. {
  351. const caffe::LayerParameter& binlayer = net.layer(netidx);
  352. const caffe::BlobProto& mean_blob = binlayer.blobs(0);
  353. const caffe::BlobProto& var_blob = binlayer.blobs(1);
  354. fprintf(pp, " %d", (int)mean_blob.data_size());
  355. const caffe::BatchNormParameter& batch_norm_param = layer.batch_norm_param();
  356. float eps = batch_norm_param.eps();
  357. std::vector<float> ones(mean_blob.data_size(), 1.f);
  358. fwrite(ones.data(), sizeof(float), ones.size(), bp);// slope
  359. if (binlayer.blobs_size() < 3)
  360. {
  361. fwrite(mean_blob.data().data(), sizeof(float), mean_blob.data_size(), bp);
  362. float tmp;
  363. for (int j=0; j<var_blob.data_size(); j++)
  364. {
  365. tmp = var_blob.data().data()[j] + eps;
  366. fwrite(&tmp, sizeof(float), 1, bp);
  367. }
  368. }
  369. else
  370. {
  371. float scale_factor = 1 / binlayer.blobs(2).data().data()[0];
  372. // premultiply scale_factor to mean and variance
  373. float tmp;
  374. for (int j=0; j<mean_blob.data_size(); j++)
  375. {
  376. tmp = mean_blob.data().data()[j] * scale_factor;
  377. fwrite(&tmp, sizeof(float), 1, bp);
  378. }
  379. for (int j=0; j<var_blob.data_size(); j++)
  380. {
  381. tmp = var_blob.data().data()[j] * scale_factor + eps;
  382. fwrite(&tmp, sizeof(float), 1, bp);
  383. }
  384. }
  385. std::vector<float> zeros(mean_blob.data_size(), 0.f);
  386. fwrite(zeros.data(), sizeof(float), zeros.size(), bp);// bias
  387. }
  388. else if (layer.type() == "Concat")
  389. {
  390. const caffe::ConcatParameter& concat_param = layer.concat_param();
  391. if (concat_param.axis() != 1)
  392. {
  393. int dim = concat_param.axis() >= 1 ? concat_param.axis() - 1 : 0;
  394. fprintf(pp, " %d", dim);
  395. }
  396. }
  397. else if (layer.type() == "Convolution")
  398. {
  399. const caffe::LayerParameter& binlayer = net.layer(netidx);
  400. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  401. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  402. fprintf(pp, " %d %d %d %d %d %d %d", convolution_param.num_output(), convolution_param.kernel_size(0),
  403. convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1,
  404. convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1,
  405. convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0,
  406. convolution_param.bias_term(),
  407. weight_blob.data_size());
  408. if (convolution_param.group() != 1)
  409. {
  410. fprintf(pp, " %d", convolution_param.group());
  411. }
  412. for (int j = 0; j < binlayer.blobs_size(); j++)
  413. {
  414. int quantize_tag = 0;
  415. const caffe::BlobProto& blob = binlayer.blobs(j);
  416. std::vector<float> quantize_table;
  417. std::vector<unsigned char> quantize_index;
  418. std::vector<unsigned short> float16_weights;
  419. // we will not quantize the bias values
  420. if (j == 0 && quantize_level != 0)
  421. {
  422. if (quantize_level == 256)
  423. {
  424. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  425. }
  426. else if (quantize_level == 65536)
  427. {
  428. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), float16_weights);
  429. }
  430. }
  431. // write quantize tag first
  432. if (j == 0)
  433. fwrite(&quantize_tag, sizeof(int), 1, bp);
  434. if (quantize_tag)
  435. {
  436. int p0 = ftell(bp);
  437. if (quantize_level == 256)
  438. {
  439. // write quantize table and index
  440. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  441. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  442. }
  443. else if (quantize_level == 65536)
  444. {
  445. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  446. }
  447. // padding to 32bit align
  448. int nwrite = ftell(bp) - p0;
  449. int nalign = alignSize(nwrite, 4);
  450. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  451. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  452. }
  453. else
  454. {
  455. // write original data
  456. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  457. }
  458. }
  459. }
  460. else if (layer.type() == "Crop")
  461. {
  462. const caffe::CropParameter& crop_param = layer.crop_param();
  463. int num_offset = crop_param.offset_size();
  464. int woffset = (num_offset == 2) ? crop_param.offset(0) : 0;
  465. int hoffset = (num_offset == 2) ? crop_param.offset(1) : 0;
  466. fprintf(pp, " %d %d", woffset, hoffset);
  467. }
  468. else if (layer.type() == "Deconvolution")
  469. {
  470. const caffe::LayerParameter& binlayer = net.layer(netidx);
  471. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  472. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  473. fprintf(pp, " %d %d %d %d %d %d %d", convolution_param.num_output(), convolution_param.kernel_size(0),
  474. convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1,
  475. convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1,
  476. convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0,
  477. convolution_param.bias_term(),
  478. weight_blob.data_size());
  479. int quantized_weight = 0;
  480. fwrite(&quantized_weight, sizeof(int), 1, bp);
  481. // reorder weight from inch-outch to outch-inch
  482. int ksize = convolution_param.kernel_size(0);
  483. int num_output = convolution_param.num_output();
  484. int num_input = weight_blob.data_size() / (ksize * ksize) / num_output;
  485. const float* weight_data_ptr = weight_blob.data().data();
  486. for (int k=0; k<num_output; k++)
  487. {
  488. for (int j=0; j<num_input; j++)
  489. {
  490. fwrite(weight_data_ptr + (j*num_output + k) * ksize * ksize, sizeof(float), ksize * ksize, bp);
  491. }
  492. }
  493. for (int j=1; j<binlayer.blobs_size(); j++)
  494. {
  495. const caffe::BlobProto& blob = binlayer.blobs(j);
  496. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  497. }
  498. }
  499. else if (layer.type() == "DetectionOutput")
  500. {
  501. const caffe::DetectionOutputParameter& detection_output_param = layer.detection_output_param();
  502. const caffe::NonMaximumSuppressionParameter& nms_param = detection_output_param.nms_param();
  503. fprintf(pp, " %d %f %d %d %f", detection_output_param.num_classes(), nms_param.nms_threshold(), nms_param.top_k(), detection_output_param.keep_top_k(), detection_output_param.confidence_threshold());
  504. }
  505. else if (layer.type() == "Eltwise")
  506. {
  507. const caffe::EltwiseParameter& eltwise_param = layer.eltwise_param();
  508. int coeff_size = eltwise_param.coeff_size();
  509. fprintf(pp, " %d %d", (int)eltwise_param.operation(), coeff_size);
  510. for (int j=0; j<coeff_size; j++)
  511. {
  512. fprintf(pp, " %f", eltwise_param.coeff(j));
  513. }
  514. }
  515. else if (layer.type() == "ELU")
  516. {
  517. const caffe::ELUParameter& elu_param = layer.elu_param();
  518. fprintf(pp, " %f", elu_param.alpha());
  519. }
  520. else if (layer.type() == "InnerProduct")
  521. {
  522. const caffe::LayerParameter& binlayer = net.layer(netidx);
  523. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  524. const caffe::InnerProductParameter& inner_product_param = layer.inner_product_param();
  525. fprintf(pp, " %d %d %d", inner_product_param.num_output(), inner_product_param.bias_term(),
  526. weight_blob.data_size());
  527. for (int j=0; j<binlayer.blobs_size(); j++)
  528. {
  529. int quantize_tag = 0;
  530. const caffe::BlobProto& blob = binlayer.blobs(j);
  531. std::vector<float> quantize_table;
  532. std::vector<unsigned char> quantize_index;
  533. std::vector<unsigned short> float16_weights;
  534. // we will not quantize the bias values
  535. if (j == 0 && quantize_level != 0)
  536. {
  537. if (quantize_level == 256)
  538. {
  539. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  540. }
  541. else if (quantize_level == 65536)
  542. {
  543. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), float16_weights);
  544. }
  545. }
  546. // write quantize tag first
  547. if (j == 0)
  548. fwrite(&quantize_tag, sizeof(int), 1, bp);
  549. if (quantize_tag)
  550. {
  551. int p0 = ftell(bp);
  552. if (quantize_level == 256)
  553. {
  554. // write quantize table and index
  555. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  556. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  557. }
  558. else if (quantize_level == 65536)
  559. {
  560. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  561. }
  562. // padding to 32bit align
  563. int nwrite = ftell(bp) - p0;
  564. int nalign = alignSize(nwrite, 4);
  565. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  566. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  567. }
  568. else
  569. {
  570. // write original data
  571. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  572. }
  573. }
  574. }
  575. else if (layer.type() == "Input")
  576. {
  577. const caffe::InputParameter& input_param = layer.input_param();
  578. const caffe::BlobShape& bs = input_param.shape(0);
  579. for (int j=1; j<std::min((int)bs.dim_size(), 4); j++)
  580. {
  581. fprintf(pp, " %ld", bs.dim(j));
  582. }
  583. for (int j=bs.dim_size(); j<4; j++)
  584. {
  585. fprintf(pp, " -233");
  586. }
  587. }
  588. else if (layer.type() == "Interp")
  589. {
  590. const caffe::InterpParameter& interp_param = layer.interp_param();
  591. fprintf(pp," %d %f %f %d %d",2, static_cast<float>(interp_param.zoom_factor()), \
  592. static_cast<float>(interp_param.zoom_factor()),interp_param.height(),interp_param.width());
  593. }
  594. else if (layer.type() == "LRN")
  595. {
  596. const caffe::LRNParameter& lrn_param = layer.lrn_param();
  597. fprintf(pp, " %d %d %.8f %.8f", lrn_param.norm_region(), lrn_param.local_size(), lrn_param.alpha(), lrn_param.beta());
  598. }
  599. else if (layer.type() == "MemoryData")
  600. {
  601. const caffe::MemoryDataParameter& memory_data_param = layer.memory_data_param();
  602. fprintf(pp, " %d %d %d", memory_data_param.channels(), memory_data_param.width(), memory_data_param.height());
  603. }
  604. else if (layer.type() == "Normalize")
  605. {
  606. const caffe::LayerParameter& binlayer = net.layer(netidx);
  607. const caffe::BlobProto& scale_blob = binlayer.blobs(0);
  608. const caffe::NormalizeParameter& norm_param = layer.norm_param();
  609. fprintf(pp, " %d %d %f %d", norm_param.across_spatial(), norm_param.channel_shared(), norm_param.eps(), scale_blob.data_size());
  610. fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
  611. }
  612. else if (layer.type() == "Permute")
  613. {
  614. const caffe::PermuteParameter& permute_param = layer.permute_param();
  615. int order_size = permute_param.order_size();
  616. int order_type = 0;
  617. if (order_size == 0)
  618. order_type = 0;
  619. if (order_size == 1)
  620. {
  621. int order0 = permute_param.order(0);
  622. if (order0 == 0)
  623. order_type = 0;
  624. // permute with N not supported
  625. }
  626. if (order_size == 2)
  627. {
  628. int order0 = permute_param.order(0);
  629. int order1 = permute_param.order(1);
  630. if (order0 == 0)
  631. {
  632. if (order1 == 1) // 0 1 2 3
  633. order_type = 0;
  634. else if (order1 == 2) // 0 2 1 3
  635. order_type = 2;
  636. else if (order1 == 3) // 0 3 1 2
  637. order_type = 4;
  638. }
  639. // permute with N not supported
  640. }
  641. if (order_size == 3 || order_size == 4)
  642. {
  643. int order0 = permute_param.order(0);
  644. int order1 = permute_param.order(1);
  645. int order2 = permute_param.order(2);
  646. if (order0 == 0)
  647. {
  648. if (order1 == 1)
  649. {
  650. if (order2 == 2) // 0 1 2 3
  651. order_type = 0;
  652. if (order2 == 3) // 0 1 3 2
  653. order_type = 1;
  654. }
  655. else if (order1 == 2)
  656. {
  657. if (order2 == 1) // 0 2 1 3
  658. order_type = 2;
  659. if (order2 == 3) // 0 2 3 1
  660. order_type = 3;
  661. }
  662. else if (order1 == 3)
  663. {
  664. if (order2 == 1) // 0 3 1 2
  665. order_type = 4;
  666. if (order2 == 2) // 0 3 2 1
  667. order_type = 5;
  668. }
  669. }
  670. // permute with N not supported
  671. }
  672. fprintf(pp, " %d", order_type);
  673. }
  674. else if (layer.type() == "Pooling")
  675. {
  676. const caffe::PoolingParameter& pooling_param = layer.pooling_param();
  677. fprintf(pp, " %d %d %d %d %d", pooling_param.pool(), pooling_param.kernel_size(), pooling_param.stride(), pooling_param.pad(),
  678. pooling_param.has_global_pooling() ? pooling_param.global_pooling() : 0);
  679. }
  680. else if (layer.type() == "Power")
  681. {
  682. const caffe::PowerParameter& power_param = layer.power_param();
  683. fprintf(pp, " %f %f %f", power_param.power(), power_param.scale(), power_param.shift());
  684. }
  685. else if (layer.type() == "PReLU")
  686. {
  687. const caffe::LayerParameter& binlayer = net.layer(netidx);
  688. const caffe::BlobProto& slope_blob = binlayer.blobs(0);
  689. fprintf(pp, " %d", slope_blob.data_size());
  690. fwrite(slope_blob.data().data(), sizeof(float), slope_blob.data_size(), bp);
  691. }
  692. else if (layer.type() == "PriorBox")
  693. {
  694. const caffe::PriorBoxParameter& prior_box_param = layer.prior_box_param();
  695. int num_aspect_ratio = prior_box_param.aspect_ratio_size();
  696. for (int j=0; j<prior_box_param.aspect_ratio_size(); j++)
  697. {
  698. float ar = prior_box_param.aspect_ratio(j);
  699. if (fabs(ar - 1.) < 1e-6) {
  700. num_aspect_ratio--;
  701. }
  702. }
  703. float variances[4] = {0.1f, 0.1f, 0.1f, 0.1f};
  704. if (prior_box_param.variance_size() == 4)
  705. {
  706. variances[0] = prior_box_param.variance(0);
  707. variances[1] = prior_box_param.variance(1);
  708. variances[2] = prior_box_param.variance(2);
  709. variances[3] = prior_box_param.variance(3);
  710. }
  711. else if (prior_box_param.variance_size() == 1)
  712. {
  713. variances[0] = prior_box_param.variance(0);
  714. variances[1] = prior_box_param.variance(0);
  715. variances[2] = prior_box_param.variance(0);
  716. variances[3] = prior_box_param.variance(0);
  717. }
  718. int flip = prior_box_param.has_flip() ? prior_box_param.flip() : 1;
  719. int clip = prior_box_param.has_clip() ? prior_box_param.clip() : 0;
  720. int image_width = -233;
  721. int image_height = -233;
  722. if (prior_box_param.has_img_size())
  723. {
  724. image_width = prior_box_param.img_size();
  725. image_height = prior_box_param.img_size();
  726. }
  727. else if (prior_box_param.has_img_w() && prior_box_param.has_img_h())
  728. {
  729. image_width = prior_box_param.img_w();
  730. image_height = prior_box_param.img_h();
  731. }
  732. float step_width = -233;
  733. float step_height = -233;
  734. if (prior_box_param.has_step())
  735. {
  736. step_width = prior_box_param.step();
  737. step_height = prior_box_param.step();
  738. }
  739. else if (prior_box_param.has_step_w() && prior_box_param.has_step_h())
  740. {
  741. step_width = prior_box_param.step_w();
  742. step_height = prior_box_param.step_h();
  743. }
  744. fprintf(pp, " %d %d %d %f %f %f %f %d %d %d %d %f %f %f", prior_box_param.min_size_size(),
  745. prior_box_param.max_size_size(), num_aspect_ratio,
  746. variances[0], variances[1], variances[2], variances[3],
  747. flip, clip, image_width, image_height,
  748. step_width, step_height, prior_box_param.offset());
  749. for (int j=0; j<prior_box_param.min_size_size(); j++)
  750. {
  751. fprintf(pp, " %f", prior_box_param.min_size(j));
  752. }
  753. for (int j=0; j<prior_box_param.max_size_size(); j++)
  754. {
  755. fprintf(pp, " %f", prior_box_param.max_size(j));
  756. }
  757. for (int j=0; j<prior_box_param.aspect_ratio_size(); j++)
  758. {
  759. float ar = prior_box_param.aspect_ratio(j);
  760. if (fabs(ar - 1.) < 1e-6) {
  761. continue;
  762. }
  763. fprintf(pp, " %f", ar);
  764. }
  765. }
  766. else if (layer.type() == "Python")
  767. {
  768. const caffe::PythonParameter& python_param = layer.python_param();
  769. std::string python_layer_name = python_param.layer();
  770. if (python_layer_name == "ProposalLayer")
  771. {
  772. int feat_stride = 16;
  773. sscanf(python_param.param_str().c_str(), "'feat_stride': %d", &feat_stride);
  774. int base_size = 16;
  775. // float ratio;
  776. // float scale;
  777. int pre_nms_topN = 6000;
  778. int after_nms_topN = 300;
  779. float nms_thresh = 0.7;
  780. int min_size = 16;
  781. fprintf(pp, " %d %d %d %d %f %d", feat_stride, base_size, pre_nms_topN, after_nms_topN, nms_thresh, min_size);
  782. }
  783. }
  784. else if (layer.type() == "ReLU")
  785. {
  786. const caffe::ReLUParameter& relu_param = layer.relu_param();
  787. fprintf(pp, " %f", relu_param.negative_slope());
  788. }
  789. else if (layer.type() == "Reshape")
  790. {
  791. const caffe::ReshapeParameter& reshape_param = layer.reshape_param();
  792. const caffe::BlobShape& bs = reshape_param.shape();
  793. if (bs.dim_size() == 1)
  794. {
  795. fprintf(pp, " %ld -233 -233", bs.dim(0));
  796. }
  797. else if (bs.dim_size() == 2)
  798. {
  799. fprintf(pp, " %ld %ld -233", bs.dim(1), bs.dim(0));
  800. }
  801. else if (bs.dim_size() == 3)
  802. {
  803. fprintf(pp, " %ld %ld %ld", bs.dim(2), bs.dim(1), bs.dim(0));
  804. }
  805. else // bs.dim_size() == 4
  806. {
  807. fprintf(pp, " %ld %ld %ld", bs.dim(3), bs.dim(2), bs.dim(1));
  808. }
  809. fprintf(pp, " 0");// permute
  810. }
  811. else if (layer.type() == "ROIPooling")
  812. {
  813. const caffe::ROIPoolingParameter& roi_pooling_param = layer.roi_pooling_param();
  814. fprintf(pp, " %d %d %.8f", roi_pooling_param.pooled_w(), roi_pooling_param.pooled_h(), roi_pooling_param.spatial_scale());
  815. }
  816. else if (layer.type() == "Scale")
  817. {
  818. const caffe::LayerParameter& binlayer = net.layer(netidx);
  819. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  820. const caffe::ScaleParameter& scale_param = layer.scale_param();
  821. fprintf(pp, " %d %d", (int)weight_blob.data_size(), scale_param.bias_term());
  822. for (int j=0; j<binlayer.blobs_size(); j++)
  823. {
  824. const caffe::BlobProto& blob = binlayer.blobs(j);
  825. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  826. }
  827. }
  828. else if (layer.type() == "Slice")
  829. {
  830. const caffe::SliceParameter& slice_param = layer.slice_param();
  831. if (slice_param.has_slice_dim())
  832. {
  833. int num_slice = layer.top_size();
  834. fprintf(pp, " %d", num_slice);
  835. for (int j=0; j<num_slice; j++)
  836. {
  837. fprintf(pp, " -233");
  838. }
  839. }
  840. else
  841. {
  842. int num_slice = slice_param.slice_point_size() + 1;
  843. fprintf(pp, " %d", num_slice);
  844. int prev_offset = 0;
  845. for (int j=0; j<slice_param.slice_point_size(); j++)
  846. {
  847. int offset = slice_param.slice_point(j);
  848. fprintf(pp, " %d", offset - prev_offset);
  849. prev_offset = offset;
  850. }
  851. fprintf(pp, " -233");
  852. }
  853. }
  854. else if (layer.type() == "Softmax")
  855. {
  856. const caffe::SoftmaxParameter& softmax_param = layer.softmax_param();
  857. if (softmax_param.axis() != 1)
  858. {
  859. int dim = softmax_param.axis() >= 1 ? softmax_param.axis() - 1 : 0;
  860. fprintf(pp, " %d", dim);
  861. }
  862. }
  863. else if (layer.type() == "Threshold")
  864. {
  865. const caffe::ThresholdParameter& threshold_param = layer.threshold_param();
  866. fprintf(pp, " %f", threshold_param.threshold());
  867. }
  868. fprintf(pp, "\n");
  869. // add split layer if top reference larger than one
  870. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  871. {
  872. std::string blob_name = blob_name_decorated[layer.top(0)];
  873. if (bottom_reference.find(blob_name) != bottom_reference.end())
  874. {
  875. int refcount = bottom_reference[blob_name];
  876. if (refcount > 1)
  877. {
  878. char splitname[256];
  879. sprintf(splitname, "splitncnn_%d", internal_split);
  880. fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
  881. fprintf(pp, " %s", blob_name.c_str());
  882. for (int j=0; j<refcount; j++)
  883. {
  884. fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
  885. }
  886. fprintf(pp, "\n");
  887. internal_split++;
  888. }
  889. }
  890. }
  891. else
  892. {
  893. for (int j=0; j<layer.top_size(); j++)
  894. {
  895. std::string blob_name = layer.top(j);
  896. if (bottom_reference.find(blob_name) != bottom_reference.end())
  897. {
  898. int refcount = bottom_reference[blob_name];
  899. if (refcount > 1)
  900. {
  901. char splitname[256];
  902. sprintf(splitname, "splitncnn_%d", internal_split);
  903. fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
  904. fprintf(pp, " %s", blob_name.c_str());
  905. for (int j=0; j<refcount; j++)
  906. {
  907. fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
  908. }
  909. fprintf(pp, "\n");
  910. internal_split++;
  911. }
  912. }
  913. }
  914. }
  915. }
  916. fclose(pp);
  917. fclose(bp);
  918. return 0;
  919. }