You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

caffe2ncnn.cpp 66 kB

8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #ifdef _MSC_VER
  15. #define _CRT_SECURE_NO_DEPRECATE
  16. #endif
  17. #include "caffe.pb.h"
  18. #include <algorithm>
  19. #include <fstream>
  20. #include <google/protobuf/io/coded_stream.h>
  21. #include <google/protobuf/io/zero_copy_stream_impl.h>
  22. #include <google/protobuf/message.h>
  23. #include <google/protobuf/text_format.h>
  24. #include <limits.h>
  25. #include <limits>
  26. #include <map>
  27. #include <math.h>
  28. #include <set>
  29. #include <stdio.h>
  30. static inline size_t alignSize(size_t sz, int n)
  31. {
  32. return (sz + n - 1) & -n;
  33. }
  34. // convert float to half precision floating point
  35. static unsigned short float2half(float value)
  36. {
  37. // 1 : 8 : 23
  38. union
  39. {
  40. unsigned int u;
  41. float f;
  42. } tmp;
  43. tmp.f = value;
  44. // 1 : 8 : 23
  45. unsigned short sign = (tmp.u & 0x80000000) >> 31;
  46. unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
  47. unsigned int significand = tmp.u & 0x7FFFFF;
  48. // fprintf(stderr, "%d %d %d\n", sign, exponent, significand);
  49. // 1 : 5 : 10
  50. unsigned short fp16;
  51. if (exponent == 0)
  52. {
  53. // zero or denormal, always underflow
  54. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  55. }
  56. else if (exponent == 0xFF)
  57. {
  58. // infinity or NaN
  59. fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
  60. }
  61. else
  62. {
  63. // normalized
  64. short newexp = exponent + (-127 + 15);
  65. if (newexp >= 31)
  66. {
  67. // overflow, return infinity
  68. fp16 = (sign << 15) | (0x1F << 10) | 0x00;
  69. }
  70. else if (newexp <= 0)
  71. {
  72. // underflow
  73. if (newexp >= -10)
  74. {
  75. // denormal half-precision
  76. unsigned short sig = (significand | 0x800000) >> (14 - newexp);
  77. fp16 = (sign << 15) | (0x00 << 10) | sig;
  78. }
  79. else
  80. {
  81. // underflow
  82. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  83. }
  84. }
  85. else
  86. {
  87. fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
  88. }
  89. }
  90. return fp16;
  91. }
  92. // round to nearest
  93. static signed char float2int8(float value)
  94. {
  95. float tmp;
  96. if (value >= 0.f)
  97. tmp = value + 0.5f;
  98. else
  99. tmp = value - 0.5f;
  100. if (tmp > 127)
  101. return 127;
  102. if (tmp < -127)
  103. return -127;
  104. return static_cast<signed char>(tmp);
  105. }
  106. static bool read_int8scale_table(const char* filepath, std::map<std::string, std::vector<float> >& blob_int8scale_table, std::map<std::string, std::vector<float> >& weight_int8scale_table)
  107. {
  108. blob_int8scale_table.clear();
  109. weight_int8scale_table.clear();
  110. FILE* fp = fopen(filepath, "rb");
  111. if (!fp)
  112. {
  113. fprintf(stderr, "fopen %s failed\n", filepath);
  114. return false;
  115. }
  116. bool in_scale_vector = false;
  117. std::string keystr;
  118. std::vector<float> scales;
  119. while (!feof(fp))
  120. {
  121. char key[256];
  122. int nscan = fscanf(fp, "%255s", key);
  123. if (nscan != 1)
  124. {
  125. break;
  126. }
  127. if (in_scale_vector)
  128. {
  129. float scale = 1.f;
  130. int nscan = sscanf(key, "%f", &scale);
  131. if (nscan == 1)
  132. {
  133. scales.push_back(scale);
  134. continue;
  135. }
  136. else
  137. {
  138. // XYZ_param_N pattern
  139. if (strstr(keystr.c_str(), "_param_"))
  140. {
  141. weight_int8scale_table[keystr] = scales;
  142. }
  143. else
  144. {
  145. blob_int8scale_table[keystr] = scales;
  146. }
  147. keystr.clear();
  148. scales.clear();
  149. in_scale_vector = false;
  150. }
  151. }
  152. if (!in_scale_vector)
  153. {
  154. keystr = key;
  155. in_scale_vector = true;
  156. }
  157. }
  158. if (in_scale_vector)
  159. {
  160. // XYZ_param_N pattern
  161. if (strstr(keystr.c_str(), "_param_"))
  162. {
  163. weight_int8scale_table[keystr] = scales;
  164. }
  165. else
  166. {
  167. blob_int8scale_table[keystr] = scales;
  168. }
  169. }
  170. fclose(fp);
  171. return true;
  172. }
  173. static int quantize_weight(float* data, size_t data_length, std::vector<unsigned short>& float16_weights)
  174. {
  175. float16_weights.resize(data_length);
  176. for (size_t i = 0; i < data_length; i++)
  177. {
  178. float f = data[i];
  179. unsigned short fp16 = float2half(f);
  180. float16_weights[i] = fp16;
  181. }
  182. // magic tag for half-precision floating point
  183. return 0x01306B47;
  184. }
  185. static int quantize_weight(float* data, size_t data_length, std::vector<float> scales, std::vector<signed char>& int8_weights)
  186. {
  187. int8_weights.resize(data_length);
  188. const int length_per_group = static_cast<int>(data_length / scales.size());
  189. for (size_t i = 0; i < data_length; i++)
  190. {
  191. float f = data[i];
  192. signed char int8 = float2int8(f * scales[i / length_per_group]);
  193. int8_weights[i] = int8;
  194. }
  195. // magic tag for int8
  196. return 0x000D4B38;
  197. }
  198. static bool quantize_weight(float* data, size_t data_length, int quantize_level, std::vector<float>& quantize_table, std::vector<unsigned char>& quantize_index)
  199. {
  200. assert(quantize_level != 0);
  201. assert(data != NULL);
  202. assert(data_length > 0);
  203. if (data_length < static_cast<size_t>(quantize_level))
  204. {
  205. fprintf(stderr, "No need quantize,because: data_length < quantize_level");
  206. return false;
  207. }
  208. quantize_table.reserve(quantize_level);
  209. quantize_index.reserve(data_length);
  210. // 1. Find min and max value
  211. float max_value = std::numeric_limits<float>::min();
  212. float min_value = std::numeric_limits<float>::max();
  213. for (size_t i = 0; i < data_length; ++i)
  214. {
  215. if (max_value < data[i]) max_value = data[i];
  216. if (min_value > data[i]) min_value = data[i];
  217. }
  218. float strides = (max_value - min_value) / quantize_level;
  219. // 2. Generate quantize table
  220. for (int i = 0; i < quantize_level; ++i)
  221. {
  222. quantize_table.push_back(min_value + i * strides);
  223. }
  224. // 3. Align data to the quantized value
  225. for (size_t i = 0; i < data_length; ++i)
  226. {
  227. int table_index = int((data[i] - min_value) / strides);
  228. table_index = std::min(table_index, quantize_level - 1);
  229. float low_value = quantize_table[table_index];
  230. float high_value = low_value + strides;
  231. // find a nearest value between low and high value.
  232. const float targetValue = data[i] - low_value < high_value - data[i] ? low_value : high_value;
  233. table_index = int((targetValue - min_value) / strides);
  234. table_index = std::min(table_index, quantize_level - 1);
  235. quantize_index.push_back(table_index);
  236. }
  237. return true;
  238. }
  239. static bool read_proto_from_text(const char* filepath, google::protobuf::Message* message)
  240. {
  241. std::ifstream fs(filepath, std::ifstream::in);
  242. if (!fs.is_open())
  243. {
  244. fprintf(stderr, "open failed %s\n", filepath);
  245. return false;
  246. }
  247. google::protobuf::io::IstreamInputStream input(&fs);
  248. bool success = google::protobuf::TextFormat::Parse(&input, message);
  249. fs.close();
  250. return success;
  251. }
  252. static bool read_proto_from_binary(const char* filepath, google::protobuf::Message* message)
  253. {
  254. std::ifstream fs(filepath, std::ifstream::in | std::ifstream::binary);
  255. if (!fs.is_open())
  256. {
  257. fprintf(stderr, "open failed %s\n", filepath);
  258. return false;
  259. }
  260. google::protobuf::io::IstreamInputStream input(&fs);
  261. google::protobuf::io::CodedInputStream codedstr(&input);
  262. #if GOOGLE_PROTOBUF_VERSION >= 3011000
  263. codedstr.SetTotalBytesLimit(INT_MAX);
  264. #else
  265. codedstr.SetTotalBytesLimit(INT_MAX, INT_MAX / 2);
  266. #endif
  267. bool success = message->ParseFromCodedStream(&codedstr);
  268. fs.close();
  269. return success;
  270. }
  271. int main(int argc, char** argv)
  272. {
  273. if (!(argc == 3 || argc == 5 || argc == 6 || argc == 7))
  274. {
  275. fprintf(stderr, "Usage: %s [caffeproto] [caffemodel] [ncnnproto] [ncnnbin] [quantizelevel] [int8scaletable]\n", argv[0]);
  276. return -1;
  277. }
  278. const char* caffeproto = argv[1];
  279. const char* caffemodel = argv[2];
  280. const char* ncnn_prototxt = argc >= 5 ? argv[3] : "ncnn.proto";
  281. const char* ncnn_modelbin = argc >= 5 ? argv[4] : "ncnn.bin";
  282. const char* quantize_param = argc >= 6 ? argv[5] : "0";
  283. const char* int8scale_table_path = argc == 7 ? argv[6] : NULL;
  284. int quantize_level = atoi(quantize_param);
  285. if (quantize_level != 0 && quantize_level != 256 && quantize_level != 65536)
  286. {
  287. fprintf(stderr, "%s: only support quantize level = 0, 256, or 65536", argv[0]);
  288. return -1;
  289. }
  290. caffe::NetParameter proto;
  291. caffe::NetParameter net;
  292. // load
  293. bool s0 = read_proto_from_text(caffeproto, &proto);
  294. if (!s0)
  295. {
  296. fprintf(stderr, "read_proto_from_text failed\n");
  297. return -1;
  298. }
  299. bool s1 = read_proto_from_binary(caffemodel, &net);
  300. if (!s1)
  301. {
  302. fprintf(stderr, "read_proto_from_binary failed\n");
  303. return -1;
  304. }
  305. std::map<std::string, std::vector<float> > blob_int8scale_table;
  306. std::map<std::string, std::vector<float> > weight_int8scale_table;
  307. if (int8scale_table_path)
  308. {
  309. bool s2 = read_int8scale_table(int8scale_table_path, blob_int8scale_table, weight_int8scale_table);
  310. if (!s2)
  311. {
  312. fprintf(stderr, "read_int8scale_table failed\n");
  313. return -1;
  314. }
  315. }
  316. FILE* pp = fopen(ncnn_prototxt, "wb");
  317. FILE* bp = fopen(ncnn_modelbin, "wb");
  318. // magic
  319. fprintf(pp, "7767517\n");
  320. // rename mapping for identical bottom top style
  321. std::map<std::string, std::string> blob_name_decorated;
  322. // bottom blob reference
  323. std::map<std::string, int> bottom_reference;
  324. // global definition line
  325. // [layer count] [blob count]
  326. int layer_count = proto.layer_size();
  327. std::set<std::string> blob_names;
  328. for (int i = 0; i < layer_count; i++)
  329. {
  330. const caffe::LayerParameter& layer = proto.layer(i);
  331. for (int j = 0; j < layer.bottom_size(); j++)
  332. {
  333. std::string blob_name = layer.bottom(j);
  334. if (blob_name_decorated.find(blob_name) != blob_name_decorated.end())
  335. {
  336. blob_name = blob_name_decorated[blob_name];
  337. }
  338. blob_names.insert(blob_name);
  339. if (bottom_reference.find(blob_name) == bottom_reference.end())
  340. {
  341. bottom_reference[blob_name] = 1;
  342. }
  343. else
  344. {
  345. bottom_reference[blob_name] = bottom_reference[blob_name] + 1;
  346. }
  347. }
  348. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  349. {
  350. std::string blob_name = layer.top(0) + "_" + layer.name();
  351. blob_name_decorated[layer.top(0)] = blob_name;
  352. blob_names.insert(blob_name);
  353. }
  354. else
  355. {
  356. for (int j = 0; j < layer.top_size(); j++)
  357. {
  358. std::string blob_name = layer.top(j);
  359. blob_names.insert(blob_name);
  360. }
  361. }
  362. }
  363. // remove bottom_reference entry with reference equals to one
  364. int splitncnn_blob_count = 0;
  365. std::map<std::string, int>::iterator it = bottom_reference.begin();
  366. while (it != bottom_reference.end())
  367. {
  368. if (it->second == 1)
  369. {
  370. bottom_reference.erase(it++);
  371. }
  372. else
  373. {
  374. splitncnn_blob_count += it->second;
  375. // fprintf(stderr, "%s %d\n", it->first.c_str(), it->second);
  376. ++it;
  377. }
  378. }
  379. fprintf(pp, "%d %d\n", int(layer_count + bottom_reference.size()), int(blob_names.size() + splitncnn_blob_count));
  380. // populate
  381. blob_name_decorated.clear();
  382. int internal_split = 0;
  383. for (int i = 0; i < layer_count; i++)
  384. {
  385. const caffe::LayerParameter& layer = proto.layer(i);
  386. // layer definition line, repeated
  387. // [type] [name] [bottom blob count] [top blob count] [bottom blobs] [top blobs] [layer specific params]
  388. if (layer.type() == "BN")
  389. {
  390. fprintf(pp, "%-16s", "Scale");
  391. }
  392. else if (layer.type() == "Convolution")
  393. {
  394. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  395. if (convolution_param.group() != 1)
  396. fprintf(pp, "%-16s", "ConvolutionDepthWise");
  397. else
  398. fprintf(pp, "%-16s", "Convolution");
  399. }
  400. else if (layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
  401. {
  402. fprintf(pp, "%-16s", "ConvolutionDepthWise");
  403. }
  404. else if (layer.type() == "Deconvolution")
  405. {
  406. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  407. if (convolution_param.group() != 1)
  408. fprintf(pp, "%-16s", "DeconvolutionDepthWise");
  409. else
  410. fprintf(pp, "%-16s", "Deconvolution");
  411. }
  412. else if (layer.type() == "MemoryData")
  413. {
  414. fprintf(pp, "%-16s", "Input");
  415. }
  416. else if (layer.type() == "Python")
  417. {
  418. const caffe::PythonParameter& python_param = layer.python_param();
  419. std::string python_layer_name = python_param.layer();
  420. if (python_layer_name == "ProposalLayer")
  421. fprintf(pp, "%-16s", "Proposal");
  422. else
  423. fprintf(pp, "%-16s", python_layer_name.c_str());
  424. }
  425. else if (layer.type() == "ReLU6")
  426. {
  427. fprintf(pp, "%-16s", "Clip");
  428. }
  429. else if (layer.type() == "Silence")
  430. {
  431. fprintf(pp, "%-16s", "Noop");
  432. }
  433. else
  434. {
  435. fprintf(pp, "%-16s", layer.type().c_str());
  436. }
  437. fprintf(pp, " %-16s %d %d", layer.name().c_str(), layer.bottom_size(), layer.top_size());
  438. for (int j = 0; j < layer.bottom_size(); j++)
  439. {
  440. std::string blob_name = layer.bottom(j);
  441. if (blob_name_decorated.find(layer.bottom(j)) != blob_name_decorated.end())
  442. {
  443. blob_name = blob_name_decorated[layer.bottom(j)];
  444. }
  445. if (bottom_reference.find(blob_name) != bottom_reference.end())
  446. {
  447. int refidx = bottom_reference[blob_name] - 1;
  448. bottom_reference[blob_name] = refidx;
  449. char splitsuffix[256];
  450. sprintf(splitsuffix, "_splitncnn_%d", refidx);
  451. blob_name = blob_name + splitsuffix;
  452. }
  453. fprintf(pp, " %s", blob_name.c_str());
  454. }
  455. // decorated
  456. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  457. {
  458. std::string blob_name = layer.top(0) + "_" + layer.name();
  459. blob_name_decorated[layer.top(0)] = blob_name;
  460. fprintf(pp, " %s", blob_name.c_str());
  461. }
  462. else
  463. {
  464. for (int j = 0; j < layer.top_size(); j++)
  465. {
  466. std::string blob_name = layer.top(j);
  467. fprintf(pp, " %s", blob_name.c_str());
  468. }
  469. }
  470. // find blob binary by layer name
  471. int netidx;
  472. for (netidx = 0; netidx < net.layer_size(); netidx++)
  473. {
  474. if (net.layer(netidx).name() == layer.name())
  475. {
  476. break;
  477. }
  478. }
  479. // layer specific params
  480. if (layer.type() == "BatchNorm")
  481. {
  482. const caffe::LayerParameter& binlayer = net.layer(netidx);
  483. const caffe::BlobProto& mean_blob = binlayer.blobs(0);
  484. const caffe::BlobProto& var_blob = binlayer.blobs(1);
  485. fprintf(pp, " 0=%d", (int)mean_blob.data_size());
  486. const caffe::BatchNormParameter& batch_norm_param = layer.batch_norm_param();
  487. float eps = batch_norm_param.eps();
  488. std::vector<float> ones(mean_blob.data_size(), 1.f);
  489. fwrite(ones.data(), sizeof(float), ones.size(), bp); // slope
  490. if (binlayer.blobs_size() < 3)
  491. {
  492. fwrite(mean_blob.data().data(), sizeof(float), mean_blob.data_size(), bp);
  493. float tmp;
  494. for (int j = 0; j < var_blob.data_size(); j++)
  495. {
  496. tmp = var_blob.data().data()[j] + eps;
  497. fwrite(&tmp, sizeof(float), 1, bp);
  498. }
  499. }
  500. else
  501. {
  502. float scale_factor = binlayer.blobs(2).data().data()[0] == 0 ? 0 : 1 / binlayer.blobs(2).data().data()[0];
  503. // premultiply scale_factor to mean and variance
  504. float tmp;
  505. for (int j = 0; j < mean_blob.data_size(); j++)
  506. {
  507. tmp = mean_blob.data().data()[j] * scale_factor;
  508. fwrite(&tmp, sizeof(float), 1, bp);
  509. }
  510. for (int j = 0; j < var_blob.data_size(); j++)
  511. {
  512. tmp = var_blob.data().data()[j] * scale_factor + eps;
  513. fwrite(&tmp, sizeof(float), 1, bp);
  514. }
  515. }
  516. std::vector<float> zeros(mean_blob.data_size(), 0.f);
  517. fwrite(zeros.data(), sizeof(float), zeros.size(), bp); // bias
  518. }
  519. else if (layer.type() == "BN")
  520. {
  521. const caffe::LayerParameter& binlayer = net.layer(netidx);
  522. const caffe::BlobProto& scale_blob = binlayer.blobs(0);
  523. const caffe::BlobProto& shift_blob = binlayer.blobs(1);
  524. fprintf(pp, " 0=%d", (int)scale_blob.data_size());
  525. fprintf(pp, " 1=1");
  526. fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
  527. fwrite(shift_blob.data().data(), sizeof(float), shift_blob.data_size(), bp);
  528. }
  529. else if (layer.type() == "Concat")
  530. {
  531. const caffe::ConcatParameter& concat_param = layer.concat_param();
  532. int axis = concat_param.axis() - 1;
  533. fprintf(pp, " 0=%d", axis);
  534. }
  535. else if (layer.type() == "Convolution" || layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
  536. {
  537. const caffe::LayerParameter& binlayer = net.layer(netidx);
  538. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  539. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  540. fprintf(pp, " 0=%d", convolution_param.num_output());
  541. if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
  542. {
  543. fprintf(pp, " 1=%d", convolution_param.kernel_w());
  544. fprintf(pp, " 11=%d", convolution_param.kernel_h());
  545. }
  546. else
  547. {
  548. fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
  549. }
  550. fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
  551. if (convolution_param.has_stride_w() && convolution_param.has_stride_h())
  552. {
  553. fprintf(pp, " 3=%d", convolution_param.stride_w());
  554. fprintf(pp, " 13=%d", convolution_param.stride_h());
  555. }
  556. else
  557. {
  558. fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
  559. }
  560. if (convolution_param.has_pad_w() && convolution_param.has_pad_h())
  561. {
  562. fprintf(pp, " 4=%d", convolution_param.pad_w());
  563. fprintf(pp, " 14=%d", convolution_param.pad_h());
  564. }
  565. else
  566. {
  567. fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
  568. }
  569. fprintf(pp, " 5=%d", convolution_param.bias_term());
  570. fprintf(pp, " 6=%d", weight_blob.data_size());
  571. int num_group = 1;
  572. if (layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
  573. {
  574. num_group = convolution_param.num_output();
  575. }
  576. else
  577. {
  578. num_group = convolution_param.group();
  579. }
  580. if (num_group != 1)
  581. {
  582. fprintf(pp, " 7=%d", num_group);
  583. }
  584. bool int8_scale_term = false;
  585. std::vector<float> weight_int8scale;
  586. std::vector<float> blob_int8scale;
  587. if (int8scale_table_path)
  588. {
  589. char key[256];
  590. sprintf(key, "%s_param_0", layer.name().c_str());
  591. if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
  592. {
  593. weight_int8scale = weight_int8scale_table[std::string(key)];
  594. }
  595. if (blob_int8scale_table.find(layer.name()) != blob_int8scale_table.end())
  596. {
  597. blob_int8scale = blob_int8scale_table[layer.name()];
  598. }
  599. int8_scale_term = !weight_int8scale.empty() && !blob_int8scale.empty();
  600. if (int8_scale_term)
  601. {
  602. if ((int)weight_int8scale.size() == num_group)
  603. {
  604. fprintf(pp, " 8=1");
  605. }
  606. else
  607. {
  608. fprintf(pp, " 8=2");
  609. }
  610. }
  611. }
  612. for (int j = 0; j < binlayer.blobs_size(); j++)
  613. {
  614. int quantize_tag = 0;
  615. const caffe::BlobProto& blob = binlayer.blobs(j);
  616. std::vector<float> quantize_table;
  617. std::vector<unsigned char> quantize_index;
  618. std::vector<unsigned short> float16_weights;
  619. std::vector<signed char> int8_weights;
  620. // we will not quantize the bias values
  621. if (j == 0)
  622. {
  623. if (int8_scale_term)
  624. {
  625. if (quantize_level == 0)
  626. {
  627. quantize_tag = 0x0002C056;
  628. }
  629. else if (quantize_level == 256)
  630. {
  631. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), weight_int8scale, int8_weights);
  632. }
  633. }
  634. else if (quantize_level == 256)
  635. {
  636. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  637. }
  638. else if (quantize_level == 65536)
  639. {
  640. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
  641. }
  642. // write quantize tag first
  643. fwrite(&quantize_tag, sizeof(int), 1, bp);
  644. if (quantize_tag)
  645. {
  646. int p0 = ftell(bp);
  647. if (int8_scale_term)
  648. {
  649. if (quantize_level == 0)
  650. {
  651. // write original data and int8scale
  652. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  653. }
  654. else if (quantize_level == 256)
  655. {
  656. fwrite(int8_weights.data(), sizeof(signed char), int8_weights.size(), bp);
  657. }
  658. }
  659. else if (quantize_level == 256)
  660. {
  661. // write quantize table and index
  662. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  663. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  664. }
  665. else if (quantize_level == 65536)
  666. {
  667. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  668. }
  669. // padding to 32bit align
  670. int nwrite = ftell(bp) - p0;
  671. int nalign = int(alignSize(nwrite, 4));
  672. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  673. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  674. }
  675. else
  676. {
  677. // write original data
  678. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  679. }
  680. }
  681. else
  682. {
  683. // write original data
  684. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  685. }
  686. }
  687. if (int8_scale_term)
  688. {
  689. // write int8_scale data
  690. fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
  691. fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
  692. }
  693. }
  694. else if (layer.type() == "Crop")
  695. {
  696. const caffe::CropParameter& crop_param = layer.crop_param();
  697. int num_offset = crop_param.offset_size();
  698. if (num_offset == 1)
  699. {
  700. int offset = crop_param.offset(0);
  701. int axis = crop_param.axis() - 1;
  702. if (axis == 0)
  703. {
  704. fprintf(pp, " 0=%d", offset);
  705. fprintf(pp, " 1=%d", offset);
  706. fprintf(pp, " 2=%d", offset);
  707. }
  708. else if (axis == 1)
  709. {
  710. fprintf(pp, " 0=%d", offset);
  711. fprintf(pp, " 1=%d", offset);
  712. }
  713. else if (axis == 2)
  714. {
  715. fprintf(pp, " 0=%d", offset);
  716. }
  717. }
  718. else if (num_offset == 2)
  719. {
  720. int woffset = crop_param.offset(1);
  721. int hoffset = crop_param.offset(0);
  722. fprintf(pp, " 0=%d", woffset);
  723. fprintf(pp, " 1=%d", hoffset);
  724. }
  725. else if (num_offset == 3)
  726. {
  727. int woffset = crop_param.offset(2);
  728. int hoffset = crop_param.offset(1);
  729. int coffset = crop_param.offset(0);
  730. fprintf(pp, " 0=%d", woffset);
  731. fprintf(pp, " 1=%d", hoffset);
  732. fprintf(pp, " 2=%d", coffset);
  733. }
  734. }
  735. else if (layer.type() == "Deconvolution")
  736. {
  737. const caffe::LayerParameter& binlayer = net.layer(netidx);
  738. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  739. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  740. fprintf(pp, " 0=%d", convolution_param.num_output());
  741. if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
  742. {
  743. fprintf(pp, " 1=%d", convolution_param.kernel_w());
  744. fprintf(pp, " 11=%d", convolution_param.kernel_h());
  745. }
  746. else
  747. {
  748. fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
  749. }
  750. fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
  751. if (convolution_param.has_stride_w() && convolution_param.has_stride_h())
  752. {
  753. fprintf(pp, " 3=%d", convolution_param.stride_w());
  754. fprintf(pp, " 13=%d", convolution_param.stride_h());
  755. }
  756. else
  757. {
  758. fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
  759. }
  760. if (convolution_param.has_pad_w() && convolution_param.has_pad_h())
  761. {
  762. fprintf(pp, " 4=%d", convolution_param.pad_w());
  763. fprintf(pp, " 14=%d", convolution_param.pad_h());
  764. }
  765. else
  766. {
  767. fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
  768. }
  769. fprintf(pp, " 5=%d", convolution_param.bias_term());
  770. fprintf(pp, " 6=%d", weight_blob.data_size());
  771. int group = convolution_param.group();
  772. if (group != 1)
  773. {
  774. fprintf(pp, " 7=%d", group);
  775. }
  776. int quantized_weight = 0;
  777. fwrite(&quantized_weight, sizeof(int), 1, bp);
  778. int maxk = 0;
  779. if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
  780. {
  781. maxk = convolution_param.kernel_w() * convolution_param.kernel_h();
  782. }
  783. else
  784. {
  785. maxk = convolution_param.kernel_size(0) * convolution_param.kernel_size(0);
  786. }
  787. for (int g = 0; g < group; g++)
  788. {
  789. // reorder weight from inch-outch to outch-inch
  790. int num_output = convolution_param.num_output() / group;
  791. int num_input = weight_blob.data_size() / maxk / num_output / group;
  792. const float* weight_data_ptr = weight_blob.data().data() + g * maxk * num_output * num_input;
  793. for (int k = 0; k < num_output; k++)
  794. {
  795. for (int j = 0; j < num_input; j++)
  796. {
  797. fwrite(weight_data_ptr + (j * num_output + k) * maxk, sizeof(float), maxk, bp);
  798. }
  799. }
  800. }
  801. for (int j = 1; j < binlayer.blobs_size(); j++)
  802. {
  803. const caffe::BlobProto& blob = binlayer.blobs(j);
  804. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  805. }
  806. }
  807. else if (layer.type() == "DetectionOutput")
  808. {
  809. const caffe::DetectionOutputParameter& detection_output_param = layer.detection_output_param();
  810. const caffe::NonMaximumSuppressionParameter& nms_param = detection_output_param.nms_param();
  811. fprintf(pp, " 0=%d", detection_output_param.num_classes());
  812. fprintf(pp, " 1=%e", nms_param.nms_threshold());
  813. fprintf(pp, " 2=%d", nms_param.top_k());
  814. fprintf(pp, " 3=%d", detection_output_param.keep_top_k());
  815. fprintf(pp, " 4=%e", detection_output_param.confidence_threshold());
  816. }
  817. else if (layer.type() == "Dropout")
  818. {
  819. const caffe::DropoutParameter& dropout_param = layer.dropout_param();
  820. if (dropout_param.has_scale_train() && !dropout_param.scale_train())
  821. {
  822. float scale = 1.f - dropout_param.dropout_ratio();
  823. fprintf(pp, " 0=%e", scale);
  824. }
  825. }
  826. else if (layer.type() == "Eltwise")
  827. {
  828. const caffe::EltwiseParameter& eltwise_param = layer.eltwise_param();
  829. int coeff_size = eltwise_param.coeff_size();
  830. fprintf(pp, " 0=%d", (int)eltwise_param.operation());
  831. fprintf(pp, " -23301=%d", coeff_size);
  832. for (int j = 0; j < coeff_size; j++)
  833. {
  834. fprintf(pp, ",%e", eltwise_param.coeff(j));
  835. }
  836. }
  837. else if (layer.type() == "ELU")
  838. {
  839. const caffe::ELUParameter& elu_param = layer.elu_param();
  840. fprintf(pp, " 0=%e", elu_param.alpha());
  841. }
  842. else if (layer.type() == "Embed")
  843. {
  844. const caffe::LayerParameter& binlayer = net.layer(netidx);
  845. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  846. const caffe::EmbedParameter& embed_param = layer.embed_param();
  847. fprintf(pp, " 0=%d", embed_param.num_output());
  848. fprintf(pp, " 1=%d", embed_param.input_dim());
  849. fprintf(pp, " 2=%d", embed_param.bias_term());
  850. fprintf(pp, " 3=%d", weight_blob.data_size());
  851. for (int j = 0; j < binlayer.blobs_size(); j++)
  852. {
  853. int quantize_tag = 0;
  854. const caffe::BlobProto& blob = binlayer.blobs(j);
  855. std::vector<float> quantize_table;
  856. std::vector<unsigned char> quantize_index;
  857. std::vector<unsigned short> float16_weights;
  858. // we will not quantize the bias values
  859. if (j == 0 && quantize_level != 0)
  860. {
  861. if (quantize_level == 256)
  862. {
  863. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  864. }
  865. else if (quantize_level == 65536)
  866. {
  867. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
  868. }
  869. }
  870. // write quantize tag first
  871. if (j == 0)
  872. fwrite(&quantize_tag, sizeof(int), 1, bp);
  873. if (quantize_tag)
  874. {
  875. int p0 = ftell(bp);
  876. if (quantize_level == 256)
  877. {
  878. // write quantize table and index
  879. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  880. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  881. }
  882. else if (quantize_level == 65536)
  883. {
  884. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  885. }
  886. // padding to 32bit align
  887. int nwrite = ftell(bp) - p0;
  888. int nalign = int(alignSize(nwrite, 4));
  889. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  890. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  891. }
  892. else
  893. {
  894. // write original data
  895. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  896. }
  897. }
  898. }
  899. else if (layer.type() == "InnerProduct")
  900. {
  901. const caffe::LayerParameter& binlayer = net.layer(netidx);
  902. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  903. const caffe::InnerProductParameter& inner_product_param = layer.inner_product_param();
  904. fprintf(pp, " 0=%d", inner_product_param.num_output());
  905. fprintf(pp, " 1=%d", inner_product_param.bias_term());
  906. fprintf(pp, " 2=%d", weight_blob.data_size());
  907. bool int8_scale_term = false;
  908. std::vector<float> weight_int8scale;
  909. std::vector<float> blob_int8scale;
  910. if (int8scale_table_path)
  911. {
  912. char key[256];
  913. sprintf(key, "%s_param_0", layer.name().c_str());
  914. if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
  915. {
  916. weight_int8scale = weight_int8scale_table[std::string(key)];
  917. }
  918. if (blob_int8scale_table.find(layer.name()) != blob_int8scale_table.end())
  919. {
  920. blob_int8scale = blob_int8scale_table[layer.name()];
  921. }
  922. int8_scale_term = !weight_int8scale.empty() && !blob_int8scale.empty();
  923. if (int8_scale_term)
  924. {
  925. fprintf(pp, " 8=1");
  926. }
  927. }
  928. for (int j = 0; j < binlayer.blobs_size(); j++)
  929. {
  930. int quantize_tag = 0;
  931. const caffe::BlobProto& blob = binlayer.blobs(j);
  932. std::vector<float> quantize_table;
  933. std::vector<unsigned char> quantize_index;
  934. std::vector<unsigned short> float16_weights;
  935. std::vector<signed char> int8_weights;
  936. // we will not quantize the bias values
  937. if (j == 0)
  938. {
  939. if (int8_scale_term)
  940. {
  941. if (quantize_level == 0)
  942. {
  943. quantize_tag = 0x0002C056;
  944. }
  945. else if (quantize_level == 256)
  946. {
  947. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), weight_int8scale, int8_weights);
  948. }
  949. }
  950. else if (quantize_level == 256)
  951. {
  952. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  953. }
  954. else if (quantize_level == 65536)
  955. {
  956. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
  957. }
  958. // write quantize tag first
  959. fwrite(&quantize_tag, sizeof(int), 1, bp);
  960. if (quantize_tag)
  961. {
  962. int p0 = ftell(bp);
  963. if (int8_scale_term)
  964. {
  965. if (quantize_level == 0)
  966. {
  967. // write original data and int8scale
  968. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  969. }
  970. else if (quantize_level == 256)
  971. {
  972. fwrite(int8_weights.data(), sizeof(signed char), int8_weights.size(), bp);
  973. }
  974. }
  975. else if (quantize_level == 256)
  976. {
  977. // write quantize table and index
  978. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  979. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  980. }
  981. else if (quantize_level == 65536)
  982. {
  983. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  984. }
  985. // padding to 32bit align
  986. int nwrite = ftell(bp) - p0;
  987. int nalign = int(alignSize(nwrite, 4));
  988. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  989. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  990. }
  991. else
  992. {
  993. // write original data
  994. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  995. }
  996. }
  997. else
  998. {
  999. // write original data
  1000. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  1001. }
  1002. }
  1003. if (int8_scale_term)
  1004. {
  1005. // write int8_scale data
  1006. fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
  1007. fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
  1008. }
  1009. }
  1010. else if (layer.type() == "Input")
  1011. {
  1012. const caffe::InputParameter& input_param = layer.input_param();
  1013. const caffe::BlobShape& bs = input_param.shape(0);
  1014. if (bs.dim_size() == 4)
  1015. {
  1016. fprintf(pp, " 0=%zd", size_t(bs.dim(3)));
  1017. fprintf(pp, " 1=%zd", size_t(bs.dim(2)));
  1018. fprintf(pp, " 2=%zd", size_t(bs.dim(1)));
  1019. }
  1020. else if (bs.dim_size() == 3)
  1021. {
  1022. fprintf(pp, " 0=%zd", size_t(bs.dim(2)));
  1023. fprintf(pp, " 1=%zd", size_t(bs.dim(1)));
  1024. fprintf(pp, " 2=-233");
  1025. }
  1026. else if (bs.dim_size() == 2)
  1027. {
  1028. fprintf(pp, " 0=%zd", size_t(bs.dim(1)));
  1029. fprintf(pp, " 1=-233");
  1030. fprintf(pp, " 2=-233");
  1031. }
  1032. }
  1033. else if (layer.type() == "Interp")
  1034. {
  1035. const caffe::InterpParameter& interp_param = layer.interp_param();
  1036. fprintf(pp, " 0=%d", 2);
  1037. fprintf(pp, " 1=%e", (float)interp_param.zoom_factor());
  1038. fprintf(pp, " 2=%e", (float)interp_param.zoom_factor());
  1039. fprintf(pp, " 3=%d", interp_param.height());
  1040. fprintf(pp, " 4=%d", interp_param.width());
  1041. }
  1042. else if (layer.type() == "LRN")
  1043. {
  1044. const caffe::LRNParameter& lrn_param = layer.lrn_param();
  1045. fprintf(pp, " 0=%d", lrn_param.norm_region());
  1046. fprintf(pp, " 1=%d", lrn_param.local_size());
  1047. fprintf(pp, " 2=%e", lrn_param.alpha());
  1048. fprintf(pp, " 3=%e", lrn_param.beta());
  1049. }
  1050. else if (layer.type() == "LSTM")
  1051. {
  1052. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1053. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  1054. const caffe::RecurrentParameter& recurrent_param = layer.recurrent_param();
  1055. fprintf(pp, " 0=%d", recurrent_param.num_output());
  1056. fprintf(pp, " 1=%d", weight_blob.data_size());
  1057. for (int j = 0; j < binlayer.blobs_size(); j++)
  1058. {
  1059. int quantize_tag = 0;
  1060. const caffe::BlobProto& blob = binlayer.blobs(j);
  1061. std::vector<float> quantize_table;
  1062. std::vector<unsigned char> quantize_index;
  1063. std::vector<unsigned short> float16_weights;
  1064. if (quantize_level != 0)
  1065. {
  1066. if (quantize_level == 256)
  1067. {
  1068. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  1069. }
  1070. else if (quantize_level == 65536)
  1071. {
  1072. quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
  1073. }
  1074. }
  1075. // write quantize tag first
  1076. fwrite(&quantize_tag, sizeof(int), 1, bp);
  1077. if (quantize_tag)
  1078. {
  1079. int p0 = ftell(bp);
  1080. if (quantize_level == 256)
  1081. {
  1082. // write quantize table and index
  1083. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  1084. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  1085. }
  1086. else if (quantize_level == 65536)
  1087. {
  1088. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  1089. }
  1090. // padding to 32bit align
  1091. int nwrite = ftell(bp) - p0;
  1092. int nalign = int(alignSize(nwrite, 4));
  1093. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  1094. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  1095. }
  1096. else
  1097. {
  1098. // write original data
  1099. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  1100. }
  1101. }
  1102. }
  1103. else if (layer.type() == "MemoryData")
  1104. {
  1105. const caffe::MemoryDataParameter& memory_data_param = layer.memory_data_param();
  1106. fprintf(pp, " 0=%d", memory_data_param.width());
  1107. fprintf(pp, " 1=%d", memory_data_param.height());
  1108. fprintf(pp, " 2=%d", memory_data_param.channels());
  1109. }
  1110. else if (layer.type() == "MVN")
  1111. {
  1112. const caffe::MVNParameter& mvn_param = layer.mvn_param();
  1113. fprintf(pp, " 0=%d", mvn_param.normalize_variance());
  1114. fprintf(pp, " 1=%d", mvn_param.across_channels());
  1115. fprintf(pp, " 2=%e", mvn_param.eps());
  1116. }
  1117. else if (layer.type() == "Normalize")
  1118. {
  1119. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1120. const caffe::BlobProto& scale_blob = binlayer.blobs(0);
  1121. const caffe::NormalizeParameter& norm_param = layer.norm_param();
  1122. fprintf(pp, " 0=%d", norm_param.across_spatial());
  1123. fprintf(pp, " 1=%d", norm_param.channel_shared());
  1124. fprintf(pp, " 2=%e", norm_param.eps());
  1125. fprintf(pp, " 3=%d", scale_blob.data_size());
  1126. fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
  1127. }
  1128. else if (layer.type() == "Permute")
  1129. {
  1130. const caffe::PermuteParameter& permute_param = layer.permute_param();
  1131. int order_size = permute_param.order_size();
  1132. int order_type = 0;
  1133. if (order_size == 0)
  1134. order_type = 0;
  1135. if (order_size == 1)
  1136. {
  1137. int order0 = permute_param.order(0);
  1138. if (order0 == 0)
  1139. order_type = 0;
  1140. // permute with N not supported
  1141. }
  1142. if (order_size == 2)
  1143. {
  1144. int order0 = permute_param.order(0);
  1145. int order1 = permute_param.order(1);
  1146. if (order0 == 0)
  1147. {
  1148. if (order1 == 1) // 0 1 2 3
  1149. order_type = 0;
  1150. else if (order1 == 2) // 0 2 1 3
  1151. order_type = 2;
  1152. else if (order1 == 3) // 0 3 1 2
  1153. order_type = 4;
  1154. }
  1155. // permute with N not supported
  1156. }
  1157. if (order_size == 3 || order_size == 4)
  1158. {
  1159. int order0 = permute_param.order(0);
  1160. int order1 = permute_param.order(1);
  1161. int order2 = permute_param.order(2);
  1162. if (order0 == 0)
  1163. {
  1164. if (order1 == 1)
  1165. {
  1166. if (order2 == 2) // 0 1 2 3
  1167. order_type = 0;
  1168. if (order2 == 3) // 0 1 3 2
  1169. order_type = 1;
  1170. }
  1171. else if (order1 == 2)
  1172. {
  1173. if (order2 == 1) // 0 2 1 3
  1174. order_type = 2;
  1175. if (order2 == 3) // 0 2 3 1
  1176. order_type = 3;
  1177. }
  1178. else if (order1 == 3)
  1179. {
  1180. if (order2 == 1) // 0 3 1 2
  1181. order_type = 4;
  1182. if (order2 == 2) // 0 3 2 1
  1183. order_type = 5;
  1184. }
  1185. }
  1186. // permute with N not supported
  1187. }
  1188. fprintf(pp, " 0=%d", order_type);
  1189. }
  1190. else if (layer.type() == "Pooling")
  1191. {
  1192. const caffe::PoolingParameter& pooling_param = layer.pooling_param();
  1193. fprintf(pp, " 0=%d", pooling_param.pool());
  1194. if (pooling_param.has_kernel_w() && pooling_param.has_kernel_h())
  1195. {
  1196. fprintf(pp, " 1=%d", pooling_param.kernel_w());
  1197. fprintf(pp, " 11=%d", pooling_param.kernel_h());
  1198. }
  1199. else
  1200. {
  1201. fprintf(pp, " 1=%d", pooling_param.kernel_size());
  1202. }
  1203. if (pooling_param.has_stride_w() && pooling_param.has_stride_h())
  1204. {
  1205. fprintf(pp, " 2=%d", pooling_param.stride_w());
  1206. fprintf(pp, " 12=%d", pooling_param.stride_h());
  1207. }
  1208. else
  1209. {
  1210. fprintf(pp, " 2=%d", pooling_param.stride());
  1211. }
  1212. if (pooling_param.has_pad_w() && pooling_param.has_pad_h())
  1213. {
  1214. fprintf(pp, " 3=%d", pooling_param.pad_w());
  1215. fprintf(pp, " 13=%d", pooling_param.pad_h());
  1216. }
  1217. else
  1218. {
  1219. fprintf(pp, " 3=%d", pooling_param.pad());
  1220. }
  1221. fprintf(pp, " 4=%d", pooling_param.has_global_pooling() ? pooling_param.global_pooling() : 0);
  1222. }
  1223. else if (layer.type() == "Power")
  1224. {
  1225. const caffe::PowerParameter& power_param = layer.power_param();
  1226. fprintf(pp, " 0=%e", power_param.power());
  1227. fprintf(pp, " 1=%e", power_param.scale());
  1228. fprintf(pp, " 2=%e", power_param.shift());
  1229. }
  1230. else if (layer.type() == "PReLU")
  1231. {
  1232. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1233. const caffe::BlobProto& slope_blob = binlayer.blobs(0);
  1234. fprintf(pp, " 0=%d", slope_blob.data_size());
  1235. fwrite(slope_blob.data().data(), sizeof(float), slope_blob.data_size(), bp);
  1236. }
  1237. else if (layer.type() == "PriorBox")
  1238. {
  1239. const caffe::PriorBoxParameter& prior_box_param = layer.prior_box_param();
  1240. int num_aspect_ratio = prior_box_param.aspect_ratio_size();
  1241. for (int j = 0; j < prior_box_param.aspect_ratio_size(); j++)
  1242. {
  1243. float ar = prior_box_param.aspect_ratio(j);
  1244. if (fabs(ar - 1.) < 1e-6)
  1245. {
  1246. num_aspect_ratio--;
  1247. }
  1248. }
  1249. float variances[4] = {0.1f, 0.1f, 0.1f, 0.1f};
  1250. if (prior_box_param.variance_size() == 4)
  1251. {
  1252. variances[0] = prior_box_param.variance(0);
  1253. variances[1] = prior_box_param.variance(1);
  1254. variances[2] = prior_box_param.variance(2);
  1255. variances[3] = prior_box_param.variance(3);
  1256. }
  1257. else if (prior_box_param.variance_size() == 1)
  1258. {
  1259. variances[0] = prior_box_param.variance(0);
  1260. variances[1] = prior_box_param.variance(0);
  1261. variances[2] = prior_box_param.variance(0);
  1262. variances[3] = prior_box_param.variance(0);
  1263. }
  1264. int flip = prior_box_param.has_flip() ? prior_box_param.flip() : 1;
  1265. int clip = prior_box_param.has_clip() ? prior_box_param.clip() : 0;
  1266. int image_width = -233;
  1267. int image_height = -233;
  1268. if (prior_box_param.has_img_size())
  1269. {
  1270. image_width = prior_box_param.img_size();
  1271. image_height = prior_box_param.img_size();
  1272. }
  1273. else if (prior_box_param.has_img_w() && prior_box_param.has_img_h())
  1274. {
  1275. image_width = prior_box_param.img_w();
  1276. image_height = prior_box_param.img_h();
  1277. }
  1278. float step_width = -233;
  1279. float step_height = -233;
  1280. if (prior_box_param.has_step())
  1281. {
  1282. step_width = prior_box_param.step();
  1283. step_height = prior_box_param.step();
  1284. }
  1285. else if (prior_box_param.has_step_w() && prior_box_param.has_step_h())
  1286. {
  1287. step_width = prior_box_param.step_w();
  1288. step_height = prior_box_param.step_h();
  1289. }
  1290. fprintf(pp, " -23300=%d", prior_box_param.min_size_size());
  1291. for (int j = 0; j < prior_box_param.min_size_size(); j++)
  1292. {
  1293. fprintf(pp, ",%e", prior_box_param.min_size(j));
  1294. }
  1295. fprintf(pp, " -23301=%d", prior_box_param.max_size_size());
  1296. for (int j = 0; j < prior_box_param.max_size_size(); j++)
  1297. {
  1298. fprintf(pp, ",%e", prior_box_param.max_size(j));
  1299. }
  1300. fprintf(pp, " -23302=%d", num_aspect_ratio);
  1301. for (int j = 0; j < prior_box_param.aspect_ratio_size(); j++)
  1302. {
  1303. float ar = prior_box_param.aspect_ratio(j);
  1304. if (fabs(ar - 1.) < 1e-6)
  1305. {
  1306. continue;
  1307. }
  1308. fprintf(pp, ",%e", ar);
  1309. }
  1310. fprintf(pp, " 3=%e", variances[0]);
  1311. fprintf(pp, " 4=%e", variances[1]);
  1312. fprintf(pp, " 5=%e", variances[2]);
  1313. fprintf(pp, " 6=%e", variances[3]);
  1314. fprintf(pp, " 7=%d", flip);
  1315. fprintf(pp, " 8=%d", clip);
  1316. fprintf(pp, " 9=%d", image_width);
  1317. fprintf(pp, " 10=%d", image_height);
  1318. fprintf(pp, " 11=%e", step_width);
  1319. fprintf(pp, " 12=%e", step_height);
  1320. fprintf(pp, " 13=%e", prior_box_param.offset());
  1321. }
  1322. else if (layer.type() == "PSROIPooling")
  1323. {
  1324. const caffe::PSROIPoolingParameter& psroi_pooling_param = layer.psroi_pooling_param();
  1325. fprintf(pp, " 0=%d", psroi_pooling_param.group_size());
  1326. fprintf(pp, " 1=%d", psroi_pooling_param.group_size());
  1327. fprintf(pp, " 2=%e", psroi_pooling_param.spatial_scale());
  1328. fprintf(pp, " 3=%d", psroi_pooling_param.output_dim());
  1329. }
  1330. else if (layer.type() == "Python")
  1331. {
  1332. const caffe::PythonParameter& python_param = layer.python_param();
  1333. std::string python_layer_name = python_param.layer();
  1334. if (python_layer_name == "ProposalLayer")
  1335. {
  1336. int feat_stride = 16;
  1337. sscanf(python_param.param_str().c_str(), "'feat_stride': %d", &feat_stride);
  1338. int base_size = 16;
  1339. // float ratio;
  1340. // float scale;
  1341. int pre_nms_topN = 6000;
  1342. int after_nms_topN = 300;
  1343. float nms_thresh = 0.7f;
  1344. int min_size = 16;
  1345. fprintf(pp, " 0=%d", feat_stride);
  1346. fprintf(pp, " 1=%d", base_size);
  1347. fprintf(pp, " 2=%d", pre_nms_topN);
  1348. fprintf(pp, " 3=%d", after_nms_topN);
  1349. fprintf(pp, " 4=%e", nms_thresh);
  1350. fprintf(pp, " 5=%d", min_size);
  1351. }
  1352. }
  1353. else if (layer.type() == "ReLU")
  1354. {
  1355. const caffe::ReLUParameter& relu_param = layer.relu_param();
  1356. if (relu_param.has_negative_slope())
  1357. {
  1358. fprintf(pp, " 0=%e", relu_param.negative_slope());
  1359. }
  1360. }
  1361. else if (layer.type() == "ReLU6")
  1362. {
  1363. float min = 0.f;
  1364. float max = 6.f;
  1365. fprintf(pp, " 0=%e", min);
  1366. fprintf(pp, " 1=%e", max);
  1367. }
  1368. else if (layer.type() == "Reorg")
  1369. {
  1370. const caffe::ReorgParameter& reorg_param = layer.reorg_param();
  1371. fprintf(pp, " 0=%d", reorg_param.stride());
  1372. }
  1373. else if (layer.type() == "Reshape")
  1374. {
  1375. const caffe::ReshapeParameter& reshape_param = layer.reshape_param();
  1376. const caffe::BlobShape& bs = reshape_param.shape();
  1377. if (bs.dim_size() == 1)
  1378. {
  1379. fprintf(pp, " 0=%zd 1=-233 2=-233", size_t(bs.dim(0)));
  1380. }
  1381. else if (bs.dim_size() == 2)
  1382. {
  1383. fprintf(pp, " 0=%zd 1=-233 2=-233", size_t(bs.dim(1)));
  1384. }
  1385. else if (bs.dim_size() == 3)
  1386. {
  1387. fprintf(pp, " 0=%zd 1=%zd 2=-233", size_t(bs.dim(2)), size_t(bs.dim(1)));
  1388. }
  1389. else // bs.dim_size() == 4
  1390. {
  1391. fprintf(pp, " 0=%zd 1=%zd 2=%zd", size_t(bs.dim(3)), size_t(bs.dim(2)), size_t(bs.dim(1)));
  1392. }
  1393. fprintf(pp, " 3=0"); // permute
  1394. }
  1395. else if (layer.type() == "ROIAlign")
  1396. {
  1397. const caffe::ROIAlignParameter& roi_align_param = layer.roi_align_param();
  1398. fprintf(pp, " 0=%d", roi_align_param.pooled_w());
  1399. fprintf(pp, " 1=%d", roi_align_param.pooled_h());
  1400. fprintf(pp, " 2=%e", roi_align_param.spatial_scale());
  1401. fprintf(pp, " 3=%d", 0);
  1402. fprintf(pp, " 4=%d", false);
  1403. fprintf(pp, " 5=%d", 0);
  1404. }
  1405. else if (layer.type() == "ROIPooling")
  1406. {
  1407. const caffe::ROIPoolingParameter& roi_pooling_param = layer.roi_pooling_param();
  1408. fprintf(pp, " 0=%d", roi_pooling_param.pooled_w());
  1409. fprintf(pp, " 1=%d", roi_pooling_param.pooled_h());
  1410. fprintf(pp, " 2=%e", roi_pooling_param.spatial_scale());
  1411. }
  1412. else if (layer.type() == "Scale")
  1413. {
  1414. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1415. const caffe::ScaleParameter& scale_param = layer.scale_param();
  1416. bool scale_weight = scale_param.bias_term() ? (binlayer.blobs_size() == 2) : (binlayer.blobs_size() == 1);
  1417. if (scale_weight)
  1418. {
  1419. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  1420. fprintf(pp, " 0=%d", int(weight_blob.data_size()));
  1421. }
  1422. else
  1423. {
  1424. fprintf(pp, " 0=-233");
  1425. }
  1426. fprintf(pp, " 1=%d", scale_param.bias_term());
  1427. for (int j = 0; j < binlayer.blobs_size(); j++)
  1428. {
  1429. const caffe::BlobProto& blob = binlayer.blobs(j);
  1430. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  1431. }
  1432. }
  1433. else if (layer.type() == "ShuffleChannel")
  1434. {
  1435. const caffe::ShuffleChannelParameter& shuffle_channel_param = layer.shuffle_channel_param();
  1436. fprintf(pp, " 0=%d", shuffle_channel_param.group());
  1437. }
  1438. else if (layer.type() == "Slice")
  1439. {
  1440. const caffe::SliceParameter& slice_param = layer.slice_param();
  1441. if (slice_param.slice_point_size() == 0)
  1442. {
  1443. int num_slice = layer.top_size();
  1444. fprintf(pp, " -23300=%d", num_slice);
  1445. for (int j = 0; j < num_slice; j++)
  1446. {
  1447. fprintf(pp, ",-233");
  1448. }
  1449. }
  1450. else
  1451. {
  1452. int num_slice = slice_param.slice_point_size() + 1;
  1453. fprintf(pp, " -23300=%d", num_slice);
  1454. int prev_offset = 0;
  1455. for (int j = 0; j < slice_param.slice_point_size(); j++)
  1456. {
  1457. int offset = slice_param.slice_point(j);
  1458. fprintf(pp, ",%d", offset - prev_offset);
  1459. prev_offset = offset;
  1460. }
  1461. fprintf(pp, ",-233");
  1462. }
  1463. int axis = 0;
  1464. if (slice_param.has_axis())
  1465. {
  1466. axis = slice_param.axis() - 1;
  1467. }
  1468. else if (slice_param.has_slice_dim())
  1469. {
  1470. axis = slice_param.slice_dim() - 1;
  1471. }
  1472. fprintf(pp, " 1=%d", axis);
  1473. }
  1474. else if (layer.type() == "Softmax")
  1475. {
  1476. const caffe::SoftmaxParameter& softmax_param = layer.softmax_param();
  1477. int dim = softmax_param.axis() - 1;
  1478. fprintf(pp, " 0=%d", dim);
  1479. fprintf(pp, " 1=1");
  1480. }
  1481. else if (layer.type() == "Threshold")
  1482. {
  1483. const caffe::ThresholdParameter& threshold_param = layer.threshold_param();
  1484. fprintf(pp, " 0=%e", threshold_param.threshold());
  1485. }
  1486. else if (layer.type() == "YoloDetectionOutput")
  1487. {
  1488. const caffe::YoloDetectionOutputParameter& yolo_detection_output_param = layer.yolo_detection_output_param();
  1489. fprintf(pp, " 0=%d", yolo_detection_output_param.num_classes());
  1490. fprintf(pp, " 1=%d", yolo_detection_output_param.num_box());
  1491. fprintf(pp, " 2=%e", yolo_detection_output_param.confidence_threshold());
  1492. fprintf(pp, " 3=%e", yolo_detection_output_param.nms_threshold());
  1493. int num_bias = yolo_detection_output_param.biases_size();
  1494. fprintf(pp, " -23304=%d", num_bias);
  1495. for (int j = 0; j < num_bias; j++)
  1496. {
  1497. fprintf(pp, ",%e", yolo_detection_output_param.biases(j));
  1498. }
  1499. }
  1500. else if (layer.type() == "Yolov3DetectionOutput")
  1501. {
  1502. const caffe::Yolov3DetectionOutputParameter& yolov3_detection_output_param = layer.yolov3_detection_output_param();
  1503. fprintf(pp, " 0=%d", yolov3_detection_output_param.num_classes());
  1504. fprintf(pp, " 1=%d", yolov3_detection_output_param.num_box());
  1505. fprintf(pp, " 2=%e", yolov3_detection_output_param.confidence_threshold());
  1506. fprintf(pp, " 3=%e", yolov3_detection_output_param.nms_threshold());
  1507. int num_bias = yolov3_detection_output_param.biases_size();
  1508. fprintf(pp, " -23304=%d", num_bias);
  1509. for (int j = 0; j < num_bias; j++)
  1510. {
  1511. fprintf(pp, ",%e", yolov3_detection_output_param.biases(j));
  1512. }
  1513. int num_mask = yolov3_detection_output_param.mask_size();
  1514. fprintf(pp, " -23305=%d", num_mask);
  1515. for (int j = 0; j < num_mask; j++)
  1516. {
  1517. fprintf(pp, ",%e", (float)yolov3_detection_output_param.mask(j));
  1518. }
  1519. int num_anchors = yolov3_detection_output_param.anchors_scale_size();
  1520. fprintf(pp, " -23306=%d", num_anchors);
  1521. for (int j = 0; j < num_anchors; j++)
  1522. {
  1523. fprintf(pp, ",%e", (float)yolov3_detection_output_param.anchors_scale(j));
  1524. }
  1525. fprintf(pp, " 7=%d", yolov3_detection_output_param.mask_group_num());
  1526. }
  1527. fprintf(pp, "\n");
  1528. // add split layer if top reference larger than one
  1529. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  1530. {
  1531. std::string blob_name = blob_name_decorated[layer.top(0)];
  1532. if (bottom_reference.find(blob_name) != bottom_reference.end())
  1533. {
  1534. int refcount = bottom_reference[blob_name];
  1535. if (refcount > 1)
  1536. {
  1537. char splitname[256];
  1538. sprintf(splitname, "splitncnn_%d", internal_split);
  1539. fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
  1540. fprintf(pp, " %s", blob_name.c_str());
  1541. for (int j = 0; j < refcount; j++)
  1542. {
  1543. fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
  1544. }
  1545. fprintf(pp, "\n");
  1546. internal_split++;
  1547. }
  1548. }
  1549. }
  1550. else
  1551. {
  1552. for (int j = 0; j < layer.top_size(); j++)
  1553. {
  1554. std::string blob_name = layer.top(j);
  1555. if (bottom_reference.find(blob_name) != bottom_reference.end())
  1556. {
  1557. int refcount = bottom_reference[blob_name];
  1558. if (refcount > 1)
  1559. {
  1560. char splitname[256];
  1561. sprintf(splitname, "splitncnn_%d", internal_split);
  1562. fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
  1563. fprintf(pp, " %s", blob_name.c_str());
  1564. for (int j = 0; j < refcount; j++)
  1565. {
  1566. fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
  1567. }
  1568. fprintf(pp, "\n");
  1569. internal_split++;
  1570. }
  1571. }
  1572. }
  1573. }
  1574. }
  1575. fclose(pp);
  1576. fclose(bp);
  1577. return 0;
  1578. }