You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

caffe2ncnn.cpp 63 kB

8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include <stdio.h>
  15. #include <limits.h>
  16. #include <math.h>
  17. #include <fstream>
  18. #include <set>
  19. #include <limits>
  20. #include <map>
  21. #include <algorithm>
  22. #include <google/protobuf/io/coded_stream.h>
  23. #include <google/protobuf/io/zero_copy_stream_impl.h>
  24. #include <google/protobuf/text_format.h>
  25. #include <google/protobuf/message.h>
  26. #include "caffe.pb.h"
  27. static inline size_t alignSize(size_t sz, int n)
  28. {
  29. return (sz + n-1) & -n;
  30. }
  31. // convert float to half precision floating point
  32. static unsigned short float2half(float value)
  33. {
  34. // 1 : 8 : 23
  35. union
  36. {
  37. unsigned int u;
  38. float f;
  39. } tmp;
  40. tmp.f = value;
  41. // 1 : 8 : 23
  42. unsigned short sign = (tmp.u & 0x80000000) >> 31;
  43. unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
  44. unsigned int significand = tmp.u & 0x7FFFFF;
  45. // fprintf(stderr, "%d %d %d\n", sign, exponent, significand);
  46. // 1 : 5 : 10
  47. unsigned short fp16;
  48. if (exponent == 0)
  49. {
  50. // zero or denormal, always underflow
  51. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  52. }
  53. else if (exponent == 0xFF)
  54. {
  55. // infinity or NaN
  56. fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
  57. }
  58. else
  59. {
  60. // normalized
  61. short newexp = exponent + (- 127 + 15);
  62. if (newexp >= 31)
  63. {
  64. // overflow, return infinity
  65. fp16 = (sign << 15) | (0x1F << 10) | 0x00;
  66. }
  67. else if (newexp <= 0)
  68. {
  69. // underflow
  70. if (newexp >= -10)
  71. {
  72. // denormal half-precision
  73. unsigned short sig = (significand | 0x800000) >> (14 - newexp);
  74. fp16 = (sign << 15) | (0x00 << 10) | sig;
  75. }
  76. else
  77. {
  78. // underflow
  79. fp16 = (sign << 15) | (0x00 << 10) | 0x00;
  80. }
  81. }
  82. else
  83. {
  84. fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
  85. }
  86. }
  87. return fp16;
  88. }
  89. // round to nearest
  90. static signed char float2int8(float value)
  91. {
  92. float tmp;
  93. if (value >= 0.f) tmp = value + 0.5;
  94. else tmp = value - 0.5;
  95. if (tmp > 127)
  96. return 127;
  97. if (tmp < -128)
  98. return -128;
  99. return tmp;
  100. }
  101. static bool read_int8scale_table(const char* filepath, std::map<std::string, std::vector<float> >& blob_int8scale_table, std::map<std::string, std::vector<float> >& weight_int8scale_table)
  102. {
  103. blob_int8scale_table.clear();
  104. weight_int8scale_table.clear();
  105. FILE* fp = fopen(filepath, "rb");
  106. if (!fp)
  107. {
  108. fprintf(stderr, "fopen %s failed\n", filepath);
  109. return false;
  110. }
  111. bool in_scale_vector = false;
  112. std::string keystr;
  113. std::vector<float> scales;
  114. while (!feof(fp))
  115. {
  116. if (in_scale_vector)
  117. {
  118. float scale = 1.f;
  119. int nscan = fscanf(fp, "%f", &scale);
  120. if (nscan == 1)
  121. {
  122. scales.push_back(scale);
  123. continue;
  124. }
  125. else
  126. {
  127. // XYZ_param_N pattern
  128. if (strstr(keystr.c_str(), "_param_"))
  129. {
  130. weight_int8scale_table[ keystr ] = scales;
  131. }
  132. else
  133. {
  134. blob_int8scale_table[ keystr ] = scales;
  135. }
  136. keystr.clear();
  137. scales.clear();
  138. in_scale_vector = false;
  139. }
  140. }
  141. if (!in_scale_vector)
  142. {
  143. char key[256];
  144. int nscan = fscanf(fp, "%255s", key);
  145. if (nscan == 1)
  146. {
  147. keystr = key;
  148. in_scale_vector = true;
  149. }
  150. else
  151. {
  152. break;
  153. }
  154. }
  155. }
  156. fclose(fp);
  157. return true;
  158. }
  159. static int quantize_weight(float *data, size_t data_length, std::vector<unsigned short>& float16_weights)
  160. {
  161. float16_weights.resize(data_length);
  162. for (size_t i = 0; i < data_length; i++)
  163. {
  164. float f = data[i];
  165. unsigned short fp16 = float2half(f);
  166. float16_weights[i] = fp16;
  167. }
  168. // magic tag for half-precision floating point
  169. return 0x01306B47;
  170. }
  171. static int quantize_weight(float *data, size_t data_length, std::vector<float> scales, std::vector<signed char>& int8_weights)
  172. {
  173. int8_weights.resize(data_length);
  174. int length_per_group = data_length / scales.size();
  175. for (size_t i = 0; i < data_length; i++)
  176. {
  177. float f = data[i];
  178. signed char int8 = float2int8(f * scales[ i / length_per_group ]);
  179. int8_weights[i] = int8;
  180. }
  181. // magic tag for int8
  182. return 0x000D4B38;
  183. }
  184. static bool quantize_weight(float *data, size_t data_length, int quantize_level, std::vector<float> &quantize_table, std::vector<unsigned char> &quantize_index) {
  185. assert(quantize_level != 0);
  186. assert(data != NULL);
  187. assert(data_length > 0);
  188. if (data_length < static_cast<size_t>(quantize_level)) {
  189. fprintf(stderr, "No need quantize,because: data_length < quantize_level");
  190. return false;
  191. }
  192. quantize_table.reserve(quantize_level);
  193. quantize_index.reserve(data_length);
  194. // 1. Find min and max value
  195. float max_value = std::numeric_limits<float>::min();
  196. float min_value = std::numeric_limits<float>::max();
  197. for (size_t i = 0; i < data_length; ++i)
  198. {
  199. if (max_value < data[i]) max_value = data[i];
  200. if (min_value > data[i]) min_value = data[i];
  201. }
  202. float strides = (max_value - min_value) / quantize_level;
  203. // 2. Generate quantize table
  204. for (int i = 0; i < quantize_level; ++i)
  205. {
  206. quantize_table.push_back(min_value + i * strides);
  207. }
  208. // 3. Align data to the quantized value
  209. for (size_t i = 0; i < data_length; ++i)
  210. {
  211. size_t table_index = int((data[i] - min_value) / strides);
  212. table_index = std::min<float>(table_index, quantize_level - 1);
  213. float low_value = quantize_table[table_index];
  214. float high_value = low_value + strides;
  215. // find a nearest value between low and high value.
  216. float targetValue = data[i] - low_value < high_value - data[i] ? low_value : high_value;
  217. table_index = int((targetValue - min_value) / strides);
  218. table_index = std::min<float>(table_index, quantize_level - 1);
  219. quantize_index.push_back(table_index);
  220. }
  221. return true;
  222. }
  223. static bool read_proto_from_text(const char* filepath, google::protobuf::Message* message)
  224. {
  225. std::ifstream fs(filepath, std::ifstream::in);
  226. if (!fs.is_open())
  227. {
  228. fprintf(stderr, "open failed %s\n", filepath);
  229. return false;
  230. }
  231. google::protobuf::io::IstreamInputStream input(&fs);
  232. bool success = google::protobuf::TextFormat::Parse(&input, message);
  233. fs.close();
  234. return success;
  235. }
  236. static bool read_proto_from_binary(const char* filepath, google::protobuf::Message* message)
  237. {
  238. std::ifstream fs(filepath, std::ifstream::in | std::ifstream::binary);
  239. if (!fs.is_open())
  240. {
  241. fprintf(stderr, "open failed %s\n", filepath);
  242. return false;
  243. }
  244. google::protobuf::io::IstreamInputStream input(&fs);
  245. google::protobuf::io::CodedInputStream codedstr(&input);
  246. codedstr.SetTotalBytesLimit(INT_MAX, INT_MAX / 2);
  247. bool success = message->ParseFromCodedStream(&codedstr);
  248. fs.close();
  249. return success;
  250. }
  251. int main(int argc, char** argv)
  252. {
  253. if (!(argc == 3 || argc == 5 || argc == 6 || argc == 7))
  254. {
  255. fprintf(stderr, "Usage: %s [caffeproto] [caffemodel] [ncnnproto] [ncnnbin] [quantizelevel] [int8scaletable]\n", argv[0]);
  256. return -1;
  257. }
  258. const char* caffeproto = argv[1];
  259. const char* caffemodel = argv[2];
  260. const char* ncnn_prototxt = argc >= 5 ? argv[3] : "ncnn.proto";
  261. const char* ncnn_modelbin = argc >= 5 ? argv[4] : "ncnn.bin";
  262. const char* quantize_param = argc >= 6 ? argv[5] : "0";
  263. const char* int8scale_table_path = argc == 7 ? argv[6] : NULL;
  264. int quantize_level = atoi(quantize_param);
  265. if (quantize_level != 0 && quantize_level != 256 && quantize_level != 65536) {
  266. fprintf(stderr, "%s: only support quantize level = 0, 256, or 65536", argv[0]);
  267. return -1;
  268. }
  269. caffe::NetParameter proto;
  270. caffe::NetParameter net;
  271. // load
  272. bool s0 = read_proto_from_text(caffeproto, &proto);
  273. if (!s0)
  274. {
  275. fprintf(stderr, "read_proto_from_text failed\n");
  276. return -1;
  277. }
  278. bool s1 = read_proto_from_binary(caffemodel, &net);
  279. if (!s1)
  280. {
  281. fprintf(stderr, "read_proto_from_binary failed\n");
  282. return -1;
  283. }
  284. std::map<std::string, std::vector<float> > blob_int8scale_table;
  285. std::map<std::string, std::vector<float> > weight_int8scale_table;
  286. if (int8scale_table_path)
  287. {
  288. bool s2 = read_int8scale_table(int8scale_table_path, blob_int8scale_table, weight_int8scale_table);
  289. if (!s2)
  290. {
  291. fprintf(stderr, "read_int8scale_table failed\n");
  292. return -1;
  293. }
  294. }
  295. FILE* pp = fopen(ncnn_prototxt, "wb");
  296. FILE* bp = fopen(ncnn_modelbin, "wb");
  297. // magic
  298. fprintf(pp, "7767517\n");
  299. // rename mapping for identical bottom top style
  300. std::map<std::string, std::string> blob_name_decorated;
  301. // bottom blob reference
  302. std::map<std::string, int> bottom_reference;
  303. // global definition line
  304. // [layer count] [blob count]
  305. int layer_count = proto.layer_size();
  306. std::set<std::string> blob_names;
  307. for (int i=0; i<layer_count; i++)
  308. {
  309. const caffe::LayerParameter& layer = proto.layer(i);
  310. for (int j=0; j<layer.bottom_size(); j++)
  311. {
  312. std::string blob_name = layer.bottom(j);
  313. if (blob_name_decorated.find(blob_name) != blob_name_decorated.end())
  314. {
  315. blob_name = blob_name_decorated[blob_name];
  316. }
  317. blob_names.insert(blob_name);
  318. if (bottom_reference.find(blob_name) == bottom_reference.end())
  319. {
  320. bottom_reference[blob_name] = 1;
  321. }
  322. else
  323. {
  324. bottom_reference[blob_name] = bottom_reference[blob_name] + 1;
  325. }
  326. }
  327. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  328. {
  329. std::string blob_name = layer.top(0) + "_" + layer.name();
  330. blob_name_decorated[layer.top(0)] = blob_name;
  331. blob_names.insert(blob_name);
  332. }
  333. else
  334. {
  335. for (int j=0; j<layer.top_size(); j++)
  336. {
  337. std::string blob_name = layer.top(j);
  338. blob_names.insert(blob_name);
  339. }
  340. }
  341. }
  342. // remove bottom_reference entry with reference equals to one
  343. int splitncnn_blob_count = 0;
  344. std::map<std::string, int>::iterator it = bottom_reference.begin();
  345. while (it != bottom_reference.end())
  346. {
  347. if (it->second == 1)
  348. {
  349. bottom_reference.erase(it++);
  350. }
  351. else
  352. {
  353. splitncnn_blob_count += it->second;
  354. // fprintf(stderr, "%s %d\n", it->first.c_str(), it->second);
  355. ++it;
  356. }
  357. }
  358. fprintf(pp, "%lu %lu\n", layer_count + bottom_reference.size(), blob_names.size() + splitncnn_blob_count);
  359. // populate
  360. blob_name_decorated.clear();
  361. int internal_split = 0;
  362. for (int i=0; i<layer_count; i++)
  363. {
  364. const caffe::LayerParameter& layer = proto.layer(i);
  365. // layer definition line, repeated
  366. // [type] [name] [bottom blob count] [top blob count] [bottom blobs] [top blobs] [layer specific params]
  367. if (layer.type() == "BN")
  368. {
  369. fprintf(pp, "%-16s", "Scale");
  370. }
  371. else if (layer.type() == "Convolution")
  372. {
  373. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  374. if (convolution_param.group() != 1)
  375. fprintf(pp, "%-16s", "ConvolutionDepthWise");
  376. else
  377. fprintf(pp, "%-16s", "Convolution");
  378. }
  379. else if (layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
  380. {
  381. fprintf(pp, "%-16s", "ConvolutionDepthWise");
  382. }
  383. else if (layer.type() == "Deconvolution")
  384. {
  385. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  386. if (convolution_param.group() != 1)
  387. fprintf(pp, "%-16s", "DeconvolutionDepthWise");
  388. else
  389. fprintf(pp, "%-16s", "Deconvolution");
  390. }
  391. else if (layer.type() == "MemoryData")
  392. {
  393. fprintf(pp, "%-16s", "Input");
  394. }
  395. else if (layer.type() == "Python")
  396. {
  397. const caffe::PythonParameter& python_param = layer.python_param();
  398. std::string python_layer_name = python_param.layer();
  399. if (python_layer_name == "ProposalLayer")
  400. fprintf(pp, "%-16s", "Proposal");
  401. else
  402. fprintf(pp, "%-16s", python_layer_name.c_str());
  403. }
  404. else if (layer.type() == "ReLU6")
  405. {
  406. fprintf(pp, "%-16s", "Clip");
  407. }
  408. else
  409. {
  410. fprintf(pp, "%-16s", layer.type().c_str());
  411. }
  412. fprintf(pp, " %-16s %d %d", layer.name().c_str(), layer.bottom_size(), layer.top_size());
  413. for (int j=0; j<layer.bottom_size(); j++)
  414. {
  415. std::string blob_name = layer.bottom(j);
  416. if (blob_name_decorated.find(layer.bottom(j)) != blob_name_decorated.end())
  417. {
  418. blob_name = blob_name_decorated[layer.bottom(j)];
  419. }
  420. if (bottom_reference.find(blob_name) != bottom_reference.end())
  421. {
  422. int refidx = bottom_reference[blob_name] - 1;
  423. bottom_reference[blob_name] = refidx;
  424. char splitsuffix[256];
  425. sprintf(splitsuffix, "_splitncnn_%d", refidx);
  426. blob_name = blob_name + splitsuffix;
  427. }
  428. fprintf(pp, " %s", blob_name.c_str());
  429. }
  430. // decorated
  431. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  432. {
  433. std::string blob_name = layer.top(0) + "_" + layer.name();
  434. blob_name_decorated[layer.top(0)] = blob_name;
  435. fprintf(pp, " %s", blob_name.c_str());
  436. }
  437. else
  438. {
  439. for (int j=0; j<layer.top_size(); j++)
  440. {
  441. std::string blob_name = layer.top(j);
  442. fprintf(pp, " %s", blob_name.c_str());
  443. }
  444. }
  445. // find blob binary by layer name
  446. int netidx;
  447. for (netidx=0; netidx<net.layer_size(); netidx++)
  448. {
  449. if (net.layer(netidx).name() == layer.name())
  450. {
  451. break;
  452. }
  453. }
  454. // layer specific params
  455. if (layer.type() == "BatchNorm")
  456. {
  457. const caffe::LayerParameter& binlayer = net.layer(netidx);
  458. const caffe::BlobProto& mean_blob = binlayer.blobs(0);
  459. const caffe::BlobProto& var_blob = binlayer.blobs(1);
  460. fprintf(pp, " 0=%d", (int)mean_blob.data_size());
  461. const caffe::BatchNormParameter& batch_norm_param = layer.batch_norm_param();
  462. float eps = batch_norm_param.eps();
  463. std::vector<float> ones(mean_blob.data_size(), 1.f);
  464. fwrite(ones.data(), sizeof(float), ones.size(), bp);// slope
  465. if (binlayer.blobs_size() < 3)
  466. {
  467. fwrite(mean_blob.data().data(), sizeof(float), mean_blob.data_size(), bp);
  468. float tmp;
  469. for (int j=0; j<var_blob.data_size(); j++)
  470. {
  471. tmp = var_blob.data().data()[j] + eps;
  472. fwrite(&tmp, sizeof(float), 1, bp);
  473. }
  474. }
  475. else
  476. {
  477. float scale_factor = binlayer.blobs(2).data().data()[0] == 0 ? 0 : 1 / binlayer.blobs(2).data().data()[0];
  478. // premultiply scale_factor to mean and variance
  479. float tmp;
  480. for (int j=0; j<mean_blob.data_size(); j++)
  481. {
  482. tmp = mean_blob.data().data()[j] * scale_factor;
  483. fwrite(&tmp, sizeof(float), 1, bp);
  484. }
  485. for (int j=0; j<var_blob.data_size(); j++)
  486. {
  487. tmp = var_blob.data().data()[j] * scale_factor + eps;
  488. fwrite(&tmp, sizeof(float), 1, bp);
  489. }
  490. }
  491. std::vector<float> zeros(mean_blob.data_size(), 0.f);
  492. fwrite(zeros.data(), sizeof(float), zeros.size(), bp);// bias
  493. }
  494. else if (layer.type() == "BN")
  495. {
  496. const caffe::LayerParameter& binlayer = net.layer(netidx);
  497. const caffe::BlobProto& scale_blob = binlayer.blobs(0);
  498. const caffe::BlobProto& shift_blob = binlayer.blobs(1);
  499. fprintf(pp, " 0=%d", (int)scale_blob.data_size());
  500. fprintf(pp, " 1=1");
  501. fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
  502. fwrite(shift_blob.data().data(), sizeof(float), shift_blob.data_size(), bp);
  503. }
  504. else if (layer.type() == "Concat")
  505. {
  506. const caffe::ConcatParameter& concat_param = layer.concat_param();
  507. int dim = concat_param.axis() - 1;
  508. fprintf(pp, " 0=%d", dim);
  509. }
  510. else if (layer.type() == "Convolution" || layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
  511. {
  512. const caffe::LayerParameter& binlayer = net.layer(netidx);
  513. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  514. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  515. fprintf(pp, " 0=%d", convolution_param.num_output());
  516. if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
  517. {
  518. fprintf(pp, " 1=%d", convolution_param.kernel_w());
  519. fprintf(pp, " 11=%d", convolution_param.kernel_h());
  520. }
  521. else
  522. {
  523. fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
  524. }
  525. fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
  526. if (convolution_param.has_stride_w() && convolution_param.has_stride_h())
  527. {
  528. fprintf(pp, " 3=%d", convolution_param.stride_w());
  529. fprintf(pp, " 13=%d", convolution_param.stride_h());
  530. }
  531. else
  532. {
  533. fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
  534. }
  535. if (convolution_param.has_pad_w() && convolution_param.has_pad_h())
  536. {
  537. fprintf(pp, " 4=%d", convolution_param.pad_w());
  538. fprintf(pp, " 14=%d", convolution_param.pad_h());
  539. }
  540. else
  541. {
  542. fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
  543. }
  544. fprintf(pp, " 5=%d", convolution_param.bias_term());
  545. fprintf(pp, " 6=%d", weight_blob.data_size());
  546. int num_group = 1;
  547. if (layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
  548. {
  549. num_group = convolution_param.num_output();
  550. }
  551. else
  552. {
  553. num_group = convolution_param.group();
  554. }
  555. if (num_group != 1)
  556. {
  557. fprintf(pp, " 7=%d", num_group);
  558. }
  559. bool int8_scale_term = false;
  560. std::vector<float> weight_int8scale;
  561. std::vector<float> blob_int8scale;
  562. if (int8scale_table_path)
  563. {
  564. char key[256];
  565. sprintf(key, "%s_param_0", layer.name().c_str());
  566. if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
  567. {
  568. weight_int8scale = weight_int8scale_table[std::string(key)];
  569. }
  570. if (blob_int8scale_table.find(layer.name()) != blob_int8scale_table.end())
  571. {
  572. blob_int8scale = blob_int8scale_table[layer.name()];
  573. }
  574. int8_scale_term = !weight_int8scale.empty() && !blob_int8scale.empty();
  575. if (int8_scale_term)
  576. {
  577. if ((int)weight_int8scale.size() == num_group && (int)blob_int8scale.size() == num_group)
  578. {
  579. fprintf(pp, " 8=1");
  580. }
  581. else
  582. {
  583. fprintf(pp, " 8=2");
  584. }
  585. }
  586. }
  587. for (int j = 0; j < binlayer.blobs_size(); j++)
  588. {
  589. int quantize_tag = 0;
  590. const caffe::BlobProto& blob = binlayer.blobs(j);
  591. std::vector<float> quantize_table;
  592. std::vector<unsigned char> quantize_index;
  593. std::vector<unsigned short> float16_weights;
  594. std::vector<signed char> int8_weights;
  595. // we will not quantize the bias values
  596. if (j == 0)
  597. {
  598. if (int8_scale_term)
  599. {
  600. if (quantize_level == 0)
  601. {
  602. quantize_tag = 0x0002C056;
  603. }
  604. else if (quantize_level == 256)
  605. {
  606. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), weight_int8scale, int8_weights);
  607. }
  608. }
  609. else if (quantize_level == 256)
  610. {
  611. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  612. }
  613. else if (quantize_level == 65536)
  614. {
  615. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), float16_weights);
  616. }
  617. // write quantize tag first
  618. fwrite(&quantize_tag, sizeof(int), 1, bp);
  619. if (quantize_tag)
  620. {
  621. int p0 = ftell(bp);
  622. if (int8_scale_term)
  623. {
  624. if (quantize_level == 0)
  625. {
  626. // write original data and int8scale
  627. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  628. }
  629. else if (quantize_level == 256)
  630. {
  631. fwrite(int8_weights.data(), sizeof(signed char), int8_weights.size(), bp);
  632. }
  633. }
  634. else if (quantize_level == 256)
  635. {
  636. // write quantize table and index
  637. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  638. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  639. }
  640. else if (quantize_level == 65536)
  641. {
  642. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  643. }
  644. // padding to 32bit align
  645. int nwrite = ftell(bp) - p0;
  646. int nalign = alignSize(nwrite, 4);
  647. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  648. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  649. }
  650. else
  651. {
  652. // write original data
  653. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  654. }
  655. }
  656. else
  657. {
  658. // write original data
  659. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  660. }
  661. }
  662. if (int8_scale_term)
  663. {
  664. // write int8_scale data
  665. fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
  666. fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
  667. }
  668. }
  669. else if (layer.type() == "Crop")
  670. {
  671. const caffe::CropParameter& crop_param = layer.crop_param();
  672. int num_offset = crop_param.offset_size();
  673. if (num_offset == 2)
  674. {
  675. int woffset = crop_param.offset(1);
  676. int hoffset = crop_param.offset(0);
  677. fprintf(pp, " 0=%d", woffset);
  678. fprintf(pp, " 1=%d", hoffset);
  679. }
  680. else if (num_offset == 3)
  681. {
  682. int woffset = crop_param.offset(2);
  683. int hoffset = crop_param.offset(1);
  684. int coffset = crop_param.offset(0);
  685. fprintf(pp, " 0=%d", woffset);
  686. fprintf(pp, " 1=%d", hoffset);
  687. fprintf(pp, " 2=%d", coffset);
  688. }
  689. }
  690. else if (layer.type() == "Deconvolution")
  691. {
  692. const caffe::LayerParameter& binlayer = net.layer(netidx);
  693. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  694. const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
  695. fprintf(pp, " 0=%d", convolution_param.num_output());
  696. if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
  697. {
  698. fprintf(pp, " 1=%d", convolution_param.kernel_w());
  699. fprintf(pp, " 11=%d", convolution_param.kernel_h());
  700. }
  701. else
  702. {
  703. fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
  704. }
  705. fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
  706. if (convolution_param.has_stride_w() && convolution_param.has_stride_h())
  707. {
  708. fprintf(pp, " 3=%d", convolution_param.stride_w());
  709. fprintf(pp, " 13=%d", convolution_param.stride_h());
  710. }
  711. else
  712. {
  713. fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
  714. }
  715. if (convolution_param.has_pad_w() && convolution_param.has_pad_h())
  716. {
  717. fprintf(pp, " 4=%d", convolution_param.pad_w());
  718. fprintf(pp, " 14=%d", convolution_param.pad_h());
  719. }
  720. else
  721. {
  722. fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
  723. }
  724. fprintf(pp, " 5=%d", convolution_param.bias_term());
  725. fprintf(pp, " 6=%d", weight_blob.data_size());
  726. int group = convolution_param.group();
  727. if (group != 1)
  728. {
  729. fprintf(pp, " 7=%d", group);
  730. }
  731. int quantized_weight = 0;
  732. fwrite(&quantized_weight, sizeof(int), 1, bp);
  733. int maxk = 0;
  734. if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
  735. {
  736. maxk = convolution_param.kernel_w() * convolution_param.kernel_h();
  737. }
  738. else
  739. {
  740. maxk = convolution_param.kernel_size(0) * convolution_param.kernel_size(0);
  741. }
  742. for (int g=0; g<group; g++)
  743. {
  744. // reorder weight from inch-outch to outch-inch
  745. int num_output = convolution_param.num_output() / group;
  746. int num_input = weight_blob.data_size() / maxk / num_output / group;
  747. const float* weight_data_ptr = weight_blob.data().data() + g * maxk * num_output * num_input;
  748. for (int k=0; k<num_output; k++)
  749. {
  750. for (int j=0; j<num_input; j++)
  751. {
  752. fwrite(weight_data_ptr + (j*num_output + k) * maxk, sizeof(float), maxk, bp);
  753. }
  754. }
  755. }
  756. for (int j=1; j<binlayer.blobs_size(); j++)
  757. {
  758. const caffe::BlobProto& blob = binlayer.blobs(j);
  759. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  760. }
  761. }
  762. else if (layer.type() == "DetectionOutput")
  763. {
  764. const caffe::DetectionOutputParameter& detection_output_param = layer.detection_output_param();
  765. const caffe::NonMaximumSuppressionParameter& nms_param = detection_output_param.nms_param();
  766. fprintf(pp, " 0=%d", detection_output_param.num_classes());
  767. fprintf(pp, " 1=%f", nms_param.nms_threshold());
  768. fprintf(pp, " 2=%d", nms_param.top_k());
  769. fprintf(pp, " 3=%d", detection_output_param.keep_top_k());
  770. fprintf(pp, " 4=%f", detection_output_param.confidence_threshold());
  771. }
  772. else if (layer.type() == "Dropout")
  773. {
  774. const caffe::DropoutParameter& dropout_param = layer.dropout_param();
  775. if (dropout_param.has_scale_train() && !dropout_param.scale_train())
  776. {
  777. float scale = 1.f - dropout_param.dropout_ratio();
  778. fprintf(pp, " 0=%f", scale);
  779. }
  780. }
  781. else if (layer.type() == "Eltwise")
  782. {
  783. const caffe::EltwiseParameter& eltwise_param = layer.eltwise_param();
  784. int coeff_size = eltwise_param.coeff_size();
  785. fprintf(pp, " 0=%d", (int)eltwise_param.operation());
  786. fprintf(pp, " -23301=%d", coeff_size);
  787. for (int j=0; j<coeff_size; j++)
  788. {
  789. fprintf(pp, ",%f", eltwise_param.coeff(j));
  790. }
  791. }
  792. else if (layer.type() == "ELU")
  793. {
  794. const caffe::ELUParameter& elu_param = layer.elu_param();
  795. fprintf(pp, " 0=%f", elu_param.alpha());
  796. }
  797. else if (layer.type() == "Embed")
  798. {
  799. const caffe::LayerParameter& binlayer = net.layer(netidx);
  800. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  801. const caffe::EmbedParameter& embed_param = layer.embed_param();
  802. fprintf(pp, " 0=%d", embed_param.num_output());
  803. fprintf(pp, " 1=%d", embed_param.input_dim());
  804. fprintf(pp, " 2=%d", embed_param.bias_term());
  805. fprintf(pp, " 3=%d", weight_blob.data_size());
  806. for (int j=0; j<binlayer.blobs_size(); j++)
  807. {
  808. int quantize_tag = 0;
  809. const caffe::BlobProto& blob = binlayer.blobs(j);
  810. std::vector<float> quantize_table;
  811. std::vector<unsigned char> quantize_index;
  812. std::vector<unsigned short> float16_weights;
  813. // we will not quantize the bias values
  814. if (j == 0 && quantize_level != 0)
  815. {
  816. if (quantize_level == 256)
  817. {
  818. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  819. }
  820. else if (quantize_level == 65536)
  821. {
  822. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), float16_weights);
  823. }
  824. }
  825. // write quantize tag first
  826. if (j == 0)
  827. fwrite(&quantize_tag, sizeof(int), 1, bp);
  828. if (quantize_tag)
  829. {
  830. int p0 = ftell(bp);
  831. if (quantize_level == 256)
  832. {
  833. // write quantize table and index
  834. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  835. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  836. }
  837. else if (quantize_level == 65536)
  838. {
  839. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  840. }
  841. // padding to 32bit align
  842. int nwrite = ftell(bp) - p0;
  843. int nalign = alignSize(nwrite, 4);
  844. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  845. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  846. }
  847. else
  848. {
  849. // write original data
  850. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  851. }
  852. }
  853. }
  854. else if (layer.type() == "InnerProduct")
  855. {
  856. const caffe::LayerParameter& binlayer = net.layer(netidx);
  857. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  858. const caffe::InnerProductParameter& inner_product_param = layer.inner_product_param();
  859. fprintf(pp, " 0=%d", inner_product_param.num_output());
  860. fprintf(pp, " 1=%d", inner_product_param.bias_term());
  861. fprintf(pp, " 2=%d", weight_blob.data_size());
  862. bool int8_scale_term = false;
  863. std::vector<float> weight_int8scale;
  864. std::vector<float> blob_int8scale;
  865. if (int8scale_table_path)
  866. {
  867. char key[256];
  868. sprintf(key, "%s_param_0", layer.name().c_str());
  869. if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
  870. {
  871. weight_int8scale = weight_int8scale_table[std::string(key)];
  872. }
  873. if (blob_int8scale_table.find(layer.name()) != blob_int8scale_table.end())
  874. {
  875. blob_int8scale = blob_int8scale_table[layer.name()];
  876. }
  877. int8_scale_term = !weight_int8scale.empty() && !blob_int8scale.empty();
  878. if (int8_scale_term)
  879. {
  880. fprintf(pp, " 8=1");
  881. }
  882. }
  883. for (int j=0; j<binlayer.blobs_size(); j++)
  884. {
  885. int quantize_tag = 0;
  886. const caffe::BlobProto& blob = binlayer.blobs(j);
  887. std::vector<float> quantize_table;
  888. std::vector<unsigned char> quantize_index;
  889. std::vector<unsigned short> float16_weights;
  890. std::vector<signed char> int8_weights;
  891. // we will not quantize the bias values
  892. if (j == 0)
  893. {
  894. if (int8_scale_term)
  895. {
  896. if (quantize_level == 0)
  897. {
  898. quantize_tag = 0x0002C056;
  899. }
  900. else if (quantize_level == 256)
  901. {
  902. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), weight_int8scale, int8_weights);
  903. }
  904. }
  905. else if (quantize_level == 256)
  906. {
  907. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  908. }
  909. else if (quantize_level == 65536)
  910. {
  911. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), float16_weights);
  912. }
  913. // write quantize tag first
  914. fwrite(&quantize_tag, sizeof(int), 1, bp);
  915. if (quantize_tag)
  916. {
  917. int p0 = ftell(bp);
  918. if (int8_scale_term)
  919. {
  920. if (quantize_level == 0)
  921. {
  922. // write original data and int8scale
  923. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  924. }
  925. else if (quantize_level == 256)
  926. {
  927. fwrite(int8_weights.data(), sizeof(signed char), int8_weights.size(), bp);
  928. }
  929. }
  930. else if (quantize_level == 256)
  931. {
  932. // write quantize table and index
  933. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  934. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  935. }
  936. else if (quantize_level == 65536)
  937. {
  938. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  939. }
  940. // padding to 32bit align
  941. int nwrite = ftell(bp) - p0;
  942. int nalign = alignSize(nwrite, 4);
  943. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  944. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  945. }
  946. else
  947. {
  948. // write original data
  949. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  950. }
  951. }
  952. else
  953. {
  954. // write original data
  955. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  956. }
  957. }
  958. if (int8_scale_term)
  959. {
  960. // write int8_scale data
  961. fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
  962. fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
  963. }
  964. }
  965. else if (layer.type() == "Input")
  966. {
  967. const caffe::InputParameter& input_param = layer.input_param();
  968. const caffe::BlobShape& bs = input_param.shape(0);
  969. if (bs.dim_size() == 4)
  970. {
  971. fprintf(pp, " 0=%ld", bs.dim(3));
  972. fprintf(pp, " 1=%ld", bs.dim(2));
  973. fprintf(pp, " 2=%ld", bs.dim(1));
  974. }
  975. else if (bs.dim_size() == 3)
  976. {
  977. fprintf(pp, " 0=%ld", bs.dim(2));
  978. fprintf(pp, " 1=%ld", bs.dim(1));
  979. fprintf(pp, " 2=-233");
  980. }
  981. else if (bs.dim_size() == 2)
  982. {
  983. fprintf(pp, " 0=%ld", bs.dim(1));
  984. fprintf(pp, " 1=-233");
  985. fprintf(pp, " 2=-233");
  986. }
  987. }
  988. else if (layer.type() == "Interp")
  989. {
  990. const caffe::InterpParameter& interp_param = layer.interp_param();
  991. fprintf(pp, " 0=%d", 2);
  992. fprintf(pp, " 1=%f", (float)interp_param.zoom_factor());
  993. fprintf(pp, " 2=%f", (float)interp_param.zoom_factor());
  994. fprintf(pp, " 3=%d", interp_param.height());
  995. fprintf(pp, " 4=%d", interp_param.width());
  996. }
  997. else if (layer.type() == "LRN")
  998. {
  999. const caffe::LRNParameter& lrn_param = layer.lrn_param();
  1000. fprintf(pp, " 0=%d", lrn_param.norm_region());
  1001. fprintf(pp, " 1=%d", lrn_param.local_size());
  1002. fprintf(pp, " 2=%f", lrn_param.alpha());
  1003. fprintf(pp, " 3=%f", lrn_param.beta());
  1004. }
  1005. else if (layer.type() == "LSTM")
  1006. {
  1007. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1008. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  1009. const caffe::RecurrentParameter& recurrent_param = layer.recurrent_param();
  1010. fprintf(pp, " 0=%d", recurrent_param.num_output());
  1011. fprintf(pp, " 1=%d", weight_blob.data_size());
  1012. for (int j=0; j<binlayer.blobs_size(); j++)
  1013. {
  1014. int quantize_tag = 0;
  1015. const caffe::BlobProto& blob = binlayer.blobs(j);
  1016. std::vector<float> quantize_table;
  1017. std::vector<unsigned char> quantize_index;
  1018. std::vector<unsigned short> float16_weights;
  1019. if (quantize_level != 0)
  1020. {
  1021. if (quantize_level == 256)
  1022. {
  1023. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
  1024. }
  1025. else if (quantize_level == 65536)
  1026. {
  1027. quantize_tag = quantize_weight((float *)blob.data().data(), blob.data_size(), float16_weights);
  1028. }
  1029. }
  1030. // write quantize tag first
  1031. fwrite(&quantize_tag, sizeof(int), 1, bp);
  1032. if (quantize_tag)
  1033. {
  1034. int p0 = ftell(bp);
  1035. if (quantize_level == 256)
  1036. {
  1037. // write quantize table and index
  1038. fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
  1039. fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
  1040. }
  1041. else if (quantize_level == 65536)
  1042. {
  1043. fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
  1044. }
  1045. // padding to 32bit align
  1046. int nwrite = ftell(bp) - p0;
  1047. int nalign = alignSize(nwrite, 4);
  1048. unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
  1049. fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
  1050. }
  1051. else
  1052. {
  1053. // write original data
  1054. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  1055. }
  1056. }
  1057. }
  1058. else if (layer.type() == "MemoryData")
  1059. {
  1060. const caffe::MemoryDataParameter& memory_data_param = layer.memory_data_param();
  1061. fprintf(pp, " 0=%d", memory_data_param.width());
  1062. fprintf(pp, " 1=%d", memory_data_param.height());
  1063. fprintf(pp, " 2=%d", memory_data_param.channels());
  1064. }
  1065. else if (layer.type() == "MVN")
  1066. {
  1067. const caffe::MVNParameter& mvn_param = layer.mvn_param();
  1068. fprintf(pp, " 0=%d", mvn_param.normalize_variance());
  1069. fprintf(pp, " 1=%d", mvn_param.across_channels());
  1070. fprintf(pp, " 2=%f", mvn_param.eps());
  1071. }
  1072. else if (layer.type() == "Normalize")
  1073. {
  1074. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1075. const caffe::BlobProto& scale_blob = binlayer.blobs(0);
  1076. const caffe::NormalizeParameter& norm_param = layer.norm_param();
  1077. fprintf(pp, " 0=%d", norm_param.across_spatial());
  1078. fprintf(pp, " 1=%d", norm_param.channel_shared());
  1079. fprintf(pp, " 2=%f", norm_param.eps());
  1080. fprintf(pp, " 3=%d", scale_blob.data_size());
  1081. fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
  1082. }
  1083. else if (layer.type() == "Permute")
  1084. {
  1085. const caffe::PermuteParameter& permute_param = layer.permute_param();
  1086. int order_size = permute_param.order_size();
  1087. int order_type = 0;
  1088. if (order_size == 0)
  1089. order_type = 0;
  1090. if (order_size == 1)
  1091. {
  1092. int order0 = permute_param.order(0);
  1093. if (order0 == 0)
  1094. order_type = 0;
  1095. // permute with N not supported
  1096. }
  1097. if (order_size == 2)
  1098. {
  1099. int order0 = permute_param.order(0);
  1100. int order1 = permute_param.order(1);
  1101. if (order0 == 0)
  1102. {
  1103. if (order1 == 1) // 0 1 2 3
  1104. order_type = 0;
  1105. else if (order1 == 2) // 0 2 1 3
  1106. order_type = 2;
  1107. else if (order1 == 3) // 0 3 1 2
  1108. order_type = 4;
  1109. }
  1110. // permute with N not supported
  1111. }
  1112. if (order_size == 3 || order_size == 4)
  1113. {
  1114. int order0 = permute_param.order(0);
  1115. int order1 = permute_param.order(1);
  1116. int order2 = permute_param.order(2);
  1117. if (order0 == 0)
  1118. {
  1119. if (order1 == 1)
  1120. {
  1121. if (order2 == 2) // 0 1 2 3
  1122. order_type = 0;
  1123. if (order2 == 3) // 0 1 3 2
  1124. order_type = 1;
  1125. }
  1126. else if (order1 == 2)
  1127. {
  1128. if (order2 == 1) // 0 2 1 3
  1129. order_type = 2;
  1130. if (order2 == 3) // 0 2 3 1
  1131. order_type = 3;
  1132. }
  1133. else if (order1 == 3)
  1134. {
  1135. if (order2 == 1) // 0 3 1 2
  1136. order_type = 4;
  1137. if (order2 == 2) // 0 3 2 1
  1138. order_type = 5;
  1139. }
  1140. }
  1141. // permute with N not supported
  1142. }
  1143. fprintf(pp, " 0=%d", order_type);
  1144. }
  1145. else if (layer.type() == "Pooling")
  1146. {
  1147. const caffe::PoolingParameter& pooling_param = layer.pooling_param();
  1148. fprintf(pp, " 0=%d", pooling_param.pool());
  1149. if (pooling_param.has_kernel_w() && pooling_param.has_kernel_h())
  1150. {
  1151. fprintf(pp, " 1=%d", pooling_param.kernel_w());
  1152. fprintf(pp, " 11=%d", pooling_param.kernel_h());
  1153. }
  1154. else
  1155. {
  1156. fprintf(pp, " 1=%d", pooling_param.kernel_size());
  1157. }
  1158. if (pooling_param.has_stride_w() && pooling_param.has_stride_h())
  1159. {
  1160. fprintf(pp, " 2=%d", pooling_param.stride_w());
  1161. fprintf(pp, " 12=%d", pooling_param.stride_h());
  1162. }
  1163. else
  1164. {
  1165. fprintf(pp, " 2=%d", pooling_param.stride());
  1166. }
  1167. if (pooling_param.has_pad_w() && pooling_param.has_pad_h())
  1168. {
  1169. fprintf(pp, " 3=%d", pooling_param.pad_w());
  1170. fprintf(pp, " 13=%d", pooling_param.pad_h());
  1171. }
  1172. else
  1173. {
  1174. fprintf(pp, " 3=%d", pooling_param.pad());
  1175. }
  1176. fprintf(pp, " 4=%d", pooling_param.has_global_pooling() ? pooling_param.global_pooling() : 0);
  1177. }
  1178. else if (layer.type() == "Power")
  1179. {
  1180. const caffe::PowerParameter& power_param = layer.power_param();
  1181. fprintf(pp, " 0=%f", power_param.power());
  1182. fprintf(pp, " 1=%f", power_param.scale());
  1183. fprintf(pp, " 2=%f", power_param.shift());
  1184. }
  1185. else if (layer.type() == "PReLU")
  1186. {
  1187. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1188. const caffe::BlobProto& slope_blob = binlayer.blobs(0);
  1189. fprintf(pp, " 0=%d", slope_blob.data_size());
  1190. fwrite(slope_blob.data().data(), sizeof(float), slope_blob.data_size(), bp);
  1191. }
  1192. else if (layer.type() == "PriorBox")
  1193. {
  1194. const caffe::PriorBoxParameter& prior_box_param = layer.prior_box_param();
  1195. int num_aspect_ratio = prior_box_param.aspect_ratio_size();
  1196. for (int j=0; j<prior_box_param.aspect_ratio_size(); j++)
  1197. {
  1198. float ar = prior_box_param.aspect_ratio(j);
  1199. if (fabs(ar - 1.) < 1e-6) {
  1200. num_aspect_ratio--;
  1201. }
  1202. }
  1203. float variances[4] = {0.1f, 0.1f, 0.1f, 0.1f};
  1204. if (prior_box_param.variance_size() == 4)
  1205. {
  1206. variances[0] = prior_box_param.variance(0);
  1207. variances[1] = prior_box_param.variance(1);
  1208. variances[2] = prior_box_param.variance(2);
  1209. variances[3] = prior_box_param.variance(3);
  1210. }
  1211. else if (prior_box_param.variance_size() == 1)
  1212. {
  1213. variances[0] = prior_box_param.variance(0);
  1214. variances[1] = prior_box_param.variance(0);
  1215. variances[2] = prior_box_param.variance(0);
  1216. variances[3] = prior_box_param.variance(0);
  1217. }
  1218. int flip = prior_box_param.has_flip() ? prior_box_param.flip() : 1;
  1219. int clip = prior_box_param.has_clip() ? prior_box_param.clip() : 0;
  1220. int image_width = -233;
  1221. int image_height = -233;
  1222. if (prior_box_param.has_img_size())
  1223. {
  1224. image_width = prior_box_param.img_size();
  1225. image_height = prior_box_param.img_size();
  1226. }
  1227. else if (prior_box_param.has_img_w() && prior_box_param.has_img_h())
  1228. {
  1229. image_width = prior_box_param.img_w();
  1230. image_height = prior_box_param.img_h();
  1231. }
  1232. float step_width = -233;
  1233. float step_height = -233;
  1234. if (prior_box_param.has_step())
  1235. {
  1236. step_width = prior_box_param.step();
  1237. step_height = prior_box_param.step();
  1238. }
  1239. else if (prior_box_param.has_step_w() && prior_box_param.has_step_h())
  1240. {
  1241. step_width = prior_box_param.step_w();
  1242. step_height = prior_box_param.step_h();
  1243. }
  1244. fprintf(pp, " -23300=%d", prior_box_param.min_size_size());
  1245. for (int j=0; j<prior_box_param.min_size_size(); j++)
  1246. {
  1247. fprintf(pp, ",%f", prior_box_param.min_size(j));
  1248. }
  1249. fprintf(pp, " -23301=%d", prior_box_param.max_size_size());
  1250. for (int j=0; j<prior_box_param.max_size_size(); j++)
  1251. {
  1252. fprintf(pp, ",%f", prior_box_param.max_size(j));
  1253. }
  1254. fprintf(pp, " -23302=%d", num_aspect_ratio);
  1255. for (int j=0; j<prior_box_param.aspect_ratio_size(); j++)
  1256. {
  1257. float ar = prior_box_param.aspect_ratio(j);
  1258. if (fabs(ar - 1.) < 1e-6) {
  1259. continue;
  1260. }
  1261. fprintf(pp, ",%f", ar);
  1262. }
  1263. fprintf(pp, " 3=%f", variances[0]);
  1264. fprintf(pp, " 4=%f", variances[1]);
  1265. fprintf(pp, " 5=%f", variances[2]);
  1266. fprintf(pp, " 6=%f", variances[3]);
  1267. fprintf(pp, " 7=%d", flip);
  1268. fprintf(pp, " 8=%d", clip);
  1269. fprintf(pp, " 9=%d", image_width);
  1270. fprintf(pp, " 10=%d", image_height);
  1271. fprintf(pp, " 11=%f", step_width);
  1272. fprintf(pp, " 12=%f", step_height);
  1273. fprintf(pp, " 13=%f", prior_box_param.offset());
  1274. }
  1275. else if (layer.type() == "PSROIPooling")
  1276. {
  1277. const caffe::PSROIPoolingParameter& psroi_pooling_param = layer.psroi_pooling_param();
  1278. fprintf(pp, " 0=%d", psroi_pooling_param.group_size());
  1279. fprintf(pp, " 1=%d", psroi_pooling_param.group_size());
  1280. fprintf(pp, " 2=%f", psroi_pooling_param.spatial_scale());
  1281. fprintf(pp, " 3=%d", psroi_pooling_param.output_dim());
  1282. }
  1283. else if (layer.type() == "Python")
  1284. {
  1285. const caffe::PythonParameter& python_param = layer.python_param();
  1286. std::string python_layer_name = python_param.layer();
  1287. if (python_layer_name == "ProposalLayer")
  1288. {
  1289. int feat_stride = 16;
  1290. sscanf(python_param.param_str().c_str(), "'feat_stride': %d", &feat_stride);
  1291. int base_size = 16;
  1292. // float ratio;
  1293. // float scale;
  1294. int pre_nms_topN = 6000;
  1295. int after_nms_topN = 300;
  1296. float nms_thresh = 0.7;
  1297. int min_size = 16;
  1298. fprintf(pp, " 0=%d", feat_stride);
  1299. fprintf(pp, " 1=%d", base_size);
  1300. fprintf(pp, " 2=%d", pre_nms_topN);
  1301. fprintf(pp, " 3=%d", after_nms_topN);
  1302. fprintf(pp, " 4=%f", nms_thresh);
  1303. fprintf(pp, " 5=%d", min_size);
  1304. }
  1305. }
  1306. else if (layer.type() == "ReLU")
  1307. {
  1308. const caffe::ReLUParameter& relu_param = layer.relu_param();
  1309. if (relu_param.has_negative_slope())
  1310. {
  1311. fprintf(pp, " 0=%f", relu_param.negative_slope());
  1312. }
  1313. }
  1314. else if (layer.type() == "ReLU6")
  1315. {
  1316. float min = 0.f;
  1317. float max = 6.f;
  1318. fprintf(pp, " 0=%f", min);
  1319. fprintf(pp, " 1=%f", max);
  1320. }
  1321. else if (layer.type() == "Reorg")
  1322. {
  1323. const caffe::ReorgParameter& reorg_param = layer.reorg_param();
  1324. fprintf(pp, " 0=%d", reorg_param.stride());
  1325. }
  1326. else if (layer.type() == "Reshape")// -1 1 512
  1327. {
  1328. const caffe::ReshapeParameter& reshape_param = layer.reshape_param();
  1329. const caffe::BlobShape& bs = reshape_param.shape();
  1330. if (bs.dim_size() == 1)
  1331. {
  1332. fprintf(pp, " 0=%ld 1=-233 2=-233", bs.dim(0));
  1333. }
  1334. else if (bs.dim_size() == 2)
  1335. {
  1336. fprintf(pp, " 0=%ld 1=%ld 2=-233", bs.dim(1), bs.dim(0));
  1337. }
  1338. else if (bs.dim_size() == 3)
  1339. {
  1340. fprintf(pp, " 0=%ld 1=%ld 2=%ld", bs.dim(2), bs.dim(1), bs.dim(0));
  1341. }
  1342. else // bs.dim_size() == 4
  1343. {
  1344. fprintf(pp, " 0=%ld 1=%ld 2=%ld", bs.dim(3), bs.dim(2), bs.dim(1));
  1345. }
  1346. fprintf(pp, " 3=0");// permute
  1347. }
  1348. else if (layer.type() == "ROIAlign")
  1349. {
  1350. const caffe::ROIAlignParameter& roi_align_param = layer.roi_align_param();
  1351. fprintf(pp, " 0=%d", roi_align_param.pooled_w());
  1352. fprintf(pp, " 1=%d", roi_align_param.pooled_h());
  1353. fprintf(pp, " 2=%f", roi_align_param.spatial_scale());
  1354. }
  1355. else if (layer.type() == "ROIPooling")
  1356. {
  1357. const caffe::ROIPoolingParameter& roi_pooling_param = layer.roi_pooling_param();
  1358. fprintf(pp, " 0=%d", roi_pooling_param.pooled_w());
  1359. fprintf(pp, " 1=%d", roi_pooling_param.pooled_h());
  1360. fprintf(pp, " 2=%f", roi_pooling_param.spatial_scale());
  1361. }
  1362. else if (layer.type() == "Scale")
  1363. {
  1364. const caffe::LayerParameter& binlayer = net.layer(netidx);
  1365. const caffe::ScaleParameter& scale_param = layer.scale_param();
  1366. bool scale_weight = scale_param.bias_term() ? (binlayer.blobs_size() == 2) : (binlayer.blobs_size() == 1);
  1367. if (scale_weight)
  1368. {
  1369. const caffe::BlobProto& weight_blob = binlayer.blobs(0);
  1370. fprintf(pp, " 0=%d", (int)weight_blob.data_size());
  1371. }
  1372. else
  1373. {
  1374. fprintf(pp, " 0=-233");
  1375. }
  1376. fprintf(pp, " 1=%d", scale_param.bias_term());
  1377. for (int j=0; j<binlayer.blobs_size(); j++)
  1378. {
  1379. const caffe::BlobProto& blob = binlayer.blobs(j);
  1380. fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
  1381. }
  1382. }
  1383. else if (layer.type() == "ShuffleChannel")
  1384. {
  1385. const caffe::ShuffleChannelParameter& shuffle_channel_param = layer.shuffle_channel_param();
  1386. fprintf(pp, " 0=%d", shuffle_channel_param.group());
  1387. }
  1388. else if (layer.type() == "Slice")
  1389. {
  1390. const caffe::SliceParameter& slice_param = layer.slice_param();
  1391. if (slice_param.slice_point_size() == 0)
  1392. {
  1393. int num_slice = layer.top_size();
  1394. fprintf(pp, " -23300=%d", num_slice);
  1395. for (int j=0; j<num_slice; j++)
  1396. {
  1397. fprintf(pp, ",-233");
  1398. }
  1399. }
  1400. else
  1401. {
  1402. int num_slice = slice_param.slice_point_size() + 1;
  1403. fprintf(pp, " -23300=%d", num_slice);
  1404. int prev_offset = 0;
  1405. for (int j=0; j<slice_param.slice_point_size(); j++)
  1406. {
  1407. int offset = slice_param.slice_point(j);
  1408. fprintf(pp, ",%d", offset - prev_offset);
  1409. prev_offset = offset;
  1410. }
  1411. fprintf(pp, ",-233");
  1412. }
  1413. int dim = slice_param.axis() - 1;
  1414. fprintf(pp, " 1=%d", dim);
  1415. }
  1416. else if (layer.type() == "Softmax")
  1417. {
  1418. const caffe::SoftmaxParameter& softmax_param = layer.softmax_param();
  1419. int dim = softmax_param.axis() - 1;
  1420. fprintf(pp, " 0=%d", dim);
  1421. }
  1422. else if (layer.type() == "Threshold")
  1423. {
  1424. const caffe::ThresholdParameter& threshold_param = layer.threshold_param();
  1425. fprintf(pp, " 0=%f", threshold_param.threshold());
  1426. }
  1427. else if (layer.type() == "YoloDetectionOutput")
  1428. {
  1429. const caffe::YoloDetectionOutputParameter& yolo_detection_output_param = layer.yolo_detection_output_param();
  1430. fprintf(pp, " 0=%d", yolo_detection_output_param.num_classes());
  1431. fprintf(pp, " 1=%d", yolo_detection_output_param.num_box());
  1432. fprintf(pp, " 2=%f", yolo_detection_output_param.confidence_threshold());
  1433. fprintf(pp, " 3=%f", yolo_detection_output_param.nms_threshold());
  1434. int num_bias = yolo_detection_output_param.biases_size();
  1435. fprintf(pp, " -23304=%d", num_bias);
  1436. for (int j=0; j<num_bias; j++)
  1437. {
  1438. fprintf(pp, ",%f", yolo_detection_output_param.biases(j));
  1439. }
  1440. }
  1441. else if (layer.type() == "Yolov3DetectionOutput")
  1442. {
  1443. const caffe::Yolov3DetectionOutputParameter& yolov3_detection_output_param = layer.yolov3_detection_output_param();
  1444. fprintf(pp, " 0=%d", yolov3_detection_output_param.num_classes());
  1445. fprintf(pp, " 1=%d", yolov3_detection_output_param.num_box());
  1446. fprintf(pp, " 2=%f", yolov3_detection_output_param.confidence_threshold());
  1447. fprintf(pp, " 3=%f", yolov3_detection_output_param.nms_threshold());
  1448. int num_bias = yolov3_detection_output_param.biases_size();
  1449. fprintf(pp, " -23304=%d", num_bias);
  1450. for (int j = 0; j<num_bias; j++)
  1451. {
  1452. fprintf(pp, ",%f", yolov3_detection_output_param.biases(j));
  1453. }
  1454. int num_mask = yolov3_detection_output_param.mask_size();
  1455. fprintf(pp, " -23305=%d", num_mask);
  1456. for (int j = 0; j<num_mask; j++)
  1457. {
  1458. fprintf(pp, ",%f", (float)yolov3_detection_output_param.mask(j));
  1459. }
  1460. int num_anchors = yolov3_detection_output_param.anchors_scale_size();
  1461. fprintf(pp, " -23306=%d", num_anchors);
  1462. for (int j = 0; j<num_anchors; j++)
  1463. {
  1464. fprintf(pp, ",%f", (float)yolov3_detection_output_param.anchors_scale(j));
  1465. }
  1466. fprintf(pp, " 7=%d", yolov3_detection_output_param.mask_group_num());
  1467. }
  1468. fprintf(pp, "\n");
  1469. // add split layer if top reference larger than one
  1470. if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
  1471. {
  1472. std::string blob_name = blob_name_decorated[layer.top(0)];
  1473. if (bottom_reference.find(blob_name) != bottom_reference.end())
  1474. {
  1475. int refcount = bottom_reference[blob_name];
  1476. if (refcount > 1)
  1477. {
  1478. char splitname[256];
  1479. sprintf(splitname, "splitncnn_%d", internal_split);
  1480. fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
  1481. fprintf(pp, " %s", blob_name.c_str());
  1482. for (int j=0; j<refcount; j++)
  1483. {
  1484. fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
  1485. }
  1486. fprintf(pp, "\n");
  1487. internal_split++;
  1488. }
  1489. }
  1490. }
  1491. else
  1492. {
  1493. for (int j=0; j<layer.top_size(); j++)
  1494. {
  1495. std::string blob_name = layer.top(j);
  1496. if (bottom_reference.find(blob_name) != bottom_reference.end())
  1497. {
  1498. int refcount = bottom_reference[blob_name];
  1499. if (refcount > 1)
  1500. {
  1501. char splitname[256];
  1502. sprintf(splitname, "splitncnn_%d", internal_split);
  1503. fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
  1504. fprintf(pp, " %s", blob_name.c_str());
  1505. for (int j=0; j<refcount; j++)
  1506. {
  1507. fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
  1508. }
  1509. fprintf(pp, "\n");
  1510. internal_split++;
  1511. }
  1512. }
  1513. }
  1514. }
  1515. }
  1516. fclose(pp);
  1517. fclose(bp);
  1518. return 0;
  1519. }