You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ncnn2table.cpp 31 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989
  1. // BUG1989 is pleased to support the open source community by supporting ncnn available.
  2. //
  3. // author:BUG1989 (https://github.com/BUG1989/) Long-term support.
  4. // author:JansonZhu (https://github.com/JansonZhu) Implemented the function of entropy calibration.
  5. //
  6. // Copyright (C) 2019 BUG1989. All rights reserved.
  7. //
  8. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  9. // in compliance with the License. You may obtain a copy of the License at
  10. //
  11. // https://opensource.org/licenses/BSD-3-Clause
  12. //
  13. // Unless required by applicable law or agreed to in writing, software distributed
  14. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  15. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  16. // specific language governing permissions and limitations under the License.
  17. #ifdef _MSC_VER
  18. #define _CRT_SECURE_NO_DEPRECATE
  19. #endif
  20. #include <algorithm>
  21. #include <cstdio>
  22. #include <cstdlib>
  23. #include <cstring>
  24. #include <iostream>
  25. #include <map>
  26. #include <opencv2/opencv.hpp>
  27. #include <vector>
  28. // ncnn public header
  29. #include "benchmark.h"
  30. #include "cpu.h"
  31. #include "net.h"
  32. // ncnn private header
  33. #include "layer/convolution.h"
  34. #include "layer/convolutiondepthwise.h"
  35. #include "layer/innerproduct.h"
  36. static ncnn::Option g_default_option;
  37. static ncnn::UnlockedPoolAllocator g_blob_pool_allocator;
  38. static ncnn::PoolAllocator g_workspace_pool_allocator;
  39. // Get the file names from direct path
  40. int parse_images_dir(const std::string& base_path, std::vector<std::string>& file_path)
  41. {
  42. file_path.clear();
  43. const cv::String base_path_str(base_path);
  44. std::vector<cv::String> image_list;
  45. cv::glob(base_path_str, image_list, true);
  46. for (size_t i = 0; i < image_list.size(); i++)
  47. {
  48. const cv::String& image_path = image_list[i];
  49. file_path.push_back(image_path);
  50. }
  51. return 0;
  52. }
  53. class QuantNet : public ncnn::Net
  54. {
  55. public:
  56. QuantNet();
  57. std::vector<ncnn::Blob>& blobs;
  58. std::vector<ncnn::Layer*>& layers;
  59. int get_conv_names();
  60. int get_conv_bottom_blob_names();
  61. int get_conv_weight_blob_scales();
  62. int get_input_names();
  63. public:
  64. std::vector<std::string> conv_names;
  65. std::map<std::string, std::string> conv_bottom_blob_names;
  66. std::map<std::string, std::vector<float> > weight_scales;
  67. std::vector<std::string> input_names;
  68. };
  69. QuantNet::QuantNet()
  70. : blobs(mutable_blobs()), layers(mutable_layers())
  71. {
  72. }
  73. int QuantNet::get_input_names()
  74. {
  75. for (size_t i = 0; i < layers.size(); i++)
  76. {
  77. const ncnn::Layer* layer = layers[i];
  78. if (layer->type == "Input")
  79. {
  80. for (size_t j = 0; j < layer->tops.size(); j++)
  81. {
  82. int blob_index = layer->tops[j];
  83. std::string name = blobs[blob_index].name;
  84. input_names.push_back(name);
  85. }
  86. }
  87. }
  88. return 0;
  89. }
  90. int QuantNet::get_conv_names()
  91. {
  92. for (size_t i = 0; i < layers.size(); i++)
  93. {
  94. const ncnn::Layer* layer = layers[i];
  95. if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
  96. {
  97. std::string name = layer->name;
  98. conv_names.push_back(name);
  99. }
  100. }
  101. return 0;
  102. }
  103. int QuantNet::get_conv_bottom_blob_names()
  104. {
  105. // find conv bottom name or index
  106. for (size_t i = 0; i < layers.size(); i++)
  107. {
  108. const ncnn::Layer* layer = layers[i];
  109. if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
  110. {
  111. const std::string& name = layer->name;
  112. const std::string& bottom_blob_name = blobs[layer->bottoms[0]].name;
  113. conv_bottom_blob_names[name] = bottom_blob_name;
  114. }
  115. }
  116. return 0;
  117. }
  118. int QuantNet::get_conv_weight_blob_scales()
  119. {
  120. for (size_t i = 0; i < layers.size(); i++)
  121. {
  122. const ncnn::Layer* layer = layers[i];
  123. if (layer->type == "Convolution")
  124. {
  125. const ncnn::Convolution* convolution = static_cast<const ncnn::Convolution*>(layer);
  126. std::string name = layer->name;
  127. const int weight_data_size_output = convolution->weight_data_size / convolution->num_output;
  128. std::vector<float> scales;
  129. // int8 winograd F43 needs weight data to use 6bit quantization
  130. bool quant_6bit = false;
  131. int kernel_w = convolution->kernel_w;
  132. int kernel_h = convolution->kernel_h;
  133. int dilation_w = convolution->dilation_w;
  134. int dilation_h = convolution->dilation_h;
  135. int stride_w = convolution->stride_w;
  136. int stride_h = convolution->stride_h;
  137. if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
  138. quant_6bit = true;
  139. for (int n = 0; n < convolution->num_output; n++)
  140. {
  141. const ncnn::Mat weight_data_n = convolution->weight_data.range(weight_data_size_output * n, weight_data_size_output);
  142. const float* data_n = weight_data_n;
  143. float max_value = std::numeric_limits<float>::min();
  144. for (int k = 0; k < weight_data_size_output; k++)
  145. {
  146. max_value = std::max(max_value, std::fabs(data_n[k]));
  147. }
  148. if (quant_6bit)
  149. {
  150. scales.push_back(31 / max_value);
  151. }
  152. else
  153. {
  154. scales.push_back(127 / max_value);
  155. }
  156. }
  157. weight_scales[name] = scales;
  158. }
  159. if (layer->type == "ConvolutionDepthWise")
  160. {
  161. const ncnn::ConvolutionDepthWise* convolutiondepthwise = static_cast<const ncnn::ConvolutionDepthWise*>(layer);
  162. std::string name = layer->name;
  163. const int weight_data_size_output = convolutiondepthwise->weight_data_size / convolutiondepthwise->group;
  164. std::vector<float> scales;
  165. for (int n = 0; n < convolutiondepthwise->group; n++)
  166. {
  167. const ncnn::Mat weight_data_n = convolutiondepthwise->weight_data.range(weight_data_size_output * n, weight_data_size_output);
  168. const float* data_n = weight_data_n;
  169. float max_value = std::numeric_limits<float>::min();
  170. for (int k = 0; k < weight_data_size_output; k++)
  171. {
  172. max_value = std::max(max_value, std::fabs(data_n[k]));
  173. }
  174. scales.push_back(127 / max_value);
  175. }
  176. weight_scales[name] = scales;
  177. }
  178. if (layer->type == "InnerProduct")
  179. {
  180. const ncnn::InnerProduct* innerproduct = static_cast<const ncnn::InnerProduct*>(layer);
  181. std::string name = layer->name;
  182. const int weight_data_size_output = innerproduct->weight_data_size / innerproduct->num_output;
  183. std::vector<float> scales;
  184. for (int n = 0; n < innerproduct->num_output; n++)
  185. {
  186. const ncnn::Mat weight_data_n = innerproduct->weight_data.range(weight_data_size_output * n, weight_data_size_output);
  187. const float* data_n = weight_data_n;
  188. float max_value = std::numeric_limits<float>::min();
  189. for (int k = 0; k < weight_data_size_output; k++)
  190. max_value = std::max(max_value, std::fabs(data_n[k]));
  191. scales.push_back(127 / max_value);
  192. }
  193. weight_scales[name] = scales;
  194. }
  195. }
  196. return 0;
  197. }
  198. class QuantizeData
  199. {
  200. public:
  201. QuantizeData(const std::string& layer_name, const int& num);
  202. int initial_blob_max(ncnn::Mat data);
  203. int initial_histogram_interval();
  204. int initial_histogram_value();
  205. int normalize_histogram();
  206. int update_histogram(ncnn::Mat data);
  207. float compute_kl_divergence(const std::vector<float>& dist_a, const std::vector<float>& dist_b) const;
  208. int threshold_distribution(const std::vector<float>& distribution, const int target_bin = 128) const;
  209. float get_data_blob_scale();
  210. public:
  211. std::string name;
  212. float max_value;
  213. int num_bins;
  214. float histogram_interval;
  215. std::vector<float> histogram;
  216. float threshold;
  217. int threshold_bin;
  218. float scale;
  219. };
  220. QuantizeData::QuantizeData(const std::string& layer_name, const int& num)
  221. {
  222. name = layer_name;
  223. max_value = 0.f;
  224. num_bins = num;
  225. histogram_interval = 0.f;
  226. histogram.resize(num_bins);
  227. initial_histogram_value();
  228. threshold = 0.f;
  229. threshold_bin = 0;
  230. scale = 1.0f;
  231. }
  232. int QuantizeData::initial_blob_max(ncnn::Mat data)
  233. {
  234. const int channel_num = data.c;
  235. const int size = data.w * data.h;
  236. for (int q = 0; q < channel_num; q++)
  237. {
  238. const float* data_n = data.channel(q);
  239. for (int i = 0; i < size; i++)
  240. {
  241. max_value = std::max(max_value, std::fabs(data_n[i]));
  242. }
  243. }
  244. return 0;
  245. }
  246. int QuantizeData::initial_histogram_interval()
  247. {
  248. histogram_interval = max_value / static_cast<float>(num_bins);
  249. return 0;
  250. }
  251. int QuantizeData::initial_histogram_value()
  252. {
  253. for (size_t i = 0; i < histogram.size(); i++)
  254. {
  255. histogram[i] = 0.00001f;
  256. }
  257. return 0;
  258. }
  259. int QuantizeData::normalize_histogram()
  260. {
  261. const size_t length = histogram.size();
  262. float sum = 0;
  263. for (size_t i = 0; i < length; i++)
  264. sum += histogram[i];
  265. for (size_t i = 0; i < length; i++)
  266. histogram[i] /= sum;
  267. return 0;
  268. }
  269. int QuantizeData::update_histogram(ncnn::Mat data)
  270. {
  271. const int channel_num = data.c;
  272. const int size = data.w * data.h;
  273. for (int q = 0; q < channel_num; q++)
  274. {
  275. const float* data_n = data.channel(q);
  276. for (int i = 0; i < size; i++)
  277. {
  278. if (data_n[i] == 0)
  279. continue;
  280. const int index = std::min(static_cast<int>(std::abs(data_n[i]) / histogram_interval), 2047);
  281. histogram[index]++;
  282. }
  283. }
  284. return 0;
  285. }
  286. float QuantizeData::compute_kl_divergence(const std::vector<float>& dist_a, const std::vector<float>& dist_b) const
  287. {
  288. const size_t length = dist_a.size();
  289. assert(dist_b.size() == length);
  290. float result = 0;
  291. for (size_t i = 0; i < length; i++)
  292. {
  293. if (dist_a[i] != 0)
  294. {
  295. if (dist_b[i] == 0)
  296. {
  297. result += 1;
  298. }
  299. else
  300. {
  301. result += dist_a[i] * log(dist_a[i] / dist_b[i]);
  302. }
  303. }
  304. }
  305. return result;
  306. }
  307. int QuantizeData::threshold_distribution(const std::vector<float>& distribution, const int target_bin) const
  308. {
  309. int target_threshold = target_bin;
  310. float min_kl_divergence = 1000;
  311. const int length = static_cast<int>(distribution.size());
  312. std::vector<float> quantize_distribution(target_bin);
  313. float threshold_sum = 0;
  314. for (int threshold = target_bin; threshold < length; threshold++)
  315. {
  316. threshold_sum += distribution[threshold];
  317. }
  318. for (int threshold = target_bin; threshold < length; threshold++)
  319. {
  320. std::vector<float> t_distribution(distribution.begin(), distribution.begin() + threshold);
  321. t_distribution[threshold - 1] += threshold_sum;
  322. threshold_sum -= distribution[threshold];
  323. // get P
  324. fill(quantize_distribution.begin(), quantize_distribution.end(), 0.0f);
  325. const float num_per_bin = static_cast<float>(threshold) / static_cast<float>(target_bin);
  326. for (int i = 0; i < target_bin; i++)
  327. {
  328. const float start = static_cast<float>(i) * num_per_bin;
  329. const float end = start + num_per_bin;
  330. const int left_upper = static_cast<int>(ceil(start));
  331. if (static_cast<float>(left_upper) > start)
  332. {
  333. const float left_scale = static_cast<float>(left_upper) - start;
  334. quantize_distribution[i] += left_scale * distribution[left_upper - 1];
  335. }
  336. const int right_lower = static_cast<int>(floor(end));
  337. if (static_cast<float>(right_lower) < end)
  338. {
  339. const float right_scale = end - static_cast<float>(right_lower);
  340. quantize_distribution[i] += right_scale * distribution[right_lower];
  341. }
  342. for (int j = left_upper; j < right_lower; j++)
  343. {
  344. quantize_distribution[i] += distribution[j];
  345. }
  346. }
  347. // get Q
  348. std::vector<float> expand_distribution(threshold, 0);
  349. for (int i = 0; i < target_bin; i++)
  350. {
  351. const float start = static_cast<float>(i) * num_per_bin;
  352. const float end = start + num_per_bin;
  353. float count = 0;
  354. const int left_upper = static_cast<int>(ceil(start));
  355. float left_scale = 0;
  356. if (static_cast<float>(left_upper) > start)
  357. {
  358. left_scale = static_cast<float>(left_upper) - start;
  359. if (distribution[left_upper - 1] != 0)
  360. {
  361. count += left_scale;
  362. }
  363. }
  364. const int right_lower = static_cast<int>(floor(end));
  365. float right_scale = 0;
  366. if (static_cast<float>(right_lower) < end)
  367. {
  368. right_scale = end - static_cast<float>(right_lower);
  369. if (distribution[right_lower] != 0)
  370. {
  371. count += right_scale;
  372. }
  373. }
  374. for (int j = left_upper; j < right_lower; j++)
  375. {
  376. if (distribution[j] != 0)
  377. {
  378. count++;
  379. }
  380. }
  381. const float expand_value = quantize_distribution[i] / count;
  382. if (static_cast<float>(left_upper) > start)
  383. {
  384. if (distribution[left_upper - 1] != 0)
  385. {
  386. expand_distribution[left_upper - 1] += expand_value * left_scale;
  387. }
  388. }
  389. if (static_cast<float>(right_lower) < end)
  390. {
  391. if (distribution[right_lower] != 0)
  392. {
  393. expand_distribution[right_lower] += expand_value * right_scale;
  394. }
  395. }
  396. for (int j = left_upper; j < right_lower; j++)
  397. {
  398. if (distribution[j] != 0)
  399. {
  400. expand_distribution[j] += expand_value;
  401. }
  402. }
  403. }
  404. // kl
  405. const float kl_divergence = compute_kl_divergence(t_distribution, expand_distribution);
  406. // the best num of bin
  407. if (kl_divergence < min_kl_divergence)
  408. {
  409. min_kl_divergence = kl_divergence;
  410. target_threshold = threshold;
  411. }
  412. }
  413. return target_threshold;
  414. }
  415. float QuantizeData::get_data_blob_scale()
  416. {
  417. normalize_histogram();
  418. threshold_bin = threshold_distribution(histogram);
  419. threshold = (static_cast<float>(threshold_bin) + 0.5f) * histogram_interval;
  420. scale = 127 / threshold;
  421. return scale;
  422. }
  423. struct PreParam
  424. {
  425. float mean[3];
  426. float norm[3];
  427. int width;
  428. int height;
  429. bool swapRB;
  430. };
  431. static int post_training_quantize(const std::vector<std::string>& image_list, const std::string& param_path, const std::string& bin_path, const std::string& table_path, struct PreParam& per_param)
  432. {
  433. size_t size = image_list.size();
  434. QuantNet net;
  435. net.opt = g_default_option;
  436. net.load_param(param_path.c_str());
  437. net.load_model(bin_path.c_str());
  438. float mean_vals[3];
  439. float norm_vals[3];
  440. int width = per_param.width;
  441. int height = per_param.height;
  442. bool swapRB = per_param.swapRB;
  443. mean_vals[0] = per_param.mean[0];
  444. mean_vals[1] = per_param.mean[1];
  445. mean_vals[2] = per_param.mean[2];
  446. norm_vals[0] = per_param.norm[0];
  447. norm_vals[1] = per_param.norm[1];
  448. norm_vals[2] = per_param.norm[2];
  449. g_blob_pool_allocator.clear();
  450. g_workspace_pool_allocator.clear();
  451. net.get_input_names();
  452. net.get_conv_names();
  453. net.get_conv_bottom_blob_names();
  454. net.get_conv_weight_blob_scales();
  455. if (net.input_names.empty())
  456. {
  457. fprintf(stderr, "not found [Input] Layer, Check your ncnn.param \n");
  458. return -1;
  459. }
  460. FILE* fp = fopen(table_path.c_str(), "w");
  461. // save quantization scale of weight
  462. printf("====> Quantize the parameters.\n");
  463. for (size_t i = 0; i < net.conv_names.size(); i++)
  464. {
  465. std::string layer_name = net.conv_names[i];
  466. std::string blob_name = net.conv_bottom_blob_names[layer_name];
  467. std::vector<float> weight_scale_n = net.weight_scales[layer_name];
  468. fprintf(fp, "%s_param_0 ", layer_name.c_str());
  469. for (size_t j = 0; j < weight_scale_n.size(); j++)
  470. {
  471. fprintf(fp, "%f ", weight_scale_n[j]);
  472. }
  473. fprintf(fp, "\n");
  474. }
  475. // initial quantization data
  476. std::vector<QuantizeData> quantize_datas;
  477. for (size_t i = 0; i < net.conv_names.size(); i++)
  478. {
  479. std::string layer_name = net.conv_names[i];
  480. QuantizeData quantize_data(layer_name, 2048);
  481. quantize_datas.push_back(quantize_data);
  482. }
  483. // step 1 count the max value
  484. printf("====> Quantize the activation.\n");
  485. printf(" ====> step 1 : find the max value.\n");
  486. for (size_t i = 0; i < image_list.size(); i++)
  487. {
  488. std::string img_name = image_list[i];
  489. if ((i + 1) % 100 == 0)
  490. {
  491. fprintf(stderr, " %d/%d\n", static_cast<int>(i + 1), static_cast<int>(size));
  492. }
  493. #if OpenCV_VERSION_MAJOR > 2
  494. cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR);
  495. #else
  496. cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR);
  497. #endif
  498. if (bgr.empty())
  499. {
  500. fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
  501. return -1;
  502. }
  503. ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, width, height);
  504. in.substract_mean_normalize(mean_vals, norm_vals);
  505. ncnn::Extractor ex = net.create_extractor();
  506. ex.input(net.input_names[0].c_str(), in);
  507. for (size_t j = 0; j < net.conv_names.size(); j++)
  508. {
  509. std::string layer_name = net.conv_names[j];
  510. std::string blob_name = net.conv_bottom_blob_names[layer_name];
  511. ncnn::Mat out;
  512. ex.extract(blob_name.c_str(), out);
  513. for (size_t k = 0; k < quantize_datas.size(); k++)
  514. {
  515. if (quantize_datas[k].name == layer_name)
  516. {
  517. quantize_datas[k].initial_blob_max(out);
  518. break;
  519. }
  520. }
  521. }
  522. }
  523. // step 2 histogram_interval
  524. printf(" ====> step 2 : generate the histogram_interval.\n");
  525. for (size_t i = 0; i < net.conv_names.size(); i++)
  526. {
  527. std::string layer_name = net.conv_names[i];
  528. for (size_t k = 0; k < quantize_datas.size(); k++)
  529. {
  530. if (quantize_datas[k].name == layer_name)
  531. {
  532. quantize_datas[k].initial_histogram_interval();
  533. fprintf(stderr, "%-20s : max = %-15f interval = %-10f\n", quantize_datas[k].name.c_str(), quantize_datas[k].max_value, quantize_datas[k].histogram_interval);
  534. break;
  535. }
  536. }
  537. }
  538. // step 3 histogram
  539. printf(" ====> step 3 : generate the histogram.\n");
  540. for (size_t i = 0; i < image_list.size(); i++)
  541. {
  542. std::string img_name = image_list[i];
  543. if ((i + 1) % 100 == 0)
  544. fprintf(stderr, " %d/%d\n", (int)(i + 1), (int)size);
  545. #if OpenCV_VERSION_MAJOR > 2
  546. cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR);
  547. #else
  548. cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR);
  549. #endif
  550. if (bgr.empty())
  551. {
  552. fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
  553. return -1;
  554. }
  555. ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, width, height);
  556. in.substract_mean_normalize(mean_vals, norm_vals);
  557. ncnn::Extractor ex = net.create_extractor();
  558. ex.input(net.input_names[0].c_str(), in);
  559. for (size_t j = 0; j < net.conv_names.size(); j++)
  560. {
  561. std::string layer_name = net.conv_names[j];
  562. std::string blob_name = net.conv_bottom_blob_names[layer_name];
  563. ncnn::Mat out;
  564. ex.extract(blob_name.c_str(), out);
  565. for (size_t k = 0; k < quantize_datas.size(); k++)
  566. {
  567. if (quantize_datas[k].name == layer_name)
  568. {
  569. quantize_datas[k].update_histogram(out);
  570. break;
  571. }
  572. }
  573. }
  574. }
  575. // step4 kld
  576. printf(" ====> step 4 : using kld to find the best threshold value.\n");
  577. for (size_t i = 0; i < net.conv_names.size(); i++)
  578. {
  579. std::string layer_name = net.conv_names[i];
  580. std::string blob_name = net.conv_bottom_blob_names[layer_name];
  581. fprintf(stderr, "%-20s ", layer_name.c_str());
  582. for (size_t k = 0; k < quantize_datas.size(); k++)
  583. {
  584. if (quantize_datas[k].name == layer_name)
  585. {
  586. quantize_datas[k].get_data_blob_scale();
  587. fprintf(stderr, "bin : %-8d threshold : %-15f interval : %-10f scale : %-10f\n",
  588. quantize_datas[k].threshold_bin,
  589. quantize_datas[k].threshold,
  590. quantize_datas[k].histogram_interval,
  591. quantize_datas[k].scale);
  592. fprintf(fp, "%s %f\n", layer_name.c_str(), quantize_datas[k].scale);
  593. break;
  594. }
  595. }
  596. }
  597. fclose(fp);
  598. printf("====> Save the calibration table done.\n");
  599. return 0;
  600. }
  601. // usage
  602. void showUsage()
  603. {
  604. std::cout << "example: ./ncnn2table --param=squeezenet-fp32.param --bin=squeezenet-fp32.bin --images=images/ --output=squeezenet.table --mean=104.0,117.0,123.0 --norm=1.0,1.0,1.0 --size=224,224 --swapRB --thread=2" << std::endl;
  605. }
  606. static int find_all_value_in_string(const std::string& values_string, std::vector<float>& value)
  607. {
  608. std::vector<int> masks_pos;
  609. for (size_t i = 0; i < values_string.size(); i++)
  610. {
  611. if (',' == values_string[i])
  612. {
  613. masks_pos.push_back(static_cast<int>(i));
  614. }
  615. }
  616. // check
  617. if (masks_pos.empty())
  618. {
  619. fprintf(stderr, "ERROR: Cannot find any ',' in string, please check.\n");
  620. return -1;
  621. }
  622. if (2 != masks_pos.size())
  623. {
  624. fprintf(stderr, "ERROR: Char ',' in fist of string, please check.\n");
  625. return -1;
  626. }
  627. if (masks_pos.front() == 0)
  628. {
  629. fprintf(stderr, "ERROR: Char ',' in fist of string, please check.\n");
  630. return -1;
  631. }
  632. if (masks_pos.back() == 0)
  633. {
  634. fprintf(stderr, "ERROR: Char ',' in last of string, please check.\n");
  635. return -1;
  636. }
  637. for (size_t i = 0; i < masks_pos.size(); i++)
  638. {
  639. if (i > 0)
  640. {
  641. if (!(masks_pos[i] - masks_pos[i - 1] > 1))
  642. {
  643. fprintf(stderr, "ERROR: Neighbouring char ',' was found.\n");
  644. return -1;
  645. }
  646. }
  647. }
  648. const cv::String ch0_val_str = values_string.substr(0, masks_pos[0]);
  649. const cv::String ch1_val_str = values_string.substr(masks_pos[0] + 1, masks_pos[1] - masks_pos[0] - 1);
  650. const cv::String ch2_val_str = values_string.substr(masks_pos[1] + 1, values_string.size() - masks_pos[1] - 1);
  651. value.push_back(static_cast<float>(std::atof(std::string(ch0_val_str).c_str())));
  652. value.push_back(static_cast<float>(std::atof(std::string(ch1_val_str).c_str())));
  653. value.push_back(static_cast<float>(std::atof(std::string(ch2_val_str).c_str())));
  654. return 0;
  655. }
  656. #if CV_MAJOR_VERSION < 3
  657. class NcnnQuantCommandLineParser : public cv::CommandLineParser
  658. {
  659. public:
  660. NcnnQuantCommandLineParser(int argc, const char* const argv[], const char* key_map)
  661. : cv::CommandLineParser(argc, argv, key_map)
  662. {
  663. }
  664. bool has(const std::string& keys)
  665. {
  666. return cv::CommandLineParser::has(keys);
  667. }
  668. void printMessage()
  669. {
  670. cv::CommandLineParser::printParams();
  671. }
  672. };
  673. #endif
  674. int main(int argc, char** argv)
  675. {
  676. std::cout << "--- ncnn post training quantization tool --- " << __TIME__ << " " << __DATE__ << std::endl;
  677. const char* key_map = "{help h usage ? | | print this message }"
  678. "{param p | | path to ncnn.param file }"
  679. "{bin b | | path to ncnn.bin file }"
  680. "{images i | | path to calibration images folder }"
  681. "{output o | | path to output calibration table file }"
  682. "{mean m | | value of mean (mean value, default is 104.0,117.0,123.0) }"
  683. "{norm n | | value of normalize (scale value, default is 1.0,1.0,1.0) }"
  684. "{size s | | the size of input image(using the resize the original image,default is w=224,h=224) }"
  685. "{swapRB c | | flag which indicates that swap first and last channels in 3-channel image is necessary }"
  686. "{thread t | 4 | count of processing threads }";
  687. #if CV_MAJOR_VERSION < 3
  688. NcnnQuantCommandLineParser parser(argc, argv, key_map);
  689. #else
  690. cv::CommandLineParser parser(argc, argv, key_map);
  691. #endif
  692. if (parser.has("help"))
  693. {
  694. parser.printMessage();
  695. showUsage();
  696. return 0;
  697. }
  698. if (!parser.has("param") || !parser.has("bin") || !parser.has("images") || !parser.has("output") || !parser.has("mean") || !parser.has("norm"))
  699. {
  700. std::cout << "Inputs is does not include all needed param, pleas check..." << std::endl;
  701. parser.printMessage();
  702. showUsage();
  703. return 0;
  704. }
  705. const std::string image_folder_path = parser.get<cv::String>("images");
  706. const std::string ncnn_param_file_path = parser.get<cv::String>("param");
  707. const std::string ncnn_bin_file_path = parser.get<cv::String>("bin");
  708. const std::string saved_table_file_path = parser.get<cv::String>("output");
  709. // check the input param
  710. if (image_folder_path.empty() || ncnn_param_file_path.empty() || ncnn_bin_file_path.empty() || saved_table_file_path.empty())
  711. {
  712. fprintf(stderr, "One or more path may be empty, please check and try again.\n");
  713. return 0;
  714. }
  715. const int num_threads = parser.get<int>("thread");
  716. struct PreParam pre_param;
  717. pre_param.mean[0] = 104.f;
  718. pre_param.mean[1] = 117.f;
  719. pre_param.mean[2] = 103.f;
  720. pre_param.norm[0] = 1.f;
  721. pre_param.norm[1] = 1.f;
  722. pre_param.norm[2] = 1.f;
  723. pre_param.width = 224;
  724. pre_param.height = 224;
  725. pre_param.swapRB = false;
  726. if (parser.has("mean"))
  727. {
  728. const std::string mean_str = parser.get<std::string>("mean");
  729. std::vector<float> mean_values;
  730. const int ret = find_all_value_in_string(mean_str, mean_values);
  731. if (0 != ret && 3 != mean_values.size())
  732. {
  733. fprintf(stderr, "ERROR: Searching mean value from --mean was failed.\n");
  734. return -1;
  735. }
  736. pre_param.mean[0] = mean_values[0];
  737. pre_param.mean[1] = mean_values[1];
  738. pre_param.mean[2] = mean_values[2];
  739. }
  740. if (parser.has("norm"))
  741. {
  742. const std::string norm_str = parser.get<std::string>("norm");
  743. std::vector<float> norm_values;
  744. const int ret = find_all_value_in_string(norm_str, norm_values);
  745. if (0 != ret && 3 != norm_values.size())
  746. {
  747. fprintf(stderr, "ERROR: Searching mean value from --mean was failed, please check --mean param.\n");
  748. return -1;
  749. }
  750. pre_param.norm[0] = norm_values[0];
  751. pre_param.norm[1] = norm_values[1];
  752. pre_param.norm[2] = norm_values[2];
  753. }
  754. if (parser.has("size"))
  755. {
  756. cv::String size_str = parser.get<std::string>("size");
  757. size_t sep_pos = size_str.find_first_of(',');
  758. if (cv::String::npos != sep_pos && sep_pos < size_str.size())
  759. {
  760. cv::String width_value_str;
  761. cv::String height_value_str;
  762. width_value_str = size_str.substr(0, sep_pos);
  763. height_value_str = size_str.substr(sep_pos + 1, size_str.size() - sep_pos - 1);
  764. pre_param.width = static_cast<int>(std::atoi(std::string(width_value_str).c_str()));
  765. pre_param.height = static_cast<int>(std::atoi(std::string(height_value_str).c_str()));
  766. }
  767. else
  768. {
  769. fprintf(stderr, "ERROR: Searching size value from --size was failed, please check --size param.\n");
  770. return -1;
  771. }
  772. }
  773. if (parser.has("swapRB"))
  774. {
  775. pre_param.swapRB = true;
  776. }
  777. g_blob_pool_allocator.set_size_compare_ratio(0.0f);
  778. g_workspace_pool_allocator.set_size_compare_ratio(0.5f);
  779. // default option
  780. g_default_option.lightmode = true;
  781. g_default_option.num_threads = num_threads;
  782. g_default_option.blob_allocator = &g_blob_pool_allocator;
  783. g_default_option.workspace_allocator = &g_workspace_pool_allocator;
  784. g_default_option.use_winograd_convolution = true;
  785. g_default_option.use_sgemm_convolution = true;
  786. g_default_option.use_int8_inference = true;
  787. g_default_option.use_fp16_packed = true;
  788. g_default_option.use_fp16_storage = true;
  789. g_default_option.use_fp16_arithmetic = true;
  790. g_default_option.use_int8_storage = true;
  791. g_default_option.use_int8_arithmetic = true;
  792. ncnn::set_cpu_powersave(2);
  793. ncnn::set_omp_dynamic(0);
  794. ncnn::set_omp_num_threads(num_threads);
  795. std::vector<std::string> image_file_path_list;
  796. // parse the image file.
  797. parse_images_dir(image_folder_path, image_file_path_list);
  798. // get the calibration table file, and save it.
  799. const int ret = post_training_quantize(image_file_path_list, ncnn_param_file_path, ncnn_bin_file_path, saved_table_file_path, pre_param);
  800. if (!ret)
  801. {
  802. fprintf(stderr, "\nNCNN Int8 Calibration table create success, best wish for your INT8 inference has a low accuracy loss...\\(^0^)/...233...\n");
  803. }
  804. return 0;
  805. }