You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ncnn2table.cpp 30 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956
  1. // BUG1989 is pleased to support the open source community by supporting ncnn available.
  2. //
  3. // author:BUG1989 (https://github.com/BUG1989/) Long-term support.
  4. // author:JansonZhu (https://github.com/JansonZhu) Implemented the function of entropy calibration.
  5. //
  6. // Copyright (C) 2019 BUG1989. All rights reserved.
  7. //
  8. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  9. // in compliance with the License. You may obtain a copy of the License at
  10. //
  11. // https://opensource.org/licenses/BSD-3-Clause
  12. //
  13. // Unless required by applicable law or agreed to in writing, software distributed
  14. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  15. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  16. // specific language governing permissions and limitations under the License.
  17. #ifdef _MSC_VER
  18. #define _CRT_SECURE_NO_DEPRECATE
  19. #endif
  20. #include <algorithm>
  21. #include <cstdio>
  22. #include <cstdlib>
  23. #include <cstring>
  24. #include <iostream>
  25. #include <map>
  26. #include <opencv2/opencv.hpp>
  27. #include <vector>
  28. // ncnn public header
  29. #include "benchmark.h"
  30. #include "cpu.h"
  31. #include "net.h"
  32. // ncnn private header
  33. #include "layer/convolution.h"
  34. #include "layer/convolutiondepthwise.h"
  35. #include "layer/innerproduct.h"
  36. static ncnn::Option g_default_option;
  37. static ncnn::UnlockedPoolAllocator g_blob_pool_allocator;
  38. static ncnn::PoolAllocator g_workspace_pool_allocator;
  39. // Get the file names from direct path
  40. int parse_images_dir(const std::string& base_path, std::vector<std::string>& file_path)
  41. {
  42. file_path.clear();
  43. const cv::String base_path_str(base_path);
  44. std::vector<cv::String> image_list;
  45. cv::glob(base_path_str, image_list, true);
  46. for (size_t i = 0; i < image_list.size(); i++)
  47. {
  48. const cv::String& image_path = image_list[i];
  49. file_path.push_back(image_path);
  50. }
  51. return 0;
  52. }
  53. class QuantNet : public ncnn::Net
  54. {
  55. public:
  56. int get_conv_names();
  57. int get_conv_bottom_blob_names();
  58. int get_conv_weight_blob_scales();
  59. int get_input_names();
  60. public:
  61. std::vector<std::string> conv_names;
  62. std::map<std::string, std::string> conv_bottom_blob_names;
  63. std::map<std::string, std::vector<float> > weight_scales;
  64. std::vector<std::string> input_names;
  65. };
  66. int QuantNet::get_input_names()
  67. {
  68. for (size_t i = 0; i < layers.size(); i++)
  69. {
  70. const ncnn::Layer* layer = layers[i];
  71. if (layer->type == "Input")
  72. {
  73. for (size_t j = 0; j < layer->tops.size(); j++)
  74. {
  75. int blob_index = layer->tops[j];
  76. std::string name = blobs[blob_index].name;
  77. input_names.push_back(name);
  78. }
  79. }
  80. }
  81. return 0;
  82. }
  83. int QuantNet::get_conv_names()
  84. {
  85. for (size_t i = 0; i < layers.size(); i++)
  86. {
  87. const ncnn::Layer* layer = layers[i];
  88. if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
  89. {
  90. std::string name = layer->name;
  91. conv_names.push_back(name);
  92. }
  93. }
  94. return 0;
  95. }
  96. int QuantNet::get_conv_bottom_blob_names()
  97. {
  98. // find conv bottom name or index
  99. for (size_t i = 0; i < layers.size(); i++)
  100. {
  101. const ncnn::Layer* layer = layers[i];
  102. if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
  103. {
  104. const std::string& name = layer->name;
  105. const std::string& bottom_blob_name = blobs[layer->bottoms[0]].name;
  106. conv_bottom_blob_names[name] = bottom_blob_name;
  107. }
  108. }
  109. return 0;
  110. }
  111. int QuantNet::get_conv_weight_blob_scales()
  112. {
  113. for (size_t i = 0; i < layers.size(); i++)
  114. {
  115. const ncnn::Layer* layer = layers[i];
  116. if (layer->type == "Convolution")
  117. {
  118. const ncnn::Convolution* convolution = static_cast<const ncnn::Convolution*>(layer);
  119. std::string name = layer->name;
  120. const int weight_data_size_output = convolution->weight_data_size / convolution->num_output;
  121. std::vector<float> scales;
  122. // int8 winograd F43 needs weight data to use 6bit quantization
  123. bool quant_6bit = false;
  124. int kernel_w = convolution->kernel_w;
  125. int kernel_h = convolution->kernel_h;
  126. int dilation_w = convolution->dilation_w;
  127. int dilation_h = convolution->dilation_h;
  128. int stride_w = convolution->stride_w;
  129. int stride_h = convolution->stride_h;
  130. if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
  131. quant_6bit = true;
  132. for (int n = 0; n < convolution->num_output; n++)
  133. {
  134. const ncnn::Mat weight_data_n = convolution->weight_data.range(weight_data_size_output * n, weight_data_size_output);
  135. const float* data_n = weight_data_n;
  136. float max_value = std::numeric_limits<float>::min();
  137. for (int k = 0; k < weight_data_size_output; k++)
  138. {
  139. max_value = std::max(max_value, std::fabs(data_n[k]));
  140. }
  141. if (quant_6bit)
  142. {
  143. scales.push_back(31 / max_value);
  144. }
  145. else
  146. {
  147. scales.push_back(127 / max_value);
  148. }
  149. }
  150. weight_scales[name] = scales;
  151. }
  152. if (layer->type == "ConvolutionDepthWise")
  153. {
  154. const ncnn::ConvolutionDepthWise* convolutiondepthwise = static_cast<const ncnn::ConvolutionDepthWise*>(layer);
  155. std::string name = layer->name;
  156. const int weight_data_size_output = convolutiondepthwise->weight_data_size / convolutiondepthwise->group;
  157. std::vector<float> scales;
  158. for (int n = 0; n < convolutiondepthwise->group; n++)
  159. {
  160. const ncnn::Mat weight_data_n = convolutiondepthwise->weight_data.range(weight_data_size_output * n, weight_data_size_output);
  161. const float* data_n = weight_data_n;
  162. float max_value = std::numeric_limits<float>::min();
  163. for (int k = 0; k < weight_data_size_output; k++)
  164. {
  165. max_value = std::max(max_value, std::fabs(data_n[k]));
  166. }
  167. scales.push_back(127 / max_value);
  168. }
  169. weight_scales[name] = scales;
  170. }
  171. if (layer->type == "InnerProduct")
  172. {
  173. const ncnn::InnerProduct* innerproduct = static_cast<const ncnn::InnerProduct*>(layer);
  174. std::string name = layer->name;
  175. const int weight_data_size_output = innerproduct->weight_data_size / innerproduct->num_output;
  176. std::vector<float> scales;
  177. for (int n = 0; n < innerproduct->num_output; n++)
  178. {
  179. const ncnn::Mat weight_data_n = innerproduct->weight_data.range(weight_data_size_output * n, weight_data_size_output);
  180. const float* data_n = weight_data_n;
  181. float max_value = std::numeric_limits<float>::min();
  182. for (int k = 0; k < weight_data_size_output; k++)
  183. max_value = std::max(max_value, std::fabs(data_n[k]));
  184. scales.push_back(127 / max_value);
  185. }
  186. weight_scales[name] = scales;
  187. }
  188. }
  189. return 0;
  190. }
  191. class QuantizeData
  192. {
  193. public:
  194. QuantizeData(const std::string& layer_name, const int& num);
  195. int initial_blob_max(ncnn::Mat data);
  196. int initial_histogram_interval();
  197. int initial_histogram_value();
  198. int normalize_histogram();
  199. int update_histogram(ncnn::Mat data);
  200. float compute_kl_divergence(const std::vector<float>& dist_a, const std::vector<float>& dist_b) const;
  201. int threshold_distribution(const std::vector<float>& distribution, const int target_bin = 128) const;
  202. float get_data_blob_scale();
  203. public:
  204. std::string name;
  205. float max_value;
  206. int num_bins;
  207. float histogram_interval;
  208. std::vector<float> histogram;
  209. float threshold;
  210. int threshold_bin;
  211. float scale;
  212. };
  213. QuantizeData::QuantizeData(const std::string& layer_name, const int& num)
  214. {
  215. name = layer_name;
  216. max_value = 0.f;
  217. num_bins = num;
  218. histogram_interval = 0.f;
  219. histogram.resize(num_bins);
  220. initial_histogram_value();
  221. threshold = 0.f;
  222. threshold_bin = 0;
  223. scale = 1.0f;
  224. }
  225. int QuantizeData::initial_blob_max(ncnn::Mat data)
  226. {
  227. const int channel_num = data.c;
  228. const int size = data.w * data.h;
  229. for (int q = 0; q < channel_num; q++)
  230. {
  231. const float* data_n = data.channel(q);
  232. for (int i = 0; i < size; i++)
  233. {
  234. max_value = std::max(max_value, std::fabs(data_n[i]));
  235. }
  236. }
  237. return 0;
  238. }
  239. int QuantizeData::initial_histogram_interval()
  240. {
  241. histogram_interval = max_value / static_cast<float>(num_bins);
  242. return 0;
  243. }
  244. int QuantizeData::initial_histogram_value()
  245. {
  246. for (size_t i = 0; i < histogram.size(); i++)
  247. {
  248. histogram[i] = 0.00001f;
  249. }
  250. return 0;
  251. }
  252. int QuantizeData::normalize_histogram()
  253. {
  254. const size_t length = histogram.size();
  255. float sum = 0;
  256. for (size_t i = 0; i < length; i++)
  257. sum += histogram[i];
  258. for (size_t i = 0; i < length; i++)
  259. histogram[i] /= sum;
  260. return 0;
  261. }
  262. int QuantizeData::update_histogram(ncnn::Mat data)
  263. {
  264. const int channel_num = data.c;
  265. const int size = data.w * data.h;
  266. for (int q = 0; q < channel_num; q++)
  267. {
  268. const float* data_n = data.channel(q);
  269. for (int i = 0; i < size; i++)
  270. {
  271. if (data_n[i] == 0)
  272. continue;
  273. const int index = std::min(static_cast<int>(std::abs(data_n[i]) / histogram_interval), 2047);
  274. histogram[index]++;
  275. }
  276. }
  277. return 0;
  278. }
  279. float QuantizeData::compute_kl_divergence(const std::vector<float>& dist_a, const std::vector<float>& dist_b) const
  280. {
  281. const size_t length = dist_a.size();
  282. assert(dist_b.size() == length);
  283. float result = 0;
  284. for (size_t i = 0; i < length; i++)
  285. {
  286. if (dist_a[i] != 0)
  287. {
  288. if (dist_b[i] == 0)
  289. {
  290. result += 1;
  291. }
  292. else
  293. {
  294. result += dist_a[i] * log(dist_a[i] / dist_b[i]);
  295. }
  296. }
  297. }
  298. return result;
  299. }
  300. int QuantizeData::threshold_distribution(const std::vector<float>& distribution, const int target_bin) const
  301. {
  302. int target_threshold = target_bin;
  303. float min_kl_divergence = 1000;
  304. const int length = static_cast<int>(distribution.size());
  305. std::vector<float> quantize_distribution(target_bin);
  306. float threshold_sum = 0;
  307. for (int threshold = target_bin; threshold < length; threshold++)
  308. {
  309. threshold_sum += distribution[threshold];
  310. }
  311. for (int threshold = target_bin; threshold < length; threshold++)
  312. {
  313. std::vector<float> t_distribution(distribution.begin(), distribution.begin() + threshold);
  314. t_distribution[threshold - 1] += threshold_sum;
  315. threshold_sum -= distribution[threshold];
  316. // get P
  317. fill(quantize_distribution.begin(), quantize_distribution.end(), 0.0f);
  318. const float num_per_bin = static_cast<float>(threshold) / static_cast<float>(target_bin);
  319. for (int i = 0; i < target_bin; i++)
  320. {
  321. const float start = static_cast<float>(i) * num_per_bin;
  322. const float end = start + num_per_bin;
  323. const int left_upper = static_cast<int>(ceil(start));
  324. if (static_cast<float>(left_upper) > start)
  325. {
  326. const float left_scale = static_cast<float>(left_upper) - start;
  327. quantize_distribution[i] += left_scale * distribution[left_upper - 1];
  328. }
  329. const int right_lower = static_cast<int>(floor(end));
  330. if (static_cast<float>(right_lower) < end)
  331. {
  332. const float right_scale = end - static_cast<float>(right_lower);
  333. quantize_distribution[i] += right_scale * distribution[right_lower];
  334. }
  335. for (int j = left_upper; j < right_lower; j++)
  336. {
  337. quantize_distribution[i] += distribution[j];
  338. }
  339. }
  340. // get Q
  341. std::vector<float> expand_distribution(threshold, 0);
  342. for (int i = 0; i < target_bin; i++)
  343. {
  344. const float start = static_cast<float>(i) * num_per_bin;
  345. const float end = start + num_per_bin;
  346. float count = 0;
  347. const int left_upper = static_cast<int>(ceil(start));
  348. float left_scale = 0;
  349. if (static_cast<float>(left_upper) > start)
  350. {
  351. left_scale = static_cast<float>(left_upper) - start;
  352. if (distribution[left_upper - 1] != 0)
  353. {
  354. count += left_scale;
  355. }
  356. }
  357. const int right_lower = static_cast<int>(floor(end));
  358. float right_scale = 0;
  359. if (static_cast<float>(right_lower) < end)
  360. {
  361. right_scale = end - static_cast<float>(right_lower);
  362. if (distribution[right_lower] != 0)
  363. {
  364. count += right_scale;
  365. }
  366. }
  367. for (int j = left_upper; j < right_lower; j++)
  368. {
  369. if (distribution[j] != 0)
  370. {
  371. count++;
  372. }
  373. }
  374. const float expand_value = quantize_distribution[i] / count;
  375. if (static_cast<float>(left_upper) > start)
  376. {
  377. if (distribution[left_upper - 1] != 0)
  378. {
  379. expand_distribution[left_upper - 1] += expand_value * left_scale;
  380. }
  381. }
  382. if (static_cast<float>(right_lower) < end)
  383. {
  384. if (distribution[right_lower] != 0)
  385. {
  386. expand_distribution[right_lower] += expand_value * right_scale;
  387. }
  388. }
  389. for (int j = left_upper; j < right_lower; j++)
  390. {
  391. if (distribution[j] != 0)
  392. {
  393. expand_distribution[j] += expand_value;
  394. }
  395. }
  396. }
  397. // kl
  398. const float kl_divergence = compute_kl_divergence(t_distribution, expand_distribution);
  399. // the best num of bin
  400. if (kl_divergence < min_kl_divergence)
  401. {
  402. min_kl_divergence = kl_divergence;
  403. target_threshold = threshold;
  404. }
  405. }
  406. return target_threshold;
  407. }
  408. float QuantizeData::get_data_blob_scale()
  409. {
  410. normalize_histogram();
  411. threshold_bin = threshold_distribution(histogram);
  412. threshold = (static_cast<float>(threshold_bin) + 0.5f) * histogram_interval;
  413. scale = 127 / threshold;
  414. return scale;
  415. }
  416. struct PreParam
  417. {
  418. float mean[3];
  419. float norm[3];
  420. int width;
  421. int height;
  422. bool swapRB;
  423. };
  424. static int post_training_quantize(const std::vector<std::string>& image_list, const std::string& param_path, const std::string& bin_path, const std::string& table_path, struct PreParam& per_param)
  425. {
  426. size_t size = image_list.size();
  427. QuantNet net;
  428. net.opt = g_default_option;
  429. net.load_param(param_path.c_str());
  430. net.load_model(bin_path.c_str());
  431. float mean_vals[3];
  432. float norm_vals[3];
  433. int width = per_param.width;
  434. int height = per_param.height;
  435. bool swapRB = per_param.swapRB;
  436. mean_vals[0] = per_param.mean[0];
  437. mean_vals[1] = per_param.mean[1];
  438. mean_vals[2] = per_param.mean[2];
  439. norm_vals[0] = per_param.norm[0];
  440. norm_vals[1] = per_param.norm[1];
  441. norm_vals[2] = per_param.norm[2];
  442. g_blob_pool_allocator.clear();
  443. g_workspace_pool_allocator.clear();
  444. net.get_input_names();
  445. net.get_conv_names();
  446. net.get_conv_bottom_blob_names();
  447. net.get_conv_weight_blob_scales();
  448. if (net.input_names.empty())
  449. {
  450. fprintf(stderr, "not found [Input] Layer, Check your ncnn.param \n");
  451. return -1;
  452. }
  453. FILE* fp = fopen(table_path.c_str(), "w");
  454. // save quantization scale of weight
  455. printf("====> Quantize the parameters.\n");
  456. for (size_t i = 0; i < net.conv_names.size(); i++)
  457. {
  458. std::string layer_name = net.conv_names[i];
  459. std::string blob_name = net.conv_bottom_blob_names[layer_name];
  460. std::vector<float> weight_scale_n = net.weight_scales[layer_name];
  461. fprintf(fp, "%s_param_0 ", layer_name.c_str());
  462. for (size_t j = 0; j < weight_scale_n.size(); j++)
  463. {
  464. fprintf(fp, "%f ", weight_scale_n[j]);
  465. }
  466. fprintf(fp, "\n");
  467. }
  468. // initial quantization data
  469. std::vector<QuantizeData> quantize_datas;
  470. for (size_t i = 0; i < net.conv_names.size(); i++)
  471. {
  472. std::string layer_name = net.conv_names[i];
  473. QuantizeData quantize_data(layer_name, 2048);
  474. quantize_datas.push_back(quantize_data);
  475. }
  476. // step 1 count the max value
  477. printf("====> Quantize the activation.\n");
  478. printf(" ====> step 1 : find the max value.\n");
  479. for (size_t i = 0; i < image_list.size(); i++)
  480. {
  481. std::string img_name = image_list[i];
  482. if ((i + 1) % 100 == 0)
  483. {
  484. fprintf(stderr, " %d/%d\n", static_cast<int>(i + 1), static_cast<int>(size));
  485. }
  486. #if OpenCV_VERSION_MAJOR > 2
  487. cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR);
  488. #else
  489. cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR);
  490. #endif
  491. if (bgr.empty())
  492. {
  493. fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
  494. return -1;
  495. }
  496. ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, width, height);
  497. in.substract_mean_normalize(mean_vals, norm_vals);
  498. ncnn::Extractor ex = net.create_extractor();
  499. ex.input(net.input_names[0].c_str(), in);
  500. for (size_t j = 0; j < net.conv_names.size(); j++)
  501. {
  502. std::string layer_name = net.conv_names[j];
  503. std::string blob_name = net.conv_bottom_blob_names[layer_name];
  504. ncnn::Mat out;
  505. ex.extract(blob_name.c_str(), out);
  506. for (size_t k = 0; k < quantize_datas.size(); k++)
  507. {
  508. if (quantize_datas[k].name == layer_name)
  509. {
  510. quantize_datas[k].initial_blob_max(out);
  511. break;
  512. }
  513. }
  514. }
  515. }
  516. // step 2 histogram_interval
  517. printf(" ====> step 2 : generate the histogram_interval.\n");
  518. for (size_t i = 0; i < net.conv_names.size(); i++)
  519. {
  520. std::string layer_name = net.conv_names[i];
  521. for (size_t k = 0; k < quantize_datas.size(); k++)
  522. {
  523. if (quantize_datas[k].name == layer_name)
  524. {
  525. quantize_datas[k].initial_histogram_interval();
  526. fprintf(stderr, "%-20s : max = %-15f interval = %-10f\n", quantize_datas[k].name.c_str(), quantize_datas[k].max_value, quantize_datas[k].histogram_interval);
  527. break;
  528. }
  529. }
  530. }
  531. // step 3 histogram
  532. printf(" ====> step 3 : generate the histogram.\n");
  533. for (size_t i = 0; i < image_list.size(); i++)
  534. {
  535. std::string img_name = image_list[i];
  536. if ((i + 1) % 100 == 0)
  537. fprintf(stderr, " %d/%d\n", (int)(i + 1), (int)size);
  538. #if OpenCV_VERSION_MAJOR > 2
  539. cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR);
  540. #else
  541. cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR);
  542. #endif
  543. if (bgr.empty())
  544. {
  545. fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
  546. return -1;
  547. }
  548. ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, width, height);
  549. in.substract_mean_normalize(mean_vals, norm_vals);
  550. ncnn::Extractor ex = net.create_extractor();
  551. ex.input(net.input_names[0].c_str(), in);
  552. for (size_t j = 0; j < net.conv_names.size(); j++)
  553. {
  554. std::string layer_name = net.conv_names[j];
  555. std::string blob_name = net.conv_bottom_blob_names[layer_name];
  556. ncnn::Mat out;
  557. ex.extract(blob_name.c_str(), out);
  558. for (size_t k = 0; k < quantize_datas.size(); k++)
  559. {
  560. if (quantize_datas[k].name == layer_name)
  561. {
  562. quantize_datas[k].update_histogram(out);
  563. break;
  564. }
  565. }
  566. }
  567. }
  568. // step4 kld
  569. printf(" ====> step 4 : using kld to find the best threshold value.\n");
  570. for (size_t i = 0; i < net.conv_names.size(); i++)
  571. {
  572. std::string layer_name = net.conv_names[i];
  573. std::string blob_name = net.conv_bottom_blob_names[layer_name];
  574. fprintf(stderr, "%-20s ", layer_name.c_str());
  575. for (size_t k = 0; k < quantize_datas.size(); k++)
  576. {
  577. if (quantize_datas[k].name == layer_name)
  578. {
  579. quantize_datas[k].get_data_blob_scale();
  580. fprintf(stderr, "bin : %-8d threshold : %-15f interval : %-10f scale : %-10f\n",
  581. quantize_datas[k].threshold_bin,
  582. quantize_datas[k].threshold,
  583. quantize_datas[k].histogram_interval,
  584. quantize_datas[k].scale);
  585. fprintf(fp, "%s %f\n", layer_name.c_str(), quantize_datas[k].scale);
  586. break;
  587. }
  588. }
  589. }
  590. fclose(fp);
  591. printf("====> Save the calibration table done.\n");
  592. return 0;
  593. }
  594. // usage
  595. void showUsage()
  596. {
  597. std::cout << "example: ./ncnn2table --param=squeezenet-fp32.param --bin=squeezenet-fp32.bin --images=images/ --output=squeezenet.table --mean=104.0,117.0,123.0 --norm=1.0,1.0,1.0 --size=224,224 --swapRB --thread=2" << std::endl;
  598. }
  599. static int find_all_value_in_string(const std::string& values_string, std::vector<float>& value)
  600. {
  601. std::vector<int> masks_pos;
  602. for (size_t i = 0; i < values_string.size(); i++)
  603. {
  604. if (',' == values_string[i])
  605. {
  606. masks_pos.push_back(static_cast<int>(i));
  607. }
  608. }
  609. // check
  610. if (masks_pos.empty())
  611. {
  612. fprintf(stderr, "ERROR: Cannot find any ',' in string, please check.\n");
  613. return -1;
  614. }
  615. if (2 != masks_pos.size())
  616. {
  617. fprintf(stderr, "ERROR: Char ',' in fist of string, please check.\n");
  618. return -1;
  619. }
  620. if (masks_pos.front() == 0)
  621. {
  622. fprintf(stderr, "ERROR: Char ',' in fist of string, please check.\n");
  623. return -1;
  624. }
  625. if (masks_pos.back() == 0)
  626. {
  627. fprintf(stderr, "ERROR: Char ',' in last of string, please check.\n");
  628. return -1;
  629. }
  630. for (size_t i = 0; i < masks_pos.size(); i++)
  631. {
  632. if (i > 0)
  633. {
  634. if (!(masks_pos[i] - masks_pos[i - 1] > 1))
  635. {
  636. fprintf(stderr, "ERROR: Neighbouring char ',' was found.\n");
  637. return -1;
  638. }
  639. }
  640. }
  641. const cv::String ch0_val_str = values_string.substr(0, masks_pos[0]);
  642. const cv::String ch1_val_str = values_string.substr(masks_pos[0] + 1, masks_pos[1] - masks_pos[0] - 1);
  643. const cv::String ch2_val_str = values_string.substr(masks_pos[1] + 1, values_string.size() - masks_pos[1] - 1);
  644. value.push_back(static_cast<float>(std::atof(std::string(ch0_val_str).c_str())));
  645. value.push_back(static_cast<float>(std::atof(std::string(ch1_val_str).c_str())));
  646. value.push_back(static_cast<float>(std::atof(std::string(ch2_val_str).c_str())));
  647. return 0;
  648. }
  649. int main(int argc, char** argv)
  650. {
  651. std::cout << "--- ncnn post training quantization tool --- " << __TIME__ << " " << __DATE__ << std::endl;
  652. const char* key_map = "{help h usage ? | | print this message }"
  653. "{param p | | path to ncnn.param file }"
  654. "{bin b | | path to ncnn.bin file }"
  655. "{images i | | path to calibration images folder }"
  656. "{output o | | path to output calibration table file }"
  657. "{mean m | | value of mean (mean value, default is 104.0,117.0,123.0) }"
  658. "{norm n | | value of normalize (scale value, default is 1.0,1.0,1.0) }"
  659. "{size s | | the size of input image(using the resize the original image,default is w=224,h=224) }"
  660. "{swapRB c | | flag which indicates that swap first and last channels in 3-channel image is necessary }"
  661. "{thread t | 4 | count of processing threads }";
  662. cv::CommandLineParser parser(argc, argv, key_map);
  663. if (parser.has("help"))
  664. {
  665. parser.printMessage();
  666. showUsage();
  667. return 0;
  668. }
  669. if (!parser.has("param") || !parser.has("bin") || !parser.has("images") || !parser.has("output") || !parser.has("mean") || !parser.has("norm"))
  670. {
  671. std::cout << "Inputs is does not include all needed param, pleas check..." << std::endl;
  672. parser.printMessage();
  673. showUsage();
  674. return 0;
  675. }
  676. const std::string image_folder_path = parser.get<cv::String>("images");
  677. const std::string ncnn_param_file_path = parser.get<cv::String>("param");
  678. const std::string ncnn_bin_file_path = parser.get<cv::String>("bin");
  679. const std::string saved_table_file_path = parser.get<cv::String>("output");
  680. // check the input param
  681. if (image_folder_path.empty() || ncnn_param_file_path.empty() || ncnn_bin_file_path.empty() || saved_table_file_path.empty())
  682. {
  683. fprintf(stderr, "One or more path may be empty, please check and try again.\n");
  684. return 0;
  685. }
  686. const int num_threads = parser.get<int>("thread");
  687. struct PreParam pre_param;
  688. pre_param.mean[0] = 104.f;
  689. pre_param.mean[1] = 117.f;
  690. pre_param.mean[2] = 103.f;
  691. pre_param.norm[0] = 1.f;
  692. pre_param.norm[1] = 1.f;
  693. pre_param.norm[2] = 1.f;
  694. pre_param.width = 224;
  695. pre_param.height = 224;
  696. pre_param.swapRB = false;
  697. if (parser.has("mean"))
  698. {
  699. const std::string mean_str = parser.get<std::string>("mean");
  700. std::vector<float> mean_values;
  701. const int ret = find_all_value_in_string(mean_str, mean_values);
  702. if (0 != ret && 3 != mean_values.size())
  703. {
  704. fprintf(stderr, "ERROR: Searching mean value from --mean was failed.\n");
  705. return -1;
  706. }
  707. pre_param.mean[0] = mean_values[0];
  708. pre_param.mean[1] = mean_values[1];
  709. pre_param.mean[2] = mean_values[2];
  710. }
  711. if (parser.has("norm"))
  712. {
  713. const std::string norm_str = parser.get<std::string>("norm");
  714. std::vector<float> norm_values;
  715. const int ret = find_all_value_in_string(norm_str, norm_values);
  716. if (0 != ret && 3 != norm_values.size())
  717. {
  718. fprintf(stderr, "ERROR: Searching mean value from --mean was failed, please check --mean param.\n");
  719. return -1;
  720. }
  721. pre_param.norm[0] = norm_values[0];
  722. pre_param.norm[1] = norm_values[1];
  723. pre_param.norm[2] = norm_values[2];
  724. }
  725. if (parser.has("size"))
  726. {
  727. cv::String size_str = parser.get<std::string>("size");
  728. size_t sep_pos = size_str.find_first_of(',');
  729. if (cv::String::npos != sep_pos && sep_pos < size_str.size())
  730. {
  731. cv::String width_value_str;
  732. cv::String height_value_str;
  733. width_value_str = size_str.substr(0, sep_pos);
  734. height_value_str = size_str.substr(sep_pos + 1, size_str.size() - sep_pos - 1);
  735. pre_param.width = static_cast<int>(std::atoi(std::string(width_value_str).c_str()));
  736. pre_param.height = static_cast<int>(std::atoi(std::string(height_value_str).c_str()));
  737. }
  738. else
  739. {
  740. fprintf(stderr, "ERROR: Searching size value from --size was failed, please check --size param.\n");
  741. return -1;
  742. }
  743. }
  744. if (parser.has("swapRB"))
  745. {
  746. pre_param.swapRB = true;
  747. }
  748. g_blob_pool_allocator.set_size_compare_ratio(0.0f);
  749. g_workspace_pool_allocator.set_size_compare_ratio(0.5f);
  750. // default option
  751. g_default_option.lightmode = true;
  752. g_default_option.num_threads = num_threads;
  753. g_default_option.blob_allocator = &g_blob_pool_allocator;
  754. g_default_option.workspace_allocator = &g_workspace_pool_allocator;
  755. g_default_option.use_winograd_convolution = true;
  756. g_default_option.use_sgemm_convolution = true;
  757. g_default_option.use_int8_inference = true;
  758. g_default_option.use_fp16_packed = true;
  759. g_default_option.use_fp16_storage = true;
  760. g_default_option.use_fp16_arithmetic = true;
  761. g_default_option.use_int8_storage = true;
  762. g_default_option.use_int8_arithmetic = true;
  763. ncnn::set_cpu_powersave(2);
  764. ncnn::set_omp_dynamic(0);
  765. ncnn::set_omp_num_threads(num_threads);
  766. std::vector<std::string> image_file_path_list;
  767. // parse the image file.
  768. parse_images_dir(image_folder_path, image_file_path_list);
  769. // get the calibration table file, and save it.
  770. const int ret = post_training_quantize(image_file_path_list, ncnn_param_file_path, ncnn_bin_file_path, saved_table_file_path, pre_param);
  771. if (!ret)
  772. {
  773. fprintf(stderr, "\nNCNN Int8 Calibration table create success, best wish for your INT8 inference has a low accuracy loss...\\(^0^)/...233...\n");
  774. }
  775. return 0;
  776. }