You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ncnn2table.cpp 51 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // author:BUG1989 (https://github.com/BUG1989/) Long-term support.
  4. // author:JansonZhu (https://github.com/JansonZhu) Implemented the function of entropy calibration.
  5. //
  6. // Copyright (C) 2019 BUG1989. All rights reserved.
  7. // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
  8. //
  9. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  10. // in compliance with the License. You may obtain a copy of the License at
  11. //
  12. // https://opensource.org/licenses/BSD-3-Clause
  13. //
  14. // Unless required by applicable law or agreed to in writing, software distributed
  15. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  16. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  17. // specific language governing permissions and limitations under the License.
  18. #ifdef _MSC_VER
  19. #define _CRT_SECURE_NO_DEPRECATE
  20. #endif
  21. #include <float.h>
  22. #include <limits.h>
  23. #include <math.h>
  24. #include <stdio.h>
  25. #include <stdint.h>
  26. #include <stdlib.h>
  27. #include <string.h>
  28. #if defined(USE_NCNN_SIMPLEOCV)
  29. #include "simpleocv.h"
  30. #elif defined(USE_LOCAL_IMREADWRITE)
  31. #include "imreadwrite.h"
  32. #else
  33. #include <opencv2/core/core.hpp>
  34. #include <opencv2/highgui/highgui.hpp>
  35. #endif
  36. #include <string>
  37. #include <vector>
  38. // ncnn public header
  39. #include "benchmark.h"
  40. #include "cpu.h"
  41. #include "net.h"
  42. // ncnn private header
  43. #include "layer/convolution.h"
  44. #include "layer/convolutiondepthwise.h"
  45. #include "layer/innerproduct.h"
  46. class QuantBlobStat
  47. {
  48. public:
  49. QuantBlobStat()
  50. {
  51. threshold = 0.f;
  52. absmax = 0.f;
  53. total = 0;
  54. }
  55. public:
  56. float threshold;
  57. float absmax;
  58. // ACIQ
  59. int total;
  60. // KL
  61. std::vector<uint64_t> histogram;
  62. std::vector<float> histogram_normed;
  63. };
  64. class QuantNet : public ncnn::Net
  65. {
  66. public:
  67. QuantNet();
  68. std::vector<ncnn::Blob>& blobs;
  69. std::vector<ncnn::Layer*>& layers;
  70. public:
  71. std::vector<std::vector<std::string> > listspaths;
  72. std::vector<std::vector<float> > means;
  73. std::vector<std::vector<float> > norms;
  74. std::vector<std::vector<int> > shapes;
  75. std::vector<int> type_to_pixels;
  76. int quantize_num_threads;
  77. public:
  78. int init();
  79. void print_quant_info() const;
  80. int save_table(const char* tablepath);
  81. int quantize_KL();
  82. int quantize_ACIQ();
  83. int quantize_EQ();
  84. public:
  85. std::vector<int> input_blobs;
  86. std::vector<int> conv_layers;
  87. std::vector<int> conv_bottom_blobs;
  88. std::vector<int> conv_top_blobs;
  89. // result
  90. std::vector<QuantBlobStat> quant_blob_stats;
  91. std::vector<ncnn::Mat> weight_scales;
  92. std::vector<ncnn::Mat> bottom_blob_scales;
  93. };
  94. QuantNet::QuantNet()
  95. : blobs(mutable_blobs()), layers(mutable_layers())
  96. {
  97. quantize_num_threads = ncnn::get_cpu_count();
  98. }
  99. int QuantNet::init()
  100. {
  101. // find all input layers
  102. for (int i = 0; i < (int)layers.size(); i++)
  103. {
  104. const ncnn::Layer* layer = layers[i];
  105. if (layer->type == "Input")
  106. {
  107. input_blobs.push_back(layer->tops[0]);
  108. }
  109. }
  110. // find all conv layers
  111. for (int i = 0; i < (int)layers.size(); i++)
  112. {
  113. const ncnn::Layer* layer = layers[i];
  114. if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
  115. {
  116. conv_layers.push_back(i);
  117. conv_bottom_blobs.push_back(layer->bottoms[0]);
  118. conv_top_blobs.push_back(layer->tops[0]);
  119. }
  120. }
  121. const int conv_layer_count = (int)conv_layers.size();
  122. const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
  123. quant_blob_stats.resize(conv_bottom_blob_count);
  124. weight_scales.resize(conv_layer_count);
  125. bottom_blob_scales.resize(conv_bottom_blob_count);
  126. return 0;
  127. }
  128. int QuantNet::save_table(const char* tablepath)
  129. {
  130. FILE* fp = fopen(tablepath, "wb");
  131. if (!fp)
  132. {
  133. fprintf(stderr, "fopen %s failed\n", tablepath);
  134. return -1;
  135. }
  136. const int conv_layer_count = (int)conv_layers.size();
  137. const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
  138. for (int i = 0; i < conv_layer_count; i++)
  139. {
  140. const ncnn::Mat& weight_scale = weight_scales[i];
  141. fprintf(fp, "%s_param_0 ", layers[conv_layers[i]]->name.c_str());
  142. for (int j = 0; j < weight_scale.w; j++)
  143. {
  144. fprintf(fp, "%f ", weight_scale[j]);
  145. }
  146. fprintf(fp, "\n");
  147. }
  148. for (int i = 0; i < conv_bottom_blob_count; i++)
  149. {
  150. const ncnn::Mat& bottom_blob_scale = bottom_blob_scales[i];
  151. fprintf(fp, "%s ", layers[conv_layers[i]]->name.c_str());
  152. for (int j = 0; j < bottom_blob_scale.w; j++)
  153. {
  154. fprintf(fp, "%f ", bottom_blob_scale[j]);
  155. }
  156. fprintf(fp, "\n");
  157. }
  158. fclose(fp);
  159. fprintf(stderr, "ncnn int8 calibration table create success, best wish for your int8 inference has a low accuracy loss...\\(^0^)/...233...\n");
  160. return 0;
  161. }
  162. void QuantNet::print_quant_info() const
  163. {
  164. for (int i = 0; i < (int)conv_bottom_blobs.size(); i++)
  165. {
  166. const QuantBlobStat& stat = quant_blob_stats[i];
  167. float scale = 127 / stat.threshold;
  168. fprintf(stderr, "%-40s : max = %-15f threshold = %-15f scale = %-15f\n", layers[conv_layers[i]]->name.c_str(), stat.absmax, stat.threshold, scale);
  169. }
  170. }
  171. static float compute_kl_divergence(const std::vector<float>& a, const std::vector<float>& b)
  172. {
  173. const size_t length = a.size();
  174. float result = 0;
  175. for (size_t i = 0; i < length; i++)
  176. {
  177. result += a[i] * log(a[i] / b[i]);
  178. }
  179. return result;
  180. }
  181. int QuantNet::quantize_KL()
  182. {
  183. const int input_blob_count = (int)input_blobs.size();
  184. const int conv_layer_count = (int)conv_layers.size();
  185. const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
  186. const int image_count = (int)listspaths[0].size();
  187. const int num_histogram_bins = 2048;
  188. // initialize conv weight scales
  189. #pragma omp parallel for num_threads(quantize_num_threads)
  190. for (int i = 0; i < conv_layer_count; i++)
  191. {
  192. const ncnn::Layer* layer = layers[conv_layers[i]];
  193. if (layer->type == "Convolution")
  194. {
  195. const ncnn::Convolution* convolution = (const ncnn::Convolution*)layer;
  196. const int num_output = convolution->num_output;
  197. const int kernel_w = convolution->kernel_w;
  198. const int kernel_h = convolution->kernel_h;
  199. const int dilation_w = convolution->dilation_w;
  200. const int dilation_h = convolution->dilation_h;
  201. const int stride_w = convolution->stride_w;
  202. const int stride_h = convolution->stride_h;
  203. const int weight_data_size_output = convolution->weight_data_size / num_output;
  204. // int8 winograd F43 needs weight data to use 6bit quantization
  205. // TODO proper condition for winograd 3x3 int8
  206. bool quant_6bit = false;
  207. if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
  208. quant_6bit = true;
  209. weight_scales[i].create(num_output);
  210. for (int n = 0; n < num_output; n++)
  211. {
  212. const ncnn::Mat weight_data_n = convolution->weight_data.range(weight_data_size_output * n, weight_data_size_output);
  213. float absmax = 0.f;
  214. for (int k = 0; k < weight_data_size_output; k++)
  215. {
  216. absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
  217. }
  218. if (quant_6bit)
  219. {
  220. weight_scales[i][n] = 31 / absmax;
  221. }
  222. else
  223. {
  224. weight_scales[i][n] = 127 / absmax;
  225. }
  226. }
  227. }
  228. if (layer->type == "ConvolutionDepthWise")
  229. {
  230. const ncnn::ConvolutionDepthWise* convolutiondepthwise = (const ncnn::ConvolutionDepthWise*)layer;
  231. const int group = convolutiondepthwise->group;
  232. const int weight_data_size_output = convolutiondepthwise->weight_data_size / group;
  233. std::vector<float> scales;
  234. weight_scales[i].create(group);
  235. for (int n = 0; n < group; n++)
  236. {
  237. const ncnn::Mat weight_data_n = convolutiondepthwise->weight_data.range(weight_data_size_output * n, weight_data_size_output);
  238. float absmax = 0.f;
  239. for (int k = 0; k < weight_data_size_output; k++)
  240. {
  241. absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
  242. }
  243. weight_scales[i][n] = 127 / absmax;
  244. }
  245. }
  246. if (layer->type == "InnerProduct")
  247. {
  248. const ncnn::InnerProduct* innerproduct = (const ncnn::InnerProduct*)layer;
  249. const int num_output = innerproduct->num_output;
  250. const int weight_data_size_output = innerproduct->weight_data_size / num_output;
  251. weight_scales[i].create(num_output);
  252. for (int n = 0; n < num_output; n++)
  253. {
  254. const ncnn::Mat weight_data_n = innerproduct->weight_data.range(weight_data_size_output * n, weight_data_size_output);
  255. float absmax = 0.f;
  256. for (int k = 0; k < weight_data_size_output; k++)
  257. {
  258. absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
  259. }
  260. weight_scales[i][n] = 127 / absmax;
  261. }
  262. }
  263. }
  264. // count the absmax
  265. #pragma omp parallel for num_threads(quantize_num_threads)
  266. for (int i = 0; i < image_count; i++)
  267. {
  268. ncnn::Extractor ex = create_extractor();
  269. for (int j = 0; j < input_blob_count; j++)
  270. {
  271. const std::string& imagepath = listspaths[j][i];
  272. const std::vector<int>& shape = shapes[j];
  273. const int type_to_pixel = type_to_pixels[j];
  274. const std::vector<float>& mean_vals = means[j];
  275. const std::vector<float>& norm_vals = norms[j];
  276. int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
  277. if (type_to_pixel != pixel_convert_type)
  278. {
  279. pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
  280. }
  281. const int target_w = shape[0];
  282. const int target_h = shape[1];
  283. cv::Mat bgr = cv::imread(imagepath, 1);
  284. ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h);
  285. in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
  286. ex.input(input_blobs[j], in);
  287. }
  288. for (int j = 0; j < conv_bottom_blob_count; j++)
  289. {
  290. ncnn::Mat out;
  291. ex.extract(conv_bottom_blobs[j], out);
  292. // count absmax
  293. {
  294. float absmax = 0.f;
  295. const int outc = out.c;
  296. const int outsize = out.w * out.h;
  297. for (int p = 0; p < outc; p++)
  298. {
  299. const float* ptr = out.channel(p);
  300. for (int k = 0; k < outsize; k++)
  301. {
  302. absmax = std::max(absmax, (float)fabs(ptr[k]));
  303. }
  304. }
  305. #pragma omp critical
  306. {
  307. QuantBlobStat& stat = quant_blob_stats[j];
  308. stat.absmax = std::max(stat.absmax, absmax);
  309. }
  310. }
  311. }
  312. }
  313. // initialize histogram
  314. #pragma omp parallel for num_threads(quantize_num_threads)
  315. for (int i = 0; i < conv_bottom_blob_count; i++)
  316. {
  317. QuantBlobStat& stat = quant_blob_stats[i];
  318. stat.histogram.resize(num_histogram_bins, 0);
  319. stat.histogram_normed.resize(num_histogram_bins, 0);
  320. }
  321. // build histogram
  322. #pragma omp parallel for num_threads(quantize_num_threads)
  323. for (int i = 0; i < image_count; i++)
  324. {
  325. ncnn::Extractor ex = create_extractor();
  326. for (int j = 0; j < input_blob_count; j++)
  327. {
  328. const std::string& imagepath = listspaths[j][i];
  329. const std::vector<int>& shape = shapes[j];
  330. const int type_to_pixel = type_to_pixels[j];
  331. const std::vector<float>& mean_vals = means[j];
  332. const std::vector<float>& norm_vals = norms[j];
  333. int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
  334. if (type_to_pixel != pixel_convert_type)
  335. {
  336. pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
  337. }
  338. const int target_w = shape[0];
  339. const int target_h = shape[1];
  340. cv::Mat bgr = cv::imread(imagepath, 1);
  341. ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h);
  342. in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
  343. ex.input(input_blobs[j], in);
  344. }
  345. for (int j = 0; j < conv_bottom_blob_count; j++)
  346. {
  347. ncnn::Mat out;
  348. ex.extract(conv_bottom_blobs[j], out);
  349. // count histogram bin
  350. {
  351. const float absmax = quant_blob_stats[j].absmax;
  352. std::vector<uint64_t> histogram(num_histogram_bins, 0);
  353. const int outc = out.c;
  354. const int outsize = out.w * out.h;
  355. for (int p = 0; p < outc; p++)
  356. {
  357. const float* ptr = out.channel(p);
  358. for (int k = 0; k < outsize; k++)
  359. {
  360. if (ptr[k] == 0.f)
  361. continue;
  362. const int index = std::min((int)(fabs(ptr[k]) / absmax * num_histogram_bins), (num_histogram_bins - 1));
  363. histogram[index] += 1;
  364. }
  365. }
  366. #pragma omp critical
  367. {
  368. QuantBlobStat& stat = quant_blob_stats[j];
  369. for (int k = 0; k < num_histogram_bins; k++)
  370. {
  371. stat.histogram[k] += histogram[k];
  372. }
  373. }
  374. }
  375. }
  376. }
  377. // using kld to find the best threshold value
  378. #pragma omp parallel for num_threads(quantize_num_threads)
  379. for (int i = 0; i < conv_bottom_blob_count; i++)
  380. {
  381. QuantBlobStat& stat = quant_blob_stats[i];
  382. // normalize histogram bin
  383. {
  384. uint64_t sum = 0;
  385. for (int j = 0; j < num_histogram_bins; j++)
  386. {
  387. sum += stat.histogram[j];
  388. }
  389. for (int j = 0; j < num_histogram_bins; j++)
  390. {
  391. stat.histogram_normed[j] = (float)(stat.histogram[j] / (double)sum);
  392. }
  393. }
  394. const int target_bin = 128;
  395. int target_threshold = target_bin;
  396. float min_kl_divergence = FLT_MAX;
  397. for (int threshold = target_bin; threshold < num_histogram_bins; threshold++)
  398. {
  399. const float kl_eps = 0.0001f;
  400. std::vector<float> clip_distribution(threshold, kl_eps);
  401. {
  402. for (int j = 0; j < threshold; j++)
  403. {
  404. clip_distribution[j] += stat.histogram_normed[j];
  405. }
  406. for (int j = threshold; j < num_histogram_bins; j++)
  407. {
  408. clip_distribution[threshold - 1] += stat.histogram_normed[j];
  409. }
  410. }
  411. const float num_per_bin = (float)threshold / target_bin;
  412. std::vector<float> quantize_distribution(target_bin, 0.f);
  413. {
  414. {
  415. const float end = num_per_bin;
  416. const int right_lower = (int)floor(end);
  417. const float right_scale = end - right_lower;
  418. if (right_scale > 0)
  419. {
  420. quantize_distribution[0] += right_scale * stat.histogram_normed[right_lower];
  421. }
  422. for (int k = 0; k < right_lower; k++)
  423. {
  424. quantize_distribution[0] += stat.histogram_normed[k];
  425. }
  426. quantize_distribution[0] /= right_lower + right_scale;
  427. }
  428. for (int j = 1; j < target_bin - 1; j++)
  429. {
  430. const float start = j * num_per_bin;
  431. const float end = (j + 1) * num_per_bin;
  432. const int left_upper = (int)ceil(start);
  433. const float left_scale = left_upper - start;
  434. const int right_lower = (int)floor(end);
  435. const float right_scale = end - right_lower;
  436. if (left_scale > 0)
  437. {
  438. quantize_distribution[j] += left_scale * stat.histogram_normed[left_upper - 1];
  439. }
  440. if (right_scale > 0)
  441. {
  442. quantize_distribution[j] += right_scale * stat.histogram_normed[right_lower];
  443. }
  444. for (int k = left_upper; k < right_lower; k++)
  445. {
  446. quantize_distribution[j] += stat.histogram_normed[k];
  447. }
  448. quantize_distribution[j] /= right_lower - left_upper + left_scale + right_scale;
  449. }
  450. {
  451. const float start = threshold - num_per_bin;
  452. const int left_upper = (int)ceil(start);
  453. const float left_scale = left_upper - start;
  454. if (left_scale > 0)
  455. {
  456. quantize_distribution[target_bin - 1] += left_scale * stat.histogram_normed[left_upper - 1];
  457. }
  458. for (int k = left_upper; k < threshold; k++)
  459. {
  460. quantize_distribution[target_bin - 1] += stat.histogram_normed[k];
  461. }
  462. quantize_distribution[target_bin - 1] /= threshold - left_upper + left_scale;
  463. }
  464. }
  465. std::vector<float> expand_distribution(threshold, kl_eps);
  466. {
  467. {
  468. const float end = num_per_bin;
  469. const int right_lower = (int)floor(end);
  470. const float right_scale = end - right_lower;
  471. if (right_scale > 0)
  472. {
  473. expand_distribution[right_lower] += right_scale * quantize_distribution[0];
  474. }
  475. for (int k = 0; k < right_lower; k++)
  476. {
  477. expand_distribution[k] += quantize_distribution[0];
  478. }
  479. }
  480. for (int j = 1; j < target_bin - 1; j++)
  481. {
  482. const float start = j * num_per_bin;
  483. const float end = (j + 1) * num_per_bin;
  484. const int left_upper = (int)ceil(start);
  485. const float left_scale = left_upper - start;
  486. const int right_lower = (int)floor(end);
  487. const float right_scale = end - right_lower;
  488. if (left_scale > 0)
  489. {
  490. expand_distribution[left_upper - 1] += left_scale * quantize_distribution[j];
  491. }
  492. if (right_scale > 0)
  493. {
  494. expand_distribution[right_lower] += right_scale * quantize_distribution[j];
  495. }
  496. for (int k = left_upper; k < right_lower; k++)
  497. {
  498. expand_distribution[k] += quantize_distribution[j];
  499. }
  500. }
  501. {
  502. const float start = threshold - num_per_bin;
  503. const int left_upper = (int)ceil(start);
  504. const float left_scale = left_upper - start;
  505. if (left_scale > 0)
  506. {
  507. expand_distribution[left_upper - 1] += left_scale * quantize_distribution[target_bin - 1];
  508. }
  509. for (int k = left_upper; k < threshold; k++)
  510. {
  511. expand_distribution[k] += quantize_distribution[target_bin - 1];
  512. }
  513. }
  514. }
  515. // kl
  516. const float kl_divergence = compute_kl_divergence(clip_distribution, expand_distribution);
  517. // the best num of bin
  518. if (kl_divergence < min_kl_divergence)
  519. {
  520. min_kl_divergence = kl_divergence;
  521. target_threshold = threshold;
  522. }
  523. }
  524. stat.threshold = (target_threshold + 0.5f) * stat.absmax / num_histogram_bins;
  525. float scale = 127 / stat.threshold;
  526. bottom_blob_scales[i].create(1);
  527. bottom_blob_scales[i][0] = scale;
  528. }
  529. return 0;
  530. }
  531. static float compute_aciq_gaussian_clip(float absmax, int N, int num_bits = 8)
  532. {
  533. const float alpha_gaussian[8] = {0, 1.71063519, 2.15159277, 2.55913646, 2.93620062, 3.28691474, 3.6151146, 3.92403714};
  534. const double gaussian_const = (0.5 * 0.35) * (1 + sqrt(3.14159265358979323846 * log(4)));
  535. double std = (absmax * 2 * gaussian_const) / sqrt(2 * log(N));
  536. return (float)(alpha_gaussian[num_bits - 1] * std);
  537. }
  538. int QuantNet::quantize_ACIQ()
  539. {
  540. const int input_blob_count = (int)input_blobs.size();
  541. const int conv_layer_count = (int)conv_layers.size();
  542. const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
  543. const int image_count = (int)listspaths[0].size();
  544. // initialize conv weight scales
  545. #pragma omp parallel for num_threads(quantize_num_threads)
  546. for (int i = 0; i < conv_layer_count; i++)
  547. {
  548. const ncnn::Layer* layer = layers[conv_layers[i]];
  549. if (layer->type == "Convolution")
  550. {
  551. const ncnn::Convolution* convolution = (const ncnn::Convolution*)layer;
  552. const int num_output = convolution->num_output;
  553. const int kernel_w = convolution->kernel_w;
  554. const int kernel_h = convolution->kernel_h;
  555. const int dilation_w = convolution->dilation_w;
  556. const int dilation_h = convolution->dilation_h;
  557. const int stride_w = convolution->stride_w;
  558. const int stride_h = convolution->stride_h;
  559. const int weight_data_size_output = convolution->weight_data_size / num_output;
  560. // int8 winograd F43 needs weight data to use 6bit quantization
  561. // TODO proper condition for winograd 3x3 int8
  562. bool quant_6bit = false;
  563. if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
  564. quant_6bit = true;
  565. weight_scales[i].create(num_output);
  566. for (int n = 0; n < num_output; n++)
  567. {
  568. const ncnn::Mat weight_data_n = convolution->weight_data.range(weight_data_size_output * n, weight_data_size_output);
  569. float absmax = 0.f;
  570. for (int k = 0; k < weight_data_size_output; k++)
  571. {
  572. absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
  573. }
  574. if (quant_6bit)
  575. {
  576. const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output, 6);
  577. weight_scales[i][n] = 31 / threshold;
  578. }
  579. else
  580. {
  581. const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output);
  582. weight_scales[i][n] = 127 / threshold;
  583. }
  584. }
  585. }
  586. if (layer->type == "ConvolutionDepthWise")
  587. {
  588. const ncnn::ConvolutionDepthWise* convolutiondepthwise = (const ncnn::ConvolutionDepthWise*)layer;
  589. const int group = convolutiondepthwise->group;
  590. const int weight_data_size_output = convolutiondepthwise->weight_data_size / group;
  591. std::vector<float> scales;
  592. weight_scales[i].create(group);
  593. for (int n = 0; n < group; n++)
  594. {
  595. const ncnn::Mat weight_data_n = convolutiondepthwise->weight_data.range(weight_data_size_output * n, weight_data_size_output);
  596. float absmax = 0.f;
  597. for (int k = 0; k < weight_data_size_output; k++)
  598. {
  599. absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
  600. }
  601. const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output);
  602. weight_scales[i][n] = 127 / threshold;
  603. }
  604. }
  605. if (layer->type == "InnerProduct")
  606. {
  607. const ncnn::InnerProduct* innerproduct = (const ncnn::InnerProduct*)layer;
  608. const int num_output = innerproduct->num_output;
  609. const int weight_data_size_output = innerproduct->weight_data_size / num_output;
  610. weight_scales[i].create(num_output);
  611. for (int n = 0; n < num_output; n++)
  612. {
  613. const ncnn::Mat weight_data_n = innerproduct->weight_data.range(weight_data_size_output * n, weight_data_size_output);
  614. float absmax = 0.f;
  615. for (int k = 0; k < weight_data_size_output; k++)
  616. {
  617. absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
  618. }
  619. const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output);
  620. weight_scales[i][n] = 127 / threshold;
  621. }
  622. }
  623. }
  624. // count the absmax abssum
  625. #pragma omp parallel for num_threads(quantize_num_threads)
  626. for (int i = 0; i < image_count; i++)
  627. {
  628. ncnn::Extractor ex = create_extractor();
  629. for (int j = 0; j < input_blob_count; j++)
  630. {
  631. const std::string& imagepath = listspaths[j][i];
  632. const std::vector<int>& shape = shapes[j];
  633. const int type_to_pixel = type_to_pixels[j];
  634. const std::vector<float>& mean_vals = means[j];
  635. const std::vector<float>& norm_vals = norms[j];
  636. int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
  637. if (type_to_pixel != pixel_convert_type)
  638. {
  639. pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
  640. }
  641. const int target_w = shape[0];
  642. const int target_h = shape[1];
  643. cv::Mat bgr = cv::imread(imagepath, 1);
  644. ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h);
  645. in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
  646. ex.input(input_blobs[j], in);
  647. }
  648. for (int j = 0; j < conv_bottom_blob_count; j++)
  649. {
  650. ncnn::Mat out;
  651. ex.extract(conv_bottom_blobs[j], out);
  652. // count absmax
  653. {
  654. float absmax = 0.f;
  655. const int outc = out.c;
  656. const int outsize = out.w * out.h;
  657. for (int p = 0; p < outc; p++)
  658. {
  659. const float* ptr = out.channel(p);
  660. for (int k = 0; k < outsize; k++)
  661. {
  662. absmax = std::max(absmax, (float)fabs(ptr[k]));
  663. }
  664. }
  665. #pragma omp critical
  666. {
  667. QuantBlobStat& stat = quant_blob_stats[j];
  668. stat.absmax = std::max(stat.absmax, absmax);
  669. stat.total = outc * outsize;
  670. }
  671. }
  672. }
  673. }
  674. // alpha gaussian
  675. #pragma omp parallel for num_threads(quantize_num_threads)
  676. for (int i = 0; i < conv_bottom_blob_count; i++)
  677. {
  678. QuantBlobStat& stat = quant_blob_stats[i];
  679. stat.threshold = compute_aciq_gaussian_clip(stat.absmax, stat.total);
  680. float scale = 127 / stat.threshold;
  681. bottom_blob_scales[i].create(1);
  682. bottom_blob_scales[i][0] = scale;
  683. }
  684. return 0;
  685. }
  686. static float cosine_similarity(const ncnn::Mat& a, const ncnn::Mat& b)
  687. {
  688. const int chanenls = a.c;
  689. const int size = a.w * a.h;
  690. float sa = 0;
  691. float sb = 0;
  692. float sum = 0;
  693. for (int p = 0; p < chanenls; p++)
  694. {
  695. const float* pa = a.channel(p);
  696. const float* pb = b.channel(p);
  697. for (int i = 0; i < size; i++)
  698. {
  699. sa += pa[i] * pa[i];
  700. sb += pb[i] * pb[i];
  701. sum += pa[i] * pb[i];
  702. }
  703. }
  704. float sim = (float)sum / sqrt(sa) / sqrt(sb);
  705. return sim;
  706. }
  707. static int get_layer_param(const ncnn::Layer* layer, ncnn::ParamDict& pd)
  708. {
  709. if (layer->type == "Convolution")
  710. {
  711. ncnn::Convolution* convolution = (ncnn::Convolution*)layer;
  712. pd.set(0, convolution->num_output);
  713. pd.set(1, convolution->kernel_w);
  714. pd.set(11, convolution->kernel_h);
  715. pd.set(2, convolution->dilation_w);
  716. pd.set(12, convolution->dilation_h);
  717. pd.set(3, convolution->stride_w);
  718. pd.set(13, convolution->stride_h);
  719. pd.set(4, convolution->pad_left);
  720. pd.set(15, convolution->pad_right);
  721. pd.set(14, convolution->pad_top);
  722. pd.set(16, convolution->pad_bottom);
  723. pd.set(18, convolution->pad_value);
  724. pd.set(5, convolution->bias_term);
  725. pd.set(6, convolution->weight_data_size);
  726. pd.set(8, convolution->int8_scale_term);
  727. pd.set(9, convolution->activation_type);
  728. pd.set(10, convolution->activation_params);
  729. }
  730. else if (layer->type == "ConvolutionDepthWise")
  731. {
  732. ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layer;
  733. pd.set(0, convolutiondepthwise->num_output);
  734. pd.set(1, convolutiondepthwise->kernel_w);
  735. pd.set(11, convolutiondepthwise->kernel_h);
  736. pd.set(2, convolutiondepthwise->dilation_w);
  737. pd.set(12, convolutiondepthwise->dilation_h);
  738. pd.set(3, convolutiondepthwise->stride_w);
  739. pd.set(13, convolutiondepthwise->stride_h);
  740. pd.set(4, convolutiondepthwise->pad_left);
  741. pd.set(15, convolutiondepthwise->pad_right);
  742. pd.set(14, convolutiondepthwise->pad_top);
  743. pd.set(16, convolutiondepthwise->pad_bottom);
  744. pd.set(18, convolutiondepthwise->pad_value);
  745. pd.set(5, convolutiondepthwise->bias_term);
  746. pd.set(6, convolutiondepthwise->weight_data_size);
  747. pd.set(7, convolutiondepthwise->group);
  748. pd.set(8, convolutiondepthwise->int8_scale_term);
  749. pd.set(9, convolutiondepthwise->activation_type);
  750. pd.set(10, convolutiondepthwise->activation_params);
  751. }
  752. else if (layer->type == "InnerProduct")
  753. {
  754. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layer;
  755. pd.set(0, innerproduct->num_output);
  756. pd.set(1, innerproduct->bias_term);
  757. pd.set(2, innerproduct->weight_data_size);
  758. pd.set(8, innerproduct->int8_scale_term);
  759. pd.set(9, innerproduct->activation_type);
  760. pd.set(10, innerproduct->activation_params);
  761. }
  762. else
  763. {
  764. fprintf(stderr, "unexpected layer type %s in get_layer_param\n", layer->type.c_str());
  765. return -1;
  766. }
  767. return 0;
  768. }
  769. static int get_layer_weights(const ncnn::Layer* layer, std::vector<ncnn::Mat>& weights)
  770. {
  771. if (layer->type == "Convolution")
  772. {
  773. ncnn::Convolution* convolution = (ncnn::Convolution*)layer;
  774. weights.push_back(convolution->weight_data);
  775. if (convolution->bias_term)
  776. weights.push_back(convolution->bias_data);
  777. }
  778. else if (layer->type == "ConvolutionDepthWise")
  779. {
  780. ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layer;
  781. weights.push_back(convolutiondepthwise->weight_data);
  782. if (convolutiondepthwise->bias_term)
  783. weights.push_back(convolutiondepthwise->bias_data);
  784. }
  785. else if (layer->type == "InnerProduct")
  786. {
  787. ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layer;
  788. weights.push_back(innerproduct->weight_data);
  789. if (innerproduct->bias_term)
  790. weights.push_back(innerproduct->bias_data);
  791. }
  792. else
  793. {
  794. fprintf(stderr, "unexpected layer type %s in get_layer_weights\n", layer->type.c_str());
  795. return -1;
  796. }
  797. return 0;
  798. }
  799. int QuantNet::quantize_EQ()
  800. {
  801. // find the initial scale via KL
  802. quantize_KL();
  803. print_quant_info();
  804. const int input_blob_count = (int)input_blobs.size();
  805. const int conv_layer_count = (int)conv_layers.size();
  806. const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
  807. // max 50 images for EQ
  808. const int image_count = std::min((int)listspaths[0].size(), 50);
  809. const float scale_range_lower = 0.5f;
  810. const float scale_range_upper = 2.0f;
  811. const int search_steps = 100;
  812. for (int i = 0; i < conv_layer_count; i++)
  813. {
  814. ncnn::Mat& weight_scale = weight_scales[i];
  815. ncnn::Mat& bottom_blob_scale = bottom_blob_scales[i];
  816. const ncnn::Layer* layer = layers[conv_layers[i]];
  817. // search weight scale
  818. for (int j = 0; j < weight_scale.w; j++)
  819. {
  820. const float scale = weight_scale[j];
  821. const float scale_lower = scale * scale_range_lower;
  822. const float scale_upper = scale * scale_range_upper;
  823. const float scale_step = (scale_upper - scale_lower) / search_steps;
  824. std::vector<double> avgsims(search_steps, 0.0);
  825. #pragma omp parallel for num_threads(quantize_num_threads)
  826. for (int ii = 0; ii < image_count; ii++)
  827. {
  828. ncnn::Extractor ex = create_extractor();
  829. for (int jj = 0; jj < input_blob_count; jj++)
  830. {
  831. const std::string& imagepath = listspaths[jj][ii];
  832. const std::vector<int>& shape = shapes[jj];
  833. const int type_to_pixel = type_to_pixels[jj];
  834. const std::vector<float>& mean_vals = means[jj];
  835. const std::vector<float>& norm_vals = norms[jj];
  836. int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
  837. if (type_to_pixel != pixel_convert_type)
  838. {
  839. pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
  840. }
  841. const int target_w = shape[0];
  842. const int target_h = shape[1];
  843. cv::Mat bgr = cv::imread(imagepath, 1);
  844. ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h);
  845. in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
  846. ex.input(input_blobs[jj], in);
  847. }
  848. ncnn::Mat in;
  849. ex.extract(conv_bottom_blobs[i], in);
  850. ncnn::Mat out;
  851. ex.extract(conv_top_blobs[i], out);
  852. ncnn::Layer* layer_int8 = ncnn::create_layer(layer->typeindex);
  853. ncnn::ParamDict pd;
  854. get_layer_param(layer, pd);
  855. pd.set(8, 1); //int8_scale_term
  856. layer_int8->load_param(pd);
  857. std::vector<float> sims(search_steps);
  858. for (int k = 0; k < search_steps; k++)
  859. {
  860. ncnn::Mat new_weight_scale = weight_scale.clone();
  861. new_weight_scale[j] = scale_lower + k * scale_step;
  862. std::vector<ncnn::Mat> weights;
  863. get_layer_weights(layer, weights);
  864. weights.push_back(new_weight_scale);
  865. weights.push_back(bottom_blob_scale);
  866. layer_int8->load_model(ncnn::ModelBinFromMatArray(weights.data()));
  867. ncnn::Option opt_int8;
  868. opt_int8.use_packing_layout = false;
  869. layer_int8->create_pipeline(opt_int8);
  870. ncnn::Mat out_int8;
  871. layer_int8->forward(in, out_int8, opt_int8);
  872. layer_int8->destroy_pipeline(opt_int8);
  873. sims[k] = cosine_similarity(out, out_int8);
  874. }
  875. delete layer_int8;
  876. #pragma omp critical
  877. {
  878. for (int k = 0; k < search_steps; k++)
  879. {
  880. avgsims[k] += sims[k];
  881. }
  882. }
  883. }
  884. double max_avgsim = 0.0;
  885. float new_scale = scale;
  886. // find the scale with min cosine distance
  887. for (int k = 0; k < search_steps; k++)
  888. {
  889. if (max_avgsim < avgsims[k])
  890. {
  891. max_avgsim = avgsims[k];
  892. new_scale = scale_lower + k * scale_step;
  893. }
  894. }
  895. fprintf(stderr, "%s w %d = %f -> %f\n", layer->name.c_str(), j, scale, new_scale);
  896. weight_scale[j] = new_scale;
  897. }
  898. // search bottom blob scale
  899. for (int j = 0; j < bottom_blob_scale.w; j++)
  900. {
  901. const float scale = bottom_blob_scale[j];
  902. const float scale_lower = scale * scale_range_lower;
  903. const float scale_upper = scale * scale_range_upper;
  904. const float scale_step = (scale_upper - scale_lower) / search_steps;
  905. std::vector<double> avgsims(search_steps, 0.0);
  906. #pragma omp parallel for num_threads(quantize_num_threads)
  907. for (int ii = 0; ii < image_count; ii++)
  908. {
  909. ncnn::Extractor ex = create_extractor();
  910. for (int jj = 0; jj < input_blob_count; jj++)
  911. {
  912. const std::string& imagepath = listspaths[jj][ii];
  913. const std::vector<int>& shape = shapes[jj];
  914. const int type_to_pixel = type_to_pixels[jj];
  915. const std::vector<float>& mean_vals = means[jj];
  916. const std::vector<float>& norm_vals = norms[jj];
  917. int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
  918. if (type_to_pixel != pixel_convert_type)
  919. {
  920. pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
  921. }
  922. const int target_w = shape[0];
  923. const int target_h = shape[1];
  924. cv::Mat bgr = cv::imread(imagepath, 1);
  925. ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h);
  926. in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
  927. ex.input(input_blobs[jj], in);
  928. }
  929. ncnn::Mat in;
  930. ex.extract(conv_bottom_blobs[i], in);
  931. ncnn::Mat out;
  932. ex.extract(conv_top_blobs[i], out);
  933. ncnn::Layer* layer_int8 = ncnn::create_layer(layer->typeindex);
  934. ncnn::ParamDict pd;
  935. get_layer_param(layer, pd);
  936. pd.set(8, 1); //int8_scale_term
  937. layer_int8->load_param(pd);
  938. std::vector<float> sims(search_steps);
  939. for (int k = 0; k < search_steps; k++)
  940. {
  941. ncnn::Mat new_bottom_blob_scale = bottom_blob_scale.clone();
  942. new_bottom_blob_scale[j] = scale_lower + k * scale_step;
  943. std::vector<ncnn::Mat> weights;
  944. get_layer_weights(layer, weights);
  945. weights.push_back(weight_scale);
  946. weights.push_back(new_bottom_blob_scale);
  947. layer_int8->load_model(ncnn::ModelBinFromMatArray(weights.data()));
  948. ncnn::Option opt_int8;
  949. opt_int8.use_packing_layout = false;
  950. layer_int8->create_pipeline(opt_int8);
  951. ncnn::Mat out_int8;
  952. layer_int8->forward(in, out_int8, opt_int8);
  953. layer_int8->destroy_pipeline(opt_int8);
  954. sims[k] = cosine_similarity(out, out_int8);
  955. }
  956. delete layer_int8;
  957. #pragma omp critical
  958. {
  959. for (int k = 0; k < search_steps; k++)
  960. {
  961. avgsims[k] += sims[k];
  962. }
  963. }
  964. }
  965. double max_avgsim = 0.0;
  966. float new_scale = scale;
  967. // find the scale with min cosine distance
  968. for (int k = 0; k < search_steps; k++)
  969. {
  970. if (max_avgsim < avgsims[k])
  971. {
  972. max_avgsim = avgsims[k];
  973. new_scale = scale_lower + k * scale_step;
  974. }
  975. }
  976. fprintf(stderr, "%s b %d = %f -> %f\n", layer->name.c_str(), j, scale, new_scale);
  977. bottom_blob_scale[j] = new_scale;
  978. }
  979. // update quant info
  980. QuantBlobStat& stat = quant_blob_stats[i];
  981. stat.threshold = 127 / bottom_blob_scale[0];
  982. }
  983. return 0;
  984. }
  985. static std::vector<std::vector<std::string> > parse_comma_path_list(char* s)
  986. {
  987. std::vector<std::vector<std::string> > aps;
  988. char* pch = strtok(s, ",");
  989. while (pch != NULL)
  990. {
  991. FILE* fp = fopen(pch, "rb");
  992. if (!fp)
  993. {
  994. fprintf(stderr, "fopen %s failed\n", pch);
  995. break;
  996. }
  997. std::vector<std::string> paths;
  998. // one filepath per line
  999. char line[1024];
  1000. while (!feof(fp))
  1001. {
  1002. char* ss = fgets(line, 1024, fp);
  1003. if (!ss)
  1004. break;
  1005. char filepath[256];
  1006. int nscan = sscanf(line, "%255s", filepath);
  1007. if (nscan != 1)
  1008. continue;
  1009. paths.push_back(std::string(filepath));
  1010. }
  1011. fclose(fp);
  1012. aps.push_back(paths);
  1013. pch = strtok(NULL, ",");
  1014. }
  1015. return aps;
  1016. }
  1017. static float vstr_to_float(const char vstr[16])
  1018. {
  1019. double v = 0.0;
  1020. const char* p = vstr;
  1021. // sign
  1022. bool sign = *p != '-';
  1023. if (*p == '+' || *p == '-')
  1024. {
  1025. p++;
  1026. }
  1027. // digits before decimal point or exponent
  1028. unsigned int v1 = 0;
  1029. while (isdigit(*p))
  1030. {
  1031. v1 = v1 * 10 + (*p - '0');
  1032. p++;
  1033. }
  1034. v = (double)v1;
  1035. // digits after decimal point
  1036. if (*p == '.')
  1037. {
  1038. p++;
  1039. unsigned int pow10 = 1;
  1040. unsigned int v2 = 0;
  1041. while (isdigit(*p))
  1042. {
  1043. v2 = v2 * 10 + (*p - '0');
  1044. pow10 *= 10;
  1045. p++;
  1046. }
  1047. v += v2 / (double)pow10;
  1048. }
  1049. // exponent
  1050. if (*p == 'e' || *p == 'E')
  1051. {
  1052. p++;
  1053. // sign of exponent
  1054. bool fact = *p != '-';
  1055. if (*p == '+' || *p == '-')
  1056. {
  1057. p++;
  1058. }
  1059. // digits of exponent
  1060. unsigned int expon = 0;
  1061. while (isdigit(*p))
  1062. {
  1063. expon = expon * 10 + (*p - '0');
  1064. p++;
  1065. }
  1066. double scale = 1.0;
  1067. while (expon >= 8)
  1068. {
  1069. scale *= 1e8;
  1070. expon -= 8;
  1071. }
  1072. while (expon > 0)
  1073. {
  1074. scale *= 10.0;
  1075. expon -= 1;
  1076. }
  1077. v = fact ? v * scale : v / scale;
  1078. }
  1079. // fprintf(stderr, "v = %f\n", v);
  1080. return sign ? (float)v : (float)-v;
  1081. }
  1082. static std::vector<std::vector<float> > parse_comma_float_array_list(char* s)
  1083. {
  1084. std::vector<std::vector<float> > aaf;
  1085. char* pch = strtok(s, "[]");
  1086. while (pch != NULL)
  1087. {
  1088. // parse a,b,c
  1089. char vstr[16];
  1090. int nconsumed = 0;
  1091. int nscan = sscanf(pch, "%15[^,]%n", vstr, &nconsumed);
  1092. if (nscan == 1)
  1093. {
  1094. // ok we get array
  1095. pch += nconsumed;
  1096. std::vector<float> af;
  1097. float v = vstr_to_float(vstr);
  1098. af.push_back(v);
  1099. nscan = sscanf(pch, ",%15[^,]%n", vstr, &nconsumed);
  1100. while (nscan == 1)
  1101. {
  1102. pch += nconsumed;
  1103. float v = vstr_to_float(vstr);
  1104. af.push_back(v);
  1105. nscan = sscanf(pch, ",%15[^,]%n", vstr, &nconsumed);
  1106. }
  1107. // array end
  1108. aaf.push_back(af);
  1109. }
  1110. pch = strtok(NULL, "[]");
  1111. }
  1112. return aaf;
  1113. }
  1114. static std::vector<std::vector<int> > parse_comma_int_array_list(char* s)
  1115. {
  1116. std::vector<std::vector<int> > aai;
  1117. char* pch = strtok(s, "[]");
  1118. while (pch != NULL)
  1119. {
  1120. // parse a,b,c
  1121. int v;
  1122. int nconsumed = 0;
  1123. int nscan = sscanf(pch, "%d%n", &v, &nconsumed);
  1124. if (nscan == 1)
  1125. {
  1126. // ok we get array
  1127. pch += nconsumed;
  1128. std::vector<int> ai;
  1129. ai.push_back(v);
  1130. nscan = sscanf(pch, ",%d%n", &v, &nconsumed);
  1131. while (nscan == 1)
  1132. {
  1133. pch += nconsumed;
  1134. ai.push_back(v);
  1135. nscan = sscanf(pch, ",%d%n", &v, &nconsumed);
  1136. }
  1137. // array end
  1138. aai.push_back(ai);
  1139. }
  1140. pch = strtok(NULL, "[]");
  1141. }
  1142. return aai;
  1143. }
  1144. static std::vector<int> parse_comma_pixel_type_list(char* s)
  1145. {
  1146. std::vector<int> aps;
  1147. char* pch = strtok(s, ",");
  1148. while (pch != NULL)
  1149. {
  1150. // RAW/RGB/BGR/GRAY/RGBA/BGRA
  1151. if (strcmp(pch, "RAW") == 0)
  1152. aps.push_back(-233);
  1153. if (strcmp(pch, "RGB") == 0)
  1154. aps.push_back(ncnn::Mat::PIXEL_RGB);
  1155. if (strcmp(pch, "BGR") == 0)
  1156. aps.push_back(ncnn::Mat::PIXEL_BGR);
  1157. if (strcmp(pch, "GRAY") == 0)
  1158. aps.push_back(ncnn::Mat::PIXEL_GRAY);
  1159. if (strcmp(pch, "RGBA") == 0)
  1160. aps.push_back(ncnn::Mat::PIXEL_RGBA);
  1161. if (strcmp(pch, "BGRA") == 0)
  1162. aps.push_back(ncnn::Mat::PIXEL_BGRA);
  1163. pch = strtok(NULL, ",");
  1164. }
  1165. return aps;
  1166. }
  1167. static void show_usage()
  1168. {
  1169. fprintf(stderr, "Usage: ncnn2table [ncnnparam] [ncnnbin] [list,...] [ncnntable] [(key=value)...]\n");
  1170. fprintf(stderr, " mean=[104.0,117.0,123.0],...\n");
  1171. fprintf(stderr, " norm=[1.0,1.0,1.0],...\n");
  1172. fprintf(stderr, " shape=[224,224,3],...\n");
  1173. fprintf(stderr, " pixel=RAW/RGB/BGR/GRAY/RGBA/BGRA,...\n");
  1174. fprintf(stderr, " thread=8\n");
  1175. fprintf(stderr, " method=kl/aciq/eq\n");
  1176. fprintf(stderr, "Sample usage: ncnn2table squeezenet.param squeezenet.bin imagelist.txt squeezenet.table mean=[104.0,117.0,123.0] norm=[1.0,1.0,1.0] shape=[227,227,3] pixel=BGR method=kl\n");
  1177. }
  1178. int main(int argc, char** argv)
  1179. {
  1180. if (argc < 5)
  1181. {
  1182. show_usage();
  1183. return -1;
  1184. }
  1185. for (int i = 1; i < argc; i++)
  1186. {
  1187. if (argv[i][0] == '-')
  1188. {
  1189. show_usage();
  1190. return -1;
  1191. }
  1192. }
  1193. const char* inparam = argv[1];
  1194. const char* inbin = argv[2];
  1195. char* lists = argv[3];
  1196. const char* outtable = argv[4];
  1197. ncnn::Option opt;
  1198. opt.num_threads = 1;
  1199. opt.use_fp16_packed = false;
  1200. opt.use_fp16_storage = false;
  1201. opt.use_fp16_arithmetic = false;
  1202. QuantNet net;
  1203. net.opt = opt;
  1204. net.load_param(inparam);
  1205. net.load_model(inbin);
  1206. net.init();
  1207. // load lists
  1208. net.listspaths = parse_comma_path_list(lists);
  1209. std::string method = "kl";
  1210. for (int i = 5; i < argc; i++)
  1211. {
  1212. // key=value
  1213. char* kv = argv[i];
  1214. char* eqs = strchr(kv, '=');
  1215. if (eqs == NULL)
  1216. {
  1217. fprintf(stderr, "unrecognized arg %s\n", kv);
  1218. continue;
  1219. }
  1220. // split k v
  1221. eqs[0] = '\0';
  1222. const char* key = kv;
  1223. char* value = eqs + 1;
  1224. fprintf(stderr, "%s = %s\n", key, value);
  1225. // load mean norm shape
  1226. if (memcmp(key, "mean", 4) == 0)
  1227. net.means = parse_comma_float_array_list(value);
  1228. if (memcmp(key, "norm", 4) == 0)
  1229. net.norms = parse_comma_float_array_list(value);
  1230. if (memcmp(key, "shape", 5) == 0)
  1231. net.shapes = parse_comma_int_array_list(value);
  1232. if (memcmp(key, "pixel", 5) == 0)
  1233. net.type_to_pixels = parse_comma_pixel_type_list(value);
  1234. if (memcmp(key, "thread", 6) == 0)
  1235. net.quantize_num_threads = atoi(value);
  1236. if (memcmp(key, "method", 6) == 0)
  1237. method = std::string(value);
  1238. }
  1239. // sanity check
  1240. const size_t input_blob_count = net.input_blobs.size();
  1241. if (net.listspaths.size() != input_blob_count)
  1242. {
  1243. fprintf(stderr, "expect %d lists, but got %d\n", (int)input_blob_count, (int)net.listspaths.size());
  1244. return -1;
  1245. }
  1246. if (net.means.size() != input_blob_count)
  1247. {
  1248. fprintf(stderr, "expect %d means, but got %d\n", (int)input_blob_count, (int)net.means.size());
  1249. return -1;
  1250. }
  1251. if (net.norms.size() != input_blob_count)
  1252. {
  1253. fprintf(stderr, "expect %d norms, but got %d\n", (int)input_blob_count, (int)net.norms.size());
  1254. return -1;
  1255. }
  1256. if (net.shapes.size() != input_blob_count)
  1257. {
  1258. fprintf(stderr, "expect %d shapes, but got %d\n", (int)input_blob_count, (int)net.shapes.size());
  1259. return -1;
  1260. }
  1261. if (net.type_to_pixels.size() != input_blob_count)
  1262. {
  1263. fprintf(stderr, "expect %d pixels, but got %d\n", (int)input_blob_count, (int)net.type_to_pixels.size());
  1264. return -1;
  1265. }
  1266. if (net.quantize_num_threads < 0)
  1267. {
  1268. fprintf(stderr, "malformed thread %d\n", net.quantize_num_threads);
  1269. return -1;
  1270. }
  1271. if (method == "kl")
  1272. {
  1273. net.quantize_KL();
  1274. }
  1275. else if (method == "aciq")
  1276. {
  1277. net.quantize_ACIQ();
  1278. }
  1279. else if (method == "eq")
  1280. {
  1281. net.quantize_EQ();
  1282. }
  1283. else
  1284. {
  1285. fprintf(stderr, "not implemented yet !\n");
  1286. fprintf(stderr, "unknown method %s, expect kl / aciq / eq\n", method.c_str());
  1287. return -1;
  1288. }
  1289. net.print_quant_info();
  1290. net.save_table(outtable);
  1291. return 0;
  1292. }