You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

paramdict.cpp 8.9 kB

[WIP] vulkan compute (#618) * vulkan infrastructure * vkallocator and vkmat * layer interface for vulkan compute * wip... * default vulkan device, command wrapper, upload model weight in load_model to simplify layer interface * simplify command api, vkmat holds staging buffer, relu works * initialize specialization constant, simplify command dispatch, fix staging buffer copy with different shape, convolution works * init extension functions * dynamic local size and group count * group count=1 is invalid * regard device max workgroup size limit * fix relu oooops * decouple command record and staging allocation * create result blob * add pooling shader * buffer is faster than image :) * fix pooling shader * add innerproduct shader * readonly writeonly decoration * simplify buffer creation * decouple command and layer, VK_KHR_descriptor_update_template extension makes descriptor binding update easy :D * fix vulkan building issues in visual studio (#1) * fix building issues on visual studio * ignore benchmark * cancel changes * ... ... * decouple paramdict and vulkandevice * fix staging buffer destroy in model loading * remove vkdev member in option * add padding shader * simplify vulkan layer creation, simplify convolution and pooling shader for no padding, less debug output * add convolutiondepthwise and softmax shader * specialization float type, add leakyrelu * add dropout shader * add batchnorm shader * split vulkan forward * add scale shader * push constant type can be int or float * set_optimal_local_size_xyz * add eltwise shader * concat vulkan forward * fix convolution without bias * add dummy shader for concat and split, more fix ... * optional VK_KHR_descriptor_update_template and VK_KHR_push_descriptor * check VK_KHR_push_descriptor for vkCmdPushDescriptorSetWithTemplateKHR * binaryop and unaryop shader * hide raw command buffer * simple vkbenchncnn benchmark * create device with transfer queue * rename command to vkcompute, add vktransfer and layer upload_model interface * external VkMat, copy and map wrt buffer offset * command copy respect offset and size * decouple weight upload and load, simplify upload weight api, use one big staging buffer for uploading weights * fix build on android * binding count can not vary :( * barrier check state, fix sub-op destruction * declare local_size_xyz constant, fix crash on radv * fix local_size_xyz, second try * more barrier and state fix * fix softmax * reconstruct buffer memory allocator, reuse blob buffer, less verbose output * find unified memory type index * weight staging buffer allocator and weight buffer allocator, respect descriptor buffer offset alignment * use VK_KHR_descriptor_update_template for faster descriptor update if available, multithread pipeline creation * find more useful vulkan extensions and enable them * fix msvc build * respect VK_KHR_dedicated_allocation for weight buffer allocation * fix android build * fix bias name conflicts with metal * decouple pipeline and layer, building shader sources into shader module, dedicated create_pipeline api, simplify pipeline recording * drop dummy shader, inplace softmax, multiple shader module works * fix unique queue family index error * flatten support vulkan * mnasnet run * find shader module by name, each entry point per shader module, fix attribute/id conflict on moltenvk * some minor changes * add some high level api * use dedicated transfer queue to upload weight model * prefer mappable buffer on unified memory * global pooling and convolution fc, reuse staging buffer * implement ring-buffer style blob allocator, add VkBufferMemory capacity * use blob allocator for workspace blob, it works fine :) * vulkan option off * Update layer.cpp * fix build with vulkan off * less verbose output, fix crash on vulkan_compute off * merge benchncnn tool * allocator clear api, use new weight buffer allocator per net * add default locked allocator * mapped mat ptr api, persistent mapped memory works generally :) * travis ci linux vulkan * travis ci vulkan wip ... * more gpu wip ... * more gpu wip ... * wip... * wip... * wip... ... * wip... ios vulkan build... * find glslangValidator on ios build * use dynamic moltenvk library * travis ci wip ... * ios simulator does not support metal at all * fix cpu only extractor * optimize workgroup size, first try * optimize workgroup size, second try * conv1x1s1d1 vec4 * revert build system * fix ncnn2mem build * fix ncnn2mem build
7 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include <ctype.h>
  15. #include <stdarg.h>
  16. #include <stdio.h>
  17. #include "paramdict.h"
  18. #include "platform.h"
  19. namespace ncnn {
  20. ParamDict::ParamDict()
  21. {
  22. use_winograd_convolution = 1;
  23. use_sgemm_convolution = 1;
  24. use_int8_inference = 1;
  25. use_vulkan_compute = 0;
  26. clear();
  27. }
  28. int ParamDict::get(int id, int def) const
  29. {
  30. return params[id].loaded ? params[id].i : def;
  31. }
  32. float ParamDict::get(int id, float def) const
  33. {
  34. return params[id].loaded ? params[id].f : def;
  35. }
  36. Mat ParamDict::get(int id, const Mat& def) const
  37. {
  38. return params[id].loaded ? params[id].v : def;
  39. }
  40. void ParamDict::set(int id, int i)
  41. {
  42. params[id].loaded = 1;
  43. params[id].i = i;
  44. }
  45. void ParamDict::set(int id, float f)
  46. {
  47. params[id].loaded = 1;
  48. params[id].f = f;
  49. }
  50. void ParamDict::set(int id, const Mat& v)
  51. {
  52. params[id].loaded = 1;
  53. params[id].v = v;
  54. }
  55. void ParamDict::clear()
  56. {
  57. for (int i = 0; i < NCNN_MAX_PARAM_COUNT; i++)
  58. {
  59. params[i].loaded = 0;
  60. params[i].v = Mat();
  61. }
  62. }
  63. #if NCNN_STDIO
  64. #if NCNN_STRING
  65. static bool vstr_is_float(const char vstr[16])
  66. {
  67. // look ahead for determine isfloat
  68. for (int j=0; j<16; j++)
  69. {
  70. if (vstr[j] == '\0')
  71. break;
  72. if (vstr[j] == '.' || tolower(vstr[j]) == 'e')
  73. return true;
  74. }
  75. return false;
  76. }
  77. int ParamDict::load_param(FILE* fp)
  78. {
  79. clear();
  80. // 0=100 1=1.250000 -23303=5,0.1,0.2,0.4,0.8,1.0
  81. // parse each key=value pair
  82. int id = 0;
  83. while (fscanf(fp, "%d=", &id) == 1)
  84. {
  85. bool is_array = id <= -23300;
  86. if (is_array)
  87. {
  88. id = -id - 23300;
  89. }
  90. if (is_array)
  91. {
  92. int len = 0;
  93. int nscan = fscanf(fp, "%d", &len);
  94. if (nscan != 1)
  95. {
  96. fprintf(stderr, "ParamDict read array length fail\n");
  97. return -1;
  98. }
  99. params[id].v.create(len);
  100. for (int j = 0; j < len; j++)
  101. {
  102. char vstr[16];
  103. nscan = fscanf(fp, ",%15[^,\n ]", vstr);
  104. if (nscan != 1)
  105. {
  106. fprintf(stderr, "ParamDict read array element fail\n");
  107. return -1;
  108. }
  109. bool is_float = vstr_is_float(vstr);
  110. if (is_float)
  111. {
  112. float* ptr = params[id].v;
  113. nscan = sscanf(vstr, "%f", &ptr[j]);
  114. }
  115. else
  116. {
  117. int* ptr = params[id].v;
  118. nscan = sscanf(vstr, "%d", &ptr[j]);
  119. }
  120. if (nscan != 1)
  121. {
  122. fprintf(stderr, "ParamDict parse array element fail\n");
  123. return -1;
  124. }
  125. }
  126. }
  127. else
  128. {
  129. char vstr[16];
  130. int nscan = fscanf(fp, "%15s", vstr);
  131. if (nscan != 1)
  132. {
  133. fprintf(stderr, "ParamDict read value fail\n");
  134. return -1;
  135. }
  136. bool is_float = vstr_is_float(vstr);
  137. if (is_float)
  138. nscan = sscanf(vstr, "%f", &params[id].f);
  139. else
  140. nscan = sscanf(vstr, "%d", &params[id].i);
  141. if (nscan != 1)
  142. {
  143. fprintf(stderr, "ParamDict parse value fail\n");
  144. return -1;
  145. }
  146. }
  147. params[id].loaded = 1;
  148. }
  149. return 0;
  150. }
  151. #if _MSC_VER
  152. static inline int mem_sscanf_with_n(int* _internal_nconsumed_ptr, const char*& ptr, const char* format, ...)
  153. {
  154. *_internal_nconsumed_ptr = 0;
  155. va_list args;
  156. va_start(args, format);
  157. int _n = vsscanf(ptr, format, args);
  158. va_end(args);
  159. ptr += *_internal_nconsumed_ptr;
  160. return *_internal_nconsumed_ptr > 0 ? _n : 0;
  161. }
  162. #define mem_sscanf(ptr, format, ...) mem_sscanf_with_n(&_internal_nconsumed, ptr, format "%n", __VA_ARGS__, &_internal_nconsumed)
  163. #else
  164. // return value from macro requires gcc extension https://gcc.gnu.org/onlinedocs/gcc/Statement-Exprs.html
  165. #define mem_sscanf(ptr, format, ...) ({int _b=0; int _n = sscanf(ptr, format "%n", __VA_ARGS__, &_b); ptr+=_b;_b>0?_n:0;})
  166. #endif // _MSC_VER
  167. int ParamDict::load_param_mem(const char*& mem)
  168. {
  169. #if _MSC_VER
  170. int _internal_nconsumed;
  171. #endif
  172. clear();
  173. // 0=100 1=1.250000 -23303=5,0.1,0.2,0.4,0.8,1.0
  174. // parse each key=value pair
  175. int id = 0;
  176. while (mem_sscanf(mem, "%d=", &id) == 1)
  177. {
  178. bool is_array = id <= -23300;
  179. if (is_array)
  180. {
  181. id = -id - 23300;
  182. }
  183. if (is_array)
  184. {
  185. int len = 0;
  186. int nscan = mem_sscanf(mem, "%d", &len);
  187. if (nscan != 1)
  188. {
  189. fprintf(stderr, "ParamDict read array length fail\n");
  190. return -1;
  191. }
  192. params[id].v.create(len);
  193. for (int j = 0; j < len; j++)
  194. {
  195. char vstr[16];
  196. nscan = mem_sscanf(mem, ",%15[^,\n ]", vstr);
  197. if (nscan != 1)
  198. {
  199. fprintf(stderr, "ParamDict read array element fail\n");
  200. return -1;
  201. }
  202. bool is_float = vstr_is_float(vstr);
  203. if (is_float)
  204. {
  205. float* ptr = params[id].v;
  206. nscan = sscanf(vstr, "%f", &ptr[j]);
  207. }
  208. else
  209. {
  210. int* ptr = params[id].v;
  211. nscan = sscanf(vstr, "%d", &ptr[j]);
  212. }
  213. if (nscan != 1)
  214. {
  215. fprintf(stderr, "ParamDict parse array element fail\n");
  216. return -1;
  217. }
  218. }
  219. }
  220. else
  221. {
  222. char vstr[16];
  223. int nscan = mem_sscanf(mem, "%15s", vstr);
  224. if (nscan != 1)
  225. {
  226. fprintf(stderr, "ParamDict read value fail\n");
  227. return -1;
  228. }
  229. bool is_float = vstr_is_float(vstr);
  230. if (is_float)
  231. nscan = sscanf(vstr, "%f", &params[id].f);
  232. else
  233. nscan = sscanf(vstr, "%d", &params[id].i);
  234. if (nscan != 1)
  235. {
  236. fprintf(stderr, "ParamDict parse value fail\n");
  237. return -1;
  238. }
  239. }
  240. params[id].loaded = 1;
  241. }
  242. return 0;
  243. }
  244. #endif // NCNN_STRING
  245. int ParamDict::load_param_bin(FILE* fp)
  246. {
  247. clear();
  248. // binary 0
  249. // binary 100
  250. // binary 1
  251. // binary 1.250000
  252. // binary 3 | array_bit
  253. // binary 5
  254. // binary 0.1
  255. // binary 0.2
  256. // binary 0.4
  257. // binary 0.8
  258. // binary 1.0
  259. // binary -233(EOP)
  260. int id = 0;
  261. fread(&id, sizeof(int), 1, fp);
  262. while (id != -233)
  263. {
  264. bool is_array = id <= -23300;
  265. if (is_array)
  266. {
  267. id = -id - 23300;
  268. }
  269. if (is_array)
  270. {
  271. int len = 0;
  272. fread(&len, sizeof(int), 1, fp);
  273. params[id].v.create(len);
  274. float* ptr = params[id].v;
  275. fread(ptr, sizeof(float), len, fp);
  276. }
  277. else
  278. {
  279. fread(&params[id].f, sizeof(float), 1, fp);
  280. }
  281. params[id].loaded = 1;
  282. fread(&id, sizeof(int), 1, fp);
  283. }
  284. return 0;
  285. }
  286. #endif // NCNN_STDIO
  287. int ParamDict::load_param(const unsigned char*& mem)
  288. {
  289. clear();
  290. int id = *(int*)(mem);
  291. mem += 4;
  292. while (id != -233)
  293. {
  294. bool is_array = id <= -23300;
  295. if (is_array)
  296. {
  297. id = -id - 23300;
  298. }
  299. if (is_array)
  300. {
  301. int len = *(int*)(mem);
  302. mem += 4;
  303. params[id].v.create(len);
  304. memcpy(params[id].v.data, mem, len * 4);
  305. mem += len * 4;
  306. }
  307. else
  308. {
  309. params[id].f = *(float*)(mem);
  310. mem += 4;
  311. }
  312. params[id].loaded = 1;
  313. id = *(int*)(mem);
  314. mem += 4;
  315. }
  316. return 0;
  317. }
  318. } // namespace ncnn