您最多选择25个标签 标签必须以中文、字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

5 年前
5 年前
5 年前
5 年前
5 年前
5 年前
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_LITE_TOOLS_COMMON_GRAPH_UTIL_H
  17. #define MINDSPORE_LITE_TOOLS_COMMON_GRAPH_UTIL_H
  18. #include <cstdlib>
  19. #include <unordered_map>
  20. #include <unordered_set>
  21. #include <string>
  22. #include <memory>
  23. #include <vector>
  24. #include <map>
  25. #include <set>
  26. #include <algorithm>
  27. #include <numeric>
  28. #include <limits>
  29. #include <functional>
  30. #include "include/errorcode.h"
  31. #include "schema/inner/model_generated.h"
  32. #include "src/common/graph_util.h"
  33. #include "ir/anf.h"
  34. #include "ir/func_graph.h"
  35. namespace mindspore {
  36. namespace lite {
  37. using STATUS = int;
  38. enum InsertPlace { kBefore, kAfter };
  39. using NodeIter = std::vector<std::unique_ptr<schema::CNodeT>>::iterator;
  40. using OpDefCopyer = std::function<std::unique_ptr<schema::CNodeT>(schema::CNodeT *)>;
  41. OpDefCopyer GetSimpleOpCopyer();
  42. std::vector<size_t> GetInputNodeIdx(const schema::MetaGraphT &graphT, const size_t &nodeIdx, int inputIndexIdx = -1);
  43. std::vector<size_t> GetInputNodeIdx(const schema::MetaGraphT &graphT, const schema::CNodeT &node,
  44. int inputIndexIdx = -1);
  45. std::vector<size_t> GetOutputNodeIdx(const schema::MetaGraphT &graphT, const size_t &nodeIdx, int outputIndexIdx = -1);
  46. std::vector<size_t> GetOutputNodeIdx(const schema::MetaGraphT &graphT, const schema::CNodeT &node,
  47. int outputIndexIdx = -1);
  48. std::vector<size_t> GetLinkedPreIdx(const schema::MetaGraphT &graphT, const size_t &tensorIdx);
  49. std::vector<size_t> GetLinkedPostIdx(const schema::MetaGraphT &graphT, const size_t &tensorIdx);
  50. STATUS IsolateNode(schema::MetaGraphT *subGraph, schema::CNodeT *node);
  51. STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, size_t nodeIdx, bool removeTensor = true);
  52. STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, size_t subGraphIdx, size_t nodeIdx, bool removeTensor = true);
  53. STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, schema::CNodeT *node, bool removeTensor = true);
  54. STATUS UpdateNodeIndex(schema::CNodeT *node, uint32_t deleteIdx);
  55. STATUS RemoveTensor(schema::MetaGraphT *graphT, std::vector<uint32_t> toDeleteTensorIdxes, bool forceDelete = false);
  56. STATUS AddTensor2Node(schema::MetaGraphT *graphT, uint32_t nodeIdx, std::unique_ptr<schema::TensorT> tensor,
  57. InsertPlace place = kBefore);
  58. STATUS ReplaceTensorOfNode(schema::MetaGraphT *graphT, uint32_t nodeIdx, uint32_t inTensorIdx,
  59. std::unique_ptr<schema::TensorT> tensor);
  60. int DoBitPack(const int &bit_num, schema::TensorT *tensor_input);
  61. NodeIter InsertNode(schema::MetaGraphT *graphT, uint32_t existNodeIdx, InsertPlace place, size_t inoutIndex,
  62. std::unique_ptr<schema::CNodeT> toAddNode, STATUS *errorCode, int *insert_num,
  63. const OpDefCopyer &opDefCopyer = GetSimpleOpCopyer());
  64. NodeIter InsertNode(schema::MetaGraphT *graphT, NodeIter existNodeIter, InsertPlace place, size_t inoutIndexIdx,
  65. std::unique_ptr<schema::CNodeT> toAddNode, STATUS *errorCode, int *insert_num,
  66. const OpDefCopyer &opDefCopyer = GetSimpleOpCopyer());
  67. NodeIter InsertNodeBefore(schema::MetaGraphT *graphT, NodeIter existNodeIter, size_t inputIndexIdx,
  68. std::unique_ptr<schema::CNodeT> toAddNode, STATUS *errorCode, int *insert_num,
  69. const OpDefCopyer &opDefCopyer);
  70. NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, size_t outputIndexIdx,
  71. std::unique_ptr<schema::CNodeT> toAddNode, STATUS *errorCode, int *insert_num,
  72. const OpDefCopyer &opDefCopyery);
  73. STATUS ValidateFileStr(const std::string &modelFile, const std::string &fileType);
  74. STATUS SetSubgraphTensorIndices(schema::MetaGraphT *meta_graphT);
  75. std::string GetModelName(const std::string &modelFile);
  76. std::vector<int> GetTransposePerm(schema::MetaGraphT *graph, const std::unique_ptr<schema::CNodeT> &cnode);
  77. std::string BoolVectorToString(const std::vector<bool> &bool_vec);
  78. TypeId GetAbstractTensorDtype(const abstract::AbstractTensorPtr &tensor);
  79. TypeId GetParameterDtype(const ParameterPtr &param_node);
  80. STATUS UpdateFuncGraphInputsAndOutputsDtype(const FuncGraphPtr &func_graph);
  81. template <typename T>
  82. bool IndexingCompress(const std::set<T> &quant_data_set, const std::map<T, size_t> &unique_value_index_map,
  83. size_t unique_value_bit, size_t unique_value_cnt, size_t pack_repetition_size_in_byte,
  84. size_t bit_num, schema::TensorT *tensor) {
  85. auto quant_data_array = reinterpret_cast<T *>(tensor->data.data());
  86. std::vector<T> quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T));
  87. std::vector<bool> bits(pack_repetition_size_in_byte * 8);
  88. size_t index = 0;
  89. // write unique_value_cnt: bit_num bit for unsigned
  90. for (size_t i = 0; i < bit_num; i++) {
  91. bits[index++] = (unique_value_cnt >> (bit_num - i - 1)) & (0x1);
  92. }
  93. // write the unique value set: each value has bit_num bit signed
  94. for (auto unique_value : quant_data_set) {
  95. for (size_t i = 0; i < bit_num; i++) {
  96. bits[index++] = ((unique_value + (1 << (bit_num - 1))) >> (bit_num - i - 1)) & (0x1);
  97. }
  98. }
  99. // write the index: each index has unique_value_bit unsigned
  100. for (auto quant_value : quant_data) {
  101. for (size_t i = 0; i < unique_value_bit; i++) {
  102. bits[index++] = (unique_value_index_map.at(quant_value) >> (unique_value_bit - i - 1)) & (0x1);
  103. }
  104. }
  105. if (index > pack_repetition_size_in_byte * 8) {
  106. MS_LOG(ERROR) << "unexpected index: " << index << " should not greater than " << pack_repetition_size_in_byte * 8;
  107. return false;
  108. }
  109. // update tensor data
  110. auto new_data_str = BoolVectorToString(bits);
  111. auto ret = memcpy_s(tensor->data.data(), tensor->data.size(), new_data_str.c_str(), new_data_str.size());
  112. if (ret != EOK) {
  113. MS_LOG(ERROR) << "memcpy error";
  114. return false;
  115. }
  116. tensor->data.resize(new_data_str.size());
  117. tensor->weightQunatCompressType = schema::WeightQunatCompressType_INDEXING;
  118. MS_LOG(DEBUG) << "set WeightQunatCompressType_INDEXING";
  119. return true;
  120. }
  121. template <typename T>
  122. bool SparsityCompress(const std::set<T> &quant_data_set, const std::map<T, size_t> &unique_value_index_map,
  123. size_t unique_value_bit, size_t unique_value_cnt, size_t pack_sparsity_size_in_byte,
  124. size_t nz_cnt, size_t coor_best_bit, size_t bit_num, schema::TensorT *tensor) {
  125. auto quant_data_array = reinterpret_cast<T *>(tensor->data.data());
  126. std::vector<T> quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T));
  127. auto &quant_params = tensor->quantParams;
  128. auto elem_cnt = quant_data.size();
  129. auto channel_cnt = quant_params.size();
  130. auto elem_perchannel = elem_cnt / channel_cnt;
  131. std::vector<bool> bits(pack_sparsity_size_in_byte * 8);
  132. int index = 0;
  133. // coor_best_bit
  134. for (size_t i = 0; i < 8; i++) {
  135. bits[index++] = (coor_best_bit >> (8 - i - 1)) & 0x1;
  136. }
  137. // nz_cnt
  138. for (size_t i = 0; i < 32; i++) {
  139. bits[index++] = (nz_cnt >> (32 - i - 1)) & 0x1;
  140. }
  141. // unique_value cnt
  142. for (size_t i = 0; i < bit_num; i++) {
  143. bits[index++] = (unique_value_cnt >> (bit_num - i - 1)) & 0x1;
  144. }
  145. // unique_values
  146. for (auto unique_value : quant_data_set) {
  147. for (size_t i = 0; i < bit_num; i++) {
  148. bits[index++] = ((unique_value + (1 << (bit_num - 1))) >> (bit_num - i - 1)) & (0x1);
  149. }
  150. }
  151. // nz values indexing && get coor
  152. std::vector<size_t> coors(nz_cnt);
  153. int coors_index = 0;
  154. int prev_index = -1;
  155. for (int di = 0; (unsigned int)di < elem_cnt; di++) {
  156. auto cur_channel = di / elem_perchannel;
  157. auto zp = quant_params[cur_channel]->zeroPoint;
  158. auto nz_value = quant_data[di];
  159. if (nz_value != zp || (di - prev_index) >= (1 << coor_best_bit)) {
  160. MS_ASSERT(coors_index < nz_cnt);
  161. coors[coors_index++] = di - prev_index - 1;
  162. prev_index = di;
  163. for (size_t i = 0; i < unique_value_bit; i++) {
  164. bits[index++] = (unique_value_index_map.at(nz_value) >> (unique_value_bit - i - 1)) & (0x1);
  165. }
  166. }
  167. }
  168. // write coor
  169. for (auto coor : coors) {
  170. for (size_t i = 0; i < coor_best_bit; i++) {
  171. bits[index++] = (coor >> (coor_best_bit - i - 1)) & 0x1;
  172. }
  173. }
  174. if ((unsigned int)index > pack_sparsity_size_in_byte * 8) {
  175. MS_LOG(ERROR) << "unexpected index: " << index << " should not greater than " << pack_sparsity_size_in_byte * 8;
  176. return false;
  177. }
  178. auto new_data_str = BoolVectorToString(bits);
  179. auto ret = memcpy_s(tensor->data.data(), tensor->data.size(), new_data_str.c_str(), new_data_str.size());
  180. if (ret != EOK) {
  181. MS_LOG(ERROR) << "memcpy error";
  182. return false;
  183. }
  184. tensor->data.resize(new_data_str.size());
  185. tensor->weightQunatCompressType = schema::WeightQunatCompressType_SPARSE;
  186. MS_LOG(ERROR) << "set WeightQunatCompressType_SPARSITY";
  187. return true;
  188. }
  189. template <typename T>
  190. size_t CalCoorBestBit(const std::vector<T> &quant_data, size_t elem_cnt,
  191. const std::vector<std::unique_ptr<schema::QuantParamT>> &quant_params, int unique_value_bit,
  192. size_t *coor_best_bit) {
  193. size_t best_nn_cnt = 0;
  194. size_t min_len_in_bit = std::numeric_limits<size_t>::max();
  195. for (int bit = 2; bit <= 10; bit++) {
  196. // search
  197. size_t nn_cnt = 0;
  198. int prev_index = -1;
  199. auto channel_cnt = quant_params.size();
  200. auto elem_perchannel = elem_cnt / channel_cnt;
  201. for (int i = 0; (unsigned int)i < elem_cnt; i++) {
  202. auto cur_channel = i / elem_perchannel;
  203. auto zp = quant_params[cur_channel]->zeroPoint;
  204. if (quant_data[i] != zp || (i - prev_index) >= (1 << bit)) {
  205. nn_cnt++;
  206. prev_index = i;
  207. }
  208. }
  209. size_t len_in_bit = nn_cnt * bit + nn_cnt * unique_value_bit;
  210. if (len_in_bit < min_len_in_bit) {
  211. min_len_in_bit = len_in_bit;
  212. *coor_best_bit = bit;
  213. best_nn_cnt = nn_cnt;
  214. }
  215. }
  216. return best_nn_cnt;
  217. }
  218. template <typename T>
  219. bool PackRepetition(size_t bit_num, schema::TensorT *tensor) {
  220. auto quant_data_array = reinterpret_cast<T *>(tensor->data.data());
  221. std::vector<T> quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T));
  222. auto elem_cnt = quant_data.size();
  223. auto dims = tensor->dims;
  224. size_t elem_cnt_by_dims = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<>());
  225. if (elem_cnt != elem_cnt_by_dims) {
  226. MS_LOG(ERROR) << "elem_cnt: " << elem_cnt << " not equal: " << elem_cnt_by_dims;
  227. return false;
  228. }
  229. auto &quant_params = tensor->quantParams;
  230. std::set<T> quant_data_set;
  231. for (auto quant_value : quant_data) {
  232. quant_data_set.insert(quant_value);
  233. }
  234. std::map<T, size_t> unique_value_index_map;
  235. auto index = 0;
  236. for (auto value : quant_data_set) {
  237. unique_value_index_map[value] = index++;
  238. }
  239. auto unique_value_cnt = quant_data_set.size();
  240. size_t unique_value_bit = ceil(log2(unique_value_cnt));
  241. auto pack_repetition_size_in_bit = bit_num + bit_num * unique_value_cnt + unique_value_bit * elem_cnt;
  242. size_t pack_repetition_size_in_byte = ceil(pack_repetition_size_in_bit / 8.0);
  243. size_t origin_size_in_byte = ceil(bit_num * elem_cnt / 8.0);
  244. size_t coor_best_bit = 0;
  245. auto nz_cnt = CalCoorBestBit<T>(quant_data, elem_cnt, quant_params, unique_value_bit, &coor_best_bit);
  246. // 1. coor_best_bit 2. nz_cnt 3. quant_data_set size 4. unique_values 5. unique_value indexing 6. nz values coord
  247. auto pack_sparsity_size_in_bit =
  248. 1 * 8 + 4 * 8 + bit_num + bit_num * unique_value_cnt + unique_value_bit * nz_cnt + nz_cnt * coor_best_bit;
  249. size_t pack_sparsity_size_in_byte = ceil(pack_sparsity_size_in_bit / 8.0);
  250. MS_LOG(DEBUG) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte
  251. << " indexing: " << pack_repetition_size_in_byte << " sparse: " << pack_sparsity_size_in_byte;
  252. auto min_byte_need = std::min({origin_size_in_byte, pack_repetition_size_in_byte, pack_sparsity_size_in_byte});
  253. if (min_byte_need == origin_size_in_byte) {
  254. return false;
  255. } else if (min_byte_need == pack_repetition_size_in_byte) {
  256. MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte;
  257. return IndexingCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt,
  258. pack_repetition_size_in_byte, bit_num, tensor);
  259. } else if (min_byte_need == pack_sparsity_size_in_byte) {
  260. MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte;
  261. return SparsityCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt,
  262. pack_sparsity_size_in_byte, nz_cnt, coor_best_bit, bit_num, tensor);
  263. } else {
  264. MS_LOG(DEBUG) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " "
  265. << pack_repetition_size_in_byte << " " << pack_sparsity_size_in_byte << "}";
  266. }
  267. return false;
  268. }
  269. } // namespace lite
  270. } // namespace mindspore
  271. #endif // MINDSPORE_LITE_TOOLS_COMMON_GRAPH_UTIL_H