You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensorprint_utils.cc 13 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "utils/tensorprint_utils.h"
  17. #include <atomic>
  18. #include <fstream>
  19. #include <memory>
  20. #include <string>
  21. #include <vector>
  22. #include "ir/tensor.h"
  23. #include "pybind11/pybind11.h"
  24. #include "utils/ms_utils.h"
  25. #include "utils/shape_utils.h"
  26. namespace py = pybind11;
  27. namespace mindspore {
  28. #ifndef NO_DLIB
  29. static std::map<aclDataType, TypeId> print_acl_data_type_map = {
  30. {ACL_INT8, TypeId::kNumberTypeInt8}, {ACL_UINT8, TypeId::kNumberTypeUInt8},
  31. {ACL_INT16, TypeId::kNumberTypeInt16}, {ACL_UINT16, TypeId::kNumberTypeUInt16},
  32. {ACL_INT32, TypeId::kNumberTypeInt32}, {ACL_UINT32, TypeId::kNumberTypeUInt32},
  33. {ACL_INT64, TypeId::kNumberTypeInt64}, {ACL_UINT64, TypeId::kNumberTypeUInt64},
  34. {ACL_FLOAT16, TypeId::kNumberTypeFloat16}, {ACL_FLOAT, TypeId::kNumberTypeFloat32},
  35. {ACL_DOUBLE, TypeId::kNumberTypeFloat64}, {ACL_BOOL, TypeId::kNumberTypeBool}};
  36. static std::map<aclDataType, size_t> acl_data_type_size_map = {
  37. {ACL_INT8, sizeof(int8_t)}, {ACL_UINT8, sizeof(uint8_t)}, {ACL_INT16, sizeof(int16_t)},
  38. {ACL_UINT16, sizeof(uint16_t)}, {ACL_INT32, sizeof(int32_t)}, {ACL_UINT32, sizeof(uint32_t)},
  39. {ACL_INT64, sizeof(int64_t)}, {ACL_UINT64, sizeof(uint64_t)}, {ACL_FLOAT16, sizeof(float) / 2},
  40. {ACL_FLOAT, sizeof(float)}, {ACL_DOUBLE, sizeof(double)}, {ACL_BOOL, sizeof(bool)}};
  41. std::string GetParseType(const aclDataType &acl_data_type) {
  42. static const std::map<aclDataType, std::string> print_tensor_parse_map = {
  43. {ACL_INT8, "Int8"}, {ACL_UINT8, "Uint8"}, {ACL_INT16, "Int16"}, {ACL_UINT16, "Uint16"},
  44. {ACL_INT32, "Int32"}, {ACL_UINT32, "Uint32"}, {ACL_INT64, "Int64"}, {ACL_UINT64, "Uint64"},
  45. {ACL_FLOAT16, "Float16"}, {ACL_FLOAT, "Float32"}, {ACL_DOUBLE, "Float64"}, {ACL_BOOL, "Bool"}};
  46. auto type_iter = print_tensor_parse_map.find(acl_data_type);
  47. if (type_iter == print_tensor_parse_map.end()) {
  48. MS_LOG(EXCEPTION) << "type of tensor need to print is not support " << acl_data_type;
  49. }
  50. return type_iter->second;
  51. }
  52. bool PrintTensorToString(const char *str_data_ptr, mindspore::tensor::Tensor *const print_tensor,
  53. const size_t &memory_size) {
  54. MS_EXCEPTION_IF_NULL(str_data_ptr);
  55. MS_EXCEPTION_IF_NULL(print_tensor);
  56. auto *tensor_data_ptr = static_cast<uint8_t *>(print_tensor->data_c());
  57. MS_EXCEPTION_IF_NULL(tensor_data_ptr);
  58. size_t dest_size = static_cast<size_t>(print_tensor->data().nbytes());
  59. size_t target_size = memory_size;
  60. auto cp_ret = memcpy_s(tensor_data_ptr, dest_size, str_data_ptr, target_size);
  61. if (cp_ret != EOK) {
  62. MS_LOG(ERROR) << "Print op Failed to copy the memory to py::tensor " << cp_ret;
  63. return false;
  64. }
  65. return true;
  66. }
  67. template <typename T>
  68. void PrintScalarToString(const char *str_data_ptr, const aclDataType &acl_data_type, std::ostringstream *const buf) {
  69. MS_EXCEPTION_IF_NULL(str_data_ptr);
  70. MS_EXCEPTION_IF_NULL(buf);
  71. *buf << "Tensor(shape=[], dtype=" << GetParseType(acl_data_type) << ", value=";
  72. const T *data_ptr = reinterpret_cast<const T *>(str_data_ptr);
  73. if constexpr (std::is_same<T, int8_t>::value || std::is_same<T, uint8_t>::value) {
  74. const int int_data = static_cast<int>(*data_ptr);
  75. *buf << int_data << ")\n";
  76. } else {
  77. *buf << *data_ptr << ")\n";
  78. }
  79. }
  80. void PrintScalarToBoolString(const char *str_data_ptr, const aclDataType &acl_data_type,
  81. std::ostringstream *const buf) {
  82. MS_EXCEPTION_IF_NULL(str_data_ptr);
  83. MS_EXCEPTION_IF_NULL(buf);
  84. const bool *data_ptr = reinterpret_cast<const bool *>(str_data_ptr);
  85. *buf << "Tensor(shape=[], dtype=" << GetParseType(acl_data_type) << ", value=";
  86. if (*data_ptr) {
  87. *buf << "True)\n";
  88. } else {
  89. *buf << "False)\n";
  90. }
  91. }
  92. void convertDataItem2Scalar(const char *str_data_ptr, const aclDataType &acl_data_type, std::ostringstream *const buf) {
  93. MS_EXCEPTION_IF_NULL(str_data_ptr);
  94. MS_EXCEPTION_IF_NULL(buf);
  95. auto type_iter = print_acl_data_type_map.find(acl_data_type);
  96. auto type_id = type_iter->second;
  97. if (type_id == TypeId::kNumberTypeBool) {
  98. PrintScalarToBoolString(str_data_ptr, acl_data_type, buf);
  99. } else if (type_id == TypeId::kNumberTypeInt8) {
  100. PrintScalarToString<int8_t>(str_data_ptr, acl_data_type, buf);
  101. } else if (type_id == TypeId::kNumberTypeUInt8) {
  102. PrintScalarToString<uint8_t>(str_data_ptr, acl_data_type, buf);
  103. } else if (type_id == TypeId::kNumberTypeInt16) {
  104. PrintScalarToString<int16_t>(str_data_ptr, acl_data_type, buf);
  105. } else if (type_id == TypeId::kNumberTypeUInt16) {
  106. PrintScalarToString<uint16_t>(str_data_ptr, acl_data_type, buf);
  107. } else if (type_id == TypeId::kNumberTypeInt32) {
  108. PrintScalarToString<int32_t>(str_data_ptr, acl_data_type, buf);
  109. } else if (type_id == TypeId::kNumberTypeUInt32) {
  110. PrintScalarToString<uint32_t>(str_data_ptr, acl_data_type, buf);
  111. } else if (type_id == TypeId::kNumberTypeInt64) {
  112. PrintScalarToString<int64_t>(str_data_ptr, acl_data_type, buf);
  113. } else if (type_id == TypeId::kNumberTypeUInt64) {
  114. PrintScalarToString<uint64_t>(str_data_ptr, acl_data_type, buf);
  115. } else if (type_id == TypeId::kNumberTypeFloat16) {
  116. PrintScalarToString<float16>(str_data_ptr, acl_data_type, buf);
  117. } else if (type_id == TypeId::kNumberTypeFloat32) {
  118. PrintScalarToString<float>(str_data_ptr, acl_data_type, buf);
  119. } else if (type_id == TypeId::kNumberTypeFloat64) {
  120. PrintScalarToString<double>(str_data_ptr, acl_data_type, buf);
  121. } else {
  122. MS_LOG(EXCEPTION) << "Cannot print scalar because of unsupported data type: " << GetParseType(acl_data_type) << ".";
  123. }
  124. }
  125. bool judgeLengthValid(const size_t str_len, const aclDataType &acl_data_type) {
  126. auto type_iter = acl_data_type_size_map.find(acl_data_type);
  127. if (type_iter == acl_data_type_size_map.end()) {
  128. MS_LOG(EXCEPTION) << "type of scalar to print is not support.";
  129. }
  130. return str_len == type_iter->second;
  131. }
  132. bool ConvertDataset2Tensor(acltdtDataset *acl_dataset) {
  133. // Acquire Python GIL
  134. py::gil_scoped_acquire gil_acquire;
  135. std::ostringstream buf;
  136. bool ret_end_sequence = false;
  137. size_t acl_dataset_size = acltdtGetDatasetSize(acl_dataset);
  138. for (size_t i = 0; i < acl_dataset_size; i++) {
  139. acltdtDataItem *item = acltdtGetDataItem(acl_dataset, i);
  140. if (acltdtGetTensorTypeFromItem(item) == ACL_TENSOR_DATA_END_OF_SEQUENCE) {
  141. ret_end_sequence = true;
  142. MS_LOG(INFO) << "end of sequence" << std::endl;
  143. break;
  144. }
  145. size_t dim_num = acltdtGetDimNumFromItem(item);
  146. void *acl_addr = acltdtGetDataAddrFromItem(item);
  147. size_t acl_data_size = acltdtGetDataSizeFromItem(item);
  148. aclDataType acl_data_type = acltdtGetDataTypeFromItem(item);
  149. char *acl_data = reinterpret_cast<char *>(acl_addr);
  150. acl_data = const_cast<char *>(reinterpret_cast<std::string *>(acl_data)->c_str());
  151. MS_EXCEPTION_IF_NULL(acl_data);
  152. ShapeVector tensorShape;
  153. tensorShape.resize(dim_num);
  154. if (acltdtGetDimsFromItem(item, tensorShape.data(), dim_num) != ACL_SUCCESS) {
  155. MS_LOG(ERROR) << "ACL failed to get dim-size from acl channel data";
  156. }
  157. if ((tensorShape.size() == 1 && tensorShape[0] == 0) || tensorShape.size() == 0) {
  158. if (!judgeLengthValid(acl_data_size, acl_data_type)) {
  159. MS_LOG(EXCEPTION) << "Print op receive data length is invalid.";
  160. }
  161. convertDataItem2Scalar(acl_data, acl_data_type, &buf);
  162. continue;
  163. }
  164. if (acl_data_type == ACL_STRING) {
  165. std::string data(reinterpret_cast<const char *>(acl_data), acl_data_size);
  166. buf << data << std::endl;
  167. } else {
  168. auto type_iter = print_acl_data_type_map.find(acl_data_type);
  169. if (type_iter == print_acl_data_type_map.end()) {
  170. MS_LOG(ERROR) << "type of tensor need to print is not support " << GetParseType(acl_data_type);
  171. continue;
  172. }
  173. auto type_id = type_iter->second;
  174. mindspore::tensor::Tensor print_tensor(type_id, tensorShape);
  175. if (PrintTensorToString(acl_data, &print_tensor, acl_data_size)) {
  176. buf << print_tensor.ToStringNoLimit() << std::endl;
  177. }
  178. }
  179. }
  180. std::cout << buf.str() << std::endl;
  181. return ret_end_sequence;
  182. }
  183. bool SaveDataset2File(acltdtDataset *acl_dataset, const std::string &print_file_path, prntpb::Print print,
  184. std::fstream *output) {
  185. bool ret_end_thread = false;
  186. for (size_t i = 0; i < acltdtGetDatasetSize(acl_dataset); i++) {
  187. acltdtDataItem *item = acltdtGetDataItem(acl_dataset, i);
  188. MS_EXCEPTION_IF_NULL(item);
  189. acltdtTensorType acl_tensor_type = acltdtGetTensorTypeFromItem(item);
  190. if (acl_tensor_type == ACL_TENSOR_DATA_END_OF_SEQUENCE) {
  191. MS_LOG(INFO) << "Acl channel received end-of-sequence for print op.";
  192. ret_end_thread = true;
  193. break;
  194. } else if (acl_tensor_type == ACL_TENSOR_DATA_ABNORMAL) {
  195. MS_LOG(INFO) << "Acl channel received abnormal for print op.";
  196. return true;
  197. } else if (acl_tensor_type == ACL_TENSOR_DATA_UNDEFINED) {
  198. MS_LOG(INFO) << "Acl channel received undefined message type for print op.";
  199. return false;
  200. }
  201. prntpb::Print_Value *value = print.add_value();
  202. size_t dim_num = acltdtGetDimNumFromItem(item);
  203. void *acl_addr = acltdtGetDataAddrFromItem(item);
  204. size_t acl_data_size = acltdtGetDataSizeFromItem(item);
  205. aclDataType acl_data_type = acltdtGetDataTypeFromItem(item);
  206. char *acl_data = reinterpret_cast<char *>(acl_addr);
  207. acl_data = const_cast<char *>(reinterpret_cast<std::string *>(acl_data)->c_str());
  208. MS_EXCEPTION_IF_NULL(acl_data);
  209. ShapeVector tensorShape;
  210. tensorShape.resize(dim_num);
  211. if (acltdtGetDimsFromItem(item, tensorShape.data(), dim_num) != ACL_SUCCESS) {
  212. MS_LOG(ERROR) << "ACL failed to get dim-size from acl channel data";
  213. }
  214. if ((tensorShape.size() == 1 && tensorShape[0] == 0) || tensorShape.size() == 0) {
  215. if (!judgeLengthValid(acl_data_size, acl_data_type)) {
  216. MS_LOG(ERROR) << "Print op receive data length is invalid.";
  217. ret_end_thread = true;
  218. }
  219. }
  220. if (acl_data_type == ACL_STRING) {
  221. std::string data(reinterpret_cast<const char *>(acl_data), acl_data_size);
  222. value->set_desc(data);
  223. } else {
  224. auto parse_type = GetParseType(acl_data_type);
  225. prntpb::TensorProto *tensor = value->mutable_tensor();
  226. if (tensorShape.size() > 1 || (tensorShape.size() == 1 && tensorShape[0] != 1)) {
  227. for (const auto &dim : tensorShape) {
  228. tensor->add_dims(static_cast<::google::protobuf::int64>(dim));
  229. }
  230. }
  231. tensor->set_tensor_type(parse_type);
  232. std::string data(reinterpret_cast<const char *>(acl_data), acl_data_size);
  233. tensor->set_tensor_content(data);
  234. }
  235. if (!print.SerializeToOstream(output)) {
  236. MS_LOG(ERROR) << "Save print file:" << print_file_path << " fail.";
  237. ret_end_thread = true;
  238. break;
  239. }
  240. print.Clear();
  241. }
  242. return ret_end_thread;
  243. }
  244. void TensorPrint::operator()() {
  245. prntpb::Print print;
  246. auto ms_context = MsContext::GetInstance();
  247. MS_EXCEPTION_IF_NULL(ms_context);
  248. std::string print_file_path = ms_context->get_param<std::string>(MS_CTX_PRINT_FILE_PATH);
  249. if (print_file_path == "") {
  250. while (true) {
  251. acltdtDataset *acl_dataset = acltdtCreateDataset();
  252. if (acl_dataset == nullptr) {
  253. MS_LOG(ERROR) << "Failed to create acl dateaset.";
  254. }
  255. if (acltdtReceiveTensor(acl_handle_, acl_dataset, -1 /* no timeout */) != ACL_SUCCESS) {
  256. MS_LOG(ERROR) << "AclHandle failed to receive tensor.";
  257. break;
  258. }
  259. if (ConvertDataset2Tensor(acl_dataset)) {
  260. break;
  261. }
  262. }
  263. } else {
  264. std::fstream output(print_file_path, std::ios::out | std::ios::trunc | std::ios::binary);
  265. while (true) {
  266. acltdtDataset *acl_dataset = acltdtCreateDataset();
  267. if (acl_dataset == nullptr) {
  268. MS_LOG(ERROR) << "Failed to create acl dateaset.";
  269. }
  270. if (acltdtReceiveTensor(acl_handle_, acl_dataset, -1 /* no timeout */) != ACL_SUCCESS) {
  271. MS_LOG(ERROR) << "Acltdt failed to receive tensor.";
  272. break;
  273. }
  274. if (SaveDataset2File(acl_dataset, print_file_path, print, &output)) {
  275. break;
  276. }
  277. }
  278. output.close();
  279. std::string path_string = print_file_path;
  280. if (chmod(common::SafeCStr(path_string), S_IRUSR) == -1) {
  281. MS_LOG(ERROR) << "Modify file:" << print_file_path << " fail.";
  282. return;
  283. }
  284. }
  285. }
  286. #endif
  287. } // namespace mindspore