You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

data_helper.h 24 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATA_HELPER_H_
  17. #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATA_HELPER_H_
  18. #include <sys/stat.h>
  19. #include <fstream>
  20. #include <iostream>
  21. #include <map>
  22. #include <memory>
  23. #include <sstream>
  24. #include <string>
  25. #include <unordered_map>
  26. #include <vector>
  27. #include "include/api/dual_abi_helper.h"
  28. #include "include/api/status.h"
  29. namespace mindspore {
  30. namespace dataset {
  31. /// \brief Simple class to do data manipulation, contains helper function to update json files in dataset
  32. class DataHelper {
  33. public:
  34. /// \brief constructor
  35. DataHelper() {}
  36. /// \brief Destructor
  37. ~DataHelper() = default;
  38. /// \brief Create an Album dataset while taking in a path to a image folder
  39. /// Creates the output directory if doesn't exist
  40. /// \param[in] in_dir Image folder directory that takes in images
  41. /// \param[in] out_dir Directory containing output json files
  42. Status CreateAlbum(const std::string &in_dir, const std::string &out_dir) {
  43. return CreateAlbumIF(StringToChar(in_dir), StringToChar(out_dir));
  44. }
  45. /// \brief Update a json file field with a vector of string values
  46. /// \param in_file The input file name to read in
  47. /// \param key Key of field to write to
  48. /// \param value Value array to write to file
  49. /// \param out_file Optional input for output file path, will write to input file if not specified
  50. /// \return Status The status code returned
  51. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<std::string> &value,
  52. const std::string &out_file = "") {
  53. return UpdateArrayIF(StringToChar(in_file), StringToChar(key), VectorStringToChar(value), StringToChar(out_file));
  54. }
  55. /// \brief Update a json file field with a vector of bool values
  56. /// \param in_file The input file name to read in
  57. /// \param key Key of field to write to
  58. /// \param value Value array to write to file
  59. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  60. /// \return Status The status code returned
  61. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<bool> &value,
  62. const std::string &out_file = "") {
  63. return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  64. }
  65. /// \brief Update a json file field with a vector of int8 values
  66. /// \param in_file The input file name to read in
  67. /// \param key Key of field to write to
  68. /// \param value Value array to write to file
  69. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  70. /// \return Status The status code returned
  71. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int8_t> &value,
  72. const std::string &out_file = "") {
  73. return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  74. }
  75. /// \brief Update a json file field with a vector of uint8 values
  76. /// \param in_file The input file name to read in
  77. /// \param key Key of field to write to
  78. /// \param value Value array to write to file
  79. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  80. /// \return Status The status code returned
  81. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint8_t> &value,
  82. const std::string &out_file = "") {
  83. return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  84. }
  85. /// \brief Update a json file field with a vector of int16 values
  86. /// \param in_file The input file name to read in
  87. /// \param key Key of field to write to
  88. /// \param value Value array to write to file
  89. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  90. /// \return Status The status code returned
  91. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int16_t> &value,
  92. const std::string &out_file = "") {
  93. return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  94. }
  95. /// \brief Update a json file field with a vector of uint16 values
  96. /// \param in_file The input file name to read in
  97. /// \param key Key of field to write to
  98. /// \param value Value array to write to file
  99. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  100. /// \return Status The status code returned
  101. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint16_t> &value,
  102. const std::string &out_file = "") {
  103. return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  104. }
  105. /// \brief Update a json file field with a vector of int32 values
  106. /// \param in_file The input file name to read in
  107. /// \param key Key of field to write to
  108. /// \param value Value array to write to file
  109. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  110. /// \return Status The status code returned
  111. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int32_t> &value,
  112. const std::string &out_file = "") {
  113. return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  114. }
  115. /// \brief Update a json file field with a vector of uint32 values
  116. /// \param in_file The input file name to read in
  117. /// \param key Key of field to write to
  118. /// \param value Value array to write to file
  119. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  120. /// \return Status The status code returned
  121. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint32_t> &value,
  122. const std::string &out_file = "") {
  123. return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  124. }
  125. /// \brief Update a json file field with a vector of int64 values
  126. /// \param in_file The input file name to read in
  127. /// \param key Key of field to write to
  128. /// \param value Value array to write to file
  129. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  130. /// \return Status The status code returned
  131. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int64_t> &value,
  132. const std::string &out_file = "") {
  133. return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  134. }
  135. /// \brief Update a json file field with a vector of uint64 values
  136. /// \param in_file The input file name to read in
  137. /// \param key Key of field to write to
  138. /// \param value Value array to write to file
  139. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  140. /// \return Status The status code returned
  141. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint64_t> &value,
  142. const std::string &out_file = "") {
  143. return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  144. }
  145. /// \brief Update a json file field with a vector of float values
  146. /// \param in_file The input file name to read in
  147. /// \param key Key of field to write to
  148. /// \param value Value array to write to file
  149. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  150. /// \return Status The status code returned
  151. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<float> &value,
  152. const std::string &out_file = "") {
  153. return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  154. }
  155. /// \brief Update a json file field with a vector of double values
  156. /// \param in_file The input file name to read in
  157. /// \param key Key of field to write to
  158. /// \param value Value array to write to file
  159. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  160. /// \return Status The status code returned
  161. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<double> &value,
  162. const std::string &out_file = "") {
  163. return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  164. }
  165. /// \brief Update a json file field with a string value
  166. /// \param in_file The input file name to read in
  167. /// \param key Key of field to write to
  168. /// \param value Value to write to file
  169. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  170. /// \return Status The status code returned
  171. Status UpdateValue(const std::string &in_file, const std::string &key, const std::string &value,
  172. const std::string &out_file = "") {
  173. return UpdateValueIF(StringToChar(in_file), StringToChar(key), StringToChar(value), StringToChar(out_file));
  174. }
  175. /// \brief Update a json file field with a bool value
  176. /// \param in_file The input file name to read in
  177. /// \param key Key of field to write to
  178. /// \param value Value to write to file
  179. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  180. /// \return Status The status code returned
  181. Status UpdateValue(const std::string &in_file, const std::string &key, const bool &value,
  182. const std::string &out_file = "") {
  183. return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  184. }
  185. /// \brief Update a json file field with an int8 value
  186. /// \param in_file The input file name to read in
  187. /// \param key Key of field to write to
  188. /// \param value Value to write to file
  189. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  190. /// \return Status The status code returned
  191. Status UpdateValue(const std::string &in_file, const std::string &key, const int8_t &value,
  192. const std::string &out_file = "") {
  193. return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  194. }
  195. /// \brief Update a json file field with an uint8 value
  196. /// \param in_file The input file name to read in
  197. /// \param key Key of field to write to
  198. /// \param value Value to write to file
  199. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  200. /// \return Status The status code returned
  201. Status UpdateValue(const std::string &in_file, const std::string &key, const uint8_t &value,
  202. const std::string &out_file = "") {
  203. return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  204. }
  205. /// \brief Update a json file field with an int16 value
  206. /// \param in_file The input file name to read in
  207. /// \param key Key of field to write to
  208. /// \param value Value to write to file
  209. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  210. /// \return Status The status code returned
  211. Status UpdateValue(const std::string &in_file, const std::string &key, const int16_t &value,
  212. const std::string &out_file = "") {
  213. return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  214. }
  215. /// \brief Update a json file field with an uint16 value
  216. /// \param in_file The input file name to read in
  217. /// \param key Key of field to write to
  218. /// \param value Value to write to file
  219. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  220. /// \return Status The status code returned
  221. Status UpdateValue(const std::string &in_file, const std::string &key, const uint16_t &value,
  222. const std::string &out_file = "") {
  223. return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  224. }
  225. /// \brief Update a json file field with an int32 value
  226. /// \param in_file The input file name to read in
  227. /// \param key Key of field to write to
  228. /// \param value Value to write to file
  229. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  230. /// \return Status The status code returned
  231. Status UpdateValue(const std::string &in_file, const std::string &key, const int32_t &value,
  232. const std::string &out_file = "") {
  233. return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  234. }
  235. /// \brief Update a json file field with an uint32 value
  236. /// \param in_file The input file name to read in
  237. /// \param key Key of field to write to
  238. /// \param value Value to write to file
  239. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  240. /// \return Status The status code returned
  241. Status UpdateValue(const std::string &in_file, const std::string &key, const uint32_t &value,
  242. const std::string &out_file = "") {
  243. return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  244. }
  245. /// \brief Update a json file field with an int64 value
  246. /// \param in_file The input file name to read in
  247. /// \param key Key of field to write to
  248. /// \param value Value to write to file
  249. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  250. /// \return Status The status code returned
  251. Status UpdateValue(const std::string &in_file, const std::string &key, const int64_t &value,
  252. const std::string &out_file = "") {
  253. return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  254. }
  255. /// \brief Update a json file field with an uint64 value
  256. /// \param in_file The input file name to read in
  257. /// \param key Key of field to write to
  258. /// \param value Value to write to file
  259. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  260. /// \return Status The status code returned
  261. Status UpdateValue(const std::string &in_file, const std::string &key, const uint64_t &value,
  262. const std::string &out_file = "") {
  263. return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  264. }
  265. /// \brief Update a json file field with a float value
  266. /// \param in_file The input file name to read in
  267. /// \param key Key of field to write to
  268. /// \param value Value to write to file
  269. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  270. /// \return Status The status code returned
  271. Status UpdateValue(const std::string &in_file, const std::string &key, const float &value,
  272. const std::string &out_file = "") {
  273. return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  274. }
  275. /// \brief Update a json file field with a double value
  276. /// \param in_file The input file name to read in
  277. /// \param key Key of field to write to
  278. /// \param value Value to write to file
  279. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  280. /// \return Status The status code returned
  281. Status UpdateValue(const std::string &in_file, const std::string &key, const double &value,
  282. const std::string &out_file = "") {
  283. return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
  284. }
  285. /// \brief Template function to write tensor to file
  286. /// \param[in] in_file File to write to
  287. /// \param[in] data Array of type T values
  288. /// \return Status The status code returned
  289. template <typename T>
  290. Status WriteBinFile(const std::string &in_file, const std::vector<T> &data) {
  291. try {
  292. std::ofstream o(in_file, std::ios::binary | std::ios::out);
  293. if (!o.is_open()) {
  294. return Status(kMDUnexpectedError, "Error opening Bin file to write");
  295. }
  296. size_t length = data.size();
  297. o.write(reinterpret_cast<const char *>(&data[0]), std::streamsize(length * sizeof(T)));
  298. o.close();
  299. }
  300. // Catch any exception and convert to Status return code
  301. catch (const std::exception &err) {
  302. return Status(kMDUnexpectedError, "Write bin file failed ");
  303. }
  304. return Status::OK();
  305. }
  306. /// \brief Write pointer to bin, use pointer to avoid memcpy
  307. /// \param[in] in_file File name to write to
  308. /// \param[in] data Pointer to data
  309. /// \param[in] length Length of values to write from pointer
  310. /// \return Status The status code returned
  311. template <typename T>
  312. Status WriteBinFile(const std::string &in_file, T *data, size_t length) {
  313. try {
  314. std::ofstream o(in_file, std::ios::binary | std::ios::out);
  315. if (!o.is_open()) {
  316. return Status(kMDUnexpectedError, "Error opening Bin file to write");
  317. }
  318. o.write(reinterpret_cast<const char *>(data), std::streamsize(length * sizeof(T)));
  319. o.close();
  320. }
  321. // Catch any exception and convert to Status return code
  322. catch (const std::exception &err) {
  323. return Status(kMDUnexpectedError, "Write bin file failed ");
  324. }
  325. return Status::OK();
  326. }
  327. /// \brief Helper function to copy content of a tensor to buffer
  328. /// \note This function iterates over the tensor in bytes, since
  329. /// \param[in] tensor_addr The memory held by a tensor
  330. /// \param[in] tensor_size The amount of data in bytes in tensor_addr, e.g. tensor->SizeInBytes()
  331. /// \param[out] addr The address to copy tensor data to
  332. /// \param[in] buffer_size The buffer size of addr
  333. /// \return The size of the tensor (bytes copied
  334. size_t DumpData(const unsigned char *tensor_addr, const size_t &tensor_size, void *addr, const size_t &buffer_size);
  335. /// \brief Helper function to delete key in json file
  336. /// note This function will return okay even if key not found
  337. /// \param[in] in_file Json file to remove key from
  338. /// \param[in] key The key to remove
  339. /// \return Status The status code returned
  340. Status RemoveKey(const std::string &in_file, const std::string &key, const std::string &out_file = "") {
  341. return RemoveKeyIF(StringToChar(in_file), StringToChar(key), StringToChar(out_file));
  342. }
  343. /// \brief A print method typically used for debugging
  344. /// \param out - The output stream to write output to
  345. void Print(std::ostream &out) const;
  346. /// \brief << Stream output operator overload
  347. /// \notes This allows you to write the debug print info using stream operators
  348. /// \param out Reference to the output stream being overloaded
  349. /// \param ds Reference to the DataSchema to display
  350. /// \return The output stream must be returned
  351. friend std::ostream &operator<<(std::ostream &out, const DataHelper &dh) {
  352. dh.Print(out);
  353. return out;
  354. }
  355. private:
  356. // Helper function for dual ABI support
  357. Status CreateAlbumIF(const std::vector<char> &in_dir, const std::vector<char> &out_dir);
  358. Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
  359. const std::vector<std::vector<char>> &value, const std::vector<char> &out_file);
  360. Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<bool> &value,
  361. const std::vector<char> &out_file);
  362. Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<int8_t> &value,
  363. const std::vector<char> &out_file);
  364. Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
  365. const std::vector<uint8_t> &value, const std::vector<char> &out_file);
  366. Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
  367. const std::vector<int16_t> &value, const std::vector<char> &out_file);
  368. Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
  369. const std::vector<uint16_t> &value, const std::vector<char> &out_file);
  370. Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
  371. const std::vector<int32_t> &value, const std::vector<char> &out_file);
  372. Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
  373. const std::vector<uint32_t> &value, const std::vector<char> &out_file);
  374. Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
  375. const std::vector<int64_t> &value, const std::vector<char> &out_file);
  376. Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
  377. const std::vector<uint64_t> &value, const std::vector<char> &out_file);
  378. Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<float> &value,
  379. const std::vector<char> &out_file);
  380. Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<double> &value,
  381. const std::vector<char> &out_file);
  382. Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<char> &value,
  383. const std::vector<char> &out_file);
  384. Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const bool &value,
  385. const std::vector<char> &out_file);
  386. Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int8_t &value,
  387. const std::vector<char> &out_file);
  388. Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint8_t &value,
  389. const std::vector<char> &out_file);
  390. Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int16_t &value,
  391. const std::vector<char> &out_file);
  392. Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint16_t &value,
  393. const std::vector<char> &out_file);
  394. Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int32_t &value,
  395. const std::vector<char> &out_file);
  396. Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint32_t &value,
  397. const std::vector<char> &out_file);
  398. Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int64_t &value,
  399. const std::vector<char> &out_file);
  400. Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint64_t &value,
  401. const std::vector<char> &out_file);
  402. Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const float &value,
  403. const std::vector<char> &out_file);
  404. Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const double &value,
  405. const std::vector<char> &out_file);
  406. Status RemoveKeyIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<char> &out_file);
  407. };
  408. } // namespace dataset
  409. } // namespace mindspore
  410. #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATA_HELPER_H_