You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

data_helper.h 8.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_DATA_HELPER_H_
  17. #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_DATA_HELPER_H_
  18. #include <fstream>
  19. #include <iostream>
  20. #include <map>
  21. #include <memory>
  22. #include <sstream>
  23. #include <string>
  24. #include <unordered_map>
  25. #include <vector>
  26. #include <nlohmann/json.hpp>
  27. #include "minddata/dataset/core/constants.h"
  28. #include "minddata/dataset/core/data_type.h"
  29. #include "minddata/dataset/core/tensor.h"
  30. #include "minddata/dataset/core/tensor_shape.h"
  31. #include "minddata/dataset/util/log_adapter.h"
  32. #include "minddata/dataset/util/path.h"
  33. #include "minddata/dataset/util/status.h"
  34. namespace mindspore {
  35. namespace dataset {
  36. /// \brief Simple class to do data manipulation, contains helper function to update json files in dataset
  37. class DataHelper {
  38. public:
  39. /// \brief constructor
  40. DataHelper() {}
  41. /// \brief Destructor
  42. ~DataHelper() = default;
  43. /// \brief Create an Album dataset while taking in a path to a image folder
  44. /// Creates the output directory if doesn't exist
  45. /// \param[in] in_dir Image folder directory that takes in images
  46. /// \param[in] out_dir Directory containing output json files
  47. Status CreateAlbum(const std::string &in_dir, const std::string &out_dir);
  48. /// \brief Update a json file field with a vector of integers
  49. /// \param in_file The input file name to read in
  50. /// \param key Key of field to write to
  51. /// \param value Value array to write to file
  52. /// \param out_file Optional input for output file path, will write to input file if not specified
  53. /// \return Status The error code return
  54. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<std::string> &value,
  55. const std::string &out_file = "");
  56. /// \brief Update a json file field with a vector of type T values
  57. /// \param in_file The input file name to read in
  58. /// \param key Key of field to write to
  59. /// \param value Value array to write to file
  60. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  61. /// \return Status The error code return
  62. template <typename T>
  63. Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<T> &value,
  64. const std::string &out_file = "") {
  65. try {
  66. Path in = Path(in_file);
  67. nlohmann::json js;
  68. if (in.Exists()) {
  69. std::ifstream in(in_file);
  70. MS_LOG(INFO) << "Filename: " << in_file << ".";
  71. in >> js;
  72. in.close();
  73. }
  74. js[key] = value;
  75. MS_LOG(INFO) << "Write outfile is: " << js << ".";
  76. if (out_file == "") {
  77. std::ofstream o(in_file, std::ofstream::trunc);
  78. o << js;
  79. o.close();
  80. } else {
  81. std::ofstream o(out_file, std::ofstream::trunc);
  82. o << js;
  83. o.close();
  84. }
  85. }
  86. // Catch any exception and convert to Status return code
  87. catch (const std::exception &err) {
  88. RETURN_STATUS_UNEXPECTED("Update json failed ");
  89. }
  90. return Status::OK();
  91. }
  92. /// \brief Update a json file field with a single value of of type T
  93. /// \param in_file The input file name to read in
  94. /// \param key Key of field to write to
  95. /// \param value Value to write to file
  96. /// \param out_file Optional parameter for output file path, will write to input file if not specified
  97. /// \return Status The error code return
  98. template <typename T>
  99. Status UpdateValue(const std::string &in_file, const std::string &key, const T &value,
  100. const std::string &out_file = "") {
  101. try {
  102. Path in = Path(in_file);
  103. nlohmann::json js;
  104. if (in.Exists()) {
  105. std::ifstream in(in_file);
  106. MS_LOG(INFO) << "Filename: " << in_file << ".";
  107. in >> js;
  108. in.close();
  109. }
  110. js[key] = value;
  111. MS_LOG(INFO) << "Write outfile is: " << js << ".";
  112. if (out_file == "") {
  113. std::ofstream o(in_file, std::ofstream::trunc);
  114. o << js;
  115. o.close();
  116. } else {
  117. std::ofstream o(out_file, std::ofstream::trunc);
  118. o << js;
  119. o.close();
  120. }
  121. }
  122. // Catch any exception and convert to Status return code
  123. catch (const std::exception &err) {
  124. RETURN_STATUS_UNEXPECTED("Update json failed ");
  125. }
  126. return Status::OK();
  127. }
  128. /// \brief Template function to write tensor to file
  129. /// \param[in] in_file File to write to
  130. /// \param[in] data Array of type T values
  131. /// \return Status The error code return
  132. template <typename T>
  133. Status WriteBinFile(const std::string &in_file, const std::vector<T> &data) {
  134. try {
  135. std::ofstream o(in_file, std::ios::binary | std::ios::out);
  136. if (!o.is_open()) {
  137. RETURN_STATUS_UNEXPECTED("Error opening Bin file to write");
  138. }
  139. size_t length = data.size();
  140. o.write(reinterpret_cast<const char *>(&data[0]), std::streamsize(length * sizeof(T)));
  141. o.close();
  142. }
  143. // Catch any exception and convert to Status return code
  144. catch (const std::exception &err) {
  145. RETURN_STATUS_UNEXPECTED("Write bin file failed ");
  146. }
  147. return Status::OK();
  148. }
  149. /// \brief Write pointer to bin, use pointer to avoid memcpy
  150. /// \param[in] in_file File name to write to
  151. /// \param[in] data Pointer to data
  152. /// \param[in] length Length of values to write from pointer
  153. /// \return Status The error code return
  154. template <typename T>
  155. Status WriteBinFile(const std::string &in_file, T *data, size_t length) {
  156. try {
  157. std::ofstream o(in_file, std::ios::binary | std::ios::out);
  158. if (!o.is_open()) {
  159. RETURN_STATUS_UNEXPECTED("Error opening Bin file to write");
  160. }
  161. o.write(reinterpret_cast<const char *>(data), std::streamsize(length * sizeof(T)));
  162. o.close();
  163. }
  164. // Catch any exception and convert to Status return code
  165. catch (const std::exception &err) {
  166. RETURN_STATUS_UNEXPECTED("Write bin file failed ");
  167. }
  168. return Status::OK();
  169. }
  170. /// \brief Helper function to copy content of a tensor to buffer
  171. /// \note This function iterates over the tensor in bytes, since
  172. /// \param[in] input The tensor to copy value from
  173. /// \param[out] addr The address to copy tensor data to
  174. /// \param[in] buffer_size The buffer size of addr
  175. /// \return The size of the tensor (bytes copied
  176. size_t DumpTensor(const std::shared_ptr<Tensor> &input, void *addr, const size_t &buffer_size);
  177. /// \brief Helper function to delete key in json file
  178. /// note This function will return okay even if key not found
  179. /// \param[in] in_file Json file to remove key from
  180. /// \param[in] key The key to remove
  181. /// \return Status The error code return
  182. Status RemoveKey(const std::string &in_file, const std::string &key, const std::string &out_file = "");
  183. /// \brief A print method typically used for debugging
  184. /// \param out - The output stream to write output to
  185. void Print(std::ostream &out) const;
  186. /// \brief << Stream output operator overload
  187. /// \notes This allows you to write the debug print info using stream operators
  188. /// \param out Reference to the output stream being overloaded
  189. /// \param ds Reference to the DataSchema to display
  190. /// \return The output stream must be returned
  191. friend std::ostream &operator<<(std::ostream &out, const DataHelper &dh) {
  192. dh.Print(out);
  193. return out;
  194. }
  195. };
  196. } // namespace dataset
  197. } // namespace mindspore
  198. #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_DATA_HELPER_H_