You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

deserialize_test.cc 29 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/common.h"
  17. #include "minddata/dataset/core/global_context.h"
  18. #include "minddata/dataset/engine/serdes.h"
  19. #include "minddata/dataset/include/dataset/datasets.h"
  20. #include "minddata/dataset/include/dataset/vision.h"
  21. #include "minddata/dataset/include/dataset/transforms.h"
  22. #include "minddata/dataset/kernels/ir/data/transforms_ir.h"
  23. using namespace mindspore::dataset;
  24. using mindspore::dataset::DatasetNode;
  25. using mindspore::dataset::ShuffleMode;
  26. using mindspore::dataset::Tensor;
  27. class MindDataTestDeserialize : public UT::DatasetOpTesting {
  28. protected:
  29. };
  30. void compare_dataset(std::shared_ptr<DatasetNode> ds) {
  31. nlohmann::json out_json;
  32. ASSERT_OK(Serdes::SaveToJSON(ds, "dataset_pipeline.json", &out_json));
  33. // output the deserialized out_json to ds1 and then out_json1
  34. std::shared_ptr<DatasetNode> ds1;
  35. ASSERT_OK(Serdes::Deserialize("dataset_pipeline.json", &ds1));
  36. EXPECT_NE(ds1, nullptr);
  37. // check original and deserialized dataset are the same
  38. nlohmann::json out_json1;
  39. ASSERT_OK(Serdes::SaveToJSON(ds1, "dataset_pipeline_1.json", &out_json1));
  40. std::stringstream json_ss;
  41. json_ss << out_json;
  42. std::stringstream json_ss1;
  43. json_ss1 << out_json1;
  44. EXPECT_EQ(json_ss.str(), json_ss1.str());
  45. return;
  46. }
  47. // test mnist dataset, and special cases of tensor operations (no input or tensor operation input)
  48. TEST_F(MindDataTestDeserialize, TestDeserializeMnist) {
  49. MS_LOG(INFO) << "Doing MindDataTestDeserialize-Minist.";
  50. std::string data_dir = "./data/dataset/testMnistData";
  51. std::string usage = "all";
  52. std::shared_ptr<SamplerObj> sampler = std::make_shared<RandomSamplerObj>(true, 100);
  53. std::shared_ptr<DatasetNode> ds = std::make_shared<MnistNode>(data_dir, usage, sampler, nullptr);
  54. std::shared_ptr<TensorOperation> operation0 = std::make_shared<vision::EqualizeOperation>();
  55. std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::BoundingBoxAugmentOperation>(operation0, 0.5);
  56. std::shared_ptr<TensorOperation> operation2 = std::make_shared<vision::HorizontalFlipOperation>();
  57. std::shared_ptr<TensorOperation> operation3 = std::make_shared<vision::HwcToChwOperation>();
  58. std::shared_ptr<TensorOperation> operation4 = std::make_shared<vision::RgbaToBgrOperation>();
  59. std::shared_ptr<TensorOperation> operation5 = std::make_shared<vision::RgbaToRgbOperation>();
  60. std::shared_ptr<TensorOperation> operation6 = std::make_shared<vision::SwapRedBlueOperation>();
  61. std::vector<std::vector<std::pair<std::shared_ptr<TensorOperation>, double>>> policy;
  62. std::vector<std::pair<std::shared_ptr<TensorOperation>, double>> sub_policy;
  63. sub_policy.push_back(std::make_pair(operation1, 0.4));
  64. policy.push_back(sub_policy);
  65. std::shared_ptr<TensorOperation> operation7 = std::make_shared<vision::RandomSelectSubpolicyOperation>(policy);
  66. std::vector<std::shared_ptr<TensorOperation>> transforms;
  67. transforms.push_back(operation2);
  68. transforms.push_back(operation3);
  69. transforms.push_back(operation4);
  70. std::shared_ptr<TensorOperation> operation8 = std::make_shared<vision::UniformAugOperation>(transforms, 3);
  71. transforms.push_back(operation5);
  72. transforms.push_back(operation6);
  73. transforms.push_back(operation7);
  74. transforms.push_back(operation8);
  75. ds = std::make_shared<MapNode>(ds, transforms);
  76. ds = std::make_shared<BatchNode>(ds, 10, true);
  77. compare_dataset(ds);
  78. }
  79. // test celeba dataset and part of the tensor operation
  80. TEST_F(MindDataTestDeserialize, TestDeserializeCelebA) {
  81. MS_LOG(INFO) << "Doing MindDataTestDeserialize-CelebA.";
  82. std::string data_dir = "./data/dataset/testCelebAData/";
  83. std::string usage = "all";
  84. std::shared_ptr<SamplerObj> sampler = std::make_shared<DistributedSamplerObj>(1, 0, true, 2, 1, 1, true);
  85. bool decode = true;
  86. std::set<std::string> extensions = {};
  87. std::shared_ptr<DatasetCache> cache = nullptr;
  88. std::shared_ptr<DatasetNode> ds = std::make_shared<CelebANode>(data_dir, usage, sampler, decode, extensions, cache);
  89. std::vector<int32_t> size = {80, 80};
  90. std::vector<int32_t> size1 = {80, 80};
  91. std::vector<int32_t> coordinates = {5, 5};
  92. std::vector<int32_t> padding = {20, 20, 20, 20};
  93. std::vector<uint8_t> fill_value = {20, 20, 20};
  94. std::vector<uint32_t> ignore = {20, 20, 20, 20};
  95. std::vector<float> mean = {2.0, 2.0, 2.0, 2.0};
  96. std::vector<float> std = {0.5, 0.5, 0.5, 0.5};
  97. std::vector<float> translation = {0.5, 0.5};
  98. std::vector<float> shear = {0.5, 0.5};
  99. std::vector<float> sigma = {0.5, 0.5};
  100. InterpolationMode interpolation = InterpolationMode::kLinear;
  101. std::shared_ptr<TensorOperation> operation0 =
  102. std::make_shared<vision::AffineOperation>(0.0, translation, 0.5, shear, interpolation, fill_value);
  103. std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::AutoContrastOperation>(0.5, ignore);
  104. std::shared_ptr<TensorOperation> operation2 = std::make_shared<vision::CenterCropOperation>(size);
  105. std::shared_ptr<TensorOperation> operation3 =
  106. std::make_shared<vision::CutMixBatchOperation>(ImageBatchFormat::kNHWC, 0.1, 0.1);
  107. std::shared_ptr<TensorOperation> operation4 = std::make_shared<vision::CutOutOperation>(1, 1);
  108. std::shared_ptr<TensorOperation> operation5 = std::make_shared<vision::DecodeOperation>(true);
  109. std::shared_ptr<TensorOperation> operation6 = std::make_shared<vision::GaussianBlurOperation>(coordinates, sigma);
  110. std::shared_ptr<TensorOperation> operation7 = std::make_shared<vision::MixUpBatchOperation>(1.0);
  111. std::shared_ptr<TensorOperation> operation8 = std::make_shared<vision::NormalizeOperation>(mean, std);
  112. std::shared_ptr<TensorOperation> operation9 = std::make_shared<vision::NormalizePadOperation>(mean, std, "float");
  113. std::shared_ptr<TensorOperation> operation10 =
  114. std::make_shared<vision::PadOperation>(padding, fill_value, BorderType::kConstant);
  115. std::shared_ptr<TensorOperation> operation11 = std::make_shared<vision::RescaleOperation>(1.0, 0.5);
  116. std::shared_ptr<TensorOperation> operation12 = std::make_shared<vision::ResizePreserveAROperation>(10, 10, 0);
  117. std::shared_ptr<TensorOperation> operation13 = std::make_shared<vision::ResizeWithBBoxOperation>(size, interpolation);
  118. std::shared_ptr<TensorOperation> operation14 = std::make_shared<vision::ResizeOperation>(size, interpolation);
  119. std::vector<std::shared_ptr<TensorOperation>> operations;
  120. operations.push_back(operation0);
  121. operations.push_back(operation1);
  122. operations.push_back(operation2);
  123. operations.push_back(operation3);
  124. operations.push_back(operation4);
  125. operations.push_back(operation5);
  126. operations.push_back(operation6);
  127. operations.push_back(operation7);
  128. operations.push_back(operation8);
  129. operations.push_back(operation9);
  130. operations.push_back(operation10);
  131. operations.push_back(operation11);
  132. operations.push_back(operation12);
  133. operations.push_back(operation13);
  134. operations.push_back(operation14);
  135. ds = std::make_shared<RepeatNode>(ds, 2);
  136. ds = std::make_shared<MapNode>(ds, operations);
  137. compare_dataset(ds);
  138. }
  139. // test cifar10 dataset and random tensor operations
  140. TEST_F(MindDataTestDeserialize, TestDeserializeCifar10) {
  141. MS_LOG(INFO) << "Doing MindDataTestDeserialize-Cifar10.";
  142. std::string data_dir = "./data/dataset/testCifar10Data";
  143. std::string usage = "all";
  144. std::shared_ptr<DatasetCache> cache = nullptr;
  145. std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10);
  146. std::shared_ptr<DatasetNode> ds = std::make_shared<Cifar10Node>(data_dir, usage, sampler, cache);
  147. std::vector<float> center = {50.0, 50.0};
  148. std::vector<uint8_t> threshold = {5, 5};
  149. std::vector<uint8_t> fill_value = {150, 150, 150};
  150. std::vector<uint8_t> bit_range = {5, 15};
  151. std::vector<float> degrees = {0.0, 0.0};
  152. std::vector<float> scale = {0.5, 0.5};
  153. std::vector<float> ratio = {0.5, 0.5};
  154. std::vector<int32_t> size = {224, 224};
  155. std::vector<int32_t> padding = {20, 20, 20, 20};
  156. std::vector<float_t> translate_range = {0.0, 0.0, 0.0, 0.0};
  157. std::vector<float_t> scale_range = {1.0, 1.0};
  158. std::vector<float_t> shear_ranges = {0.0, 0.0, 0.0, 0.0};
  159. InterpolationMode interpolation = InterpolationMode::kLinear;
  160. std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::RandomRotationOperation>(
  161. degrees, InterpolationMode::kNearestNeighbour, true, center, fill_value);
  162. std::shared_ptr<TensorOperation> operation2 = std::make_shared<vision::RandomAffineOperation>(
  163. degrees, translate_range, scale_range, shear_ranges, interpolation, fill_value);
  164. std::shared_ptr<TensorOperation> operation3 = std::make_shared<vision::RandomColorOperation>(0.5, 10.5);
  165. std::shared_ptr<TensorOperation> operation4 =
  166. std::make_shared<vision::RandomCropDecodeResizeOperation>(size, scale, ratio, interpolation, 2);
  167. std::shared_ptr<TensorOperation> operation5 =
  168. std::make_shared<vision::RandomCropWithBBoxOperation>(size, padding, true, fill_value, BorderType::kConstant);
  169. std::shared_ptr<TensorOperation> operation6 = std::make_shared<vision::RandomHorizontalFlipOperation>(0.1);
  170. std::shared_ptr<TensorOperation> operation7 = std::make_shared<vision::RandomHorizontalFlipWithBBoxOperation>(0.1);
  171. std::shared_ptr<TensorOperation> operation8 = std::make_shared<vision::RandomPosterizeOperation>(bit_range);
  172. std::shared_ptr<TensorOperation> operation9 = std::make_shared<vision::RandomResizeOperation>(size);
  173. std::shared_ptr<TensorOperation> operation10 = std::make_shared<vision::RandomResizeWithBBoxOperation>(size);
  174. std::shared_ptr<TensorOperation> operation11 =
  175. std::make_shared<vision::RandomResizedCropOperation>(size, scale, ratio, interpolation, 2);
  176. std::shared_ptr<TensorOperation> operation12 =
  177. std::make_shared<vision::RandomResizedCropWithBBoxOperation>(size, scale, ratio, interpolation, 2);
  178. std::shared_ptr<TensorOperation> operation13 =
  179. std::make_shared<vision::RandomRotationOperation>(degrees, interpolation, true, center, fill_value);
  180. std::shared_ptr<TensorOperation> operation14 = std::make_shared<vision::RandomSharpnessOperation>(degrees);
  181. std::shared_ptr<TensorOperation> operation15 = std::make_shared<vision::RandomSolarizeOperation>(threshold);
  182. std::shared_ptr<TensorOperation> operation16 = std::make_shared<vision::RandomVerticalFlipOperation>(0.1);
  183. std::shared_ptr<TensorOperation> operation17 = std::make_shared<vision::RandomVerticalFlipWithBBoxOperation>(0.1);
  184. std::vector<std::shared_ptr<TensorOperation>> operations;
  185. operations.push_back(operation1);
  186. operations.push_back(operation2);
  187. operations.push_back(operation3);
  188. operations.push_back(operation4);
  189. operations.push_back(operation5);
  190. operations.push_back(operation6);
  191. operations.push_back(operation7);
  192. operations.push_back(operation8);
  193. operations.push_back(operation9);
  194. operations.push_back(operation10);
  195. operations.push_back(operation11);
  196. operations.push_back(operation12);
  197. operations.push_back(operation13);
  198. operations.push_back(operation14);
  199. operations.push_back(operation15);
  200. operations.push_back(operation16);
  201. operations.push_back(operation17);
  202. ds = std::make_shared<MapNode>(ds, operations);
  203. ds = std::make_shared<BatchNode>(ds, 1, true);
  204. ds = std::make_shared<SkipNode>(ds, 1);
  205. compare_dataset(ds);
  206. }
  207. TEST_F(MindDataTestDeserialize, TestDeserializeCifar100) {
  208. MS_LOG(INFO) << "Doing MindDataTestDeserialize-Cifar100.";
  209. std::string data_dir = "./data/dataset/testCifar100Data";
  210. std::string usage = "all";
  211. std::shared_ptr<DatasetCache> cache = nullptr;
  212. std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10);
  213. std::shared_ptr<DatasetNode> ds = std::make_shared<Cifar100Node>(data_dir, usage, sampler, cache);
  214. ds = std::make_shared<TakeNode>(ds, 6);
  215. std::shared_ptr<TensorOperation> operation = std::make_shared<vision::HorizontalFlipOperation>();
  216. std::vector<std::shared_ptr<TensorOperation>> ops = {operation};
  217. ds = std::make_shared<MapNode>(ds, ops);
  218. std::vector<std::shared_ptr<TensorOperation>> operations;
  219. std::vector<int32_t> size = {32, 32};
  220. std::vector<int32_t> padding = {4, 4, 4, 4};
  221. bool pad_if_needed = false;
  222. std::vector<uint8_t> fill_value = {4, 4, 4};
  223. InterpolationMode interpolation = InterpolationMode::kLinear;
  224. std::shared_ptr<TensorOperation> operation1 =
  225. std::make_shared<vision::RandomCropOperation>(size, padding, pad_if_needed, fill_value, BorderType::kConstant);
  226. size = {224, 224};
  227. std::shared_ptr<TensorOperation> operation2 = std::make_shared<vision::ResizeOperation>(size, interpolation);
  228. std::shared_ptr<TensorOperation> operation3 = std::make_shared<vision::RescaleOperation>(0.5, 0.0);
  229. std::vector<float> mean = {0.49, 0.48, 0.46};
  230. std::vector<float> std = {0.20, 0.199, 0.201};
  231. std::shared_ptr<TensorOperation> operation4 = std::make_shared<vision::NormalizeOperation>(mean, std);
  232. operations.push_back(operation1);
  233. operations.push_back(operation2);
  234. operations.push_back(operation3);
  235. operations.push_back(operation4);
  236. ds = std::make_shared<MapNode>(ds, operations);
  237. ds = std::make_shared<BatchNode>(ds, 3, true);
  238. ds = std::make_shared<RepeatNode>(ds, 1);
  239. compare_dataset(ds);
  240. }
  241. TEST_F(MindDataTestDeserialize, TestDeserializeCSV) {
  242. MS_LOG(INFO) << "Doing MindDataTestDeserialize-CSV.";
  243. std::string data_file = "./data/dataset/testCSV/1.csv";
  244. std::vector<std::string> dataset_files = {data_file};
  245. char field_delim = ',';
  246. std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"};
  247. std::vector<std::string> columns = {"col1", "col4", "col2"};
  248. std::vector<std::shared_ptr<CsvBase>> column_defaults = {};
  249. std::shared_ptr<DatasetCache> cache = nullptr;
  250. std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10);
  251. std::shared_ptr<DatasetNode> ds = std::make_shared<CSVNode>(dataset_files, field_delim, column_defaults, column_names,
  252. 3, ShuffleMode::kGlobal, 1, 0, cache);
  253. ds = std::make_shared<ProjectNode>(ds, columns);
  254. compare_dataset(ds);
  255. }
  256. TEST_F(MindDataTestDeserialize, TestDeserializeImageFolder) {
  257. MS_LOG(INFO) << "Doing MindDataTestDeserialize-ImageFolder.";
  258. std::string dataset_dir = "./data/dataset/testPK/data";
  259. std::shared_ptr<SamplerObj> child_sampler = std::make_shared<PKSamplerObj>(3, true, 1);
  260. std::vector<double> weights = {1.0, 0.1, 0.02, 0.3, 0.4, 0.05, 1.2, 0.13, 0.14, 0.015, 0.16, 1.1};
  261. std::set<std::string> extensions = {};
  262. std::shared_ptr<DatasetCache> cache = nullptr;
  263. std::map<std::string, int32_t> class_indexing = {};
  264. std::shared_ptr<SamplerObj> sampler = std::make_shared<WeightedRandomSamplerObj>(weights, 11);
  265. sampler->AddChildSampler(child_sampler);
  266. std::shared_ptr<DatasetNode> ds =
  267. std::make_shared<ImageFolderNode>(dataset_dir, false, sampler, false, extensions, class_indexing, cache);
  268. ds = std::make_shared<RepeatNode>(ds, 1);
  269. std::vector<int32_t> size = {224, 224};
  270. std::vector<float> scale = {0.5, 0.5};
  271. std::vector<float> ratio = {0.5, 0.5};
  272. std::vector<float> center = {50.0, 50.0};
  273. std::vector<uint8_t> fill_value = {150, 150, 150};
  274. InterpolationMode interpolation = InterpolationMode::kLinear;
  275. std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::SoftDvppDecodeResizeJpegOperation>(size);
  276. std::vector<std::shared_ptr<TensorOperation>> ops = {operation1};
  277. ds = std::make_shared<MapNode>(ds, ops);
  278. std::vector<std::shared_ptr<TensorOperation>> operations;
  279. std::shared_ptr<TensorOperation> operation2 =
  280. std::make_shared<vision::SoftDvppDecodeRandomCropResizeJpegOperation>(size, scale, ratio, 2);
  281. std::shared_ptr<TensorOperation> operation3 =
  282. std::make_shared<vision::RotateOperation>(0.5, interpolation, true, center, fill_value);
  283. operations.push_back(operation2);
  284. operations.push_back(operation3);
  285. ds = std::make_shared<MapNode>(ds, operations);
  286. ds = std::make_shared<BatchNode>(ds, 2, true);
  287. compare_dataset(ds);
  288. }
  289. TEST_F(MindDataTestDeserialize, TestDeserializeManifest) {
  290. MS_LOG(INFO) << "Doing MindDataTestDeserialize-Manifest.";
  291. std::string data_file = "./data/dataset/testManifestData/cpp.json";
  292. std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10);
  293. std::map<std::string, int32_t> class_indexing = {};
  294. std::shared_ptr<DatasetCache> cache = nullptr;
  295. std::shared_ptr<DatasetNode> ds =
  296. std::make_shared<ManifestNode>(data_file, "train", sampler, class_indexing, false, cache);
  297. std::vector<int32_t> coordinates = {50, 50};
  298. std::vector<int32_t> size = {224, 224};
  299. std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::CropOperation>(coordinates, size);
  300. std::shared_ptr<TensorOperation> operation2 = std::make_shared<vision::RgbToBgrOperation>();
  301. std::shared_ptr<TensorOperation> operation3 = std::make_shared<vision::RgbToGrayOperation>();
  302. std::shared_ptr<TensorOperation> operation4 =
  303. std::make_shared<vision::SlicePatchesOperation>(5, 5, SliceMode::kDrop, 1);
  304. std::shared_ptr<TensorOperation> operation5 = std::make_shared<vision::VerticalFlipOperation>();
  305. std::vector<std::shared_ptr<TensorOperation>> operations;
  306. operations.push_back(operation1);
  307. operations.push_back(operation2);
  308. operations.push_back(operation3);
  309. operations.push_back(operation4);
  310. operations.push_back(operation5);
  311. ds = std::make_shared<MapNode>(ds, operations);
  312. ds = std::make_shared<BatchNode>(ds, 2, false);
  313. compare_dataset(ds);
  314. }
  315. TEST_F(MindDataTestDeserialize, TestDeserializeVOC) {
  316. MS_LOG(INFO) << "Doing MindDataTestDeserialize-VOC.";
  317. std::string dataset_dir = "./data/dataset/testVOC2012";
  318. std::vector<int64_t> indices = {0, 1};
  319. std::shared_ptr<SamplerObj> sampler = std::make_shared<SubsetRandomSamplerObj>(indices, 3);
  320. std::string task = "Detection";
  321. std::string usage = "train";
  322. std::map<std::string, int32_t> class_indexing = {};
  323. std::shared_ptr<DatasetCache> cache = nullptr;
  324. std::shared_ptr<DatasetNode> ds =
  325. std::make_shared<VOCNode>(dataset_dir, task, usage, class_indexing, true, sampler, cache);
  326. std::vector<float> brightness = {0.5, 0.5};
  327. std::vector<float> contrast = {1.0, 1.0};
  328. std::vector<float> hue = {0.0, 0.0};
  329. std::vector<float> saturation = {1.0, 1.0};
  330. std::shared_ptr<TensorOperation> operation =
  331. std::make_shared<vision::RandomColorAdjustOperation>(brightness, contrast, saturation, hue);
  332. std::vector<std::shared_ptr<TensorOperation>> ops = {operation};
  333. ds = std::make_shared<MapNode>(ds, ops);
  334. ds = std::make_shared<SkipNode>(ds, 2);
  335. compare_dataset(ds);
  336. }
  337. TEST_F(MindDataTestDeserialize, TestDeserializeCLUE) {
  338. MS_LOG(INFO) << "Doing MindDataTestDeserialize-CLUE.";
  339. std::string train_file = "./data/dataset/testCLUE/afqmc/train.json";
  340. std::string task = "AFQMC";
  341. std::string usage = "train";
  342. std::vector<std::string> files = {train_file};
  343. std::shared_ptr<DatasetCache> cache = nullptr;
  344. std::shared_ptr<DatasetNode> ds = std::make_shared<CLUENode>(files, task, usage, 1, ShuffleMode::kFalse, 1, 0, cache);
  345. ds = std::make_shared<RepeatNode>(ds, 1);
  346. std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::DecodeOperation>(true);
  347. std::vector<std::shared_ptr<TensorOperation>> ops = {operation1};
  348. ds = std::make_shared<MapNode>(ds, ops);
  349. compare_dataset(ds);
  350. }
  351. TEST_F(MindDataTestDeserialize, TestDeserializeCoco) {
  352. MS_LOG(INFO) << "Doing MindDataTestDeserialize-Coco.";
  353. std::string folder_path = "./data/dataset/testCOCO/train";
  354. std::string annotation_file = "./data/dataset/testCOCO/annotations/train.json";
  355. std::string task = "Detection";
  356. std::vector<int64_t> indices = {0, 1};
  357. std::shared_ptr<SamplerObj> sampler = std::make_shared<SubsetRandomSamplerObj>(indices, 3);
  358. std::shared_ptr<DatasetCache> cache = nullptr;
  359. std::shared_ptr<DatasetNode> ds =
  360. std::make_shared<CocoNode>(folder_path, annotation_file, task, true, sampler, cache, false);
  361. std::vector<uint8_t> fill_value = {150, 150, 150};
  362. std::vector<float> degrees = {0.0, 0.0};
  363. std::vector<float> scale = {0.5, 0.5};
  364. std::vector<float> ratio = {0.5, 0.5};
  365. std::vector<int32_t> size = {224, 224};
  366. std::vector<int32_t> padding = {20, 20, 20, 20};
  367. InterpolationMode interpolation = InterpolationMode::kLinear;
  368. std::shared_ptr<TensorOperation> operation1 =
  369. std::make_shared<vision::RandomCropDecodeResizeOperation>(size, scale, ratio, interpolation, 2);
  370. std::shared_ptr<TensorOperation> operation2 =
  371. std::make_shared<vision::RandomCropWithBBoxOperation>(size, padding, true, fill_value, BorderType::kConstant);
  372. std::shared_ptr<TensorOperation> operation3 = std::make_shared<vision::RandomHorizontalFlipOperation>(0.1);
  373. std::shared_ptr<TensorOperation> operation4 = std::make_shared<vision::RandomHorizontalFlipWithBBoxOperation>(0.1);
  374. std::vector<std::shared_ptr<TensorOperation>> operations;
  375. operations.push_back(operation1);
  376. operations.push_back(operation2);
  377. operations.push_back(operation3);
  378. operations.push_back(operation4);
  379. ds = std::make_shared<MapNode>(ds, operations);
  380. compare_dataset(ds);
  381. }
  382. TEST_F(MindDataTestDeserialize, TestDeserializeTFRecord) {
  383. MS_LOG(INFO) << "Doing MindDataTestDeserialize-TFRecord.";
  384. int num_samples = 12;
  385. int32_t num_shards = 1;
  386. int32_t shard_id = 0;
  387. bool shard_equal_rows = false;
  388. std::shared_ptr<DatasetCache> cache = nullptr;
  389. std::vector<std::string> columns_list = {};
  390. std::vector<std::string> dataset_files = {"./data/dataset/testTFTestAllTypes/test.data"};
  391. std::shared_ptr<SchemaObj> schema = Schema();
  392. ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt32, {4}));
  393. ASSERT_OK(schema->add_column("col2", mindspore::DataType::kNumberTypeInt64, {4}));
  394. std::shared_ptr<DatasetNode> ds =
  395. std::make_shared<TFRecordNode>(dataset_files, schema, columns_list, num_samples, ShuffleMode::kFiles, num_shards,
  396. shard_id, shard_equal_rows, cache);
  397. ds = std::make_shared<ShuffleNode>(ds, 10000, true);
  398. std::vector<std::string> input_columns = {"col_sint16", "col_sint32", "col_sint64", "col_float",
  399. "col_1d", "col_2d", "col_3d", "col_binary"};
  400. std::vector<std::string> output_columns = {"column_sint16", "column_sint32", "column_sint64", "column_float",
  401. "column_1d", "column_2d", "column_3d", "column_binary"};
  402. std::shared_ptr<TensorOperation> operation = std::make_shared<vision::InvertOperation>();
  403. std::vector<std::shared_ptr<TensorOperation>> ops = {operation};
  404. ds = std::make_shared<MapNode>(ds, ops, input_columns, output_columns);
  405. std::string train_file = "./data/dataset/testCLUE/afqmc/train.json";
  406. std::string task1 = "AFQMC";
  407. std::string usage = "train";
  408. std::vector<std::string> files = {train_file};
  409. std::shared_ptr<DatasetNode> ds_child1 =
  410. std::make_shared<CLUENode>(files, task1, usage, 0, ShuffleMode::kFalse, 1, 0, cache);
  411. std::vector<std::string> dataset_files2 = {"./data/dataset/testTextFileDataset/1.txt"};
  412. std::shared_ptr<DatasetNode> ds_child2 =
  413. std::make_shared<TextFileNode>(dataset_files2, 2, ShuffleMode::kFiles, 1, 0, cache);
  414. std::vector<std::shared_ptr<DatasetNode>> datasets = {ds, ds_child1, ds_child2};
  415. ds = std::make_shared<ZipNode>(datasets);
  416. compare_dataset(ds);
  417. }
  418. TEST_F(MindDataTestDeserialize, TestDeserializeTextfile) {
  419. MS_LOG(INFO) << "Doing MindDataTestDeserialize-Textfile.";
  420. std::vector<std::string> dataset_files = {"./data/dataset/testTextFileDataset/1.txt"};
  421. std::shared_ptr<DatasetCache> cache = nullptr;
  422. std::shared_ptr<DatasetNode> ds = std::make_shared<TextFileNode>(dataset_files, 2, ShuffleMode::kFiles, 1, 0, cache);
  423. std::shared_ptr<TensorOperation> operation = std::make_shared<vision::InvertOperation>();
  424. std::vector<std::shared_ptr<TensorOperation>> ops = {operation};
  425. ds = std::make_shared<MapNode>(ds, ops);
  426. ds = std::make_shared<BatchNode>(ds, 10, true);
  427. compare_dataset(ds);
  428. }
  429. TEST_F(MindDataTestDeserialize, TestDeserializeInvalidJson) {
  430. std::shared_ptr<DatasetNode> ds;
  431. // check the invalid json path would return error
  432. ASSERT_ERROR(Serdes::Deserialize("invalid_dataset.json", &ds));
  433. // check the invalid json object would return error
  434. ASSERT_ERROR(Serdes::Deserialize("./data/dataset/testDataset1/datasetTestInvalidJson.json", &ds));
  435. EXPECT_EQ(ds, nullptr);
  436. }
  437. TEST_F(MindDataTestDeserialize, TestDeserializeFill) {
  438. MS_LOG(INFO) << "Doing MindDataTestDeserialize-Fill.";
  439. std::vector<std::string> dataset_files = {"./data/dataset/testTextFileDataset/1.txt"};
  440. std::shared_ptr<DatasetCache> cache = nullptr;
  441. std::shared_ptr<DatasetNode> ds = std::make_shared<TextFileNode>(dataset_files, 2, ShuffleMode::kFiles, 1, 0, cache);
  442. std::shared_ptr<Tensor> fill_value;
  443. ASSERT_OK(Tensor::CreateScalar(true, &fill_value));
  444. std::shared_ptr<TensorOperation> operation1 = std::make_shared<transforms::FillOperation>(fill_value);
  445. std::shared_ptr<TensorOperation> operation2 = std::make_shared<text::ToNumberOperation>("int32_t");
  446. std::vector<std::shared_ptr<TensorOperation>> ops = {operation1, operation2};
  447. ds = std::make_shared<MapNode>(ds, ops);
  448. ds = std::make_shared<TransferNode>(ds, "queue", "type", 1, true, 10, true);
  449. compare_dataset(ds);
  450. }
  451. TEST_F(MindDataTestDeserialize, TestDeserializeTensor) {
  452. MS_LOG(INFO) << "Doing MindDataTestDeserialize-Tensor.";
  453. std::shared_ptr<Tensor> test_tensor;
  454. std::vector<float> input = {1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 1.2, 0.7, 0.8, 0.9, 1.0, 2.0, 1.3, 3.0, 4.0};
  455. ASSERT_OK(Tensor::CreateFromVector(input, TensorShape{3, 5}, &test_tensor));
  456. nlohmann::json json_obj;
  457. ASSERT_OK(test_tensor->to_json(&json_obj));
  458. std::shared_ptr<Tensor> test_tensor1;
  459. ASSERT_OK(Tensor::from_json(json_obj, &test_tensor1));
  460. nlohmann::json json_obj1;
  461. ASSERT_OK(test_tensor1->to_json(&json_obj1));
  462. std::stringstream json_ss;
  463. json_ss << json_obj;
  464. std::stringstream json_ss1;
  465. json_ss1 << json_obj1;
  466. EXPECT_EQ(json_ss.str(), json_ss1.str());
  467. }
  468. // Helper function to get the session id from SESSION_ID env variable
  469. Status GetSessionFromEnv(session_id_type *session_id);
  470. TEST_F(MindDataTestDeserialize, DISABLED_TestDeserializeCache) {
  471. MS_LOG(INFO) << "Doing MindDataTestDeserialize-Cache.";
  472. std::string data_dir = "./data/dataset/testCache";
  473. std::string usage = "all";
  474. session_id_type env_session;
  475. ASSERT_TRUE(GetSessionFromEnv(&env_session));
  476. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false, "127.0.0.1", 50052, 1, 1);
  477. std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10);
  478. std::shared_ptr<DatasetNode> ds = std::make_shared<Cifar10Node>(data_dir, usage, sampler, some_cache);
  479. compare_dataset(ds);
  480. }
  481. TEST_F(MindDataTestDeserialize, TestDeserializeConcatAlbumFlickr) {
  482. MS_LOG(INFO) << "Doing MindDataTestDeserialize-ConcatAlbumFlickr.";
  483. std::string dataset_dir = "./data/dataset/testAlbum";
  484. std::vector<std::string> column_names = {"col1", "col2", "col3"};
  485. bool decode = false;
  486. std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10);
  487. std::string data_schema = "./data/dataset/testAlbum/datasetSchema.json";
  488. std::shared_ptr<DatasetNode> ds =
  489. std::make_shared<AlbumNode>(dataset_dir, data_schema, column_names, decode, sampler, nullptr);
  490. std::shared_ptr<TensorOperation> operation = std::make_shared<vision::AdjustGammaOperation>(0.5, 0.5);
  491. std::vector<std::shared_ptr<TensorOperation>> ops = {operation};
  492. ds = std::make_shared<MapNode>(ds, ops);
  493. std::string dataset_path = "./data/dataset/testFlickrData/flickr30k/flickr30k-images";
  494. std::string annotation_file = "./data/dataset/testFlickrData/flickr30k/test1.token";
  495. std::shared_ptr<DatasetNode> ds_child1 =
  496. std::make_shared<FlickrNode>(dataset_path, annotation_file, decode, sampler, nullptr);
  497. std::vector<std::shared_ptr<DatasetNode>> datasets = {ds, ds_child1};
  498. std::pair<int, int> pair = std::make_pair(1, 1);
  499. std::vector<std::pair<int, int>> children_flag_and_nums = {pair};
  500. std::vector<std::pair<int, int>> children_start_end_index = {pair};
  501. ds = std::make_shared<ConcatNode>(datasets, sampler, children_flag_and_nums, children_start_end_index);
  502. compare_dataset(ds);
  503. }
  504. TEST_F(MindDataTestDeserialize, TestDeserializePyFunc) {
  505. MS_LOG(INFO) << "Doing MindDataTestDeserialize-PyFunc.";
  506. if (Py_IsInitialized() != 0) {
  507. std::shared_ptr<DatasetNode> ds1;
  508. ASSERT_OK(Serdes::Deserialize("./data/dataset/tf_file_dataset/pyvision_dataset_pipeline.json", &ds1));
  509. EXPECT_NE(ds1, nullptr);
  510. }
  511. }