You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_api_cache_test.cc 24 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/common.h"
  17. #include "minddata/dataset/include/datasets.h"
  18. #include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"
  19. using namespace mindspore::dataset;
  20. // Helper function to get the session id from SESSION_ID env variable
  21. Status GetSessionFromEnv(session_id_type *session_id);
  22. class MindDataTestCacheOp : public UT::DatasetOpTesting {
  23. public:
  24. void SetUp() override {
  25. DatasetOpTesting::SetUp();
  26. GlobalInit();
  27. }
  28. };
  29. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiSamplerNull) {
  30. session_id_type env_session;
  31. Status s = GetSessionFromEnv(&env_session);
  32. EXPECT_EQ(s, Status::OK());
  33. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true, "127.0.0.1", 50053, 1, 1);
  34. EXPECT_NE(some_cache, nullptr);
  35. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  36. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  37. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, nullptr, {}, {}, some_cache);
  38. EXPECT_NE(ds, nullptr);
  39. // Create an iterator over the result of the above dataset
  40. // This will trigger the creation of the Execution Tree and launch it.
  41. // Now the parameter check for ImageFolderNode would fail and we would end up with a nullptr iter.
  42. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  43. EXPECT_EQ(iter, nullptr);
  44. }
  45. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheImageFolderCApi) {
  46. session_id_type env_session;
  47. Status s = GetSessionFromEnv(&env_session);
  48. EXPECT_EQ(s, Status::OK());
  49. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  50. EXPECT_NE(some_cache, nullptr);
  51. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  52. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  53. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, RandomSampler(), {}, {}, some_cache);
  54. EXPECT_NE(ds, nullptr);
  55. // Create a Repeat operation on ds
  56. int32_t repeat_num = 2;
  57. ds = ds->Repeat(repeat_num);
  58. EXPECT_NE(ds, nullptr);
  59. // Create an iterator over the result of the above dataset
  60. // This will trigger the creation of the Execution Tree and launch it.
  61. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  62. EXPECT_NE(iter, nullptr);
  63. // Iterate the dataset and get each row
  64. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  65. iter->GetNextRow(&row);
  66. uint64_t i = 0;
  67. while (row.size() != 0) {
  68. i++;
  69. auto image = row["image"];
  70. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  71. iter->GetNextRow(&row);
  72. }
  73. EXPECT_EQ(i, 4);
  74. // Manually terminate the pipeline
  75. iter->Stop();
  76. }
  77. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCocoCApi) {
  78. session_id_type env_session;
  79. Status s = GetSessionFromEnv(&env_session);
  80. EXPECT_EQ(s, Status::OK());
  81. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  82. EXPECT_NE(some_cache, nullptr);
  83. // Create a Coco Dataset, this folder_path has 6 images in it
  84. std::string folder_path = datasets_root_path_ + "/testCOCO/train/";
  85. std::string annotation_file_path = datasets_root_path_ + "/testCOCO/annotations/train.json";
  86. std::shared_ptr<Dataset> ds =
  87. Coco(folder_path, annotation_file_path, "Detection", false, RandomSampler(), some_cache);
  88. EXPECT_NE(ds, nullptr);
  89. // Create a Repeat operation on ds
  90. int32_t repeat_num = 2;
  91. ds = ds->Repeat(repeat_num);
  92. EXPECT_NE(ds, nullptr);
  93. // Create an iterator over the result of the above dataset
  94. // This will trigger the creation of the Execution Tree and launch it.
  95. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  96. EXPECT_NE(iter, nullptr);
  97. // Iterate the dataset and get each row
  98. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  99. iter->GetNextRow(&row);
  100. uint64_t i = 0;
  101. while (row.size() != 0) {
  102. i++;
  103. auto image = row["image"];
  104. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  105. iter->GetNextRow(&row);
  106. }
  107. EXPECT_EQ(i, 12);
  108. // Manually terminate the pipeline
  109. iter->Stop();
  110. }
  111. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheMnistCApi) {
  112. session_id_type env_session;
  113. Status s = GetSessionFromEnv(&env_session);
  114. EXPECT_EQ(s, Status::OK());
  115. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  116. EXPECT_NE(some_cache, nullptr);
  117. // Create a Mnist Dataset
  118. std::string folder_path = datasets_root_path_ + "/testMnistData/";
  119. std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", RandomSampler(false, 10), some_cache);
  120. EXPECT_NE(ds, nullptr);
  121. // Create a Repeat operation on ds
  122. int32_t repeat_num = 2;
  123. ds = ds->Repeat(repeat_num);
  124. EXPECT_NE(ds, nullptr);
  125. // Create an iterator over the result of the above dataset
  126. // This will trigger the creation of the Execution Tree and launch it.
  127. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  128. EXPECT_NE(iter, nullptr);
  129. // Iterate the dataset and get each row
  130. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  131. iter->GetNextRow(&row);
  132. uint64_t i = 0;
  133. while (row.size() != 0) {
  134. i++;
  135. auto image = row["image"];
  136. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  137. iter->GetNextRow(&row);
  138. }
  139. EXPECT_EQ(i, 20);
  140. // Manually terminate the pipeline
  141. iter->Stop();
  142. }
  143. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCelebaCApi) {
  144. session_id_type env_session;
  145. Status s = GetSessionFromEnv(&env_session);
  146. EXPECT_EQ(s, Status::OK());
  147. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  148. EXPECT_NE(some_cache, nullptr);
  149. // Create a CelebA Dataset, this folder_path has 4 records in it
  150. std::string folder_path = datasets_root_path_ + "/testCelebAData/";
  151. std::shared_ptr<Dataset> ds = CelebA(folder_path, "all", RandomSampler(false, 10), false, {}, some_cache);
  152. EXPECT_NE(ds, nullptr);
  153. // Create a Repeat operation on ds
  154. int32_t repeat_num = 2;
  155. ds = ds->Repeat(repeat_num);
  156. EXPECT_NE(ds, nullptr);
  157. // Create an iterator over the result of the above dataset
  158. // This will trigger the creation of the Execution Tree and launch it.
  159. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  160. EXPECT_NE(iter, nullptr);
  161. // Iterate the dataset and get each row
  162. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  163. iter->GetNextRow(&row);
  164. uint64_t i = 0;
  165. while (row.size() != 0) {
  166. i++;
  167. auto image = row["image"];
  168. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  169. iter->GetNextRow(&row);
  170. }
  171. EXPECT_EQ(i, 8);
  172. // Manually terminate the pipeline
  173. iter->Stop();
  174. }
  175. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheManifestCApi) {
  176. session_id_type env_session;
  177. Status s = GetSessionFromEnv(&env_session);
  178. EXPECT_EQ(s, Status::OK());
  179. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  180. EXPECT_NE(some_cache, nullptr);
  181. // Create a Manifest Dataset, this file_path has 2 records in it
  182. std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
  183. std::shared_ptr<Dataset> ds = Manifest(file_path, "train", RandomSampler(), {}, false, some_cache);
  184. EXPECT_NE(ds, nullptr);
  185. // Create a Repeat operation on ds
  186. int32_t repeat_num = 2;
  187. ds = ds->Repeat(repeat_num);
  188. EXPECT_NE(ds, nullptr);
  189. // Create an iterator over the result of the above dataset
  190. // This will trigger the creation of the Execution Tree and launch it.
  191. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  192. EXPECT_NE(iter, nullptr);
  193. // Iterate the dataset and get each row
  194. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  195. iter->GetNextRow(&row);
  196. uint64_t i = 0;
  197. while (row.size() != 0) {
  198. i++;
  199. auto image = row["image"];
  200. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  201. iter->GetNextRow(&row);
  202. }
  203. EXPECT_EQ(i, 4);
  204. // Manually terminate the pipeline
  205. iter->Stop();
  206. }
  207. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar10CApi) {
  208. session_id_type env_session;
  209. Status s = GetSessionFromEnv(&env_session);
  210. EXPECT_EQ(s, Status::OK());
  211. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  212. EXPECT_NE(some_cache, nullptr);
  213. // Create a Cifar10 Dataset
  214. std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
  215. std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", RandomSampler(false, 10), some_cache);
  216. EXPECT_NE(ds, nullptr);
  217. // Create a Repeat operation on ds
  218. int32_t repeat_num = 2;
  219. ds = ds->Repeat(repeat_num);
  220. EXPECT_NE(ds, nullptr);
  221. // Create an iterator over the result of the above dataset
  222. // This will trigger the creation of the Execution Tree and launch it.
  223. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  224. EXPECT_NE(iter, nullptr);
  225. // Iterate the dataset and get each row
  226. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  227. iter->GetNextRow(&row);
  228. uint64_t i = 0;
  229. while (row.size() != 0) {
  230. i++;
  231. auto image = row["image"];
  232. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  233. iter->GetNextRow(&row);
  234. }
  235. EXPECT_EQ(i, 20);
  236. // Manually terminate the pipeline
  237. iter->Stop();
  238. }
  239. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar100CApi) {
  240. session_id_type env_session;
  241. Status s = GetSessionFromEnv(&env_session);
  242. EXPECT_EQ(s, Status::OK());
  243. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  244. EXPECT_NE(some_cache, nullptr);
  245. // Create a Cifar100 Dataset
  246. std::string folder_path = datasets_root_path_ + "/testCifar100Data/";
  247. std::shared_ptr<Dataset> ds = Cifar100(folder_path, "all", RandomSampler(false, 10), some_cache);
  248. EXPECT_NE(ds, nullptr);
  249. // Create a Repeat operation on ds
  250. int32_t repeat_num = 2;
  251. ds = ds->Repeat(repeat_num);
  252. EXPECT_NE(ds, nullptr);
  253. // Create an iterator over the result of the above dataset
  254. // This will trigger the creation of the Execution Tree and launch it.
  255. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  256. EXPECT_NE(iter, nullptr);
  257. // Iterate the dataset and get each row
  258. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  259. iter->GetNextRow(&row);
  260. uint64_t i = 0;
  261. while (row.size() != 0) {
  262. i++;
  263. auto image = row["image"];
  264. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  265. iter->GetNextRow(&row);
  266. }
  267. EXPECT_EQ(i, 20);
  268. // Manually terminate the pipeline
  269. iter->Stop();
  270. }
  271. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheVocCApi) {
  272. session_id_type env_session;
  273. Status s = GetSessionFromEnv(&env_session);
  274. EXPECT_EQ(s, Status::OK());
  275. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  276. EXPECT_NE(some_cache, nullptr);
  277. // Create a VOC Dataset, this folder_path has 9 records in it
  278. std::string folder_path = datasets_root_path_ + "/testVOC2012/";
  279. std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, RandomSampler(), some_cache);
  280. EXPECT_NE(ds, nullptr);
  281. // Create a Repeat operation on ds
  282. int32_t repeat_num = 2;
  283. ds = ds->Repeat(repeat_num);
  284. EXPECT_NE(ds, nullptr);
  285. // Create an iterator over the result of the above dataset
  286. // This will trigger the creation of the Execution Tree and launch it.
  287. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  288. EXPECT_NE(iter, nullptr);
  289. // Iterate the dataset and get each row
  290. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  291. iter->GetNextRow(&row);
  292. uint64_t i = 0;
  293. while (row.size() != 0) {
  294. i++;
  295. auto image = row["image"];
  296. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  297. iter->GetNextRow(&row);
  298. }
  299. EXPECT_EQ(i, 18);
  300. // Manually terminate the pipeline
  301. iter->Stop();
  302. }
  303. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheAlbumCApi) {
  304. session_id_type env_session;
  305. Status s = GetSessionFromEnv(&env_session);
  306. EXPECT_EQ(s, Status::OK());
  307. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  308. EXPECT_NE(some_cache, nullptr);
  309. std::string folder_path = datasets_root_path_ + "/testAlbum/images";
  310. std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
  311. std::vector<std::string> column_names = {"image", "label", "id"};
  312. // Create a Album Dataset, 7 records in it
  313. std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, false, RandomSampler(), some_cache);
  314. EXPECT_NE(ds, nullptr);
  315. // Create a Repeat operation on ds
  316. int32_t repeat_num = 2;
  317. ds = ds->Repeat(repeat_num);
  318. EXPECT_NE(ds, nullptr);
  319. // Create an iterator over the result of the above dataset
  320. // This will trigger the creation of the Execution Tree and launch it.
  321. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  322. EXPECT_NE(iter, nullptr);
  323. // Iterate the dataset and get each row
  324. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  325. iter->GetNextRow(&row);
  326. uint64_t i = 0;
  327. while (row.size() != 0) {
  328. i++;
  329. iter->GetNextRow(&row);
  330. }
  331. EXPECT_EQ(i, 14);
  332. // Manually terminate the pipeline
  333. iter->Stop();
  334. }
  335. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataCApi) {
  336. session_id_type env_session;
  337. Status s = GetSessionFromEnv(&env_session);
  338. EXPECT_EQ(s, Status::OK());
  339. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  340. EXPECT_NE(some_cache, nullptr);
  341. // Create a RandomDataset
  342. std::shared_ptr<SchemaObj> schema = Schema();
  343. schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2});
  344. schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
  345. std::shared_ptr<Dataset> ds = RandomData(4, schema, {}, some_cache);
  346. EXPECT_NE(ds, nullptr);
  347. // Create a Repeat operation on ds
  348. int32_t repeat_num = 2;
  349. ds = ds->Repeat(repeat_num);
  350. EXPECT_NE(ds, nullptr);
  351. // Create an iterator over the result of the above dataset
  352. // This will trigger the creation of the Execution Tree and launch it.
  353. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  354. EXPECT_NE(iter, nullptr);
  355. // Iterate the dataset and get each row
  356. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  357. iter->GetNextRow(&row);
  358. uint64_t i = 0;
  359. while (row.size() != 0) {
  360. i++;
  361. iter->GetNextRow(&row);
  362. }
  363. EXPECT_EQ(i, 8);
  364. // Manually terminate the pipeline
  365. iter->Stop();
  366. }
  367. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi1) {
  368. session_id_type env_session;
  369. Status s = GetSessionFromEnv(&env_session);
  370. EXPECT_EQ(s, Status::OK());
  371. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  372. EXPECT_NE(some_cache, nullptr);
  373. // Create a TFRecord Dataset, this file_path has 3 records in it
  374. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  375. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  376. std::shared_ptr<Dataset> ds =
  377. TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 1, 0, false, some_cache);
  378. EXPECT_NE(ds, nullptr);
  379. // Create a Repeat operation on ds
  380. int32_t repeat_num = 2;
  381. ds = ds->Repeat(repeat_num);
  382. EXPECT_NE(ds, nullptr);
  383. // Create an iterator over the result of the above dataset
  384. // This will trigger the creation of the Execution Tree and launch it.
  385. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  386. EXPECT_NE(iter, nullptr);
  387. // Iterate the dataset and get each row
  388. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  389. iter->GetNextRow(&row);
  390. uint64_t i = 0;
  391. while (row.size() != 0) {
  392. i++;
  393. auto image = row["image"];
  394. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  395. iter->GetNextRow(&row);
  396. }
  397. EXPECT_EQ(i, 6);
  398. // Manually terminate the pipeline
  399. iter->Stop();
  400. }
  401. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi2) {
  402. session_id_type env_session;
  403. Status s = GetSessionFromEnv(&env_session);
  404. EXPECT_EQ(s, Status::OK());
  405. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  406. EXPECT_NE(some_cache, nullptr);
  407. // Create a TFRecord Dataset, this file_path has 3 records in it
  408. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  409. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  410. // In this one, the TFRecord dataset will be given sharding configuration, however since a cache is
  411. // used, the tree prepare should undo the sharding configuration and instead, a distributed
  412. // sampler will be chosen with the same shard config.
  413. // With only 3 records shard into 3, we expect only 1 record returned for this shard
  414. // However, the sharding will be done by the sampler, not by the TFRecord leaf node
  415. // In this case, it is a row-based sharding, not the file-based sharding that would happen if
  416. // there was not any cache.
  417. std::shared_ptr<Dataset> ds =
  418. TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 3, 0, false, some_cache);
  419. EXPECT_NE(ds, nullptr);
  420. // Create a Repeat operation on ds
  421. int32_t repeat_num = 2;
  422. ds = ds->Repeat(repeat_num);
  423. EXPECT_NE(ds, nullptr);
  424. // Create an iterator over the result of the above dataset
  425. // This will trigger the creation of the Execution Tree and launch it.
  426. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  427. EXPECT_NE(iter, nullptr);
  428. // Iterate the dataset and get each row
  429. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  430. iter->GetNextRow(&row);
  431. uint64_t i = 0;
  432. while (row.size() != 0) {
  433. i++;
  434. auto image = row["image"];
  435. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  436. iter->GetNextRow(&row);
  437. }
  438. EXPECT_EQ(i, 2);
  439. // Manually terminate the pipeline
  440. iter->Stop();
  441. }
  442. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi3) {
  443. session_id_type env_session;
  444. Status s = GetSessionFromEnv(&env_session);
  445. EXPECT_EQ(s, Status::OK());
  446. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  447. EXPECT_NE(some_cache, nullptr);
  448. // Create a TFRecord Dataset, this file_path has 3 records in it
  449. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  450. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  451. // In this one, a num_samples argument is given.
  452. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  453. // The samples will be selected by the sequential sampler, not by the TFRecord leaf node.
  454. std::shared_ptr<Dataset> ds =
  455. TFRecord({file_path}, schema_path, {"image"}, 2, ShuffleMode::kFalse, 1, 0, false, some_cache);
  456. EXPECT_NE(ds, nullptr);
  457. // Create a Repeat operation on ds
  458. int32_t repeat_num = 2;
  459. ds = ds->Repeat(repeat_num);
  460. EXPECT_NE(ds, nullptr);
  461. // Create an iterator over the result of the above dataset
  462. // This will trigger the creation of the Execution Tree and launch it.
  463. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  464. EXPECT_NE(iter, nullptr);
  465. // Iterate the dataset and get each row
  466. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  467. iter->GetNextRow(&row);
  468. uint64_t i = 0;
  469. while (row.size() != 0) {
  470. i++;
  471. auto image = row["image"];
  472. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  473. iter->GetNextRow(&row);
  474. }
  475. EXPECT_EQ(i, 4);
  476. // Manually terminate the pipeline
  477. iter->Stop();
  478. }
  479. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTextfileCApi) {
  480. session_id_type env_session;
  481. Status s = GetSessionFromEnv(&env_session);
  482. EXPECT_EQ(s, Status::OK());
  483. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  484. EXPECT_NE(some_cache, nullptr);
  485. // Create a TextFile Dataset, this file_path has 3 records in it
  486. std::string file_path = datasets_root_path_ + "/testTextFileDataset/1.txt";
  487. // In this one, a num_samples=2 argument is given.
  488. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  489. // The samples will be selected by the sequential sampler, not by the TextFile leaf node.
  490. std::shared_ptr<Dataset> ds = TextFile({file_path}, 2, ShuffleMode::kGlobal, 1, 0, some_cache);
  491. EXPECT_NE(ds, nullptr);
  492. // Create a Repeat operation on ds
  493. int32_t repeat_num = 2;
  494. ds = ds->Repeat(repeat_num);
  495. EXPECT_NE(ds, nullptr);
  496. // Create an iterator over the result of the above dataset
  497. // This will trigger the creation of the Execution Tree and launch it.
  498. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  499. EXPECT_NE(iter, nullptr);
  500. // Iterate the dataset and get each row
  501. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  502. iter->GetNextRow(&row);
  503. uint64_t i = 0;
  504. while (row.size() != 0) {
  505. i++;
  506. iter->GetNextRow(&row);
  507. }
  508. EXPECT_EQ(i, 4);
  509. // Manually terminate the pipeline
  510. iter->Stop();
  511. }
  512. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCsvCApi) {
  513. session_id_type env_session;
  514. Status s = GetSessionFromEnv(&env_session);
  515. EXPECT_EQ(s, Status::OK());
  516. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  517. EXPECT_NE(some_cache, nullptr);
  518. // Create a CSV Dataset, this file_path has 3 records in it
  519. std::string file_path = datasets_root_path_ + "/testCSV/1.csv";
  520. std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"};
  521. // In this one, a num_samples=2 argument is given.
  522. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  523. // The samples will be selected by the sequential sampler, not by the CSV leaf node.
  524. std::shared_ptr<Dataset> ds = CSV({file_path}, ',', {}, column_names, 2, ShuffleMode::kFalse, 1, 0, some_cache);
  525. EXPECT_NE(ds, nullptr);
  526. // Create a Repeat operation on ds
  527. int32_t repeat_num = 2;
  528. ds = ds->Repeat(repeat_num);
  529. EXPECT_NE(ds, nullptr);
  530. // Create an iterator over the result of the above dataset
  531. // This will trigger the creation of the Execution Tree and launch it.
  532. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  533. EXPECT_NE(iter, nullptr);
  534. // Iterate the dataset and get each row
  535. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  536. iter->GetNextRow(&row);
  537. uint64_t i = 0;
  538. while (row.size() != 0) {
  539. i++;
  540. iter->GetNextRow(&row);
  541. }
  542. EXPECT_EQ(i, 4);
  543. // Manually terminate the pipeline
  544. iter->Stop();
  545. }
  546. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheClueCApi) {
  547. session_id_type env_session;
  548. Status s = GetSessionFromEnv(&env_session);
  549. EXPECT_EQ(s, Status::OK());
  550. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  551. EXPECT_NE(some_cache, nullptr);
  552. // Create a CLUE Dataset, this file_path has 3 records in it
  553. std::string file_path = datasets_root_path_ + "/testCLUE/afqmc/train.json";
  554. std::string task = "AFQMC";
  555. std::string usage = "train";
  556. // In this one, a num_samples=2 argument is given.
  557. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  558. // The samples will be selected by the sequential sampler, not by the CLUE leaf node.
  559. std::shared_ptr<Dataset> ds = CLUE({file_path}, task, usage, 2, ShuffleMode::kFalse, 1, 0, some_cache);
  560. EXPECT_NE(ds, nullptr);
  561. // Create a Repeat operation on ds
  562. int32_t repeat_num = 2;
  563. ds = ds->Repeat(repeat_num);
  564. EXPECT_NE(ds, nullptr);
  565. // Create an iterator over the result of the above dataset
  566. // This will trigger the creation of the Execution Tree and launch it.
  567. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  568. EXPECT_NE(iter, nullptr);
  569. // Iterate the dataset and get each row
  570. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  571. iter->GetNextRow(&row);
  572. uint64_t i = 0;
  573. while (row.size() != 0) {
  574. i++;
  575. iter->GetNextRow(&row);
  576. }
  577. EXPECT_EQ(i, 4);
  578. // Manually terminate the pipeline
  579. iter->Stop();
  580. }