You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_api_cache_test.cc 30 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/common.h"
  17. #include "minddata/dataset/include/datasets.h"
  18. #include "minddata/dataset/include/vision.h"
  19. #include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"
  20. using namespace mindspore::dataset;
  21. // Helper function to get the session id from SESSION_ID env variable
  22. Status GetSessionFromEnv(session_id_type *session_id);
  23. class MindDataTestCacheOp : public UT::DatasetOpTesting {
  24. public:
  25. void SetUp() override {
  26. DatasetOpTesting::SetUp();
  27. GlobalInit();
  28. }
  29. };
  30. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiSamplerNull) {
  31. session_id_type env_session;
  32. Status s = GetSessionFromEnv(&env_session);
  33. EXPECT_EQ(s, Status::OK());
  34. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true, "127.0.0.1", 50053, 1, 1);
  35. EXPECT_NE(some_cache, nullptr);
  36. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  37. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  38. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, nullptr, {}, {}, some_cache);
  39. EXPECT_NE(ds, nullptr);
  40. // Create an iterator over the result of the above dataset
  41. // This will trigger the creation of the Execution Tree and launch it.
  42. // Now the parameter check for ImageFolderNode would fail and we would end up with a nullptr iter.
  43. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  44. EXPECT_EQ(iter, nullptr);
  45. }
  46. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiNestedCache) {
  47. session_id_type env_session;
  48. Status s = GetSessionFromEnv(&env_session);
  49. EXPECT_EQ(s, Status::OK());
  50. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  51. EXPECT_NE(some_cache, nullptr);
  52. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  53. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  54. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, RandomSampler(), {}, {}, some_cache);
  55. EXPECT_NE(ds, nullptr);
  56. // Create objects for the tensor ops
  57. std::shared_ptr<TensorOperation> decode_op = vision::Decode();
  58. EXPECT_NE(decode_op, nullptr);
  59. // Create a Map operation on ds
  60. ds = ds->Map({decode_op}, {}, {}, {"image"}, some_cache);
  61. EXPECT_NE(ds, nullptr);
  62. // Create an iterator over the result of the above dataset
  63. // This will trigger the creation of the Execution Tree and launch it.
  64. // Now in the cache_error_pass would fail and we would end up with a nullptr iter.
  65. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  66. EXPECT_EQ(iter, nullptr);
  67. }
  68. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheImageFolderCApi) {
  69. session_id_type env_session;
  70. Status s = GetSessionFromEnv(&env_session);
  71. EXPECT_EQ(s, Status::OK());
  72. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  73. EXPECT_NE(some_cache, nullptr);
  74. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  75. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  76. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, RandomSampler(), {}, {}, some_cache);
  77. EXPECT_NE(ds, nullptr);
  78. // Create a Repeat operation on ds
  79. int32_t repeat_num = 2;
  80. ds = ds->Repeat(repeat_num);
  81. EXPECT_NE(ds, nullptr);
  82. // Create an iterator over the result of the above dataset
  83. // This will trigger the creation of the Execution Tree and launch it.
  84. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  85. EXPECT_NE(iter, nullptr);
  86. // Iterate the dataset and get each row
  87. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  88. iter->GetNextRow(&row);
  89. uint64_t i = 0;
  90. while (row.size() != 0) {
  91. i++;
  92. auto image = row["image"];
  93. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  94. iter->GetNextRow(&row);
  95. }
  96. EXPECT_EQ(i, 4);
  97. // Manually terminate the pipeline
  98. iter->Stop();
  99. }
  100. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCocoCApi) {
  101. session_id_type env_session;
  102. Status s = GetSessionFromEnv(&env_session);
  103. EXPECT_EQ(s, Status::OK());
  104. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  105. EXPECT_NE(some_cache, nullptr);
  106. // Create a Coco Dataset, this folder_path has 6 images in it
  107. std::string folder_path = datasets_root_path_ + "/testCOCO/train/";
  108. std::string annotation_file_path = datasets_root_path_ + "/testCOCO/annotations/train.json";
  109. std::shared_ptr<Dataset> ds =
  110. Coco(folder_path, annotation_file_path, "Detection", false, RandomSampler(), some_cache);
  111. EXPECT_NE(ds, nullptr);
  112. // Create a Repeat operation on ds
  113. int32_t repeat_num = 2;
  114. ds = ds->Repeat(repeat_num);
  115. EXPECT_NE(ds, nullptr);
  116. // Create an iterator over the result of the above dataset
  117. // This will trigger the creation of the Execution Tree and launch it.
  118. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  119. EXPECT_NE(iter, nullptr);
  120. // Iterate the dataset and get each row
  121. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  122. iter->GetNextRow(&row);
  123. uint64_t i = 0;
  124. while (row.size() != 0) {
  125. i++;
  126. auto image = row["image"];
  127. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  128. iter->GetNextRow(&row);
  129. }
  130. EXPECT_EQ(i, 12);
  131. // Manually terminate the pipeline
  132. iter->Stop();
  133. }
  134. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheMnistCApi) {
  135. session_id_type env_session;
  136. Status s = GetSessionFromEnv(&env_session);
  137. EXPECT_EQ(s, Status::OK());
  138. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  139. EXPECT_NE(some_cache, nullptr);
  140. // Create a Mnist Dataset
  141. std::string folder_path = datasets_root_path_ + "/testMnistData/";
  142. std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", RandomSampler(false, 10), some_cache);
  143. EXPECT_NE(ds, nullptr);
  144. // Create a Repeat operation on ds
  145. int32_t repeat_num = 2;
  146. ds = ds->Repeat(repeat_num);
  147. EXPECT_NE(ds, nullptr);
  148. // Create an iterator over the result of the above dataset
  149. // This will trigger the creation of the Execution Tree and launch it.
  150. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  151. EXPECT_NE(iter, nullptr);
  152. // Iterate the dataset and get each row
  153. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  154. iter->GetNextRow(&row);
  155. uint64_t i = 0;
  156. while (row.size() != 0) {
  157. i++;
  158. auto image = row["image"];
  159. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  160. iter->GetNextRow(&row);
  161. }
  162. EXPECT_EQ(i, 20);
  163. // Manually terminate the pipeline
  164. iter->Stop();
  165. }
  166. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCelebaCApi) {
  167. session_id_type env_session;
  168. Status s = GetSessionFromEnv(&env_session);
  169. EXPECT_EQ(s, Status::OK());
  170. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  171. EXPECT_NE(some_cache, nullptr);
  172. // Create a CelebA Dataset, this folder_path has 4 records in it
  173. std::string folder_path = datasets_root_path_ + "/testCelebAData/";
  174. std::shared_ptr<Dataset> ds = CelebA(folder_path, "all", RandomSampler(false, 10), false, {}, some_cache);
  175. EXPECT_NE(ds, nullptr);
  176. // Create a Repeat operation on ds
  177. int32_t repeat_num = 2;
  178. ds = ds->Repeat(repeat_num);
  179. EXPECT_NE(ds, nullptr);
  180. // Create an iterator over the result of the above dataset
  181. // This will trigger the creation of the Execution Tree and launch it.
  182. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  183. EXPECT_NE(iter, nullptr);
  184. // Iterate the dataset and get each row
  185. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  186. iter->GetNextRow(&row);
  187. uint64_t i = 0;
  188. while (row.size() != 0) {
  189. i++;
  190. auto image = row["image"];
  191. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  192. iter->GetNextRow(&row);
  193. }
  194. EXPECT_EQ(i, 8);
  195. // Manually terminate the pipeline
  196. iter->Stop();
  197. }
  198. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheManifestCApi) {
  199. session_id_type env_session;
  200. Status s = GetSessionFromEnv(&env_session);
  201. EXPECT_EQ(s, Status::OK());
  202. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  203. EXPECT_NE(some_cache, nullptr);
  204. // Create a Manifest Dataset, this file_path has 2 records in it
  205. std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
  206. std::shared_ptr<Dataset> ds = Manifest(file_path, "train", RandomSampler(), {}, false, some_cache);
  207. EXPECT_NE(ds, nullptr);
  208. // Create a Repeat operation on ds
  209. int32_t repeat_num = 2;
  210. ds = ds->Repeat(repeat_num);
  211. EXPECT_NE(ds, nullptr);
  212. // Create an iterator over the result of the above dataset
  213. // This will trigger the creation of the Execution Tree and launch it.
  214. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  215. EXPECT_NE(iter, nullptr);
  216. // Iterate the dataset and get each row
  217. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  218. iter->GetNextRow(&row);
  219. uint64_t i = 0;
  220. while (row.size() != 0) {
  221. i++;
  222. auto image = row["image"];
  223. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  224. iter->GetNextRow(&row);
  225. }
  226. EXPECT_EQ(i, 4);
  227. // Manually terminate the pipeline
  228. iter->Stop();
  229. }
  230. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar10CApi) {
  231. session_id_type env_session;
  232. Status s = GetSessionFromEnv(&env_session);
  233. EXPECT_EQ(s, Status::OK());
  234. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  235. EXPECT_NE(some_cache, nullptr);
  236. // Create a Cifar10 Dataset
  237. std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
  238. std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", RandomSampler(false, 10), some_cache);
  239. EXPECT_NE(ds, nullptr);
  240. // Create a Repeat operation on ds
  241. int32_t repeat_num = 2;
  242. ds = ds->Repeat(repeat_num);
  243. EXPECT_NE(ds, nullptr);
  244. // Create an iterator over the result of the above dataset
  245. // This will trigger the creation of the Execution Tree and launch it.
  246. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  247. EXPECT_NE(iter, nullptr);
  248. // Iterate the dataset and get each row
  249. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  250. iter->GetNextRow(&row);
  251. uint64_t i = 0;
  252. while (row.size() != 0) {
  253. i++;
  254. auto image = row["image"];
  255. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  256. iter->GetNextRow(&row);
  257. }
  258. EXPECT_EQ(i, 20);
  259. // Manually terminate the pipeline
  260. iter->Stop();
  261. }
  262. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar100CApi) {
  263. session_id_type env_session;
  264. Status s = GetSessionFromEnv(&env_session);
  265. EXPECT_EQ(s, Status::OK());
  266. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  267. EXPECT_NE(some_cache, nullptr);
  268. // Create a Cifar100 Dataset
  269. std::string folder_path = datasets_root_path_ + "/testCifar100Data/";
  270. std::shared_ptr<Dataset> ds = Cifar100(folder_path, "all", RandomSampler(false, 10), some_cache);
  271. EXPECT_NE(ds, nullptr);
  272. // Create a Repeat operation on ds
  273. int32_t repeat_num = 2;
  274. ds = ds->Repeat(repeat_num);
  275. EXPECT_NE(ds, nullptr);
  276. // Create an iterator over the result of the above dataset
  277. // This will trigger the creation of the Execution Tree and launch it.
  278. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  279. EXPECT_NE(iter, nullptr);
  280. // Iterate the dataset and get each row
  281. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  282. iter->GetNextRow(&row);
  283. uint64_t i = 0;
  284. while (row.size() != 0) {
  285. i++;
  286. auto image = row["image"];
  287. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  288. iter->GetNextRow(&row);
  289. }
  290. EXPECT_EQ(i, 20);
  291. // Manually terminate the pipeline
  292. iter->Stop();
  293. }
  294. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheVocCApi) {
  295. session_id_type env_session;
  296. Status s = GetSessionFromEnv(&env_session);
  297. EXPECT_EQ(s, Status::OK());
  298. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  299. EXPECT_NE(some_cache, nullptr);
  300. // Create a VOC Dataset, this folder_path has 9 records in it
  301. std::string folder_path = datasets_root_path_ + "/testVOC2012/";
  302. std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, RandomSampler(), some_cache);
  303. EXPECT_NE(ds, nullptr);
  304. // Create a Repeat operation on ds
  305. int32_t repeat_num = 2;
  306. ds = ds->Repeat(repeat_num);
  307. EXPECT_NE(ds, nullptr);
  308. // Create an iterator over the result of the above dataset
  309. // This will trigger the creation of the Execution Tree and launch it.
  310. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  311. EXPECT_NE(iter, nullptr);
  312. // Iterate the dataset and get each row
  313. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  314. iter->GetNextRow(&row);
  315. uint64_t i = 0;
  316. while (row.size() != 0) {
  317. i++;
  318. auto image = row["image"];
  319. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  320. iter->GetNextRow(&row);
  321. }
  322. EXPECT_EQ(i, 18);
  323. // Manually terminate the pipeline
  324. iter->Stop();
  325. }
  326. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheAlbumCApi) {
  327. session_id_type env_session;
  328. Status s = GetSessionFromEnv(&env_session);
  329. EXPECT_EQ(s, Status::OK());
  330. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  331. EXPECT_NE(some_cache, nullptr);
  332. std::string folder_path = datasets_root_path_ + "/testAlbum/images";
  333. std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
  334. std::vector<std::string> column_names = {"image", "label", "id"};
  335. // Create a Album Dataset, 7 records in it
  336. std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, false, RandomSampler(), some_cache);
  337. EXPECT_NE(ds, nullptr);
  338. // Create a Repeat operation on ds
  339. int32_t repeat_num = 2;
  340. ds = ds->Repeat(repeat_num);
  341. EXPECT_NE(ds, nullptr);
  342. // Create an iterator over the result of the above dataset
  343. // This will trigger the creation of the Execution Tree and launch it.
  344. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  345. EXPECT_NE(iter, nullptr);
  346. // Iterate the dataset and get each row
  347. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  348. iter->GetNextRow(&row);
  349. uint64_t i = 0;
  350. while (row.size() != 0) {
  351. i++;
  352. iter->GetNextRow(&row);
  353. }
  354. EXPECT_EQ(i, 14);
  355. // Manually terminate the pipeline
  356. iter->Stop();
  357. }
  358. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataCApi) {
  359. session_id_type env_session;
  360. Status s = GetSessionFromEnv(&env_session);
  361. EXPECT_EQ(s, Status::OK());
  362. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  363. EXPECT_NE(some_cache, nullptr);
  364. // Create a RandomDataset
  365. std::shared_ptr<SchemaObj> schema = Schema();
  366. schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2});
  367. schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
  368. std::shared_ptr<Dataset> ds = RandomData(4, schema, {}, some_cache);
  369. EXPECT_NE(ds, nullptr);
  370. // Create a Repeat operation on ds
  371. int32_t repeat_num = 2;
  372. ds = ds->Repeat(repeat_num);
  373. EXPECT_NE(ds, nullptr);
  374. // Create an iterator over the result of the above dataset
  375. // This will trigger the creation of the Execution Tree and launch it.
  376. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  377. EXPECT_NE(iter, nullptr);
  378. // Iterate the dataset and get each row
  379. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  380. iter->GetNextRow(&row);
  381. uint64_t i = 0;
  382. while (row.size() != 0) {
  383. i++;
  384. iter->GetNextRow(&row);
  385. }
  386. EXPECT_EQ(i, 8);
  387. // Manually terminate the pipeline
  388. iter->Stop();
  389. }
  390. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi1) {
  391. session_id_type env_session;
  392. Status s = GetSessionFromEnv(&env_session);
  393. EXPECT_EQ(s, Status::OK());
  394. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  395. EXPECT_NE(some_cache, nullptr);
  396. // Create a TFRecord Dataset, this file_path has 3 records in it
  397. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  398. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  399. std::shared_ptr<Dataset> ds =
  400. TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 1, 0, false, some_cache);
  401. EXPECT_NE(ds, nullptr);
  402. // Create a Repeat operation on ds
  403. int32_t repeat_num = 2;
  404. ds = ds->Repeat(repeat_num);
  405. EXPECT_NE(ds, nullptr);
  406. // Create an iterator over the result of the above dataset
  407. // This will trigger the creation of the Execution Tree and launch it.
  408. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  409. EXPECT_NE(iter, nullptr);
  410. // Iterate the dataset and get each row
  411. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  412. iter->GetNextRow(&row);
  413. uint64_t i = 0;
  414. while (row.size() != 0) {
  415. i++;
  416. auto image = row["image"];
  417. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  418. iter->GetNextRow(&row);
  419. }
  420. EXPECT_EQ(i, 6);
  421. // Manually terminate the pipeline
  422. iter->Stop();
  423. }
  424. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi2) {
  425. session_id_type env_session;
  426. Status s = GetSessionFromEnv(&env_session);
  427. EXPECT_EQ(s, Status::OK());
  428. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  429. EXPECT_NE(some_cache, nullptr);
  430. // Create a TFRecord Dataset, this file_path has 3 records in it
  431. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  432. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  433. // In this one, the TFRecord dataset will be given sharding configuration, however since a cache is
  434. // used, the tree prepare should undo the sharding configuration and instead, a distributed
  435. // sampler will be chosen with the same shard config.
  436. // With only 3 records shard into 3, we expect only 1 record returned for this shard
  437. // However, the sharding will be done by the sampler, not by the TFRecord leaf node
  438. // In this case, it is a row-based sharding, not the file-based sharding that would happen if
  439. // there was not any cache.
  440. std::shared_ptr<Dataset> ds =
  441. TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 3, 0, false, some_cache);
  442. EXPECT_NE(ds, nullptr);
  443. // Create a Repeat operation on ds
  444. int32_t repeat_num = 2;
  445. ds = ds->Repeat(repeat_num);
  446. EXPECT_NE(ds, nullptr);
  447. // Create an iterator over the result of the above dataset
  448. // This will trigger the creation of the Execution Tree and launch it.
  449. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  450. EXPECT_NE(iter, nullptr);
  451. // Iterate the dataset and get each row
  452. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  453. iter->GetNextRow(&row);
  454. uint64_t i = 0;
  455. while (row.size() != 0) {
  456. i++;
  457. auto image = row["image"];
  458. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  459. iter->GetNextRow(&row);
  460. }
  461. EXPECT_EQ(i, 2);
  462. // Manually terminate the pipeline
  463. iter->Stop();
  464. }
  465. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi3) {
  466. session_id_type env_session;
  467. Status s = GetSessionFromEnv(&env_session);
  468. EXPECT_EQ(s, Status::OK());
  469. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  470. EXPECT_NE(some_cache, nullptr);
  471. // Create a TFRecord Dataset, this file_path has 3 records in it
  472. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  473. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  474. // In this one, a num_samples argument is given.
  475. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  476. // The samples will be selected by the sequential sampler, not by the TFRecord leaf node.
  477. std::shared_ptr<Dataset> ds =
  478. TFRecord({file_path}, schema_path, {"image"}, 2, ShuffleMode::kFalse, 1, 0, false, some_cache);
  479. EXPECT_NE(ds, nullptr);
  480. // Create a Repeat operation on ds
  481. int32_t repeat_num = 2;
  482. ds = ds->Repeat(repeat_num);
  483. EXPECT_NE(ds, nullptr);
  484. // Create an iterator over the result of the above dataset
  485. // This will trigger the creation of the Execution Tree and launch it.
  486. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  487. EXPECT_NE(iter, nullptr);
  488. // Iterate the dataset and get each row
  489. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  490. iter->GetNextRow(&row);
  491. uint64_t i = 0;
  492. while (row.size() != 0) {
  493. i++;
  494. auto image = row["image"];
  495. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  496. iter->GetNextRow(&row);
  497. }
  498. EXPECT_EQ(i, 4);
  499. // Manually terminate the pipeline
  500. iter->Stop();
  501. }
  502. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTextfileCApi) {
  503. session_id_type env_session;
  504. Status s = GetSessionFromEnv(&env_session);
  505. EXPECT_EQ(s, Status::OK());
  506. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  507. EXPECT_NE(some_cache, nullptr);
  508. // Create a TextFile Dataset, this file_path has 3 records in it
  509. std::string file_path = datasets_root_path_ + "/testTextFileDataset/1.txt";
  510. // In this one, a num_samples=2 argument is given.
  511. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  512. // The samples will be selected by the sequential sampler, not by the TextFile leaf node.
  513. std::shared_ptr<Dataset> ds = TextFile({file_path}, 2, ShuffleMode::kGlobal, 1, 0, some_cache);
  514. EXPECT_NE(ds, nullptr);
  515. // Create a Repeat operation on ds
  516. int32_t repeat_num = 2;
  517. ds = ds->Repeat(repeat_num);
  518. EXPECT_NE(ds, nullptr);
  519. // Create an iterator over the result of the above dataset
  520. // This will trigger the creation of the Execution Tree and launch it.
  521. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  522. EXPECT_NE(iter, nullptr);
  523. // Iterate the dataset and get each row
  524. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  525. iter->GetNextRow(&row);
  526. uint64_t i = 0;
  527. while (row.size() != 0) {
  528. i++;
  529. iter->GetNextRow(&row);
  530. }
  531. EXPECT_EQ(i, 4);
  532. // Manually terminate the pipeline
  533. iter->Stop();
  534. }
  535. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCsvCApi) {
  536. session_id_type env_session;
  537. Status s = GetSessionFromEnv(&env_session);
  538. EXPECT_EQ(s, Status::OK());
  539. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  540. EXPECT_NE(some_cache, nullptr);
  541. // Create a CSV Dataset, this file_path has 3 records in it
  542. std::string file_path = datasets_root_path_ + "/testCSV/1.csv";
  543. std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"};
  544. // In this one, a num_samples=2 argument is given.
  545. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  546. // The samples will be selected by the sequential sampler, not by the CSV leaf node.
  547. std::shared_ptr<Dataset> ds = CSV({file_path}, ',', {}, column_names, 2, ShuffleMode::kFalse, 1, 0, some_cache);
  548. EXPECT_NE(ds, nullptr);
  549. // Create a Repeat operation on ds
  550. int32_t repeat_num = 2;
  551. ds = ds->Repeat(repeat_num);
  552. EXPECT_NE(ds, nullptr);
  553. // Create an iterator over the result of the above dataset
  554. // This will trigger the creation of the Execution Tree and launch it.
  555. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  556. EXPECT_NE(iter, nullptr);
  557. // Iterate the dataset and get each row
  558. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  559. iter->GetNextRow(&row);
  560. uint64_t i = 0;
  561. while (row.size() != 0) {
  562. i++;
  563. iter->GetNextRow(&row);
  564. }
  565. EXPECT_EQ(i, 4);
  566. // Manually terminate the pipeline
  567. iter->Stop();
  568. }
  569. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheClueCApi) {
  570. session_id_type env_session;
  571. Status s = GetSessionFromEnv(&env_session);
  572. EXPECT_EQ(s, Status::OK());
  573. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  574. EXPECT_NE(some_cache, nullptr);
  575. // Create a CLUE Dataset, this file_path has 3 records in it
  576. std::string file_path = datasets_root_path_ + "/testCLUE/afqmc/train.json";
  577. std::string task = "AFQMC";
  578. std::string usage = "train";
  579. // In this one, a num_samples=2 argument is given.
  580. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  581. // The samples will be selected by the sequential sampler, not by the CLUE leaf node.
  582. std::shared_ptr<Dataset> ds = CLUE({file_path}, task, usage, 2, ShuffleMode::kFalse, 1, 0, some_cache);
  583. EXPECT_NE(ds, nullptr);
  584. // Create a Repeat operation on ds
  585. int32_t repeat_num = 2;
  586. ds = ds->Repeat(repeat_num);
  587. EXPECT_NE(ds, nullptr);
  588. // Create an iterator over the result of the above dataset
  589. // This will trigger the creation of the Execution Tree and launch it.
  590. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  591. EXPECT_NE(iter, nullptr);
  592. // Iterate the dataset and get each row
  593. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  594. iter->GetNextRow(&row);
  595. uint64_t i = 0;
  596. while (row.size() != 0) {
  597. i++;
  598. iter->GetNextRow(&row);
  599. }
  600. EXPECT_EQ(i, 4);
  601. // Manually terminate the pipeline
  602. iter->Stop();
  603. }
  604. TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare1) {
  605. session_id_type env_session;
  606. Status s = GetSessionFromEnv(&env_session);
  607. EXPECT_EQ(s, Status::OK());
  608. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  609. EXPECT_NE(some_cache, nullptr);
  610. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  611. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  612. std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, RandomSampler(), {}, {}, some_cache);
  613. EXPECT_NE(ds1, nullptr);
  614. std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, RandomSampler(), {}, {}, some_cache);
  615. EXPECT_NE(ds2, nullptr);
  616. // Create and launch the Execution Tree for ds1
  617. std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
  618. EXPECT_NE(iter1, nullptr);
  619. // Iterate the dataset and get each row
  620. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  621. iter1->GetNextRow(&row);
  622. uint64_t i = 0;
  623. while (row.size() != 0) {
  624. i++;
  625. auto image = row["image"];
  626. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  627. iter1->GetNextRow(&row);
  628. }
  629. EXPECT_EQ(i, 2);
  630. // Manually terminate the pipeline
  631. iter1->Stop();
  632. // Create and launch the Execution Tree for ds2
  633. std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
  634. EXPECT_NE(iter2, nullptr);
  635. // Iterate the dataset and get each row
  636. iter2->GetNextRow(&row);
  637. i = 0;
  638. while (row.size() != 0) {
  639. i++;
  640. auto image = row["image"];
  641. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  642. iter2->GetNextRow(&row);
  643. }
  644. EXPECT_EQ(i, 2);
  645. // Manually terminate the pipeline
  646. iter2->Stop();
  647. }
  648. TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare2) {
  649. session_id_type env_session;
  650. Status s = GetSessionFromEnv(&env_session);
  651. EXPECT_EQ(s, Status::OK());
  652. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  653. EXPECT_NE(some_cache, nullptr);
  654. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  655. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  656. // The first pipeline is ImageFolder with RandomSampler, the second pipeline is ImageFolder with SequentialSampler
  657. // Since sampler does not influence the data in the source, these two pipelines can share a common cache.
  658. std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, RandomSampler(), {}, {}, some_cache);
  659. EXPECT_NE(ds1, nullptr);
  660. std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, SequentialSampler(), {}, {}, some_cache);
  661. EXPECT_NE(ds2, nullptr);
  662. // Create and launch the Execution Tree for ds1
  663. std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
  664. EXPECT_NE(iter1, nullptr);
  665. // Iterate the dataset and get each row
  666. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  667. iter1->GetNextRow(&row);
  668. uint64_t i = 0;
  669. while (row.size() != 0) {
  670. i++;
  671. auto image = row["image"];
  672. iter1->GetNextRow(&row);
  673. }
  674. EXPECT_EQ(i, 2);
  675. // Manually terminate the pipeline
  676. iter1->Stop();
  677. // Create and launch the Execution Tree for ds2
  678. std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
  679. EXPECT_NE(iter2, nullptr);
  680. // Iterate the dataset and get each row
  681. iter2->GetNextRow(&row);
  682. i = 0;
  683. while (row.size() != 0) {
  684. i++;
  685. auto image = row["image"];
  686. iter2->GetNextRow(&row);
  687. }
  688. EXPECT_EQ(i, 2);
  689. // Manually terminate the pipeline
  690. iter2->Stop();
  691. }
  692. TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShareFailure1) {
  693. session_id_type env_session;
  694. Status s = GetSessionFromEnv(&env_session);
  695. EXPECT_EQ(s, Status::OK());
  696. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  697. EXPECT_NE(some_cache, nullptr);
  698. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  699. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  700. std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, RandomSampler(), {}, {}, some_cache);
  701. EXPECT_NE(ds1, nullptr);
  702. std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, RandomSampler(), {}, {}, some_cache);
  703. EXPECT_NE(ds2, nullptr);
  704. // Create and launch the Execution Tree for ds1
  705. std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
  706. EXPECT_NE(iter1, nullptr);
  707. // Iterate the dataset and get each row
  708. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  709. iter1->GetNextRow(&row);
  710. uint64_t i = 0;
  711. while (row.size() != 0) {
  712. i++;
  713. auto image = row["image"];
  714. iter1->GetNextRow(&row);
  715. }
  716. EXPECT_EQ(i, 2);
  717. // Manually terminate the pipeline
  718. iter1->Stop();
  719. // Re-use a cache for the second pipeline would fail
  720. std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
  721. EXPECT_EQ(iter2, nullptr);
  722. }