You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_api_cache_test.cc 30 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/common.h"
  17. #include "minddata/dataset/include/datasets.h"
  18. #include "minddata/dataset/include/vision.h"
  19. using namespace mindspore::dataset;
  20. // Helper function to get the session id from SESSION_ID env variable
  21. Status GetSessionFromEnv(session_id_type *session_id);
  22. class MindDataTestCacheOp : public UT::DatasetOpTesting {
  23. public:
  24. void SetUp() override {
  25. DatasetOpTesting::SetUp();
  26. }
  27. };
  28. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiSamplerNull) {
  29. session_id_type env_session;
  30. Status s = GetSessionFromEnv(&env_session);
  31. EXPECT_EQ(s, Status::OK());
  32. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true, "127.0.0.1", 50053, 1, 1);
  33. EXPECT_NE(some_cache, nullptr);
  34. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  35. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  36. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, nullptr, {}, {}, some_cache);
  37. EXPECT_NE(ds, nullptr);
  38. // Create an iterator over the result of the above dataset
  39. // This will trigger the creation of the Execution Tree and launch it.
  40. // Now the parameter check for ImageFolderNode would fail and we would end up with a nullptr iter.
  41. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  42. EXPECT_EQ(iter, nullptr);
  43. }
  44. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiNestedCache) {
  45. session_id_type env_session;
  46. Status s = GetSessionFromEnv(&env_session);
  47. EXPECT_EQ(s, Status::OK());
  48. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  49. EXPECT_NE(some_cache, nullptr);
  50. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  51. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  52. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, RandomSampler(), {}, {}, some_cache);
  53. EXPECT_NE(ds, nullptr);
  54. // Create objects for the tensor ops
  55. std::shared_ptr<TensorOperation> decode_op = vision::Decode();
  56. EXPECT_NE(decode_op, nullptr);
  57. // Create a Map operation on ds
  58. ds = ds->Map({decode_op}, {}, {}, {"image"}, some_cache);
  59. EXPECT_NE(ds, nullptr);
  60. // Create an iterator over the result of the above dataset
  61. // This will trigger the creation of the Execution Tree and launch it.
  62. // Now in the cache_error_pass would fail and we would end up with a nullptr iter.
  63. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  64. EXPECT_EQ(iter, nullptr);
  65. }
  66. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheImageFolderCApi) {
  67. session_id_type env_session;
  68. Status s = GetSessionFromEnv(&env_session);
  69. EXPECT_EQ(s, Status::OK());
  70. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  71. EXPECT_NE(some_cache, nullptr);
  72. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  73. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  74. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, RandomSampler(), {}, {}, some_cache);
  75. EXPECT_NE(ds, nullptr);
  76. // Create a Repeat operation on ds
  77. int32_t repeat_num = 2;
  78. ds = ds->Repeat(repeat_num);
  79. EXPECT_NE(ds, nullptr);
  80. // Create an iterator over the result of the above dataset
  81. // This will trigger the creation of the Execution Tree and launch it.
  82. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  83. EXPECT_NE(iter, nullptr);
  84. // Iterate the dataset and get each row
  85. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  86. iter->GetNextRow(&row);
  87. uint64_t i = 0;
  88. while (row.size() != 0) {
  89. i++;
  90. auto image = row["image"];
  91. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  92. iter->GetNextRow(&row);
  93. }
  94. EXPECT_EQ(i, 4);
  95. // Manually terminate the pipeline
  96. iter->Stop();
  97. }
  98. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCocoCApi) {
  99. session_id_type env_session;
  100. Status s = GetSessionFromEnv(&env_session);
  101. EXPECT_EQ(s, Status::OK());
  102. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  103. EXPECT_NE(some_cache, nullptr);
  104. // Create a Coco Dataset, this folder_path has 6 images in it
  105. std::string folder_path = datasets_root_path_ + "/testCOCO/train/";
  106. std::string annotation_file_path = datasets_root_path_ + "/testCOCO/annotations/train.json";
  107. std::shared_ptr<Dataset> ds =
  108. Coco(folder_path, annotation_file_path, "Detection", false, RandomSampler(), some_cache);
  109. EXPECT_NE(ds, nullptr);
  110. // Create a Repeat operation on ds
  111. int32_t repeat_num = 2;
  112. ds = ds->Repeat(repeat_num);
  113. EXPECT_NE(ds, nullptr);
  114. // Create an iterator over the result of the above dataset
  115. // This will trigger the creation of the Execution Tree and launch it.
  116. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  117. EXPECT_NE(iter, nullptr);
  118. // Iterate the dataset and get each row
  119. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  120. iter->GetNextRow(&row);
  121. uint64_t i = 0;
  122. while (row.size() != 0) {
  123. i++;
  124. auto image = row["image"];
  125. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  126. iter->GetNextRow(&row);
  127. }
  128. EXPECT_EQ(i, 12);
  129. // Manually terminate the pipeline
  130. iter->Stop();
  131. }
  132. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheMnistCApi) {
  133. session_id_type env_session;
  134. Status s = GetSessionFromEnv(&env_session);
  135. EXPECT_EQ(s, Status::OK());
  136. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  137. EXPECT_NE(some_cache, nullptr);
  138. // Create a Mnist Dataset
  139. std::string folder_path = datasets_root_path_ + "/testMnistData/";
  140. std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", RandomSampler(false, 10), some_cache);
  141. EXPECT_NE(ds, nullptr);
  142. // Create a Repeat operation on ds
  143. int32_t repeat_num = 2;
  144. ds = ds->Repeat(repeat_num);
  145. EXPECT_NE(ds, nullptr);
  146. // Create an iterator over the result of the above dataset
  147. // This will trigger the creation of the Execution Tree and launch it.
  148. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  149. EXPECT_NE(iter, nullptr);
  150. // Iterate the dataset and get each row
  151. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  152. iter->GetNextRow(&row);
  153. uint64_t i = 0;
  154. while (row.size() != 0) {
  155. i++;
  156. auto image = row["image"];
  157. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  158. iter->GetNextRow(&row);
  159. }
  160. EXPECT_EQ(i, 20);
  161. // Manually terminate the pipeline
  162. iter->Stop();
  163. }
  164. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCelebaCApi) {
  165. session_id_type env_session;
  166. Status s = GetSessionFromEnv(&env_session);
  167. EXPECT_EQ(s, Status::OK());
  168. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  169. EXPECT_NE(some_cache, nullptr);
  170. // Create a CelebA Dataset, this folder_path has 4 records in it
  171. std::string folder_path = datasets_root_path_ + "/testCelebAData/";
  172. std::shared_ptr<Dataset> ds = CelebA(folder_path, "all", RandomSampler(false, 10), false, {}, some_cache);
  173. EXPECT_NE(ds, nullptr);
  174. // Create a Repeat operation on ds
  175. int32_t repeat_num = 2;
  176. ds = ds->Repeat(repeat_num);
  177. EXPECT_NE(ds, nullptr);
  178. // Create an iterator over the result of the above dataset
  179. // This will trigger the creation of the Execution Tree and launch it.
  180. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  181. EXPECT_NE(iter, nullptr);
  182. // Iterate the dataset and get each row
  183. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  184. iter->GetNextRow(&row);
  185. uint64_t i = 0;
  186. while (row.size() != 0) {
  187. i++;
  188. auto image = row["image"];
  189. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  190. iter->GetNextRow(&row);
  191. }
  192. EXPECT_EQ(i, 8);
  193. // Manually terminate the pipeline
  194. iter->Stop();
  195. }
  196. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheManifestCApi) {
  197. session_id_type env_session;
  198. Status s = GetSessionFromEnv(&env_session);
  199. EXPECT_EQ(s, Status::OK());
  200. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  201. EXPECT_NE(some_cache, nullptr);
  202. // Create a Manifest Dataset, this file_path has 2 records in it
  203. std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
  204. std::shared_ptr<Dataset> ds = Manifest(file_path, "train", RandomSampler(), {}, false, some_cache);
  205. EXPECT_NE(ds, nullptr);
  206. // Create a Repeat operation on ds
  207. int32_t repeat_num = 2;
  208. ds = ds->Repeat(repeat_num);
  209. EXPECT_NE(ds, nullptr);
  210. // Create an iterator over the result of the above dataset
  211. // This will trigger the creation of the Execution Tree and launch it.
  212. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  213. EXPECT_NE(iter, nullptr);
  214. // Iterate the dataset and get each row
  215. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  216. iter->GetNextRow(&row);
  217. uint64_t i = 0;
  218. while (row.size() != 0) {
  219. i++;
  220. auto image = row["image"];
  221. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  222. iter->GetNextRow(&row);
  223. }
  224. EXPECT_EQ(i, 4);
  225. // Manually terminate the pipeline
  226. iter->Stop();
  227. }
  228. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar10CApi) {
  229. session_id_type env_session;
  230. Status s = GetSessionFromEnv(&env_session);
  231. EXPECT_EQ(s, Status::OK());
  232. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  233. EXPECT_NE(some_cache, nullptr);
  234. // Create a Cifar10 Dataset
  235. std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
  236. std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", RandomSampler(false, 10), some_cache);
  237. EXPECT_NE(ds, nullptr);
  238. // Create a Repeat operation on ds
  239. int32_t repeat_num = 2;
  240. ds = ds->Repeat(repeat_num);
  241. EXPECT_NE(ds, nullptr);
  242. // Create an iterator over the result of the above dataset
  243. // This will trigger the creation of the Execution Tree and launch it.
  244. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  245. EXPECT_NE(iter, nullptr);
  246. // Iterate the dataset and get each row
  247. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  248. iter->GetNextRow(&row);
  249. uint64_t i = 0;
  250. while (row.size() != 0) {
  251. i++;
  252. auto image = row["image"];
  253. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  254. iter->GetNextRow(&row);
  255. }
  256. EXPECT_EQ(i, 20);
  257. // Manually terminate the pipeline
  258. iter->Stop();
  259. }
  260. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar100CApi) {
  261. session_id_type env_session;
  262. Status s = GetSessionFromEnv(&env_session);
  263. EXPECT_EQ(s, Status::OK());
  264. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  265. EXPECT_NE(some_cache, nullptr);
  266. // Create a Cifar100 Dataset
  267. std::string folder_path = datasets_root_path_ + "/testCifar100Data/";
  268. std::shared_ptr<Dataset> ds = Cifar100(folder_path, "all", RandomSampler(false, 10), some_cache);
  269. EXPECT_NE(ds, nullptr);
  270. // Create a Repeat operation on ds
  271. int32_t repeat_num = 2;
  272. ds = ds->Repeat(repeat_num);
  273. EXPECT_NE(ds, nullptr);
  274. // Create an iterator over the result of the above dataset
  275. // This will trigger the creation of the Execution Tree and launch it.
  276. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  277. EXPECT_NE(iter, nullptr);
  278. // Iterate the dataset and get each row
  279. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  280. iter->GetNextRow(&row);
  281. uint64_t i = 0;
  282. while (row.size() != 0) {
  283. i++;
  284. auto image = row["image"];
  285. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  286. iter->GetNextRow(&row);
  287. }
  288. EXPECT_EQ(i, 20);
  289. // Manually terminate the pipeline
  290. iter->Stop();
  291. }
  292. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheVocCApi) {
  293. session_id_type env_session;
  294. Status s = GetSessionFromEnv(&env_session);
  295. EXPECT_EQ(s, Status::OK());
  296. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  297. EXPECT_NE(some_cache, nullptr);
  298. // Create a VOC Dataset, this folder_path has 9 records in it
  299. std::string folder_path = datasets_root_path_ + "/testVOC2012/";
  300. std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, RandomSampler(), some_cache);
  301. EXPECT_NE(ds, nullptr);
  302. // Create a Repeat operation on ds
  303. int32_t repeat_num = 2;
  304. ds = ds->Repeat(repeat_num);
  305. EXPECT_NE(ds, nullptr);
  306. // Create an iterator over the result of the above dataset
  307. // This will trigger the creation of the Execution Tree and launch it.
  308. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  309. EXPECT_NE(iter, nullptr);
  310. // Iterate the dataset and get each row
  311. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  312. iter->GetNextRow(&row);
  313. uint64_t i = 0;
  314. while (row.size() != 0) {
  315. i++;
  316. auto image = row["image"];
  317. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  318. iter->GetNextRow(&row);
  319. }
  320. EXPECT_EQ(i, 18);
  321. // Manually terminate the pipeline
  322. iter->Stop();
  323. }
  324. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheAlbumCApi) {
  325. session_id_type env_session;
  326. Status s = GetSessionFromEnv(&env_session);
  327. EXPECT_EQ(s, Status::OK());
  328. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  329. EXPECT_NE(some_cache, nullptr);
  330. std::string folder_path = datasets_root_path_ + "/testAlbum/images";
  331. std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
  332. std::vector<std::string> column_names = {"image", "label", "id"};
  333. // Create a Album Dataset, 7 records in it
  334. std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, false, RandomSampler(), some_cache);
  335. EXPECT_NE(ds, nullptr);
  336. // Create a Repeat operation on ds
  337. int32_t repeat_num = 2;
  338. ds = ds->Repeat(repeat_num);
  339. EXPECT_NE(ds, nullptr);
  340. // Create an iterator over the result of the above dataset
  341. // This will trigger the creation of the Execution Tree and launch it.
  342. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  343. EXPECT_NE(iter, nullptr);
  344. // Iterate the dataset and get each row
  345. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  346. iter->GetNextRow(&row);
  347. uint64_t i = 0;
  348. while (row.size() != 0) {
  349. i++;
  350. iter->GetNextRow(&row);
  351. }
  352. EXPECT_EQ(i, 14);
  353. // Manually terminate the pipeline
  354. iter->Stop();
  355. }
  356. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataCApi) {
  357. session_id_type env_session;
  358. Status s = GetSessionFromEnv(&env_session);
  359. EXPECT_EQ(s, Status::OK());
  360. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  361. EXPECT_NE(some_cache, nullptr);
  362. // Create a RandomDataset
  363. std::shared_ptr<SchemaObj> schema = Schema();
  364. schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2});
  365. schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
  366. std::shared_ptr<Dataset> ds = RandomData(4, schema, {}, some_cache);
  367. EXPECT_NE(ds, nullptr);
  368. // Create a Repeat operation on ds
  369. int32_t repeat_num = 2;
  370. ds = ds->Repeat(repeat_num);
  371. EXPECT_NE(ds, nullptr);
  372. // Create an iterator over the result of the above dataset
  373. // This will trigger the creation of the Execution Tree and launch it.
  374. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  375. EXPECT_NE(iter, nullptr);
  376. // Iterate the dataset and get each row
  377. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  378. iter->GetNextRow(&row);
  379. uint64_t i = 0;
  380. while (row.size() != 0) {
  381. i++;
  382. iter->GetNextRow(&row);
  383. }
  384. EXPECT_EQ(i, 8);
  385. // Manually terminate the pipeline
  386. iter->Stop();
  387. }
  388. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi1) {
  389. session_id_type env_session;
  390. Status s = GetSessionFromEnv(&env_session);
  391. EXPECT_EQ(s, Status::OK());
  392. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  393. EXPECT_NE(some_cache, nullptr);
  394. // Create a TFRecord Dataset, this file_path has 3 records in it
  395. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  396. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  397. std::shared_ptr<Dataset> ds =
  398. TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 1, 0, false, some_cache);
  399. EXPECT_NE(ds, nullptr);
  400. // Create a Repeat operation on ds
  401. int32_t repeat_num = 2;
  402. ds = ds->Repeat(repeat_num);
  403. EXPECT_NE(ds, nullptr);
  404. // Create an iterator over the result of the above dataset
  405. // This will trigger the creation of the Execution Tree and launch it.
  406. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  407. EXPECT_NE(iter, nullptr);
  408. // Iterate the dataset and get each row
  409. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  410. iter->GetNextRow(&row);
  411. uint64_t i = 0;
  412. while (row.size() != 0) {
  413. i++;
  414. auto image = row["image"];
  415. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  416. iter->GetNextRow(&row);
  417. }
  418. EXPECT_EQ(i, 6);
  419. // Manually terminate the pipeline
  420. iter->Stop();
  421. }
  422. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi2) {
  423. session_id_type env_session;
  424. Status s = GetSessionFromEnv(&env_session);
  425. EXPECT_EQ(s, Status::OK());
  426. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  427. EXPECT_NE(some_cache, nullptr);
  428. // Create a TFRecord Dataset, this file_path has 3 records in it
  429. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  430. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  431. // In this one, the TFRecord dataset will be given sharding configuration, however since a cache is
  432. // used, the tree prepare should undo the sharding configuration and instead, a distributed
  433. // sampler will be chosen with the same shard config.
  434. // With only 3 records shard into 3, we expect only 1 record returned for this shard
  435. // However, the sharding will be done by the sampler, not by the TFRecord leaf node
  436. // In this case, it is a row-based sharding, not the file-based sharding that would happen if
  437. // there was not any cache.
  438. std::shared_ptr<Dataset> ds =
  439. TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 3, 0, false, some_cache);
  440. EXPECT_NE(ds, nullptr);
  441. // Create a Repeat operation on ds
  442. int32_t repeat_num = 2;
  443. ds = ds->Repeat(repeat_num);
  444. EXPECT_NE(ds, nullptr);
  445. // Create an iterator over the result of the above dataset
  446. // This will trigger the creation of the Execution Tree and launch it.
  447. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  448. EXPECT_NE(iter, nullptr);
  449. // Iterate the dataset and get each row
  450. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  451. iter->GetNextRow(&row);
  452. uint64_t i = 0;
  453. while (row.size() != 0) {
  454. i++;
  455. auto image = row["image"];
  456. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  457. iter->GetNextRow(&row);
  458. }
  459. EXPECT_EQ(i, 2);
  460. // Manually terminate the pipeline
  461. iter->Stop();
  462. }
  463. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi3) {
  464. session_id_type env_session;
  465. Status s = GetSessionFromEnv(&env_session);
  466. EXPECT_EQ(s, Status::OK());
  467. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  468. EXPECT_NE(some_cache, nullptr);
  469. // Create a TFRecord Dataset, this file_path has 3 records in it
  470. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  471. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  472. // In this one, a num_samples argument is given.
  473. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  474. // The samples will be selected by the sequential sampler, not by the TFRecord leaf node.
  475. std::shared_ptr<Dataset> ds =
  476. TFRecord({file_path}, schema_path, {"image"}, 2, ShuffleMode::kFalse, 1, 0, false, some_cache);
  477. EXPECT_NE(ds, nullptr);
  478. // Create a Repeat operation on ds
  479. int32_t repeat_num = 2;
  480. ds = ds->Repeat(repeat_num);
  481. EXPECT_NE(ds, nullptr);
  482. // Create an iterator over the result of the above dataset
  483. // This will trigger the creation of the Execution Tree and launch it.
  484. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  485. EXPECT_NE(iter, nullptr);
  486. // Iterate the dataset and get each row
  487. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  488. iter->GetNextRow(&row);
  489. uint64_t i = 0;
  490. while (row.size() != 0) {
  491. i++;
  492. auto image = row["image"];
  493. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  494. iter->GetNextRow(&row);
  495. }
  496. EXPECT_EQ(i, 4);
  497. // Manually terminate the pipeline
  498. iter->Stop();
  499. }
  500. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTextfileCApi) {
  501. session_id_type env_session;
  502. Status s = GetSessionFromEnv(&env_session);
  503. EXPECT_EQ(s, Status::OK());
  504. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  505. EXPECT_NE(some_cache, nullptr);
  506. // Create a TextFile Dataset, this file_path has 3 records in it
  507. std::string file_path = datasets_root_path_ + "/testTextFileDataset/1.txt";
  508. // In this one, a num_samples=2 argument is given.
  509. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  510. // The samples will be selected by the sequential sampler, not by the TextFile leaf node.
  511. std::shared_ptr<Dataset> ds = TextFile({file_path}, 2, ShuffleMode::kGlobal, 1, 0, some_cache);
  512. EXPECT_NE(ds, nullptr);
  513. // Create a Repeat operation on ds
  514. int32_t repeat_num = 2;
  515. ds = ds->Repeat(repeat_num);
  516. EXPECT_NE(ds, nullptr);
  517. // Create an iterator over the result of the above dataset
  518. // This will trigger the creation of the Execution Tree and launch it.
  519. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  520. EXPECT_NE(iter, nullptr);
  521. // Iterate the dataset and get each row
  522. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  523. iter->GetNextRow(&row);
  524. uint64_t i = 0;
  525. while (row.size() != 0) {
  526. i++;
  527. iter->GetNextRow(&row);
  528. }
  529. EXPECT_EQ(i, 4);
  530. // Manually terminate the pipeline
  531. iter->Stop();
  532. }
  533. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCsvCApi) {
  534. session_id_type env_session;
  535. Status s = GetSessionFromEnv(&env_session);
  536. EXPECT_EQ(s, Status::OK());
  537. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  538. EXPECT_NE(some_cache, nullptr);
  539. // Create a CSV Dataset, this file_path has 3 records in it
  540. std::string file_path = datasets_root_path_ + "/testCSV/1.csv";
  541. std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"};
  542. // In this one, a num_samples=2 argument is given.
  543. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  544. // The samples will be selected by the sequential sampler, not by the CSV leaf node.
  545. std::shared_ptr<Dataset> ds = CSV({file_path}, ',', {}, column_names, 2, ShuffleMode::kFalse, 1, 0, some_cache);
  546. EXPECT_NE(ds, nullptr);
  547. // Create a Repeat operation on ds
  548. int32_t repeat_num = 2;
  549. ds = ds->Repeat(repeat_num);
  550. EXPECT_NE(ds, nullptr);
  551. // Create an iterator over the result of the above dataset
  552. // This will trigger the creation of the Execution Tree and launch it.
  553. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  554. EXPECT_NE(iter, nullptr);
  555. // Iterate the dataset and get each row
  556. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  557. iter->GetNextRow(&row);
  558. uint64_t i = 0;
  559. while (row.size() != 0) {
  560. i++;
  561. iter->GetNextRow(&row);
  562. }
  563. EXPECT_EQ(i, 4);
  564. // Manually terminate the pipeline
  565. iter->Stop();
  566. }
  567. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheClueCApi) {
  568. session_id_type env_session;
  569. Status s = GetSessionFromEnv(&env_session);
  570. EXPECT_EQ(s, Status::OK());
  571. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  572. EXPECT_NE(some_cache, nullptr);
  573. // Create a CLUE Dataset, this file_path has 3 records in it
  574. std::string file_path = datasets_root_path_ + "/testCLUE/afqmc/train.json";
  575. std::string task = "AFQMC";
  576. std::string usage = "train";
  577. // In this one, a num_samples=2 argument is given.
  578. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  579. // The samples will be selected by the sequential sampler, not by the CLUE leaf node.
  580. std::shared_ptr<Dataset> ds = CLUE({file_path}, task, usage, 2, ShuffleMode::kFalse, 1, 0, some_cache);
  581. EXPECT_NE(ds, nullptr);
  582. // Create a Repeat operation on ds
  583. int32_t repeat_num = 2;
  584. ds = ds->Repeat(repeat_num);
  585. EXPECT_NE(ds, nullptr);
  586. // Create an iterator over the result of the above dataset
  587. // This will trigger the creation of the Execution Tree and launch it.
  588. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  589. EXPECT_NE(iter, nullptr);
  590. // Iterate the dataset and get each row
  591. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  592. iter->GetNextRow(&row);
  593. uint64_t i = 0;
  594. while (row.size() != 0) {
  595. i++;
  596. iter->GetNextRow(&row);
  597. }
  598. EXPECT_EQ(i, 4);
  599. // Manually terminate the pipeline
  600. iter->Stop();
  601. }
  602. TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare1) {
  603. session_id_type env_session;
  604. Status s = GetSessionFromEnv(&env_session);
  605. EXPECT_EQ(s, Status::OK());
  606. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  607. EXPECT_NE(some_cache, nullptr);
  608. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  609. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  610. std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, RandomSampler(), {}, {}, some_cache);
  611. EXPECT_NE(ds1, nullptr);
  612. std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, RandomSampler(), {}, {}, some_cache);
  613. EXPECT_NE(ds2, nullptr);
  614. // Create and launch the Execution Tree for ds1
  615. std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
  616. EXPECT_NE(iter1, nullptr);
  617. // Iterate the dataset and get each row
  618. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  619. iter1->GetNextRow(&row);
  620. uint64_t i = 0;
  621. while (row.size() != 0) {
  622. i++;
  623. auto image = row["image"];
  624. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  625. iter1->GetNextRow(&row);
  626. }
  627. EXPECT_EQ(i, 2);
  628. // Manually terminate the pipeline
  629. iter1->Stop();
  630. // Create and launch the Execution Tree for ds2
  631. std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
  632. EXPECT_NE(iter2, nullptr);
  633. // Iterate the dataset and get each row
  634. iter2->GetNextRow(&row);
  635. i = 0;
  636. while (row.size() != 0) {
  637. i++;
  638. auto image = row["image"];
  639. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  640. iter2->GetNextRow(&row);
  641. }
  642. EXPECT_EQ(i, 2);
  643. // Manually terminate the pipeline
  644. iter2->Stop();
  645. }
  646. TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare2) {
  647. session_id_type env_session;
  648. Status s = GetSessionFromEnv(&env_session);
  649. EXPECT_EQ(s, Status::OK());
  650. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  651. EXPECT_NE(some_cache, nullptr);
  652. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  653. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  654. // The first pipeline is ImageFolder with RandomSampler, the second pipeline is ImageFolder with SequentialSampler
  655. // Since sampler does not influence the data in the source, these two pipelines can share a common cache.
  656. std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, RandomSampler(), {}, {}, some_cache);
  657. EXPECT_NE(ds1, nullptr);
  658. std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, SequentialSampler(), {}, {}, some_cache);
  659. EXPECT_NE(ds2, nullptr);
  660. // Create and launch the Execution Tree for ds1
  661. std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
  662. EXPECT_NE(iter1, nullptr);
  663. // Iterate the dataset and get each row
  664. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  665. iter1->GetNextRow(&row);
  666. uint64_t i = 0;
  667. while (row.size() != 0) {
  668. i++;
  669. auto image = row["image"];
  670. iter1->GetNextRow(&row);
  671. }
  672. EXPECT_EQ(i, 2);
  673. // Manually terminate the pipeline
  674. iter1->Stop();
  675. // Create and launch the Execution Tree for ds2
  676. std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
  677. EXPECT_NE(iter2, nullptr);
  678. // Iterate the dataset and get each row
  679. iter2->GetNextRow(&row);
  680. i = 0;
  681. while (row.size() != 0) {
  682. i++;
  683. auto image = row["image"];
  684. iter2->GetNextRow(&row);
  685. }
  686. EXPECT_EQ(i, 2);
  687. // Manually terminate the pipeline
  688. iter2->Stop();
  689. }
  690. TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShareFailure1) {
  691. session_id_type env_session;
  692. Status s = GetSessionFromEnv(&env_session);
  693. EXPECT_EQ(s, Status::OK());
  694. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  695. EXPECT_NE(some_cache, nullptr);
  696. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  697. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  698. std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, RandomSampler(), {}, {}, some_cache);
  699. EXPECT_NE(ds1, nullptr);
  700. std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, RandomSampler(), {}, {}, some_cache);
  701. EXPECT_NE(ds2, nullptr);
  702. // Create and launch the Execution Tree for ds1
  703. std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
  704. EXPECT_NE(iter1, nullptr);
  705. // Iterate the dataset and get each row
  706. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  707. iter1->GetNextRow(&row);
  708. uint64_t i = 0;
  709. while (row.size() != 0) {
  710. i++;
  711. auto image = row["image"];
  712. iter1->GetNextRow(&row);
  713. }
  714. EXPECT_EQ(i, 2);
  715. // Manually terminate the pipeline
  716. iter1->Stop();
  717. // Re-use a cache for the second pipeline would fail
  718. std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
  719. EXPECT_EQ(iter2, nullptr);
  720. }