You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_api_cache_test.cc 33 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/common.h"
  17. #include "minddata/dataset/include/dataset/datasets.h"
  18. #include "minddata/dataset/include/dataset/vision.h"
  19. using namespace mindspore::dataset;
  20. // Helper function to get the session id from SESSION_ID env variable
  21. Status GetSessionFromEnv(session_id_type *session_id);
  22. class MindDataTestCacheOp : public UT::DatasetOpTesting {
  23. public:
  24. void SetUp() override { DatasetOpTesting::SetUp(); }
  25. };
  26. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiSamplerNull) {
  27. session_id_type env_session;
  28. Status s = GetSessionFromEnv(&env_session);
  29. EXPECT_EQ(s, Status::OK());
  30. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false, "127.0.0.1", 50053, 1, 1);
  31. EXPECT_NE(some_cache, nullptr);
  32. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  33. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  34. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, nullptr, {}, {}, some_cache);
  35. EXPECT_NE(ds, nullptr);
  36. // Create an iterator over the result of the above dataset
  37. // This will trigger the creation of the Execution Tree and launch it.
  38. // Now the parameter check for ImageFolderNode would fail and we would end up with a nullptr iter.
  39. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  40. EXPECT_EQ(iter, nullptr);
  41. }
  42. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiNestedCache) {
  43. session_id_type env_session;
  44. Status s = GetSessionFromEnv(&env_session);
  45. EXPECT_EQ(s, Status::OK());
  46. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  47. EXPECT_NE(some_cache, nullptr);
  48. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  49. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  50. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  51. EXPECT_NE(ds, nullptr);
  52. // Create objects for the tensor ops
  53. std::shared_ptr<TensorTransform> decode_op = std::make_shared<vision::Decode>();
  54. EXPECT_NE(decode_op, nullptr);
  55. // Create a Map operation on ds
  56. ds = ds->Map({decode_op}, {}, {}, {"image"}, some_cache);
  57. EXPECT_NE(ds, nullptr);
  58. // Create an iterator over the result of the above dataset
  59. // This will trigger the creation of the Execution Tree and launch it.
  60. // Now in the cache_error_pass would fail and we would end up with a nullptr iter.
  61. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  62. EXPECT_EQ(iter, nullptr);
  63. }
  64. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheImageFolderCApi) {
  65. session_id_type env_session;
  66. Status s = GetSessionFromEnv(&env_session);
  67. EXPECT_EQ(s, Status::OK());
  68. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  69. EXPECT_NE(some_cache, nullptr);
  70. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  71. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  72. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  73. EXPECT_NE(ds, nullptr);
  74. // Create a Repeat operation on ds
  75. int32_t repeat_num = 2;
  76. ds = ds->Repeat(repeat_num);
  77. EXPECT_NE(ds, nullptr);
  78. // Create an iterator over the result of the above dataset
  79. // This will trigger the creation of the Execution Tree and launch it.
  80. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  81. EXPECT_NE(iter, nullptr);
  82. // Iterate the dataset and get each row
  83. std::unordered_map<std::string, mindspore::MSTensor> row;
  84. ASSERT_OK(iter->GetNextRow(&row));
  85. uint64_t i = 0;
  86. while (row.size() != 0) {
  87. i++;
  88. auto image = row["image"];
  89. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  90. ASSERT_OK(iter->GetNextRow(&row));
  91. }
  92. EXPECT_EQ(i, 4);
  93. // Manually terminate the pipeline
  94. iter->Stop();
  95. }
  96. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCocoCApi) {
  97. session_id_type env_session;
  98. Status s = GetSessionFromEnv(&env_session);
  99. EXPECT_EQ(s, Status::OK());
  100. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  101. EXPECT_NE(some_cache, nullptr);
  102. // Create a Coco Dataset, this folder_path has 6 images in it
  103. std::string folder_path = datasets_root_path_ + "/testCOCO/train/";
  104. std::string annotation_file_path = datasets_root_path_ + "/testCOCO/annotations/train.json";
  105. std::shared_ptr<Dataset> ds =
  106. Coco(folder_path, annotation_file_path, "Detection", false, std::make_shared<RandomSampler>(), some_cache);
  107. EXPECT_NE(ds, nullptr);
  108. // Create a Repeat operation on ds
  109. int32_t repeat_num = 2;
  110. ds = ds->Repeat(repeat_num);
  111. EXPECT_NE(ds, nullptr);
  112. // Create an iterator over the result of the above dataset
  113. // This will trigger the creation of the Execution Tree and launch it.
  114. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  115. EXPECT_NE(iter, nullptr);
  116. // Iterate the dataset and get each row
  117. std::unordered_map<std::string, mindspore::MSTensor> row;
  118. ASSERT_OK(iter->GetNextRow(&row));
  119. uint64_t i = 0;
  120. while (row.size() != 0) {
  121. i++;
  122. auto image = row["image"];
  123. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  124. ASSERT_OK(iter->GetNextRow(&row));
  125. }
  126. EXPECT_EQ(i, 12);
  127. // Manually terminate the pipeline
  128. iter->Stop();
  129. }
  130. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheMnistCApi) {
  131. session_id_type env_session;
  132. Status s = GetSessionFromEnv(&env_session);
  133. EXPECT_EQ(s, Status::OK());
  134. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  135. EXPECT_NE(some_cache, nullptr);
  136. // Create a Mnist Dataset
  137. std::string folder_path = datasets_root_path_ + "/testMnistData/";
  138. std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache);
  139. EXPECT_NE(ds, nullptr);
  140. // Create a Repeat operation on ds
  141. int32_t repeat_num = 2;
  142. ds = ds->Repeat(repeat_num);
  143. EXPECT_NE(ds, nullptr);
  144. // Create an iterator over the result of the above dataset
  145. // This will trigger the creation of the Execution Tree and launch it.
  146. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  147. EXPECT_NE(iter, nullptr);
  148. // Iterate the dataset and get each row
  149. std::unordered_map<std::string, mindspore::MSTensor> row;
  150. ASSERT_OK(iter->GetNextRow(&row));
  151. uint64_t i = 0;
  152. while (row.size() != 0) {
  153. i++;
  154. auto image = row["image"];
  155. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  156. ASSERT_OK(iter->GetNextRow(&row));
  157. }
  158. EXPECT_EQ(i, 20);
  159. // Manually terminate the pipeline
  160. iter->Stop();
  161. }
  162. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCelebaCApi) {
  163. session_id_type env_session;
  164. Status s = GetSessionFromEnv(&env_session);
  165. EXPECT_EQ(s, Status::OK());
  166. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  167. EXPECT_NE(some_cache, nullptr);
  168. // Create a CelebA Dataset, this folder_path has 4 records in it
  169. std::string folder_path = datasets_root_path_ + "/testCelebAData/";
  170. std::shared_ptr<Dataset> ds =
  171. CelebA(folder_path, "all", std::make_shared<RandomSampler>(false, 10), false, {}, some_cache);
  172. EXPECT_NE(ds, nullptr);
  173. // Create a Repeat operation on ds
  174. int32_t repeat_num = 2;
  175. ds = ds->Repeat(repeat_num);
  176. EXPECT_NE(ds, nullptr);
  177. // Create an iterator over the result of the above dataset
  178. // This will trigger the creation of the Execution Tree and launch it.
  179. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  180. EXPECT_NE(iter, nullptr);
  181. // Iterate the dataset and get each row
  182. std::unordered_map<std::string, mindspore::MSTensor> row;
  183. ASSERT_OK(iter->GetNextRow(&row));
  184. uint64_t i = 0;
  185. while (row.size() != 0) {
  186. i++;
  187. auto image = row["image"];
  188. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  189. ASSERT_OK(iter->GetNextRow(&row));
  190. }
  191. EXPECT_EQ(i, 8);
  192. // Manually terminate the pipeline
  193. iter->Stop();
  194. }
  195. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheManifestCApi) {
  196. session_id_type env_session;
  197. Status s = GetSessionFromEnv(&env_session);
  198. EXPECT_EQ(s, Status::OK());
  199. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  200. EXPECT_NE(some_cache, nullptr);
  201. // Create a Manifest Dataset, this file_path has 2 records in it
  202. std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
  203. std::shared_ptr<Dataset> ds = Manifest(file_path, "train", std::make_shared<RandomSampler>(), {}, false, some_cache);
  204. EXPECT_NE(ds, nullptr);
  205. // Create a Repeat operation on ds
  206. int32_t repeat_num = 2;
  207. ds = ds->Repeat(repeat_num);
  208. EXPECT_NE(ds, nullptr);
  209. // Create an iterator over the result of the above dataset
  210. // This will trigger the creation of the Execution Tree and launch it.
  211. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  212. EXPECT_NE(iter, nullptr);
  213. // Iterate the dataset and get each row
  214. std::unordered_map<std::string, mindspore::MSTensor> row;
  215. ASSERT_OK(iter->GetNextRow(&row));
  216. uint64_t i = 0;
  217. while (row.size() != 0) {
  218. i++;
  219. auto image = row["image"];
  220. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  221. ASSERT_OK(iter->GetNextRow(&row));
  222. }
  223. EXPECT_EQ(i, 4);
  224. // Manually terminate the pipeline
  225. iter->Stop();
  226. }
  227. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar10CApi) {
  228. session_id_type env_session;
  229. Status s = GetSessionFromEnv(&env_session);
  230. EXPECT_EQ(s, Status::OK());
  231. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  232. EXPECT_NE(some_cache, nullptr);
  233. // Create a Cifar10 Dataset
  234. std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
  235. std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache);
  236. EXPECT_NE(ds, nullptr);
  237. // Create a Repeat operation on ds
  238. int32_t repeat_num = 2;
  239. ds = ds->Repeat(repeat_num);
  240. EXPECT_NE(ds, nullptr);
  241. // Create an iterator over the result of the above dataset
  242. // This will trigger the creation of the Execution Tree and launch it.
  243. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  244. EXPECT_NE(iter, nullptr);
  245. // Iterate the dataset and get each row
  246. std::unordered_map<std::string, mindspore::MSTensor> row;
  247. ASSERT_OK(iter->GetNextRow(&row));
  248. uint64_t i = 0;
  249. while (row.size() != 0) {
  250. i++;
  251. auto image = row["image"];
  252. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  253. ASSERT_OK(iter->GetNextRow(&row));
  254. }
  255. EXPECT_EQ(i, 20);
  256. // Manually terminate the pipeline
  257. iter->Stop();
  258. }
  259. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar100CApi) {
  260. session_id_type env_session;
  261. Status s = GetSessionFromEnv(&env_session);
  262. EXPECT_EQ(s, Status::OK());
  263. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  264. EXPECT_NE(some_cache, nullptr);
  265. // Create a Cifar100 Dataset
  266. std::string folder_path = datasets_root_path_ + "/testCifar100Data/";
  267. std::shared_ptr<Dataset> ds = Cifar100(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache);
  268. EXPECT_NE(ds, nullptr);
  269. // Create a Repeat operation on ds
  270. int32_t repeat_num = 2;
  271. ds = ds->Repeat(repeat_num);
  272. EXPECT_NE(ds, nullptr);
  273. // Create an iterator over the result of the above dataset
  274. // This will trigger the creation of the Execution Tree and launch it.
  275. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  276. EXPECT_NE(iter, nullptr);
  277. // Iterate the dataset and get each row
  278. std::unordered_map<std::string, mindspore::MSTensor> row;
  279. ASSERT_OK(iter->GetNextRow(&row));
  280. uint64_t i = 0;
  281. while (row.size() != 0) {
  282. i++;
  283. auto image = row["image"];
  284. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  285. ASSERT_OK(iter->GetNextRow(&row));
  286. }
  287. EXPECT_EQ(i, 20);
  288. // Manually terminate the pipeline
  289. iter->Stop();
  290. }
  291. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheVocCApi) {
  292. session_id_type env_session;
  293. Status s = GetSessionFromEnv(&env_session);
  294. EXPECT_EQ(s, Status::OK());
  295. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  296. EXPECT_NE(some_cache, nullptr);
  297. // Create a VOC Dataset, this folder_path has 9 records in it
  298. std::string folder_path = datasets_root_path_ + "/testVOC2012/";
  299. std::shared_ptr<Dataset> ds =
  300. VOC(folder_path, "Detection", "train", {}, false, std::make_shared<RandomSampler>(), some_cache);
  301. EXPECT_NE(ds, nullptr);
  302. // Create a Repeat operation on ds
  303. int32_t repeat_num = 2;
  304. ds = ds->Repeat(repeat_num);
  305. EXPECT_NE(ds, nullptr);
  306. // Create an iterator over the result of the above dataset
  307. // This will trigger the creation of the Execution Tree and launch it.
  308. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  309. EXPECT_NE(iter, nullptr);
  310. // Iterate the dataset and get each row
  311. std::unordered_map<std::string, mindspore::MSTensor> row;
  312. ASSERT_OK(iter->GetNextRow(&row));
  313. uint64_t i = 0;
  314. while (row.size() != 0) {
  315. i++;
  316. auto image = row["image"];
  317. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  318. ASSERT_OK(iter->GetNextRow(&row));
  319. }
  320. EXPECT_EQ(i, 18);
  321. // Manually terminate the pipeline
  322. iter->Stop();
  323. }
  324. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheAlbumCApi) {
  325. session_id_type env_session;
  326. Status s = GetSessionFromEnv(&env_session);
  327. EXPECT_EQ(s, Status::OK());
  328. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  329. EXPECT_NE(some_cache, nullptr);
  330. std::string folder_path = datasets_root_path_ + "/testAlbum/images";
  331. std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
  332. std::vector<std::string> column_names = {"image", "label", "id"};
  333. // Create a Album Dataset, 7 records in it
  334. std::shared_ptr<Dataset> ds =
  335. Album(folder_path, schema_file, column_names, false, std::make_shared<RandomSampler>(), some_cache);
  336. EXPECT_NE(ds, nullptr);
  337. // Create a Repeat operation on ds
  338. int32_t repeat_num = 2;
  339. ds = ds->Repeat(repeat_num);
  340. EXPECT_NE(ds, nullptr);
  341. // Create an iterator over the result of the above dataset
  342. // This will trigger the creation of the Execution Tree and launch it.
  343. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  344. EXPECT_NE(iter, nullptr);
  345. // Iterate the dataset and get each row
  346. std::unordered_map<std::string, mindspore::MSTensor> row;
  347. ASSERT_OK(iter->GetNextRow(&row));
  348. uint64_t i = 0;
  349. while (row.size() != 0) {
  350. i++;
  351. ASSERT_OK(iter->GetNextRow(&row));
  352. }
  353. EXPECT_EQ(i, 14);
  354. // Manually terminate the pipeline
  355. iter->Stop();
  356. }
  357. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheMindRecordCApi) {
  358. session_id_type env_session;
  359. Status s = GetSessionFromEnv(&env_session);
  360. EXPECT_EQ(s, Status::OK());
  361. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  362. EXPECT_NE(some_cache, nullptr);
  363. // Create a MindData Dataset
  364. // Pass one mindrecord shard file to parse dataset info, and search for other mindrecord files with same dataset info,
  365. // thus all records in imagenet.mindrecord0 ~ imagenet.mindrecord3 will be read
  366. std::string file_path = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
  367. // Create a MindRecord Dataset, 20 records in it
  368. std::shared_ptr<Dataset> ds = MindData(file_path, {}, std::make_shared<RandomSampler>(), nullptr, 0,
  369. ShuffleMode::kGlobal, some_cache);
  370. EXPECT_NE(ds, nullptr);
  371. // Create an iterator over the result of the above dataset
  372. // This will trigger the creation of the Execution Tree and launch it.
  373. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  374. EXPECT_NE(iter, nullptr);
  375. // Iterate the dataset and get each row
  376. std::unordered_map<std::string, mindspore::MSTensor> row;
  377. ASSERT_OK(iter->GetNextRow(&row));
  378. uint64_t i = 0;
  379. while (row.size() != 0) {
  380. i++;
  381. ASSERT_OK(iter->GetNextRow(&row));
  382. }
  383. EXPECT_EQ(i, 20);
  384. // Manually terminate the pipeline
  385. iter->Stop();
  386. }
  387. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataCApi) {
  388. session_id_type env_session;
  389. Status s = GetSessionFromEnv(&env_session);
  390. EXPECT_EQ(s, Status::OK());
  391. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  392. EXPECT_NE(some_cache, nullptr);
  393. // Create a RandomDataset
  394. std::shared_ptr<SchemaObj> schema = Schema();
  395. ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2}));
  396. ASSERT_OK(schema->add_column("label", mindspore::DataType::kNumberTypeUInt8, {1}));
  397. std::shared_ptr<Dataset> ds = RandomData(8, schema, {}, some_cache);
  398. EXPECT_NE(ds, nullptr);
  399. // Create a Repeat operation on ds
  400. int32_t repeat_num = 2;
  401. ds = ds->Repeat(repeat_num);
  402. EXPECT_NE(ds, nullptr);
  403. // Create an iterator over the result of the above dataset
  404. // This will trigger the creation of the Execution Tree and launch it.
  405. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  406. EXPECT_NE(iter, nullptr);
  407. // Iterate the dataset and get each row
  408. std::unordered_map<std::string, mindspore::MSTensor> row;
  409. ASSERT_OK(iter->GetNextRow(&row));
  410. uint64_t i = 0;
  411. while (row.size() != 0) {
  412. i++;
  413. ASSERT_OK(iter->GetNextRow(&row));
  414. }
  415. EXPECT_EQ(i, 16);
  416. // Manually terminate the pipeline
  417. iter->Stop();
  418. }
  419. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi1) {
  420. session_id_type env_session;
  421. Status s = GetSessionFromEnv(&env_session);
  422. EXPECT_EQ(s, Status::OK());
  423. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  424. EXPECT_NE(some_cache, nullptr);
  425. // Create a TFRecord Dataset, this file_path has 3 records in it
  426. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  427. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  428. std::shared_ptr<Dataset> ds =
  429. TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 1, 0, false, some_cache);
  430. EXPECT_NE(ds, nullptr);
  431. // Create a Repeat operation on ds
  432. int32_t repeat_num = 2;
  433. ds = ds->Repeat(repeat_num);
  434. EXPECT_NE(ds, nullptr);
  435. // Create an iterator over the result of the above dataset
  436. // This will trigger the creation of the Execution Tree and launch it.
  437. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  438. EXPECT_NE(iter, nullptr);
  439. // Iterate the dataset and get each row
  440. std::unordered_map<std::string, mindspore::MSTensor> row;
  441. ASSERT_OK(iter->GetNextRow(&row));
  442. uint64_t i = 0;
  443. while (row.size() != 0) {
  444. i++;
  445. auto image = row["image"];
  446. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  447. ASSERT_OK(iter->GetNextRow(&row));
  448. }
  449. EXPECT_EQ(i, 6);
  450. // Manually terminate the pipeline
  451. iter->Stop();
  452. }
  453. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi2) {
  454. session_id_type env_session;
  455. Status s = GetSessionFromEnv(&env_session);
  456. EXPECT_EQ(s, Status::OK());
  457. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  458. EXPECT_NE(some_cache, nullptr);
  459. // Create a TFRecord Dataset, this file_path has 3 records in it
  460. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  461. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  462. // In this one, the TFRecord dataset will be given sharding configuration, however since a cache is
  463. // used, the tree prepare should undo the sharding configuration and instead, a distributed
  464. // sampler will be chosen with the same shard config.
  465. // With only 3 records shard into 3, we expect only 1 record returned for this shard
  466. // However, the sharding will be done by the sampler, not by the TFRecord leaf node
  467. // In this case, it is a row-based sharding, not the file-based sharding that would happen if
  468. // there was not any cache.
  469. std::shared_ptr<Dataset> ds =
  470. TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 3, 0, false, some_cache);
  471. EXPECT_NE(ds, nullptr);
  472. // Create a Repeat operation on ds
  473. int32_t repeat_num = 2;
  474. ds = ds->Repeat(repeat_num);
  475. EXPECT_NE(ds, nullptr);
  476. // Create an iterator over the result of the above dataset
  477. // This will trigger the creation of the Execution Tree and launch it.
  478. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  479. EXPECT_NE(iter, nullptr);
  480. // Iterate the dataset and get each row
  481. std::unordered_map<std::string, mindspore::MSTensor> row;
  482. ASSERT_OK(iter->GetNextRow(&row));
  483. uint64_t i = 0;
  484. while (row.size() != 0) {
  485. i++;
  486. auto image = row["image"];
  487. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  488. ASSERT_OK(iter->GetNextRow(&row));
  489. }
  490. EXPECT_EQ(i, 2);
  491. // Manually terminate the pipeline
  492. iter->Stop();
  493. }
  494. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi3) {
  495. session_id_type env_session;
  496. Status s = GetSessionFromEnv(&env_session);
  497. EXPECT_EQ(s, Status::OK());
  498. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  499. EXPECT_NE(some_cache, nullptr);
  500. // Create a TFRecord Dataset, this file_path has 3 records in it
  501. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  502. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  503. // In this one, a num_samples argument is given.
  504. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  505. // The samples will be selected by the sequential sampler, not by the TFRecord leaf node.
  506. std::shared_ptr<Dataset> ds =
  507. TFRecord({file_path}, schema_path, {"image"}, 2, ShuffleMode::kFalse, 1, 0, false, some_cache);
  508. EXPECT_NE(ds, nullptr);
  509. // Create a Repeat operation on ds
  510. int32_t repeat_num = 2;
  511. ds = ds->Repeat(repeat_num);
  512. EXPECT_NE(ds, nullptr);
  513. // Create an iterator over the result of the above dataset
  514. // This will trigger the creation of the Execution Tree and launch it.
  515. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  516. EXPECT_NE(iter, nullptr);
  517. // Iterate the dataset and get each row
  518. std::unordered_map<std::string, mindspore::MSTensor> row;
  519. ASSERT_OK(iter->GetNextRow(&row));
  520. uint64_t i = 0;
  521. while (row.size() != 0) {
  522. i++;
  523. auto image = row["image"];
  524. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  525. ASSERT_OK(iter->GetNextRow(&row));
  526. }
  527. EXPECT_EQ(i, 4);
  528. // Manually terminate the pipeline
  529. iter->Stop();
  530. }
  531. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTextfileCApi) {
  532. session_id_type env_session;
  533. Status s = GetSessionFromEnv(&env_session);
  534. EXPECT_EQ(s, Status::OK());
  535. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  536. EXPECT_NE(some_cache, nullptr);
  537. // Create a TextFile Dataset, this file_path has 3 records in it
  538. std::string file_path = datasets_root_path_ + "/testTextFileDataset/1.txt";
  539. // In this one, a num_samples=2 argument is given.
  540. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  541. // The samples will be selected by the sequential sampler, not by the TextFile leaf node.
  542. std::shared_ptr<Dataset> ds = TextFile({file_path}, 2, ShuffleMode::kGlobal, 1, 0, some_cache);
  543. EXPECT_NE(ds, nullptr);
  544. // Create a Repeat operation on ds
  545. int32_t repeat_num = 2;
  546. ds = ds->Repeat(repeat_num);
  547. EXPECT_NE(ds, nullptr);
  548. // Create an iterator over the result of the above dataset
  549. // This will trigger the creation of the Execution Tree and launch it.
  550. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  551. EXPECT_NE(iter, nullptr);
  552. // Iterate the dataset and get each row
  553. std::unordered_map<std::string, mindspore::MSTensor> row;
  554. ASSERT_OK(iter->GetNextRow(&row));
  555. uint64_t i = 0;
  556. while (row.size() != 0) {
  557. i++;
  558. ASSERT_OK(iter->GetNextRow(&row));
  559. }
  560. EXPECT_EQ(i, 4);
  561. // Manually terminate the pipeline
  562. iter->Stop();
  563. }
  564. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCsvCApi) {
  565. session_id_type env_session;
  566. Status s = GetSessionFromEnv(&env_session);
  567. EXPECT_EQ(s, Status::OK());
  568. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  569. EXPECT_NE(some_cache, nullptr);
  570. // Create a CSV Dataset, this file_path has 3 records in it
  571. std::string file_path = datasets_root_path_ + "/testCSV/1.csv";
  572. std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"};
  573. // In this one, a num_samples=2 argument is given.
  574. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  575. // The samples will be selected by the sequential sampler, not by the CSV leaf node.
  576. std::shared_ptr<Dataset> ds = CSV({file_path}, ',', {}, column_names, 2, ShuffleMode::kFalse, 1, 0, some_cache);
  577. EXPECT_NE(ds, nullptr);
  578. // Create a Repeat operation on ds
  579. int32_t repeat_num = 2;
  580. ds = ds->Repeat(repeat_num);
  581. EXPECT_NE(ds, nullptr);
  582. // Create an iterator over the result of the above dataset
  583. // This will trigger the creation of the Execution Tree and launch it.
  584. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  585. EXPECT_NE(iter, nullptr);
  586. // Iterate the dataset and get each row
  587. std::unordered_map<std::string, mindspore::MSTensor> row;
  588. ASSERT_OK(iter->GetNextRow(&row));
  589. uint64_t i = 0;
  590. while (row.size() != 0) {
  591. i++;
  592. ASSERT_OK(iter->GetNextRow(&row));
  593. }
  594. EXPECT_EQ(i, 4);
  595. // Manually terminate the pipeline
  596. iter->Stop();
  597. }
  598. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheClueCApi) {
  599. session_id_type env_session;
  600. Status s = GetSessionFromEnv(&env_session);
  601. EXPECT_EQ(s, Status::OK());
  602. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  603. EXPECT_NE(some_cache, nullptr);
  604. // Create a CLUE Dataset, this file_path has 3 records in it
  605. std::string file_path = datasets_root_path_ + "/testCLUE/afqmc/train.json";
  606. std::string task = "AFQMC";
  607. std::string usage = "train";
  608. // In this one, a num_samples=2 argument is given.
  609. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  610. // The samples will be selected by the sequential sampler, not by the CLUE leaf node.
  611. std::shared_ptr<Dataset> ds = CLUE({file_path}, task, usage, 2, ShuffleMode::kFalse, 1, 0, some_cache);
  612. EXPECT_NE(ds, nullptr);
  613. // Create a Repeat operation on ds
  614. int32_t repeat_num = 2;
  615. ds = ds->Repeat(repeat_num);
  616. EXPECT_NE(ds, nullptr);
  617. // Create an iterator over the result of the above dataset
  618. // This will trigger the creation of the Execution Tree and launch it.
  619. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  620. EXPECT_NE(iter, nullptr);
  621. // Iterate the dataset and get each row
  622. std::unordered_map<std::string, mindspore::MSTensor> row;
  623. ASSERT_OK(iter->GetNextRow(&row));
  624. uint64_t i = 0;
  625. while (row.size() != 0) {
  626. i++;
  627. ASSERT_OK(iter->GetNextRow(&row));
  628. }
  629. EXPECT_EQ(i, 4);
  630. // Manually terminate the pipeline
  631. iter->Stop();
  632. }
  633. TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare1) {
  634. session_id_type env_session;
  635. Status s = GetSessionFromEnv(&env_session);
  636. EXPECT_EQ(s, Status::OK());
  637. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  638. EXPECT_NE(some_cache, nullptr);
  639. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  640. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  641. std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  642. EXPECT_NE(ds1, nullptr);
  643. std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  644. EXPECT_NE(ds2, nullptr);
  645. // Create and launch the Execution Tree for ds1
  646. std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
  647. EXPECT_NE(iter1, nullptr);
  648. // Iterate the dataset and get each row
  649. std::unordered_map<std::string, mindspore::MSTensor> row;
  650. ASSERT_OK(iter1->GetNextRow(&row));
  651. uint64_t i = 0;
  652. while (row.size() != 0) {
  653. i++;
  654. auto image = row["image"];
  655. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  656. ASSERT_OK(iter1->GetNextRow(&row));
  657. }
  658. EXPECT_EQ(i, 2);
  659. // Manually terminate the pipeline
  660. iter1->Stop();
  661. // Create and launch the Execution Tree for ds2
  662. std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
  663. EXPECT_NE(iter2, nullptr);
  664. // Iterate the dataset and get each row
  665. ASSERT_OK(iter2->GetNextRow(&row));
  666. i = 0;
  667. while (row.size() != 0) {
  668. i++;
  669. auto image = row["image"];
  670. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  671. ASSERT_OK(iter2->GetNextRow(&row));
  672. }
  673. EXPECT_EQ(i, 2);
  674. // Manually terminate the pipeline
  675. iter2->Stop();
  676. }
  677. TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare2) {
  678. session_id_type env_session;
  679. Status s = GetSessionFromEnv(&env_session);
  680. EXPECT_EQ(s, Status::OK());
  681. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  682. EXPECT_NE(some_cache, nullptr);
  683. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  684. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  685. // The first pipeline is ImageFolder with RandomSampler, the second pipeline is ImageFolder with SequentialSampler
  686. // Since sampler does not influence the data in the source, these two pipelines can share a common cache.
  687. std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  688. EXPECT_NE(ds1, nullptr);
  689. std::shared_ptr<Dataset> ds2 =
  690. ImageFolder(folder_path, true, std::make_shared<SequentialSampler>(), {}, {}, some_cache);
  691. EXPECT_NE(ds2, nullptr);
  692. // Create and launch the Execution Tree for ds1
  693. std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
  694. EXPECT_NE(iter1, nullptr);
  695. // Iterate the dataset and get each row
  696. std::unordered_map<std::string, mindspore::MSTensor> row;
  697. ASSERT_OK(iter1->GetNextRow(&row));
  698. uint64_t i = 0;
  699. while (row.size() != 0) {
  700. i++;
  701. auto image = row["image"];
  702. ASSERT_OK(iter1->GetNextRow(&row));
  703. }
  704. EXPECT_EQ(i, 2);
  705. // Manually terminate the pipeline
  706. iter1->Stop();
  707. // Create and launch the Execution Tree for ds2
  708. std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
  709. EXPECT_NE(iter2, nullptr);
  710. // Iterate the dataset and get each row
  711. ASSERT_OK(iter2->GetNextRow(&row));
  712. i = 0;
  713. while (row.size() != 0) {
  714. i++;
  715. auto image = row["image"];
  716. ASSERT_OK(iter2->GetNextRow(&row));
  717. }
  718. EXPECT_EQ(i, 2);
  719. // Manually terminate the pipeline
  720. iter2->Stop();
  721. }
  722. TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShareFailure1) {
  723. session_id_type env_session;
  724. Status s = GetSessionFromEnv(&env_session);
  725. EXPECT_EQ(s, Status::OK());
  726. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
  727. EXPECT_NE(some_cache, nullptr);
  728. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  729. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  730. std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  731. EXPECT_NE(ds1, nullptr);
  732. std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  733. EXPECT_NE(ds2, nullptr);
  734. // Create and launch the Execution Tree for ds1
  735. std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
  736. EXPECT_NE(iter1, nullptr);
  737. // Iterate the dataset and get each row
  738. std::unordered_map<std::string, mindspore::MSTensor> row;
  739. ASSERT_OK(iter1->GetNextRow(&row));
  740. uint64_t i = 0;
  741. while (row.size() != 0) {
  742. i++;
  743. auto image = row["image"];
  744. ASSERT_OK(iter1->GetNextRow(&row));
  745. }
  746. EXPECT_EQ(i, 2);
  747. // Manually terminate the pipeline
  748. iter1->Stop();
  749. // Re-use a cache for the second pipeline would fail
  750. std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
  751. EXPECT_EQ(iter2, nullptr);
  752. }