You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_api_cache_test.cc 31 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/common.h"
  17. #include "minddata/dataset/include/datasets.h"
  18. #include "minddata/dataset/include/vision.h"
  19. using namespace mindspore::dataset;
  20. // Helper function to get the session id from SESSION_ID env variable
  21. Status GetSessionFromEnv(session_id_type *session_id);
  22. class MindDataTestCacheOp : public UT::DatasetOpTesting {
  23. public:
  24. void SetUp() override { DatasetOpTesting::SetUp(); }
  25. };
  26. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiSamplerNull) {
  27. session_id_type env_session;
  28. Status s = GetSessionFromEnv(&env_session);
  29. EXPECT_EQ(s, Status::OK());
  30. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true, "127.0.0.1", 50053, 1, 1);
  31. EXPECT_NE(some_cache, nullptr);
  32. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  33. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  34. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, nullptr, {}, {}, some_cache);
  35. EXPECT_NE(ds, nullptr);
  36. // Create an iterator over the result of the above dataset
  37. // This will trigger the creation of the Execution Tree and launch it.
  38. // Now the parameter check for ImageFolderNode would fail and we would end up with a nullptr iter.
  39. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  40. EXPECT_EQ(iter, nullptr);
  41. }
  42. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiNestedCache) {
  43. session_id_type env_session;
  44. Status s = GetSessionFromEnv(&env_session);
  45. EXPECT_EQ(s, Status::OK());
  46. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  47. EXPECT_NE(some_cache, nullptr);
  48. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  49. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  50. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  51. EXPECT_NE(ds, nullptr);
  52. // Create objects for the tensor ops
  53. std::shared_ptr<TensorTransform> decode_op = std::make_shared<vision::Decode>();
  54. EXPECT_NE(decode_op, nullptr);
  55. // Create a Map operation on ds
  56. ds = ds->Map({decode_op}, {}, {}, {"image"}, some_cache);
  57. EXPECT_NE(ds, nullptr);
  58. // Create an iterator over the result of the above dataset
  59. // This will trigger the creation of the Execution Tree and launch it.
  60. // Now in the cache_error_pass would fail and we would end up with a nullptr iter.
  61. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  62. EXPECT_EQ(iter, nullptr);
  63. }
  64. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheImageFolderCApi) {
  65. session_id_type env_session;
  66. Status s = GetSessionFromEnv(&env_session);
  67. EXPECT_EQ(s, Status::OK());
  68. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  69. EXPECT_NE(some_cache, nullptr);
  70. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  71. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  72. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  73. EXPECT_NE(ds, nullptr);
  74. // Create a Repeat operation on ds
  75. int32_t repeat_num = 2;
  76. ds = ds->Repeat(repeat_num);
  77. EXPECT_NE(ds, nullptr);
  78. // Create an iterator over the result of the above dataset
  79. // This will trigger the creation of the Execution Tree and launch it.
  80. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  81. EXPECT_NE(iter, nullptr);
  82. // Iterate the dataset and get each row
  83. std::unordered_map<std::string, mindspore::MSTensor> row;
  84. iter->GetNextRow(&row);
  85. uint64_t i = 0;
  86. while (row.size() != 0) {
  87. i++;
  88. // auto image = row["image"];
  89. // MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  90. iter->GetNextRow(&row);
  91. }
  92. EXPECT_EQ(i, 4);
  93. // Manually terminate the pipeline
  94. iter->Stop();
  95. }
  96. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCocoCApi) {
  97. session_id_type env_session;
  98. Status s = GetSessionFromEnv(&env_session);
  99. EXPECT_EQ(s, Status::OK());
  100. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  101. EXPECT_NE(some_cache, nullptr);
  102. // Create a Coco Dataset, this folder_path has 6 images in it
  103. std::string folder_path = datasets_root_path_ + "/testCOCO/train/";
  104. std::string annotation_file_path = datasets_root_path_ + "/testCOCO/annotations/train.json";
  105. std::shared_ptr<Dataset> ds =
  106. Coco(folder_path, annotation_file_path, "Detection", false, std::make_shared<RandomSampler>(), some_cache);
  107. EXPECT_NE(ds, nullptr);
  108. // Create a Repeat operation on ds
  109. int32_t repeat_num = 2;
  110. ds = ds->Repeat(repeat_num);
  111. EXPECT_NE(ds, nullptr);
  112. // Create an iterator over the result of the above dataset
  113. // This will trigger the creation of the Execution Tree and launch it.
  114. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  115. EXPECT_NE(iter, nullptr);
  116. // Iterate the dataset and get each row
  117. std::unordered_map<std::string, mindspore::MSTensor> row;
  118. iter->GetNextRow(&row);
  119. uint64_t i = 0;
  120. while (row.size() != 0) {
  121. i++;
  122. // auto image = row["image"];
  123. // MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  124. iter->GetNextRow(&row);
  125. }
  126. EXPECT_EQ(i, 12);
  127. // Manually terminate the pipeline
  128. iter->Stop();
  129. }
  130. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheMnistCApi) {
  131. session_id_type env_session;
  132. Status s = GetSessionFromEnv(&env_session);
  133. EXPECT_EQ(s, Status::OK());
  134. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  135. EXPECT_NE(some_cache, nullptr);
  136. // Create a Mnist Dataset
  137. std::string folder_path = datasets_root_path_ + "/testMnistData/";
  138. std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache);
  139. EXPECT_NE(ds, nullptr);
  140. // Create a Repeat operation on ds
  141. int32_t repeat_num = 2;
  142. ds = ds->Repeat(repeat_num);
  143. EXPECT_NE(ds, nullptr);
  144. // Create an iterator over the result of the above dataset
  145. // This will trigger the creation of the Execution Tree and launch it.
  146. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  147. EXPECT_NE(iter, nullptr);
  148. // Iterate the dataset and get each row
  149. std::unordered_map<std::string, mindspore::MSTensor> row;
  150. iter->GetNextRow(&row);
  151. uint64_t i = 0;
  152. while (row.size() != 0) {
  153. i++;
  154. // auto image = row["image"];
  155. // MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  156. iter->GetNextRow(&row);
  157. }
  158. EXPECT_EQ(i, 20);
  159. // Manually terminate the pipeline
  160. iter->Stop();
  161. }
  162. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCelebaCApi) {
  163. session_id_type env_session;
  164. Status s = GetSessionFromEnv(&env_session);
  165. EXPECT_EQ(s, Status::OK());
  166. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  167. EXPECT_NE(some_cache, nullptr);
  168. // Create a CelebA Dataset, this folder_path has 4 records in it
  169. std::string folder_path = datasets_root_path_ + "/testCelebAData/";
  170. std::shared_ptr<Dataset> ds = CelebA(folder_path, "all", std::make_shared<RandomSampler>(false, 10), false, {}, some_cache);
  171. EXPECT_NE(ds, nullptr);
  172. // Create a Repeat operation on ds
  173. int32_t repeat_num = 2;
  174. ds = ds->Repeat(repeat_num);
  175. EXPECT_NE(ds, nullptr);
  176. // Create an iterator over the result of the above dataset
  177. // This will trigger the creation of the Execution Tree and launch it.
  178. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  179. EXPECT_NE(iter, nullptr);
  180. // Iterate the dataset and get each row
  181. std::unordered_map<std::string, mindspore::MSTensor> row;
  182. iter->GetNextRow(&row);
  183. uint64_t i = 0;
  184. while (row.size() != 0) {
  185. i++;
  186. // auto image = row["image"];
  187. // MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  188. iter->GetNextRow(&row);
  189. }
  190. EXPECT_EQ(i, 8);
  191. // Manually terminate the pipeline
  192. iter->Stop();
  193. }
  194. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheManifestCApi) {
  195. session_id_type env_session;
  196. Status s = GetSessionFromEnv(&env_session);
  197. EXPECT_EQ(s, Status::OK());
  198. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  199. EXPECT_NE(some_cache, nullptr);
  200. // Create a Manifest Dataset, this file_path has 2 records in it
  201. std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
  202. std::shared_ptr<Dataset> ds = Manifest(file_path, "train", std::make_shared<RandomSampler>(), {}, false, some_cache);
  203. EXPECT_NE(ds, nullptr);
  204. // Create a Repeat operation on ds
  205. int32_t repeat_num = 2;
  206. ds = ds->Repeat(repeat_num);
  207. EXPECT_NE(ds, nullptr);
  208. // Create an iterator over the result of the above dataset
  209. // This will trigger the creation of the Execution Tree and launch it.
  210. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  211. EXPECT_NE(iter, nullptr);
  212. // Iterate the dataset and get each row
  213. std::unordered_map<std::string, mindspore::MSTensor> row;
  214. iter->GetNextRow(&row);
  215. uint64_t i = 0;
  216. while (row.size() != 0) {
  217. i++;
  218. // auto image = row["image"];
  219. // MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  220. iter->GetNextRow(&row);
  221. }
  222. EXPECT_EQ(i, 4);
  223. // Manually terminate the pipeline
  224. iter->Stop();
  225. }
  226. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar10CApi) {
  227. session_id_type env_session;
  228. Status s = GetSessionFromEnv(&env_session);
  229. EXPECT_EQ(s, Status::OK());
  230. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  231. EXPECT_NE(some_cache, nullptr);
  232. // Create a Cifar10 Dataset
  233. std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
  234. std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache);
  235. EXPECT_NE(ds, nullptr);
  236. // Create a Repeat operation on ds
  237. int32_t repeat_num = 2;
  238. ds = ds->Repeat(repeat_num);
  239. EXPECT_NE(ds, nullptr);
  240. // Create an iterator over the result of the above dataset
  241. // This will trigger the creation of the Execution Tree and launch it.
  242. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  243. EXPECT_NE(iter, nullptr);
  244. // Iterate the dataset and get each row
  245. std::unordered_map<std::string, mindspore::MSTensor> row;
  246. iter->GetNextRow(&row);
  247. uint64_t i = 0;
  248. while (row.size() != 0) {
  249. i++;
  250. // auto image = row["image"];
  251. // MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  252. iter->GetNextRow(&row);
  253. }
  254. EXPECT_EQ(i, 20);
  255. // Manually terminate the pipeline
  256. iter->Stop();
  257. }
  258. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar100CApi) {
  259. session_id_type env_session;
  260. Status s = GetSessionFromEnv(&env_session);
  261. EXPECT_EQ(s, Status::OK());
  262. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  263. EXPECT_NE(some_cache, nullptr);
  264. // Create a Cifar100 Dataset
  265. std::string folder_path = datasets_root_path_ + "/testCifar100Data/";
  266. std::shared_ptr<Dataset> ds = Cifar100(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache);
  267. EXPECT_NE(ds, nullptr);
  268. // Create a Repeat operation on ds
  269. int32_t repeat_num = 2;
  270. ds = ds->Repeat(repeat_num);
  271. EXPECT_NE(ds, nullptr);
  272. // Create an iterator over the result of the above dataset
  273. // This will trigger the creation of the Execution Tree and launch it.
  274. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  275. EXPECT_NE(iter, nullptr);
  276. // Iterate the dataset and get each row
  277. std::unordered_map<std::string, mindspore::MSTensor> row;
  278. iter->GetNextRow(&row);
  279. uint64_t i = 0;
  280. while (row.size() != 0) {
  281. i++;
  282. // auto image = row["image"];
  283. // MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  284. iter->GetNextRow(&row);
  285. }
  286. EXPECT_EQ(i, 20);
  287. // Manually terminate the pipeline
  288. iter->Stop();
  289. }
  290. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheVocCApi) {
  291. session_id_type env_session;
  292. Status s = GetSessionFromEnv(&env_session);
  293. EXPECT_EQ(s, Status::OK());
  294. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  295. EXPECT_NE(some_cache, nullptr);
  296. // Create a VOC Dataset, this folder_path has 9 records in it
  297. std::string folder_path = datasets_root_path_ + "/testVOC2012/";
  298. std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, std::make_shared<RandomSampler>(), some_cache);
  299. EXPECT_NE(ds, nullptr);
  300. // Create a Repeat operation on ds
  301. int32_t repeat_num = 2;
  302. ds = ds->Repeat(repeat_num);
  303. EXPECT_NE(ds, nullptr);
  304. // Create an iterator over the result of the above dataset
  305. // This will trigger the creation of the Execution Tree and launch it.
  306. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  307. EXPECT_NE(iter, nullptr);
  308. // Iterate the dataset and get each row
  309. std::unordered_map<std::string, mindspore::MSTensor> row;
  310. iter->GetNextRow(&row);
  311. uint64_t i = 0;
  312. while (row.size() != 0) {
  313. i++;
  314. // auto image = row["image"];
  315. // MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  316. iter->GetNextRow(&row);
  317. }
  318. EXPECT_EQ(i, 18);
  319. // Manually terminate the pipeline
  320. iter->Stop();
  321. }
  322. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheAlbumCApi) {
  323. session_id_type env_session;
  324. Status s = GetSessionFromEnv(&env_session);
  325. EXPECT_EQ(s, Status::OK());
  326. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  327. EXPECT_NE(some_cache, nullptr);
  328. std::string folder_path = datasets_root_path_ + "/testAlbum/images";
  329. std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
  330. std::vector<std::string> column_names = {"image", "label", "id"};
  331. // Create a Album Dataset, 7 records in it
  332. std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, false, std::make_shared<RandomSampler>(), some_cache);
  333. EXPECT_NE(ds, nullptr);
  334. // Create a Repeat operation on ds
  335. int32_t repeat_num = 2;
  336. ds = ds->Repeat(repeat_num);
  337. EXPECT_NE(ds, nullptr);
  338. // Create an iterator over the result of the above dataset
  339. // This will trigger the creation of the Execution Tree and launch it.
  340. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  341. EXPECT_NE(iter, nullptr);
  342. // Iterate the dataset and get each row
  343. std::unordered_map<std::string, mindspore::MSTensor> row;
  344. iter->GetNextRow(&row);
  345. uint64_t i = 0;
  346. while (row.size() != 0) {
  347. i++;
  348. iter->GetNextRow(&row);
  349. }
  350. EXPECT_EQ(i, 14);
  351. // Manually terminate the pipeline
  352. iter->Stop();
  353. }
  354. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataCApi) {
  355. session_id_type env_session;
  356. Status s = GetSessionFromEnv(&env_session);
  357. EXPECT_EQ(s, Status::OK());
  358. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  359. EXPECT_NE(some_cache, nullptr);
  360. // Create a RandomDataset
  361. std::shared_ptr<SchemaObj> schema = Schema();
  362. schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2});
  363. schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
  364. std::shared_ptr<Dataset> ds = RandomData(4, schema, {}, some_cache);
  365. EXPECT_NE(ds, nullptr);
  366. // Create a Repeat operation on ds
  367. int32_t repeat_num = 2;
  368. ds = ds->Repeat(repeat_num);
  369. EXPECT_NE(ds, nullptr);
  370. // Create an iterator over the result of the above dataset
  371. // This will trigger the creation of the Execution Tree and launch it.
  372. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  373. EXPECT_NE(iter, nullptr);
  374. // Iterate the dataset and get each row
  375. std::unordered_map<std::string, mindspore::MSTensor> row;
  376. iter->GetNextRow(&row);
  377. uint64_t i = 0;
  378. while (row.size() != 0) {
  379. i++;
  380. iter->GetNextRow(&row);
  381. }
  382. EXPECT_EQ(i, 8);
  383. // Manually terminate the pipeline
  384. iter->Stop();
  385. }
  386. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi1) {
  387. session_id_type env_session;
  388. Status s = GetSessionFromEnv(&env_session);
  389. EXPECT_EQ(s, Status::OK());
  390. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  391. EXPECT_NE(some_cache, nullptr);
  392. // Create a TFRecord Dataset, this file_path has 3 records in it
  393. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  394. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  395. std::shared_ptr<Dataset> ds =
  396. TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 1, 0, false, some_cache);
  397. EXPECT_NE(ds, nullptr);
  398. // Create a Repeat operation on ds
  399. int32_t repeat_num = 2;
  400. ds = ds->Repeat(repeat_num);
  401. EXPECT_NE(ds, nullptr);
  402. // Create an iterator over the result of the above dataset
  403. // This will trigger the creation of the Execution Tree and launch it.
  404. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  405. EXPECT_NE(iter, nullptr);
  406. // Iterate the dataset and get each row
  407. std::unordered_map<std::string, mindspore::MSTensor> row;
  408. iter->GetNextRow(&row);
  409. uint64_t i = 0;
  410. while (row.size() != 0) {
  411. i++;
  412. // auto image = row["image"];
  413. // MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  414. iter->GetNextRow(&row);
  415. }
  416. EXPECT_EQ(i, 6);
  417. // Manually terminate the pipeline
  418. iter->Stop();
  419. }
  420. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi2) {
  421. session_id_type env_session;
  422. Status s = GetSessionFromEnv(&env_session);
  423. EXPECT_EQ(s, Status::OK());
  424. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  425. EXPECT_NE(some_cache, nullptr);
  426. // Create a TFRecord Dataset, this file_path has 3 records in it
  427. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  428. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  429. // In this one, the TFRecord dataset will be given sharding configuration, however since a cache is
  430. // used, the tree prepare should undo the sharding configuration and instead, a distributed
  431. // sampler will be chosen with the same shard config.
  432. // With only 3 records shard into 3, we expect only 1 record returned for this shard
  433. // However, the sharding will be done by the sampler, not by the TFRecord leaf node
  434. // In this case, it is a row-based sharding, not the file-based sharding that would happen if
  435. // there was not any cache.
  436. std::shared_ptr<Dataset> ds =
  437. TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 3, 0, false, some_cache);
  438. EXPECT_NE(ds, nullptr);
  439. // Create a Repeat operation on ds
  440. int32_t repeat_num = 2;
  441. ds = ds->Repeat(repeat_num);
  442. EXPECT_NE(ds, nullptr);
  443. // Create an iterator over the result of the above dataset
  444. // This will trigger the creation of the Execution Tree and launch it.
  445. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  446. EXPECT_NE(iter, nullptr);
  447. // Iterate the dataset and get each row
  448. std::unordered_map<std::string, mindspore::MSTensor> row;
  449. iter->GetNextRow(&row);
  450. uint64_t i = 0;
  451. while (row.size() != 0) {
  452. i++;
  453. // auto image = row["image"];
  454. // MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  455. iter->GetNextRow(&row);
  456. }
  457. EXPECT_EQ(i, 2);
  458. // Manually terminate the pipeline
  459. iter->Stop();
  460. }
  461. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi3) {
  462. session_id_type env_session;
  463. Status s = GetSessionFromEnv(&env_session);
  464. EXPECT_EQ(s, Status::OK());
  465. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  466. EXPECT_NE(some_cache, nullptr);
  467. // Create a TFRecord Dataset, this file_path has 3 records in it
  468. std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
  469. std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
  470. // In this one, a num_samples argument is given.
  471. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  472. // The samples will be selected by the sequential sampler, not by the TFRecord leaf node.
  473. std::shared_ptr<Dataset> ds =
  474. TFRecord({file_path}, schema_path, {"image"}, 2, ShuffleMode::kFalse, 1, 0, false, some_cache);
  475. EXPECT_NE(ds, nullptr);
  476. // Create a Repeat operation on ds
  477. int32_t repeat_num = 2;
  478. ds = ds->Repeat(repeat_num);
  479. EXPECT_NE(ds, nullptr);
  480. // Create an iterator over the result of the above dataset
  481. // This will trigger the creation of the Execution Tree and launch it.
  482. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  483. EXPECT_NE(iter, nullptr);
  484. // Iterate the dataset and get each row
  485. std::unordered_map<std::string, mindspore::MSTensor> row;
  486. iter->GetNextRow(&row);
  487. uint64_t i = 0;
  488. while (row.size() != 0) {
  489. i++;
  490. // auto image = row["image"];
  491. // MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  492. iter->GetNextRow(&row);
  493. }
  494. EXPECT_EQ(i, 4);
  495. // Manually terminate the pipeline
  496. iter->Stop();
  497. }
  498. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTextfileCApi) {
  499. session_id_type env_session;
  500. Status s = GetSessionFromEnv(&env_session);
  501. EXPECT_EQ(s, Status::OK());
  502. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  503. EXPECT_NE(some_cache, nullptr);
  504. // Create a TextFile Dataset, this file_path has 3 records in it
  505. std::string file_path = datasets_root_path_ + "/testTextFileDataset/1.txt";
  506. // In this one, a num_samples=2 argument is given.
  507. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  508. // The samples will be selected by the sequential sampler, not by the TextFile leaf node.
  509. std::shared_ptr<Dataset> ds = TextFile({file_path}, 2, ShuffleMode::kGlobal, 1, 0, some_cache);
  510. EXPECT_NE(ds, nullptr);
  511. // Create a Repeat operation on ds
  512. int32_t repeat_num = 2;
  513. ds = ds->Repeat(repeat_num);
  514. EXPECT_NE(ds, nullptr);
  515. // Create an iterator over the result of the above dataset
  516. // This will trigger the creation of the Execution Tree and launch it.
  517. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  518. EXPECT_NE(iter, nullptr);
  519. // Iterate the dataset and get each row
  520. std::unordered_map<std::string, mindspore::MSTensor> row;
  521. iter->GetNextRow(&row);
  522. uint64_t i = 0;
  523. while (row.size() != 0) {
  524. i++;
  525. iter->GetNextRow(&row);
  526. }
  527. EXPECT_EQ(i, 4);
  528. // Manually terminate the pipeline
  529. iter->Stop();
  530. }
  531. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCsvCApi) {
  532. session_id_type env_session;
  533. Status s = GetSessionFromEnv(&env_session);
  534. EXPECT_EQ(s, Status::OK());
  535. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  536. EXPECT_NE(some_cache, nullptr);
  537. // Create a CSV Dataset, this file_path has 3 records in it
  538. std::string file_path = datasets_root_path_ + "/testCSV/1.csv";
  539. std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"};
  540. // In this one, a num_samples=2 argument is given.
  541. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  542. // The samples will be selected by the sequential sampler, not by the CSV leaf node.
  543. std::shared_ptr<Dataset> ds = CSV({file_path}, ',', {}, column_names, 2, ShuffleMode::kFalse, 1, 0, some_cache);
  544. EXPECT_NE(ds, nullptr);
  545. // Create a Repeat operation on ds
  546. int32_t repeat_num = 2;
  547. ds = ds->Repeat(repeat_num);
  548. EXPECT_NE(ds, nullptr);
  549. // Create an iterator over the result of the above dataset
  550. // This will trigger the creation of the Execution Tree and launch it.
  551. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  552. EXPECT_NE(iter, nullptr);
  553. // Iterate the dataset and get each row
  554. std::unordered_map<std::string, mindspore::MSTensor> row;
  555. iter->GetNextRow(&row);
  556. uint64_t i = 0;
  557. while (row.size() != 0) {
  558. i++;
  559. iter->GetNextRow(&row);
  560. }
  561. EXPECT_EQ(i, 4);
  562. // Manually terminate the pipeline
  563. iter->Stop();
  564. }
  565. TEST_F(MindDataTestCacheOp, DISABLED_TestCacheClueCApi) {
  566. session_id_type env_session;
  567. Status s = GetSessionFromEnv(&env_session);
  568. EXPECT_EQ(s, Status::OK());
  569. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  570. EXPECT_NE(some_cache, nullptr);
  571. // Create a CLUE Dataset, this file_path has 3 records in it
  572. std::string file_path = datasets_root_path_ + "/testCLUE/afqmc/train.json";
  573. std::string task = "AFQMC";
  574. std::string usage = "train";
  575. // In this one, a num_samples=2 argument is given.
  576. // In this case, a sequential sampler would be chosen with the same num_samples argument.
  577. // The samples will be selected by the sequential sampler, not by the CLUE leaf node.
  578. std::shared_ptr<Dataset> ds = CLUE({file_path}, task, usage, 2, ShuffleMode::kFalse, 1, 0, some_cache);
  579. EXPECT_NE(ds, nullptr);
  580. // Create a Repeat operation on ds
  581. int32_t repeat_num = 2;
  582. ds = ds->Repeat(repeat_num);
  583. EXPECT_NE(ds, nullptr);
  584. // Create an iterator over the result of the above dataset
  585. // This will trigger the creation of the Execution Tree and launch it.
  586. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  587. EXPECT_NE(iter, nullptr);
  588. // Iterate the dataset and get each row
  589. std::unordered_map<std::string, mindspore::MSTensor> row;
  590. iter->GetNextRow(&row);
  591. uint64_t i = 0;
  592. while (row.size() != 0) {
  593. i++;
  594. iter->GetNextRow(&row);
  595. }
  596. EXPECT_EQ(i, 4);
  597. // Manually terminate the pipeline
  598. iter->Stop();
  599. }
  600. TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare1) {
  601. session_id_type env_session;
  602. Status s = GetSessionFromEnv(&env_session);
  603. EXPECT_EQ(s, Status::OK());
  604. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  605. EXPECT_NE(some_cache, nullptr);
  606. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  607. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  608. std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  609. EXPECT_NE(ds1, nullptr);
  610. std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  611. EXPECT_NE(ds2, nullptr);
  612. // Create and launch the Execution Tree for ds1
  613. std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
  614. EXPECT_NE(iter1, nullptr);
  615. // Iterate the dataset and get each row
  616. std::unordered_map<std::string, mindspore::MSTensor> row;
  617. iter1->GetNextRow(&row);
  618. uint64_t i = 0;
  619. while (row.size() != 0) {
  620. i++;
  621. // auto image = row["image"];
  622. // MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  623. iter1->GetNextRow(&row);
  624. }
  625. EXPECT_EQ(i, 2);
  626. // Manually terminate the pipeline
  627. iter1->Stop();
  628. // Create and launch the Execution Tree for ds2
  629. std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
  630. EXPECT_NE(iter2, nullptr);
  631. // Iterate the dataset and get each row
  632. iter2->GetNextRow(&row);
  633. i = 0;
  634. while (row.size() != 0) {
  635. i++;
  636. // auto image = row["image"];
  637. // MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  638. iter2->GetNextRow(&row);
  639. }
  640. EXPECT_EQ(i, 2);
  641. // Manually terminate the pipeline
  642. iter2->Stop();
  643. }
  644. TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare2) {
  645. session_id_type env_session;
  646. Status s = GetSessionFromEnv(&env_session);
  647. EXPECT_EQ(s, Status::OK());
  648. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  649. EXPECT_NE(some_cache, nullptr);
  650. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  651. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  652. // The first pipeline is ImageFolder with RandomSampler, the second pipeline is ImageFolder with SequentialSampler
  653. // Since sampler does not influence the data in the source, these two pipelines can share a common cache.
  654. std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  655. EXPECT_NE(ds1, nullptr);
  656. std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<SequentialSampler>(), {}, {}, some_cache);
  657. EXPECT_NE(ds2, nullptr);
  658. // Create and launch the Execution Tree for ds1
  659. std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
  660. EXPECT_NE(iter1, nullptr);
  661. // Iterate the dataset and get each row
  662. std::unordered_map<std::string, mindspore::MSTensor> row;
  663. iter1->GetNextRow(&row);
  664. uint64_t i = 0;
  665. while (row.size() != 0) {
  666. i++;
  667. // auto image = row["image"];
  668. iter1->GetNextRow(&row);
  669. }
  670. EXPECT_EQ(i, 2);
  671. // Manually terminate the pipeline
  672. iter1->Stop();
  673. // Create and launch the Execution Tree for ds2
  674. std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
  675. EXPECT_NE(iter2, nullptr);
  676. // Iterate the dataset and get each row
  677. iter2->GetNextRow(&row);
  678. i = 0;
  679. while (row.size() != 0) {
  680. i++;
  681. // auto image = row["image"];
  682. iter2->GetNextRow(&row);
  683. }
  684. EXPECT_EQ(i, 2);
  685. // Manually terminate the pipeline
  686. iter2->Stop();
  687. }
  688. TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShareFailure1) {
  689. session_id_type env_session;
  690. Status s = GetSessionFromEnv(&env_session);
  691. EXPECT_EQ(s, Status::OK());
  692. std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
  693. EXPECT_NE(some_cache, nullptr);
  694. // Create an ImageFolder Dataset, this folder_path only has 2 images in it
  695. std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
  696. std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  697. EXPECT_NE(ds1, nullptr);
  698. std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
  699. EXPECT_NE(ds2, nullptr);
  700. // Create and launch the Execution Tree for ds1
  701. std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
  702. EXPECT_NE(iter1, nullptr);
  703. // Iterate the dataset and get each row
  704. std::unordered_map<std::string, mindspore::MSTensor> row;
  705. iter1->GetNextRow(&row);
  706. uint64_t i = 0;
  707. while (row.size() != 0) {
  708. i++;
  709. // auto image = row["image"];
  710. iter1->GetNextRow(&row);
  711. }
  712. EXPECT_EQ(i, 2);
  713. // Manually terminate the pipeline
  714. iter1->Stop();
  715. // Re-use a cache for the second pipeline would fail
  716. std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
  717. EXPECT_EQ(iter2, nullptr);
  718. }