You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_api_samplers_test.cc 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/common.h"
  17. #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
  18. #include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h"
  19. #include "minddata/dataset/include/dataset/datasets.h"
  20. #include <functional>
  21. using namespace mindspore::dataset;
  22. using mindspore::dataset::Tensor;
  23. class MindDataTestPipeline : public UT::DatasetOpTesting {
  24. protected:
  25. };
  26. TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
  27. std::shared_ptr<Sampler> sampl = std::make_shared<DistributedSampler>(2, 1);
  28. EXPECT_NE(sampl, nullptr);
  29. sampl = std::make_shared<PKSampler>(3);
  30. EXPECT_NE(sampl, nullptr);
  31. sampl = std::make_shared<RandomSampler>(false, 12);
  32. EXPECT_NE(sampl, nullptr);
  33. sampl = std::make_shared<SequentialSampler>(0, 12);
  34. EXPECT_NE(sampl, nullptr);
  35. std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
  36. sampl = std::make_shared<WeightedRandomSampler>(weights, 12);
  37. EXPECT_NE(sampl, nullptr);
  38. std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
  39. sampl = std::make_shared<SubsetSampler>(indices);
  40. EXPECT_NE(sampl, nullptr);
  41. sampl = std::make_shared<SubsetRandomSampler>(indices);
  42. EXPECT_NE(sampl, nullptr);
  43. // Create an ImageFolder Dataset
  44. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  45. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  46. EXPECT_NE(ds, nullptr);
  47. // Create a Repeat operation on ds
  48. int32_t repeat_num = 2;
  49. ds = ds->Repeat(repeat_num);
  50. EXPECT_NE(ds, nullptr);
  51. // Create a Batch operation on ds
  52. int32_t batch_size = 2;
  53. ds = ds->Batch(batch_size);
  54. EXPECT_NE(ds, nullptr);
  55. // Create an iterator over the result of the above dataset
  56. // This will trigger the creation of the Execution Tree and launch it.
  57. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  58. EXPECT_NE(iter, nullptr);
  59. // Iterate the dataset and get each row
  60. std::unordered_map<std::string, mindspore::MSTensor> row;
  61. ASSERT_OK(iter->GetNextRow(&row));
  62. uint64_t i = 0;
  63. while (row.size() != 0) {
  64. i++;
  65. auto image = row["image"];
  66. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  67. ASSERT_OK(iter->GetNextRow(&row));
  68. }
  69. EXPECT_EQ(i, 12);
  70. // Manually terminate the pipeline
  71. iter->Stop();
  72. }
  73. // Feature: Test ImageFolder with WeightedRandomSampler
  74. // Description: Create ImageFolder dataset with WeightedRandomRampler given num_samples=12,
  75. // iterate through dataset and count rows
  76. // Expectation: There should be 12 rows
  77. TEST_F(MindDataTestPipeline, TestWeightedRandomSamplerImageFolder) {
  78. std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
  79. std::shared_ptr<Sampler> sampl = std::make_shared<WeightedRandomSampler>(weights, 12);
  80. EXPECT_NE(sampl, nullptr);
  81. // Create an ImageFolder Dataset
  82. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  83. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  84. EXPECT_NE(ds, nullptr);
  85. // Create an iterator over the result of the above dataset
  86. // This will trigger the creation of the Execution Tree and launch it.
  87. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  88. EXPECT_NE(iter, nullptr);
  89. // Iterate the dataset and get each row
  90. std::unordered_map<std::string, mindspore::MSTensor> row;
  91. ASSERT_OK(iter->GetNextRow(&row));
  92. uint64_t i = 0;
  93. while (row.size() != 0) {
  94. i++;
  95. auto image = row["image"];
  96. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  97. ASSERT_OK(iter->GetNextRow(&row));
  98. }
  99. EXPECT_EQ(i, 12);
  100. // Manually terminate the pipeline
  101. iter->Stop();
  102. }
  103. TEST_F(MindDataTestPipeline, TestNoSamplerSuccess1) {
  104. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNoSamplerSuccess1.";
  105. // Test building a dataset with no sampler provided (defaults to random sampler
  106. // Create an ImageFolder Dataset
  107. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  108. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false);
  109. EXPECT_NE(ds, nullptr);
  110. // Iterate the dataset and get each row
  111. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  112. EXPECT_NE(iter, nullptr);
  113. std::unordered_map<std::string, mindspore::MSTensor> row;
  114. ASSERT_OK(iter->GetNextRow(&row));
  115. uint64_t i = 0;
  116. while (row.size() != 0) {
  117. i++;
  118. auto label = row["label"];
  119. ASSERT_OK(iter->GetNextRow(&row));
  120. }
  121. EXPECT_EQ(i, ds->GetDatasetSize());
  122. iter->Stop();
  123. }
  124. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess1) {
  125. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess1.";
  126. // Test basic setting of distributed_sampler
  127. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  128. std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, -1, true);
  129. EXPECT_NE(sampler, nullptr);
  130. // Create an ImageFolder Dataset
  131. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  132. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  133. EXPECT_NE(ds, nullptr);
  134. // Iterate the dataset and get each row
  135. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  136. EXPECT_NE(iter, nullptr);
  137. std::unordered_map<std::string, mindspore::MSTensor> row;
  138. ASSERT_OK(iter->GetNextRow(&row));
  139. uint64_t i = 0;
  140. while (row.size() != 0) {
  141. i++;
  142. auto label = row["label"];
  143. ASSERT_OK(iter->GetNextRow(&row));
  144. }
  145. EXPECT_EQ(i, 11);
  146. iter->Stop();
  147. }
  148. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess2) {
  149. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess2.";
  150. // Test basic setting of distributed_sampler
  151. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  152. auto sampler(new DistributedSampler(4, 0, false, 0, 0, -1, true));
  153. // Note that with new, we have to explicitly delete the allocated object as shown below.
  154. // Note: No need to check for output after calling API class constructor
  155. // Create an ImageFolder Dataset
  156. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  157. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  158. EXPECT_NE(ds, nullptr);
  159. // Iterate the dataset and get each row
  160. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  161. EXPECT_NE(iter, nullptr);
  162. std::unordered_map<std::string, mindspore::MSTensor> row;
  163. ASSERT_OK(iter->GetNextRow(&row));
  164. uint64_t i = 0;
  165. while (row.size() != 0) {
  166. i++;
  167. auto label = row["label"];
  168. ASSERT_OK(iter->GetNextRow(&row));
  169. }
  170. EXPECT_EQ(i, 11);
  171. iter->Stop();
  172. // Delete allocated objects with raw pointers
  173. delete sampler;
  174. }
  175. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess3) {
  176. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess3.";
  177. // Test basic setting of distributed_sampler
  178. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  179. DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, -1, true);
  180. // Create an ImageFolder Dataset
  181. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  182. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  183. EXPECT_NE(ds, nullptr);
  184. // Iterate the dataset and get each row
  185. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  186. EXPECT_NE(iter, nullptr);
  187. std::unordered_map<std::string, mindspore::MSTensor> row;
  188. ASSERT_OK(iter->GetNextRow(&row));
  189. uint64_t i = 0;
  190. while (row.size() != 0) {
  191. i++;
  192. auto label = row["label"];
  193. ASSERT_OK(iter->GetNextRow(&row));
  194. }
  195. EXPECT_EQ(i, 11);
  196. iter->Stop();
  197. }
  198. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess4) {
  199. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess4.";
  200. // Test pointer of distributed_sampler
  201. SequentialSampler sampler = SequentialSampler(0, 4);
  202. // Create an ImageFolder Dataset
  203. std::string folder_path = datasets_root_path_ + "/testVOC2012_2";
  204. std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, &sampler);
  205. EXPECT_NE(ds, nullptr);
  206. // Iterate the dataset and get each row
  207. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  208. EXPECT_NE(iter, nullptr);
  209. std::unordered_map<std::string, mindspore::MSTensor> row;
  210. ASSERT_OK(iter->GetNextRow(&row));
  211. uint64_t i = 0;
  212. while (row.size() != 0) {
  213. i++;
  214. auto label = row["label"];
  215. ASSERT_OK(iter->GetNextRow(&row));
  216. }
  217. EXPECT_EQ(i, 4);
  218. iter->Stop();
  219. }
  220. // Feature: Test ImageFolder with DistributedSampler
  221. // Description: Create ImageFolder dataset with DistributedSampler given num_shards=11 and shard_id=10,
  222. // count rows in dataset
  223. // Expectation: There should be 4 rows (44 rows in original data/11 = 4)
  224. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess5) {
  225. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess5.";
  226. // Test basic setting of distributed_sampler
  227. // num_shards=11, shard_id=10, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  228. std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(11, 10, false, 0, 0, -1, true);
  229. EXPECT_NE(sampler, nullptr);
  230. // Create an ImageFolder Dataset
  231. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  232. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  233. EXPECT_NE(ds, nullptr);
  234. // Iterate the dataset and get each row
  235. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  236. EXPECT_NE(iter, nullptr);
  237. std::unordered_map<std::string, mindspore::MSTensor> row;
  238. ASSERT_OK(iter->GetNextRow(&row));
  239. uint64_t i = 0;
  240. while (row.size() != 0) {
  241. i++;
  242. auto label = row["label"];
  243. ASSERT_OK(iter->GetNextRow(&row));
  244. }
  245. EXPECT_EQ(i, 4);
  246. iter->Stop();
  247. }
  248. // Feature: Test ImageFolder with DistributedSampler
  249. // Description: Create ImageFolder dataset with DistributedSampler given num_shards=4 and shard_id=3,
  250. // count rows in dataset
  251. // Expectation: There should be 11 rows (44 rows in original data/4 = 11)
  252. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess6) {
  253. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess6.";
  254. // Test basic setting of distributed_sampler
  255. // num_shards=4, shard_id=3, shuffle=false, num_samplers=12, seed=0, offset=-1, even_dist=true
  256. std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 3, false, 12, 0, -1, true);
  257. EXPECT_NE(sampler, nullptr);
  258. // Create an ImageFolder Dataset
  259. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  260. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  261. EXPECT_NE(ds, nullptr);
  262. // Iterate the dataset and get each row
  263. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  264. EXPECT_NE(iter, nullptr);
  265. std::unordered_map<std::string, mindspore::MSTensor> row;
  266. ASSERT_OK(iter->GetNextRow(&row));
  267. uint64_t i = 0;
  268. while (row.size() != 0) {
  269. i++;
  270. auto label = row["label"];
  271. ASSERT_OK(iter->GetNextRow(&row));
  272. }
  273. EXPECT_EQ(i, 11);
  274. iter->Stop();
  275. }
  276. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail1) {
  277. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail1.";
  278. // Test basic setting of distributed_sampler
  279. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
  280. // offset=5 which is greater than num_shards=4 --> will fail later
  281. std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, 5, false);
  282. EXPECT_NE(sampler, nullptr);
  283. // Create an ImageFolder Dataset
  284. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  285. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  286. EXPECT_NE(ds, nullptr);
  287. // Iterate will fail because sampler is not initiated successfully.
  288. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  289. EXPECT_EQ(iter, nullptr);
  290. }
  291. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail2) {
  292. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail2.";
  293. // Test basic setting of distributed_sampler
  294. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
  295. // offset=5 which is greater than num_shards=4 --> will fail later
  296. auto sampler(new DistributedSampler(4, 0, false, 0, 0, 5, false));
  297. // Note that with new, we have to explicitly delete the allocated object as shown below.
  298. // Note: No need to check for output after calling API class constructor
  299. // Create an ImageFolder Dataset
  300. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  301. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  302. EXPECT_NE(ds, nullptr);
  303. // Iterate will fail because sampler is not initiated successfully.
  304. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  305. EXPECT_EQ(iter, nullptr);
  306. // Delete allocated objects with raw pointers
  307. delete sampler;
  308. }
  309. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail3) {
  310. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail3.";
  311. // Test basic setting of distributed_sampler
  312. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
  313. // offset=5 which is greater than num_shards=4 --> will fail later
  314. DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, 5, false);
  315. // Create an ImageFolder Dataset
  316. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  317. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  318. EXPECT_NE(ds, nullptr);
  319. // Iterate will fail because sampler is not initiated successfully.
  320. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  321. EXPECT_EQ(iter, nullptr);
  322. }
  323. TEST_F(MindDataTestPipeline, TestSamplerAddChild) {
  324. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSamplerAddChild.";
  325. auto sampler = std::make_shared<DistributedSampler>(1, 0, false, 5, 0, -1, true);
  326. EXPECT_NE(sampler, nullptr);
  327. auto child_sampler = std::make_shared<SequentialSampler>();
  328. EXPECT_NE(child_sampler, nullptr);
  329. sampler->AddChild(child_sampler);
  330. // Create an ImageFolder Dataset
  331. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  332. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  333. EXPECT_NE(ds, nullptr);
  334. // Iterate the dataset and get each row
  335. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  336. EXPECT_NE(iter, nullptr);
  337. std::unordered_map<std::string, mindspore::MSTensor> row;
  338. ASSERT_OK(iter->GetNextRow(&row));
  339. uint64_t i = 0;
  340. while (row.size() != 0) {
  341. i++;
  342. ASSERT_OK(iter->GetNextRow(&row));
  343. }
  344. EXPECT_EQ(ds->GetDatasetSize(), 5);
  345. iter->Stop();
  346. }
  347. TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess1) {
  348. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess1.";
  349. // Test basic setting of subset_sampler with default num_samples
  350. std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
  351. std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices);
  352. EXPECT_NE(sampl, nullptr);
  353. // Create an ImageFolder Dataset
  354. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  355. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  356. EXPECT_NE(ds, nullptr);
  357. // Iterate the dataset and get each row
  358. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  359. EXPECT_NE(iter, nullptr);
  360. std::unordered_map<std::string, mindspore::MSTensor> row;
  361. ASSERT_OK(iter->GetNextRow(&row));
  362. uint64_t i = 0;
  363. while (row.size() != 0) {
  364. i++;
  365. auto image = row["image"];
  366. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  367. ASSERT_OK(iter->GetNextRow(&row));
  368. }
  369. EXPECT_EQ(i, 6);
  370. iter->Stop();
  371. }
  372. TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess2) {
  373. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess2.";
  374. // Test subset_sampler with num_samples
  375. std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
  376. std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices, 3);
  377. EXPECT_NE(sampl, nullptr);
  378. // Create an ImageFolder Dataset
  379. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  380. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  381. EXPECT_NE(ds, nullptr);
  382. // Iterate the dataset and get each row
  383. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  384. EXPECT_NE(iter, nullptr);
  385. std::unordered_map<std::string, mindspore::MSTensor> row;
  386. ASSERT_OK(iter->GetNextRow(&row));
  387. uint64_t i = 0;
  388. while (row.size() != 0) {
  389. i++;
  390. auto image = row["image"];
  391. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  392. ASSERT_OK(iter->GetNextRow(&row));
  393. }
  394. EXPECT_EQ(i, 3);
  395. iter->Stop();
  396. }
  397. TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess3) {
  398. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess3.";
  399. // Test subset_sampler with num_samples larger than the indices size.
  400. std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
  401. std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices, 8);
  402. EXPECT_NE(sampl, nullptr);
  403. // Create an ImageFolder Dataset
  404. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  405. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  406. EXPECT_NE(ds, nullptr);
  407. // Iterate the dataset and get each row
  408. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  409. EXPECT_NE(iter, nullptr);
  410. std::unordered_map<std::string, mindspore::MSTensor> row;
  411. ASSERT_OK(iter->GetNextRow(&row));
  412. uint64_t i = 0;
  413. while (row.size() != 0) {
  414. i++;
  415. auto image = row["image"];
  416. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  417. ASSERT_OK(iter->GetNextRow(&row));
  418. }
  419. EXPECT_EQ(i, 6);
  420. iter->Stop();
  421. }
  422. TEST_F(MindDataTestPipeline, TestSubsetSamplerFail) {
  423. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerFail.";
  424. // Test subset_sampler with index out of bounds.
  425. std::vector<int64_t> indices = {2, 4, 6, 8, 10, 100}; // Sample ID (100) is out of bound
  426. std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices);
  427. EXPECT_NE(sampl, nullptr);
  428. // Create an ImageFolder Dataset
  429. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  430. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  431. EXPECT_NE(ds, nullptr);
  432. // Iterate the dataset and get each row
  433. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  434. EXPECT_NE(iter, nullptr);
  435. std::unordered_map<std::string, mindspore::MSTensor> row;
  436. // Expect failure: index 100 is out of dataset bounds
  437. EXPECT_ERROR(iter->GetNextRow(&row));
  438. iter->Stop();
  439. }
  440. // Feature: Test ImageFolder with PKSampler
  441. // Description: Create ImageFolder dataset with DistributedSampler given num_val=3 and count rows
  442. // Expectation: There should be 12 rows
  443. TEST_F(MindDataTestPipeline, TestPKSamplerImageFolder) {
  444. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPKSamplerImageFolder.";
  445. std::shared_ptr<Sampler> sampler = std::make_shared<PKSampler>(3, false);
  446. EXPECT_NE(sampler, nullptr);
  447. // Create an ImageFolder Dataset
  448. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  449. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  450. EXPECT_NE(ds, nullptr);
  451. // Iterate the dataset and get each row
  452. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  453. EXPECT_NE(iter, nullptr);
  454. std::unordered_map<std::string, mindspore::MSTensor> row;
  455. ASSERT_OK(iter->GetNextRow(&row));
  456. uint64_t i = 0;
  457. while (row.size() != 0) {
  458. i++;
  459. ASSERT_OK(iter->GetNextRow(&row));
  460. }
  461. EXPECT_EQ(i, 12);
  462. iter->Stop();
  463. }